Implemented PCRE2_ALT_VERBNAMES
This commit is contained in:
parent
fd08e11c1e
commit
d2e87a75af
2
132html
2
132html
|
@ -148,7 +148,7 @@ while (<STDIN>)
|
||||||
printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
|
printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
|
||||||
$ref, $ref);
|
$ref, $ref);
|
||||||
printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
|
printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
|
||||||
$ref, $ref);
|
$ref);
|
||||||
$ref++;
|
$ref++;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
|
@ -167,6 +167,8 @@ test (there are now 20 in total).
|
||||||
47. Modifier lists in pcre2test were splitting at spaces without the required
|
47. Modifier lists in pcre2test were splitting at spaces without the required
|
||||||
commas.
|
commas.
|
||||||
|
|
||||||
|
48. Implemented PCRE2_ALT_VERBNAMES.
|
||||||
|
|
||||||
|
|
||||||
Version 10.20 30-June-2015
|
Version 10.20 30-June-2015
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
|
@ -97,6 +97,7 @@ can skip ahead to the CMake section.
|
||||||
pcre2_context.c
|
pcre2_context.c
|
||||||
pcre2_dfa_match.c
|
pcre2_dfa_match.c
|
||||||
pcre2_error.c
|
pcre2_error.c
|
||||||
|
pcre2_find_bracket.c
|
||||||
pcre2_jit_compile.c
|
pcre2_jit_compile.c
|
||||||
pcre2_maketables.c
|
pcre2_maketables.c
|
||||||
pcre2_match.c
|
pcre2_match.c
|
||||||
|
@ -388,4 +389,4 @@ and executable, is in EBCDIC and native z/OS file formats and this is the
|
||||||
recommended download site.
|
recommended download site.
|
||||||
|
|
||||||
=============================
|
=============================
|
||||||
Last Updated: 15 June 2015
|
Last Updated: 16 July 2015
|
||||||
|
|
|
@ -724,6 +724,7 @@ The distribution should contain the files listed below.
|
||||||
src/pcre2_context.c )
|
src/pcre2_context.c )
|
||||||
src/pcre2_dfa_match.c )
|
src/pcre2_dfa_match.c )
|
||||||
src/pcre2_error.c )
|
src/pcre2_error.c )
|
||||||
|
src/pcre2_find_bracket.c )
|
||||||
src/pcre2_jit_compile.c )
|
src/pcre2_jit_compile.c )
|
||||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||||
src/pcre2_jit_misc.c ) and some internal functions that they use
|
src/pcre2_jit_misc.c ) and some internal functions that they use
|
||||||
|
@ -832,4 +833,4 @@ The distribution should contain the files listed below.
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: ph10
|
||||||
Email domain: cam.ac.uk
|
Email domain: cam.ac.uk
|
||||||
Last updated: 24 April 2015
|
Last updated: 16 July 2015
|
||||||
|
|
|
@ -19,7 +19,7 @@ SYNOPSIS
|
||||||
<b>#include <pcre2.h></b>
|
<b>#include <pcre2.h></b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2_code_free(pcre2_code *<i>code</i>);</b>
|
<b>void pcre2_code_free(pcre2_code *<i>code</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
|
|
@ -19,7 +19,7 @@ SYNOPSIS
|
||||||
<b>#include <pcre2.h></b>
|
<b>#include <pcre2.h></b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
|
<b>pcre2_match_data *pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
|
||||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
|
|
|
@ -19,8 +19,8 @@ SYNOPSIS
|
||||||
<b>#include <pcre2.h></b>
|
<b>#include <pcre2.h></b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2_match_data_create_from_pattern(const pcre2_code *<i>code</i>,</b>
|
<b>pcre2_match_data *pcre2_match_data_create_from_pattern(</b>
|
||||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
<b> const pcre2_code *<i>code</i>, pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
|
|
@ -70,15 +70,15 @@ document for an overview of all the PCRE2 documentation.
|
||||||
<b> pcre2_compile_context *<i>ccontext</i>);</b>
|
<b> pcre2_compile_context *<i>ccontext</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>pcre2_code_free(pcre2_code *<i>code</i>);</b>
|
<b>void pcre2_code_free(pcre2_code *<i>code</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
|
<b>pcre2_match_data *pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
|
||||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>pcre2_match_data_create_from_pattern(const pcre2_code *<i>code</i>,</b>
|
<b>pcre2_match_data *pcre2_match_data_create_from_pattern(</b>
|
||||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
<b> const pcre2_code *<i>code</i>, pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||||
|
@ -936,7 +936,7 @@ The <i>where</i> argument should point to a buffer that is at least 24 code
|
||||||
units long. (The exact length required can be found by calling
|
units long. (The exact length required can be found by calling
|
||||||
<b>pcre2_config()</b> with <b>where</b> set to NULL.) If PCRE2 has been compiled
|
<b>pcre2_config()</b> with <b>where</b> set to NULL.) If PCRE2 has been compiled
|
||||||
without Unicode support, the buffer is filled with the text "Unicode not
|
without Unicode support, the buffer is filled with the text "Unicode not
|
||||||
supported". Otherwise, the Unicode version string (for example, "7.0.0") is
|
supported". Otherwise, the Unicode version string (for example, "8.0.0") is
|
||||||
inserted. The number of code units used is returned. This is the length of the
|
inserted. The number of code units used is returned. This is the length of the
|
||||||
string plus one unit for the terminating zero.
|
string plus one unit for the terminating zero.
|
||||||
<pre>
|
<pre>
|
||||||
|
@ -961,7 +961,7 @@ zero.
|
||||||
<b> pcre2_compile_context *<i>ccontext</i>);</b>
|
<b> pcre2_compile_context *<i>ccontext</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>pcre2_code_free(pcre2_code *<i>code</i>);</b>
|
<b>void pcre2_code_free(pcre2_code *<i>code</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <b>pcre2_compile()</b> function compiles a pattern into an internal form.
|
The <b>pcre2_compile()</b> function compiles a pattern into an internal form.
|
||||||
|
@ -1083,6 +1083,15 @@ after any internal newline. However, it does not match after a newline at the
|
||||||
end of the subject, for compatibility with Perl. If you want a multiline
|
end of the subject, for compatibility with Perl. If you want a multiline
|
||||||
circumflex also to match after a terminating newline, you must set
|
circumflex also to match after a terminating newline, you must set
|
||||||
PCRE2_ALT_CIRCUMFLEX.
|
PCRE2_ALT_CIRCUMFLEX.
|
||||||
|
<pre>
|
||||||
|
PCRE2_ALT_VERBNAMES
|
||||||
|
</pre>
|
||||||
|
By default, for compatibility with Perl, the name in any verb sequence such as
|
||||||
|
(*MARK:NAME) is any sequence of characters that does not include a closing
|
||||||
|
parenthesis. The name is not processed in any way, and it is not possible to
|
||||||
|
include a closing parenthesis in the name. However, if the PCRE2_ALT_VERBNAMES
|
||||||
|
option is set, normal backslash processing is applied to verb names and only an
|
||||||
|
unescaped closing parenthesis terminates the name.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_AUTO_CALLOUT
|
PCRE2_AUTO_CALLOUT
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -1778,12 +1787,12 @@ documentation.
|
||||||
<a name="matchdatablock"></a></P>
|
<a name="matchdatablock"></a></P>
|
||||||
<br><a name="SEC25" href="#TOC1">THE MATCH DATA BLOCK</a><br>
|
<br><a name="SEC25" href="#TOC1">THE MATCH DATA BLOCK</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
|
<b>pcre2_match_data *pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
|
||||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>pcre2_match_data_create_from_pattern(const pcre2_code *<i>code</i>,</b>
|
<b>pcre2_match_data *pcre2_match_data_create_from_pattern(</b>
|
||||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
<b> const pcre2_code *<i>code</i>, pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>void pcre2_match_data_free(pcre2_match_data *<i>match_data</i>);</b>
|
<b>void pcre2_match_data_free(pcre2_match_data *<i>match_data</i>);</b>
|
||||||
|
@ -2010,12 +2019,20 @@ If the pattern is anchored, such a match can occur only if the pattern contains
|
||||||
</pre>
|
</pre>
|
||||||
When PCRE2_UTF is set at compile time, the validity of the subject as a UTF
|
When PCRE2_UTF is set at compile time, the validity of the subject as a UTF
|
||||||
string is checked by default when <b>pcre2_match()</b> is subsequently called.
|
string is checked by default when <b>pcre2_match()</b> is subsequently called.
|
||||||
The entire string is checked before any other processing takes place, and a
|
If a non-zero starting offset is given, the check is applied only to that part
|
||||||
|
of the subject that could be inspected during matching, and there is a check
|
||||||
|
that the starting offset points to the first code unit of a character or to the
|
||||||
|
end of the subject. If there are no lookbehind assertions in the pattern, the
|
||||||
|
check starts at the starting offset. Otherwise, it starts at the length of the
|
||||||
|
longest lookbehind before the starting offset, or at the start of the subject
|
||||||
|
if there are not that many characters before the starting offset. Note that the
|
||||||
|
sequences \b and \B are one-character lookbehinds.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The check is carried out before any other processing takes place, and a
|
||||||
negative error code is returned if the check fails. There are several UTF error
|
negative error code is returned if the check fails. There are several UTF error
|
||||||
codes for each code unit width, corresponding to different problems with the
|
codes for each code unit width, corresponding to different problems with the
|
||||||
code unit sequence. The value of <i>startoffset</i> is also checked, to ensure
|
code unit sequence. There are discussions about the validity of
|
||||||
that it points to the start of a character or to the end of the subject. There
|
|
||||||
are discussions about the validity of
|
|
||||||
<a href="pcre2unicode.html#utf8strings">UTF-8 strings,</a>
|
<a href="pcre2unicode.html#utf8strings">UTF-8 strings,</a>
|
||||||
<a href="pcre2unicode.html#utf16strings">UTF-16 strings,</a>
|
<a href="pcre2unicode.html#utf16strings">UTF-16 strings,</a>
|
||||||
and
|
and
|
||||||
|
@ -2564,12 +2581,12 @@ be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
|
||||||
In the replacement string, which is interpreted as a UTF string in UTF mode,
|
In the replacement string, which is interpreted as a UTF string in UTF mode,
|
||||||
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
|
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
|
||||||
dollar character is an escape character that can specify the insertion of
|
dollar character is an escape character that can specify the insertion of
|
||||||
characters from capturing groups in the pattern. The following forms are
|
characters from capturing groups or (*MARK) items in the pattern. The following
|
||||||
recognized:
|
forms are recognized:
|
||||||
<pre>
|
<pre>
|
||||||
$$ insert a dollar character
|
$$ insert a dollar character
|
||||||
$<n> insert the contents of group <n>
|
$<n> or ${<n>} insert the contents of group <n>
|
||||||
${<n>} insert the contents of group <n>
|
$*MARK or ${*MARK} insert the name of the last (*MARK) encountered
|
||||||
</pre>
|
</pre>
|
||||||
Either a group number or a group name can be given for <n>. Curly brackets are
|
Either a group number or a group name can be given for <n>. Curly brackets are
|
||||||
required only if the following character would be interpreted as part of the
|
required only if the following character would be interpreted as part of the
|
||||||
|
@ -2580,6 +2597,15 @@ calling <b>pcre2_copy_byname()</b> or <b>pcre2_copy_bynumber()</b> as
|
||||||
appropriate.
|
appropriate.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
The facility for inserting a (*MARK) name can be used to perform simple
|
||||||
|
simultaneous substitutions, as this <b>pcre2test</b> example shows:
|
||||||
|
<pre>
|
||||||
|
/(*:pear)apple|(*:orange)lemon/g,replace=${*MARK}
|
||||||
|
apple lemon
|
||||||
|
2: pear orange
|
||||||
|
</PRE>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
The first seven arguments of <b>pcre2_substitute()</b> are the same as for
|
The first seven arguments of <b>pcre2_substitute()</b> are the same as for
|
||||||
<b>pcre2_match()</b>, except that the partial matching options are not
|
<b>pcre2_match()</b>, except that the partial matching options are not
|
||||||
permitted, and <i>match_data</i> may be passed as NULL, in which case a match
|
permitted, and <i>match_data</i> may be passed as NULL, in which case a match
|
||||||
|
@ -2883,7 +2909,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC40" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC40" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 22 April 2015
|
Last updated: 30 August 2015
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2015 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -224,8 +224,14 @@ whether a match operation was executed by JIT or by the interpreter.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
You may safely use the same JIT stack for more than one pattern (either by
|
You may safely use the same JIT stack for more than one pattern (either by
|
||||||
assigning directly or by callback), as long as the patterns are all matched
|
assigning directly or by callback), as long as the patterns are matched
|
||||||
sequentially in the same thread. In a multithread application, if you do not
|
sequentially in the same thread. Currently, the only way to set up
|
||||||
|
non-sequential matches in one thread is to use callouts: if a callout function
|
||||||
|
starts another match, that match must use a different JIT stack to the one used
|
||||||
|
for currently suspended match(es).
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
In a multithread application, if you do not
|
||||||
specify a JIT stack, or if you assign or pass back NULL from a callback, that
|
specify a JIT stack, or if you assign or pass back NULL from a callback, that
|
||||||
is thread-safe, because each thread has its own machine stack. However, if you
|
is thread-safe, because each thread has its own machine stack. However, if you
|
||||||
assign or pass back a non-NULL JIT stack, this must be a different stack for
|
assign or pass back a non-NULL JIT stack, this must be a different stack for
|
||||||
|
@ -419,9 +425,9 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC13" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC13" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 27 November 2014
|
Last updated: 28 July 2015
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2014 University of Cambridge.
|
Copyright © 1997-2015 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
|
|
@ -736,6 +736,8 @@ Those that are not part of an identified script are lumped together as
|
||||||
"Common". The current list of scripts is:
|
"Common". The current list of scripts is:
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
Ahom,
|
||||||
|
Anatolian_Hieroglyphs,
|
||||||
Arabic,
|
Arabic,
|
||||||
Armenian,
|
Armenian,
|
||||||
Avestan,
|
Avestan,
|
||||||
|
@ -776,6 +778,7 @@ Gurmukhi,
|
||||||
Han,
|
Han,
|
||||||
Hangul,
|
Hangul,
|
||||||
Hanunoo,
|
Hanunoo,
|
||||||
|
Hatran,
|
||||||
Hebrew,
|
Hebrew,
|
||||||
Hiragana,
|
Hiragana,
|
||||||
Imperial_Aramaic,
|
Imperial_Aramaic,
|
||||||
|
@ -812,12 +815,14 @@ Miao,
|
||||||
Modi,
|
Modi,
|
||||||
Mongolian,
|
Mongolian,
|
||||||
Mro,
|
Mro,
|
||||||
|
Multani,
|
||||||
Myanmar,
|
Myanmar,
|
||||||
Nabataean,
|
Nabataean,
|
||||||
New_Tai_Lue,
|
New_Tai_Lue,
|
||||||
Nko,
|
Nko,
|
||||||
Ogham,
|
Ogham,
|
||||||
Ol_Chiki,
|
Ol_Chiki,
|
||||||
|
Old_Hungarian,
|
||||||
Old_Italic,
|
Old_Italic,
|
||||||
Old_North_Arabian,
|
Old_North_Arabian,
|
||||||
Old_Permic,
|
Old_Permic,
|
||||||
|
@ -839,6 +844,7 @@ Saurashtra,
|
||||||
Sharada,
|
Sharada,
|
||||||
Shavian,
|
Shavian,
|
||||||
Siddham,
|
Siddham,
|
||||||
|
SignWriting,
|
||||||
Sinhala,
|
Sinhala,
|
||||||
Sora_Sompeng,
|
Sora_Sompeng,
|
||||||
Sundanese,
|
Sundanese,
|
||||||
|
@ -1322,9 +1328,19 @@ where a range ending character is expected. For example, [z-\xff] is valid,
|
||||||
but [A-\d] and [A-[:digit:]] are not.
|
but [A-\d] and [A-[:digit:]] are not.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Ranges operate in the collating sequence of character values. They can also be
|
Ranges normally include all code points between the start and end characters,
|
||||||
used for characters specified numerically, for example [\000-\037]. Ranges
|
inclusive. They can also be used for code points specified numerically, for
|
||||||
can include any characters that are valid for the current mode.
|
example [\000-\037]. Ranges can include any characters that are valid for the
|
||||||
|
current mode.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There is a special case in EBCDIC environments for ranges whose end points are
|
||||||
|
both specified as literal letters in the same case. For compatibility with
|
||||||
|
Perl, EBCDIC code points within the range that are not letters are omitted. For
|
||||||
|
example, [h-k] matches only four characters, even though the codes for h and k
|
||||||
|
are 0x88 and 0x92, a range of 11 code points. However, if the range is
|
||||||
|
specified numerically, for example, [\x88-\x92] or [h-\x92], all code points
|
||||||
|
are included.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If a range that includes letters is used when caseless matching is set, it
|
If a range that includes letters is used when caseless matching is set, it
|
||||||
|
@ -2899,14 +2915,23 @@ remarks apply to the PCRE2 features described in this section.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The new verbs make use of what was previously invalid syntax: an opening
|
The new verbs make use of what was previously invalid syntax: an opening
|
||||||
parenthesis followed by an asterisk. They are generally of the form
|
parenthesis followed by an asterisk. They are generally of the form (*VERB) or
|
||||||
(*VERB) or (*VERB:NAME). Some may take either form, possibly behaving
|
(*VERB:NAME). Some verbs take either form, possibly behaving differently
|
||||||
differently depending on whether or not a name is present. A name is any
|
depending on whether or not a name is present.
|
||||||
sequence of characters that does not include a closing parenthesis. The maximum
|
</P>
|
||||||
length of name is 255 in the 8-bit library and 65535 in the 16-bit and 32-bit
|
<P>
|
||||||
libraries. If the name is empty, that is, if the closing parenthesis
|
By default, for compatibility with Perl, a name is any sequence of characters
|
||||||
immediately follows the colon, the effect is as if the colon were not there.
|
that does not include a closing parenthesis. The name is not processed in
|
||||||
Any number of these verbs may occur in a pattern.
|
any way, and it is not possible to include a closing parenthesis in the name.
|
||||||
|
However, if the PCRE2_ALT_VERBNAMES option is set, normal backslash processing
|
||||||
|
is applied to verb names and only an unescaped closing parenthesis terminates
|
||||||
|
the name.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The maximum length of a name is 255 in the 8-bit library and 65535 in the
|
||||||
|
16-bit and 32-bit libraries. If the name is empty, that is, if the closing
|
||||||
|
parenthesis immediately follows the colon, the effect is as if the colon were
|
||||||
|
not there. Any number of these verbs may occur in a pattern.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Since these verbs are specifically related to backtracking, most of them can be
|
Since these verbs are specifically related to backtracking, most of them can be
|
||||||
|
@ -3323,7 +3348,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 13 June 2015
|
Last updated: 30 August 2015
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2015 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -187,6 +187,8 @@ at release 5.18.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC7" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
|
<br><a name="SEC7" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
Ahom,
|
||||||
|
Anatolian_Hieroglyphs,
|
||||||
Arabic,
|
Arabic,
|
||||||
Armenian,
|
Armenian,
|
||||||
Avestan,
|
Avestan,
|
||||||
|
@ -227,6 +229,7 @@ Gurmukhi,
|
||||||
Han,
|
Han,
|
||||||
Hangul,
|
Hangul,
|
||||||
Hanunoo,
|
Hanunoo,
|
||||||
|
Hatran,
|
||||||
Hebrew,
|
Hebrew,
|
||||||
Hiragana,
|
Hiragana,
|
||||||
Imperial_Aramaic,
|
Imperial_Aramaic,
|
||||||
|
@ -263,12 +266,14 @@ Miao,
|
||||||
Modi,
|
Modi,
|
||||||
Mongolian,
|
Mongolian,
|
||||||
Mro,
|
Mro,
|
||||||
|
Multani,
|
||||||
Myanmar,
|
Myanmar,
|
||||||
Nabataean,
|
Nabataean,
|
||||||
New_Tai_Lue,
|
New_Tai_Lue,
|
||||||
Nko,
|
Nko,
|
||||||
Ogham,
|
Ogham,
|
||||||
Ol_Chiki,
|
Ol_Chiki,
|
||||||
|
Old_Hungarian,
|
||||||
Old_Italic,
|
Old_Italic,
|
||||||
Old_North_Arabian,
|
Old_North_Arabian,
|
||||||
Old_Permic,
|
Old_Permic,
|
||||||
|
@ -290,6 +295,7 @@ Saurashtra,
|
||||||
Sharada,
|
Sharada,
|
||||||
Shavian,
|
Shavian,
|
||||||
Siddham,
|
Siddham,
|
||||||
|
SignWriting,
|
||||||
Sinhala,
|
Sinhala,
|
||||||
Sora_Sompeng,
|
Sora_Sompeng,
|
||||||
Sundanese,
|
Sundanese,
|
||||||
|
@ -582,7 +588,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 13 June 2015
|
Last updated: 17 July 2015
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2015 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -340,12 +340,13 @@ subject lines. Modifiers on a subject line can change these settings.
|
||||||
<br><a name="SEC7" href="#TOC1">MODIFIER SYNTAX</a><br>
|
<br><a name="SEC7" href="#TOC1">MODIFIER SYNTAX</a><br>
|
||||||
<P>
|
<P>
|
||||||
Modifier lists are used with both pattern and subject lines. Items in a list
|
Modifier lists are used with both pattern and subject lines. Items in a list
|
||||||
are separated by commas and optional white space. Some modifiers may be given
|
are separated by commas followed by optional white space. Trailing whitespace
|
||||||
for both patterns and subject lines, whereas others are valid for one or the
|
in a modifier list is ignored. Some modifiers may be given for both patterns
|
||||||
other only. Each modifier has a long name, for example "anchored", and some of
|
and subject lines, whereas others are valid only for one or the other. Each
|
||||||
them must be followed by an equals sign and a value, for example, "offset=12".
|
modifier has a long name, for example "anchored", and some of them must be
|
||||||
Modifiers that do not take values may be preceded by a minus sign to turn off a
|
followed by an equals sign and a value, for example, "offset=12". Values cannot
|
||||||
previous setting.
|
contain comma characters, but may contain spaces. Modifiers that do not take
|
||||||
|
values may be preceded by a minus sign to turn off a previous setting.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
A few of the more common modifiers can also be specified as single letters, for
|
A few of the more common modifiers can also be specified as single letters, for
|
||||||
|
@ -479,6 +480,7 @@ for a description of their effects.
|
||||||
allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS
|
allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS
|
||||||
alt_bsux set PCRE2_ALT_BSUX
|
alt_bsux set PCRE2_ALT_BSUX
|
||||||
alt_circumflex set PCRE2_ALT_CIRCUMFLEX
|
alt_circumflex set PCRE2_ALT_CIRCUMFLEX
|
||||||
|
alt_verbnames set PCRE2_ALT_VERBNAMES
|
||||||
anchored set PCRE2_ANCHORED
|
anchored set PCRE2_ANCHORED
|
||||||
auto_callout set PCRE2_AUTO_CALLOUT
|
auto_callout set PCRE2_AUTO_CALLOUT
|
||||||
/i caseless set PCRE2_CASELESS
|
/i caseless set PCRE2_CASELESS
|
||||||
|
@ -1469,7 +1471,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 20 May 2015
|
Last updated: 30 August 2015
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2015 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -126,11 +126,22 @@ as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting
|
||||||
strings to be in host byte order.
|
strings to be in host byte order.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The entire string is checked before any other processing takes place. In
|
A UTF string is checked before any other processing takes place. In the case of
|
||||||
addition to checking the format of the string, there is a check to ensure that
|
<b>pcre2_match()</b> and <b>pcre2_dfa_match()</b> calls with a non-zero starting
|
||||||
all code points lie in the range U+0 to U+10FFFF, excluding the surrogate area.
|
offset, the check is applied only to that part of the subject that could be
|
||||||
The so-called "non-character" code points are not excluded because Unicode
|
inspected during matching, and there is a check that the starting offset points
|
||||||
corrigendum #9 makes it clear that they should not be.
|
to the first code unit of a character or to the end of the subject. If there
|
||||||
|
are no lookbehind assertions in the pattern, the check starts at the starting
|
||||||
|
offset. Otherwise, it starts at the length of the longest lookbehind before the
|
||||||
|
starting offset, or at the start of the subject if there are not that many
|
||||||
|
characters before the starting offset. Note that the sequences \b and \B are
|
||||||
|
one-character lookbehinds.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
In addition to checking the format of the string, there is a check to ensure
|
||||||
|
that all code points lie in the range U+0 to U+10FFFF, excluding the surrogate
|
||||||
|
area. The so-called "non-character" code points are not excluded because
|
||||||
|
Unicode corrigendum #9 makes it clear that they should not be.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16,
|
Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16,
|
||||||
|
@ -264,9 +275,9 @@ Cambridge, England.
|
||||||
REVISION
|
REVISION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 23 November 2014
|
Last updated: 18 August 2015
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2014 University of Cambridge.
|
Copyright © 1997-2015 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
|
104
doc/pcre2.txt
104
doc/pcre2.txt
|
@ -190,13 +190,13 @@ PCRE2 NATIVE API BASIC FUNCTIONS
|
||||||
uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset,
|
uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset,
|
||||||
pcre2_compile_context *ccontext);
|
pcre2_compile_context *ccontext);
|
||||||
|
|
||||||
pcre2_code_free(pcre2_code *code);
|
void pcre2_code_free(pcre2_code *code);
|
||||||
|
|
||||||
pcre2_match_data_create(uint32_t ovecsize,
|
pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize,
|
||||||
pcre2_general_context *gcontext);
|
pcre2_general_context *gcontext);
|
||||||
|
|
||||||
pcre2_match_data_create_from_pattern(const pcre2_code *code,
|
pcre2_match_data *pcre2_match_data_create_from_pattern(
|
||||||
pcre2_general_context *gcontext);
|
const pcre2_code *code, pcre2_general_context *gcontext);
|
||||||
|
|
||||||
int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject,
|
int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject,
|
||||||
PCRE2_SIZE length, PCRE2_SIZE startoffset,
|
PCRE2_SIZE length, PCRE2_SIZE startoffset,
|
||||||
|
@ -989,7 +989,7 @@ CHECKING BUILD-TIME OPTIONS
|
||||||
pcre2_config() with where set to NULL.) If PCRE2 has been compiled
|
pcre2_config() with where set to NULL.) If PCRE2 has been compiled
|
||||||
without Unicode support, the buffer is filled with the text "Unicode
|
without Unicode support, the buffer is filled with the text "Unicode
|
||||||
not supported". Otherwise, the Unicode version string (for example,
|
not supported". Otherwise, the Unicode version string (for example,
|
||||||
"7.0.0") is inserted. The number of code units used is returned. This
|
"8.0.0") is inserted. The number of code units used is returned. This
|
||||||
is the length of the string plus one unit for the terminating zero.
|
is the length of the string plus one unit for the terminating zero.
|
||||||
|
|
||||||
PCRE2_CONFIG_UNICODE
|
PCRE2_CONFIG_UNICODE
|
||||||
|
@ -1014,7 +1014,7 @@ COMPILING A PATTERN
|
||||||
uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset,
|
uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset,
|
||||||
pcre2_compile_context *ccontext);
|
pcre2_compile_context *ccontext);
|
||||||
|
|
||||||
pcre2_code_free(pcre2_code *code);
|
void pcre2_code_free(pcre2_code *code);
|
||||||
|
|
||||||
The pcre2_compile() function compiles a pattern into an internal form.
|
The pcre2_compile() function compiles a pattern into an internal form.
|
||||||
The pattern is defined by a pointer to a string of code units and a
|
The pattern is defined by a pointer to a string of code units and a
|
||||||
|
@ -1128,6 +1128,16 @@ COMPILING A PATTERN
|
||||||
Perl. If you want a multiline circumflex also to match after a termi-
|
Perl. If you want a multiline circumflex also to match after a termi-
|
||||||
nating newline, you must set PCRE2_ALT_CIRCUMFLEX.
|
nating newline, you must set PCRE2_ALT_CIRCUMFLEX.
|
||||||
|
|
||||||
|
PCRE2_ALT_VERBNAMES
|
||||||
|
|
||||||
|
By default, for compatibility with Perl, the name in any verb sequence
|
||||||
|
such as (*MARK:NAME) is any sequence of characters that does not
|
||||||
|
include a closing parenthesis. The name is not processed in any way,
|
||||||
|
and it is not possible to include a closing parenthesis in the name.
|
||||||
|
However, if the PCRE2_ALT_VERBNAMES option is set, normal backslash
|
||||||
|
processing is applied to verb names and only an unescaped closing
|
||||||
|
parenthesis terminates the name.
|
||||||
|
|
||||||
PCRE2_AUTO_CALLOUT
|
PCRE2_AUTO_CALLOUT
|
||||||
|
|
||||||
If this bit is set, pcre2_compile() automatically inserts callout
|
If this bit is set, pcre2_compile() automatically inserts callout
|
||||||
|
@ -1809,11 +1819,11 @@ SERIALIZATION AND PRECOMPILING
|
||||||
|
|
||||||
THE MATCH DATA BLOCK
|
THE MATCH DATA BLOCK
|
||||||
|
|
||||||
pcre2_match_data_create(uint32_t ovecsize,
|
pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize,
|
||||||
pcre2_general_context *gcontext);
|
pcre2_general_context *gcontext);
|
||||||
|
|
||||||
pcre2_match_data_create_from_pattern(const pcre2_code *code,
|
pcre2_match_data *pcre2_match_data_create_from_pattern(
|
||||||
pcre2_general_context *gcontext);
|
const pcre2_code *code, pcre2_general_context *gcontext);
|
||||||
|
|
||||||
void pcre2_match_data_free(pcre2_match_data *match_data);
|
void pcre2_match_data_free(pcre2_match_data *match_data);
|
||||||
|
|
||||||
|
@ -2022,12 +2032,20 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
|
||||||
|
|
||||||
When PCRE2_UTF is set at compile time, the validity of the subject as a
|
When PCRE2_UTF is set at compile time, the validity of the subject as a
|
||||||
UTF string is checked by default when pcre2_match() is subsequently
|
UTF string is checked by default when pcre2_match() is subsequently
|
||||||
called. The entire string is checked before any other processing takes
|
called. If a non-zero starting offset is given, the check is applied
|
||||||
place, and a negative error code is returned if the check fails. There
|
only to that part of the subject that could be inspected during match-
|
||||||
are several UTF error codes for each code unit width, corresponding to
|
ing, and there is a check that the starting offset points to the first
|
||||||
different problems with the code unit sequence. The value of startoff-
|
code unit of a character or to the end of the subject. If there are no
|
||||||
set is also checked, to ensure that it points to the start of a charac-
|
lookbehind assertions in the pattern, the check starts at the starting
|
||||||
ter or to the end of the subject. There are discussions about the
|
offset. Otherwise, it starts at the length of the longest lookbehind
|
||||||
|
before the starting offset, or at the start of the subject if there are
|
||||||
|
not that many characters before the starting offset. Note that the
|
||||||
|
sequences \b and \B are one-character lookbehinds.
|
||||||
|
|
||||||
|
The check is carried out before any other processing takes place, and a
|
||||||
|
negative error code is returned if the check fails. There are several
|
||||||
|
UTF error codes for each code unit width, corresponding to different
|
||||||
|
problems with the code unit sequence. There are discussions about the
|
||||||
validity of UTF-8 strings, UTF-16 strings, and UTF-32 strings in the
|
validity of UTF-8 strings, UTF-16 strings, and UTF-32 strings in the
|
||||||
pcre2unicode page.
|
pcre2unicode page.
|
||||||
|
|
||||||
|
@ -2525,12 +2543,12 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
|
||||||
In the replacement string, which is interpreted as a UTF string in UTF
|
In the replacement string, which is interpreted as a UTF string in UTF
|
||||||
mode, and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK
|
mode, and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK
|
||||||
option is set, a dollar character is an escape character that can spec-
|
option is set, a dollar character is an escape character that can spec-
|
||||||
ify the insertion of characters from capturing groups in the pattern.
|
ify the insertion of characters from capturing groups or (*MARK) items
|
||||||
The following forms are recognized:
|
in the pattern. The following forms are recognized:
|
||||||
|
|
||||||
$$ insert a dollar character
|
$$ insert a dollar character
|
||||||
$<n> insert the contents of group <n>
|
$<n> or ${<n>} insert the contents of group <n>
|
||||||
${<n>} insert the contents of group <n>
|
$*MARK or ${*MARK} insert the name of the last (*MARK) encountered
|
||||||
|
|
||||||
Either a group number or a group name can be given for <n>. Curly
|
Either a group number or a group name can be given for <n>. Curly
|
||||||
brackets are required only if the following character would be inter-
|
brackets are required only if the following character would be inter-
|
||||||
|
@ -2540,6 +2558,13 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
|
||||||
is "=+babcb+=". Group insertion is done by calling pcre2_copy_byname()
|
is "=+babcb+=". Group insertion is done by calling pcre2_copy_byname()
|
||||||
or pcre2_copy_bynumber() as appropriate.
|
or pcre2_copy_bynumber() as appropriate.
|
||||||
|
|
||||||
|
The facility for inserting a (*MARK) name can be used to perform simple
|
||||||
|
simultaneous substitutions, as this pcre2test example shows:
|
||||||
|
|
||||||
|
/(*:pear)apple|(*:orange)lemon/g,replace=${*MARK}
|
||||||
|
apple lemon
|
||||||
|
2: pear orange
|
||||||
|
|
||||||
The first seven arguments of pcre2_substitute() are the same as for
|
The first seven arguments of pcre2_substitute() are the same as for
|
||||||
pcre2_match(), except that the partial matching options are not permit-
|
pcre2_match(), except that the partial matching options are not permit-
|
||||||
ted, and match_data may be passed as NULL, in which case a match data
|
ted, and match_data may be passed as NULL, in which case a match data
|
||||||
|
@ -2826,7 +2851,7 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 22 April 2015
|
Last updated: 30 August 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -4051,13 +4076,17 @@ CONTROLLING THE JIT STACK
|
||||||
interpreter.
|
interpreter.
|
||||||
|
|
||||||
You may safely use the same JIT stack for more than one pattern (either
|
You may safely use the same JIT stack for more than one pattern (either
|
||||||
by assigning directly or by callback), as long as the patterns are all
|
by assigning directly or by callback), as long as the patterns are
|
||||||
matched sequentially in the same thread. In a multithread application,
|
matched sequentially in the same thread. Currently, the only way to set
|
||||||
if you do not specify a JIT stack, or if you assign or pass back NULL
|
up non-sequential matches in one thread is to use callouts: if a call-
|
||||||
from a callback, that is thread-safe, because each thread has its own
|
out function starts another match, that match must use a different JIT
|
||||||
machine stack. However, if you assign or pass back a non-NULL JIT
|
stack to the one used for currently suspended match(es).
|
||||||
stack, this must be a different stack for each thread so that the
|
|
||||||
application is thread-safe.
|
In a multithread application, if you do not specify a JIT stack, or if
|
||||||
|
you assign or pass back NULL from a callback, that is thread-safe,
|
||||||
|
because each thread has its own machine stack. However, if you assign
|
||||||
|
or pass back a non-NULL JIT stack, this must be a different stack for
|
||||||
|
each thread so that the application is thread-safe.
|
||||||
|
|
||||||
Strictly speaking, even more is allowed. You can assign the same non-
|
Strictly speaking, even more is allowed. You can assign the same non-
|
||||||
NULL stack to a match context that is used by any number of patterns,
|
NULL stack to a match context that is used by any number of patterns,
|
||||||
|
@ -4234,8 +4263,8 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 27 November 2014
|
Last updated: 28 July 2015
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@ -5069,7 +5098,18 @@ VALIDITY OF UTF STRINGS
|
||||||
knows as a byte-order mark (BOM). The PCRE2 functions do not handle
|
knows as a byte-order mark (BOM). The PCRE2 functions do not handle
|
||||||
this, expecting strings to be in host byte order.
|
this, expecting strings to be in host byte order.
|
||||||
|
|
||||||
The entire string is checked before any other processing takes place.
|
A UTF string is checked before any other processing takes place. In the
|
||||||
|
case of pcre2_match() and pcre2_dfa_match() calls with a non-zero
|
||||||
|
starting offset, the check is applied only to that part of the subject
|
||||||
|
that could be inspected during matching, and there is a check that the
|
||||||
|
starting offset points to the first code unit of a character or to the
|
||||||
|
end of the subject. If there are no lookbehind assertions in the pat-
|
||||||
|
tern, the check starts at the starting offset. Otherwise, it starts at
|
||||||
|
the length of the longest lookbehind before the starting offset, or at
|
||||||
|
the start of the subject if there are not that many characters before
|
||||||
|
the starting offset. Note that the sequences \b and \B are one-charac-
|
||||||
|
ter lookbehinds.
|
||||||
|
|
||||||
In addition to checking the format of the string, there is a check to
|
In addition to checking the format of the string, there is a check to
|
||||||
ensure that all code points lie in the range U+0 to U+10FFFF, excluding
|
ensure that all code points lie in the range U+0 to U+10FFFF, excluding
|
||||||
the surrogate area. The so-called "non-character" code points are not
|
the surrogate area. The so-called "non-character" code points are not
|
||||||
|
@ -5192,8 +5232,8 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 23 November 2014
|
Last updated: 18 August 2015
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2API 3 "29 August 2015" "PCRE2 10.21"
|
.TH PCRE2API 3 "30 August 2015" "PCRE2 10.21"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
|
@ -1052,6 +1052,15 @@ after any internal newline. However, it does not match after a newline at the
|
||||||
end of the subject, for compatibility with Perl. If you want a multiline
|
end of the subject, for compatibility with Perl. If you want a multiline
|
||||||
circumflex also to match after a terminating newline, you must set
|
circumflex also to match after a terminating newline, you must set
|
||||||
PCRE2_ALT_CIRCUMFLEX.
|
PCRE2_ALT_CIRCUMFLEX.
|
||||||
|
.sp
|
||||||
|
PCRE2_ALT_VERBNAMES
|
||||||
|
.sp
|
||||||
|
By default, for compatibility with Perl, the name in any verb sequence such as
|
||||||
|
(*MARK:NAME) is any sequence of characters that does not include a closing
|
||||||
|
parenthesis. The name is not processed in any way, and it is not possible to
|
||||||
|
include a closing parenthesis in the name. However, if the PCRE2_ALT_VERBNAMES
|
||||||
|
option is set, normal backslash processing is applied to verb names and only an
|
||||||
|
unescaped closing parenthesis terminates the name.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_AUTO_CALLOUT
|
PCRE2_AUTO_CALLOUT
|
||||||
.sp
|
.sp
|
||||||
|
@ -2953,6 +2962,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 29 August 2015
|
Last updated: 30 August 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2PATTERN 3 "24 July 2015" "PCRE2 10.21"
|
.TH PCRE2PATTERN 3 "30 August 2015" "PCRE2 10.21"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH "PCRE2 REGULAR EXPRESSION DETAILS"
|
.SH "PCRE2 REGULAR EXPRESSION DETAILS"
|
||||||
|
@ -1334,7 +1334,7 @@ both specified as literal letters in the same case. For compatibility with
|
||||||
Perl, EBCDIC code points within the range that are not letters are omitted. For
|
Perl, EBCDIC code points within the range that are not letters are omitted. For
|
||||||
example, [h-k] matches only four characters, even though the codes for h and k
|
example, [h-k] matches only four characters, even though the codes for h and k
|
||||||
are 0x88 and 0x92, a range of 11 code points. However, if the range is
|
are 0x88 and 0x92, a range of 11 code points. However, if the range is
|
||||||
specified numerically, for example, [\ex88-\ex92] or [h-\x92], all code points
|
specified numerically, for example, [\ex88-\ex92] or [h-\ex92], all code points
|
||||||
are included.
|
are included.
|
||||||
.P
|
.P
|
||||||
If a range that includes letters is used when caseless matching is set, it
|
If a range that includes letters is used when caseless matching is set, it
|
||||||
|
@ -2944,14 +2944,21 @@ in production code should be noted to avoid problems during upgrades." The same
|
||||||
remarks apply to the PCRE2 features described in this section.
|
remarks apply to the PCRE2 features described in this section.
|
||||||
.P
|
.P
|
||||||
The new verbs make use of what was previously invalid syntax: an opening
|
The new verbs make use of what was previously invalid syntax: an opening
|
||||||
parenthesis followed by an asterisk. They are generally of the form
|
parenthesis followed by an asterisk. They are generally of the form (*VERB) or
|
||||||
(*VERB) or (*VERB:NAME). Some may take either form, possibly behaving
|
(*VERB:NAME). Some verbs take either form, possibly behaving differently
|
||||||
differently depending on whether or not a name is present. A name is any
|
depending on whether or not a name is present.
|
||||||
sequence of characters that does not include a closing parenthesis. The maximum
|
.P
|
||||||
length of name is 255 in the 8-bit library and 65535 in the 16-bit and 32-bit
|
By default, for compatibility with Perl, a name is any sequence of characters
|
||||||
libraries. If the name is empty, that is, if the closing parenthesis
|
that does not include a closing parenthesis. The name is not processed in
|
||||||
immediately follows the colon, the effect is as if the colon were not there.
|
any way, and it is not possible to include a closing parenthesis in the name.
|
||||||
Any number of these verbs may occur in a pattern.
|
However, if the PCRE2_ALT_VERBNAMES option is set, normal backslash processing
|
||||||
|
is applied to verb names and only an unescaped closing parenthesis terminates
|
||||||
|
the name.
|
||||||
|
.P
|
||||||
|
The maximum length of a name is 255 in the 8-bit library and 65535 in the
|
||||||
|
16-bit and 32-bit libraries. If the name is empty, that is, if the closing
|
||||||
|
parenthesis immediately follows the colon, the effect is as if the colon were
|
||||||
|
not there. Any number of these verbs may occur in a pattern.
|
||||||
.P
|
.P
|
||||||
Since these verbs are specifically related to backtracking, most of them can be
|
Since these verbs are specifically related to backtracking, most of them can be
|
||||||
used only when the pattern is to be matched using the traditional matching
|
used only when the pattern is to be matched using the traditional matching
|
||||||
|
@ -3376,6 +3383,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 24 July 2015
|
Last updated: 30 August 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -445,6 +445,7 @@ for a description of their effects.
|
||||||
allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS
|
allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS
|
||||||
alt_bsux set PCRE2_ALT_BSUX
|
alt_bsux set PCRE2_ALT_BSUX
|
||||||
alt_circumflex set PCRE2_ALT_CIRCUMFLEX
|
alt_circumflex set PCRE2_ALT_CIRCUMFLEX
|
||||||
|
alt_verbnames set PCRE2_ALT_VERBNAMES
|
||||||
anchored set PCRE2_ANCHORED
|
anchored set PCRE2_ANCHORED
|
||||||
auto_callout set PCRE2_AUTO_CALLOUT
|
auto_callout set PCRE2_AUTO_CALLOUT
|
||||||
/i caseless set PCRE2_CASELESS
|
/i caseless set PCRE2_CASELESS
|
||||||
|
|
|
@ -285,12 +285,14 @@ COMMAND LINES
|
||||||
MODIFIER SYNTAX
|
MODIFIER SYNTAX
|
||||||
|
|
||||||
Modifier lists are used with both pattern and subject lines. Items in a
|
Modifier lists are used with both pattern and subject lines. Items in a
|
||||||
list are separated by commas and optional white space. Some modifiers
|
list are separated by commas followed by optional white space. Trailing
|
||||||
may be given for both patterns and subject lines, whereas others are
|
whitespace in a modifier list is ignored. Some modifiers may be given
|
||||||
valid for one or the other only. Each modifier has a long name, for
|
for both patterns and subject lines, whereas others are valid only for
|
||||||
example "anchored", and some of them must be followed by an equals sign
|
one or the other. Each modifier has a long name, for example
|
||||||
and a value, for example, "offset=12". Modifiers that do not take val-
|
"anchored", and some of them must be followed by an equals sign and a
|
||||||
ues may be preceded by a minus sign to turn off a previous setting.
|
value, for example, "offset=12". Values cannot contain comma charac-
|
||||||
|
ters, but may contain spaces. Modifiers that do not take values may be
|
||||||
|
preceded by a minus sign to turn off a previous setting.
|
||||||
|
|
||||||
A few of the more common modifiers can also be specified as single let-
|
A few of the more common modifiers can also be specified as single let-
|
||||||
ters, for example "i" for "caseless". In documentation, following the
|
ters, for example "i" for "caseless". In documentation, following the
|
||||||
|
@ -424,6 +426,7 @@ PATTERN MODIFIERS
|
||||||
allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS
|
allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS
|
||||||
alt_bsux set PCRE2_ALT_BSUX
|
alt_bsux set PCRE2_ALT_BSUX
|
||||||
alt_circumflex set PCRE2_ALT_CIRCUMFLEX
|
alt_circumflex set PCRE2_ALT_CIRCUMFLEX
|
||||||
|
alt_verbnames set PCRE2_ALT_VERBNAMES
|
||||||
anchored set PCRE2_ANCHORED
|
anchored set PCRE2_ANCHORED
|
||||||
auto_callout set PCRE2_AUTO_CALLOUT
|
auto_callout set PCRE2_AUTO_CALLOUT
|
||||||
/i caseless set PCRE2_CASELESS
|
/i caseless set PCRE2_CASELESS
|
||||||
|
@ -1330,5 +1333,5 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 20 May 2015
|
Last updated: 30 August 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
|
|
|
@ -120,6 +120,7 @@ D is inspected during pcre2_dfa_match() execution
|
||||||
#define PCRE2_UTF 0x00080000u /* C J M D */
|
#define PCRE2_UTF 0x00080000u /* C J M D */
|
||||||
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
|
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
|
||||||
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
|
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
|
||||||
|
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
|
||||||
|
|
||||||
/* These are for pcre2_jit_compile(). */
|
/* These are for pcre2_jit_compile(). */
|
||||||
|
|
||||||
|
|
|
@ -561,12 +561,12 @@ static PCRE2_SPTR posix_substitutes[] = {
|
||||||
|
|
||||||
#define PUBLIC_COMPILE_OPTIONS \
|
#define PUBLIC_COMPILE_OPTIONS \
|
||||||
(PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
|
(PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
|
||||||
PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY|PCRE2_DOTALL| \
|
PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
|
||||||
PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_FIRSTLINE|PCRE2_MATCH_UNSET_BACKREF| \
|
PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \
|
||||||
PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C|PCRE2_NEVER_UCP| \
|
PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
|
||||||
PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE|PCRE2_NO_AUTO_POSSESS| \
|
PCRE2_NEVER_UCP|PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE| \
|
||||||
PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE|PCRE2_NO_UTF_CHECK| \
|
PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
|
||||||
PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_UTF)
|
PCRE2_NO_UTF_CHECK|PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_UTF)
|
||||||
|
|
||||||
/* Compile time error code numbers. They are given names so that they can more
|
/* Compile time error code numbers. They are given names so that they can more
|
||||||
easily be tracked. When a new number is added, the tables called eint1 and
|
easily be tracked. When a new number is added, the tables called eint1 and
|
||||||
|
@ -5382,13 +5382,52 @@ for (;; ptr++)
|
||||||
|
|
||||||
/* It appears that Perl allows any characters whatsoever, other than
|
/* It appears that Perl allows any characters whatsoever, other than
|
||||||
a closing parenthesis, to appear in arguments, so we no longer insist on
|
a closing parenthesis, to appear in arguments, so we no longer insist on
|
||||||
letters, digits, and underscores. */
|
letters, digits, and underscores. Perl does not, however, do any
|
||||||
|
interpretation within arguments, and has no means of including a closing
|
||||||
|
parenthesis. PCRE supports escape processing but only when it is
|
||||||
|
requested by an option. Note that check_escape() will not return values
|
||||||
|
greater than the code unit maximum when not in UTF mode. */
|
||||||
|
|
||||||
if (*ptr == CHAR_COLON)
|
if (*ptr == CHAR_COLON)
|
||||||
{
|
{
|
||||||
arg = ++ptr;
|
arg = ++ptr;
|
||||||
|
|
||||||
|
if ((options & PCRE2_ALT_VERBNAMES) == 0)
|
||||||
|
{
|
||||||
while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
|
while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
|
||||||
arglen = (int)(ptr - arg);
|
arglen = (int)(ptr - arg);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
arglen = 0;
|
||||||
|
while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS)
|
||||||
|
{
|
||||||
|
if (*ptr == '\\')
|
||||||
|
{
|
||||||
|
uint32_t x;
|
||||||
|
*errorcodeptr = 0;
|
||||||
|
i = check_escape(&ptr, &x, errorcodeptr, options, FALSE, cb);
|
||||||
|
if (*errorcodeptr != 0) goto FAILED;
|
||||||
|
if (i != 0)
|
||||||
|
{
|
||||||
|
*errorcodeptr = ERR40;
|
||||||
|
goto FAILED;
|
||||||
|
}
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
for (i = 0; i < PRIV(utf8_table1_size); i++)
|
||||||
|
if ((int)x <= PRIV(utf8_table1)[i]) break;
|
||||||
|
arglen += i;
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
if (x > 0xffff) arglen++;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
arglen++;
|
||||||
|
ptr++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ((unsigned int)arglen > MAX_MARK)
|
if ((unsigned int)arglen > MAX_MARK)
|
||||||
{
|
{
|
||||||
*errorcodeptr = ERR76;
|
*errorcodeptr = ERR76;
|
||||||
|
@ -5456,8 +5495,42 @@ for (;; ptr++)
|
||||||
}
|
}
|
||||||
setverb = *code++ = verbs[i].op_arg;
|
setverb = *code++ = verbs[i].op_arg;
|
||||||
*code++ = arglen;
|
*code++ = arglen;
|
||||||
|
|
||||||
|
/* If we are processing the argument for escapes, we don't need
|
||||||
|
to apply checks here because it was all checked above when
|
||||||
|
computing the length. */
|
||||||
|
|
||||||
|
if ((options & PCRE2_ALT_VERBNAMES) != 0)
|
||||||
|
{
|
||||||
|
for (; arg != ptr; arg++)
|
||||||
|
{
|
||||||
|
if (*arg == '\\')
|
||||||
|
{
|
||||||
|
uint32_t x;
|
||||||
|
*errorcodeptr = 0;
|
||||||
|
(void)check_escape(&arg, &x, errorcodeptr, options, FALSE,
|
||||||
|
cb);
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
PCRE2_UCHAR cbuff[8];
|
||||||
|
x = PRIV(ord2utf)(x, cbuff);
|
||||||
|
memcpy(code, cbuff, CU2BYTES(x));
|
||||||
|
code += x;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
*code++ = x;
|
||||||
|
}
|
||||||
|
else *code++ = *arg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else /* No argument processing */
|
||||||
|
{
|
||||||
memcpy(code, arg, CU2BYTES(arglen));
|
memcpy(code, arg, CU2BYTES(arglen));
|
||||||
code += arglen;
|
code += arglen;
|
||||||
|
}
|
||||||
|
|
||||||
*code++ = 0;
|
*code++ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -111,7 +111,7 @@ static const char compile_error_texts[] =
|
||||||
"number after (?C is greater than 255\0"
|
"number after (?C is greater than 255\0"
|
||||||
"closing parenthesis for (?C expected\0"
|
"closing parenthesis for (?C expected\0"
|
||||||
/* 40 */
|
/* 40 */
|
||||||
"SPARE ERROR\0"
|
"invalid escape sequence in (*VERB) name\0"
|
||||||
"unrecognized character after (?P\0"
|
"unrecognized character after (?P\0"
|
||||||
"syntax error in subpattern name (missing terminator)\0"
|
"syntax error in subpattern name (missing terminator)\0"
|
||||||
"two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0"
|
"two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0"
|
||||||
|
|
|
@ -496,6 +496,7 @@ static modstruct modlist[] = {
|
||||||
{ "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
|
{ "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
|
||||||
{ "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
|
{ "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
|
||||||
{ "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) },
|
{ "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) },
|
||||||
|
{ "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) },
|
||||||
{ "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) },
|
{ "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) },
|
||||||
{ "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
|
{ "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
|
||||||
{ "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
|
{ "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
|
||||||
|
@ -3467,10 +3468,11 @@ static void
|
||||||
show_compile_options(uint32_t options, const char *before, const char *after)
|
show_compile_options(uint32_t options, const char *before, const char *after)
|
||||||
{
|
{
|
||||||
if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
|
if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
|
||||||
else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||||
before,
|
before,
|
||||||
((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
|
((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
|
||||||
((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
|
((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
|
||||||
|
((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
|
||||||
((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
|
((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
|
||||||
((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
|
((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
|
||||||
((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
|
((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
|
||||||
|
|
|
@ -4442,4 +4442,11 @@ a random value. /Ix
|
||||||
/((*MARK:A))++a(*SKIP:B)b/
|
/((*MARK:A))++a(*SKIP:B)b/
|
||||||
aacb
|
aacb
|
||||||
|
|
||||||
|
/(*MARK:a\zb)z/alt_verbnames
|
||||||
|
|
||||||
|
/(*:ab\t(d\)c)xxx/
|
||||||
|
|
||||||
|
/(*:ab\t(d\)c)xxx/alt_verbnames,mark
|
||||||
|
cxxxz
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -1662,4 +1662,9 @@
|
||||||
/[\pS#moq]/
|
/[\pS#moq]/
|
||||||
=
|
=
|
||||||
|
|
||||||
|
# UTF tests
|
||||||
|
|
||||||
|
/(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark
|
||||||
|
cxxxz
|
||||||
|
|
||||||
# End of testinput5
|
# End of testinput5
|
||||||
|
|
|
@ -251,4 +251,6 @@
|
||||||
|
|
||||||
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
|
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
|
||||||
|
|
||||||
|
/(*MARK:a\x{100}b)z/alt_verbnames
|
||||||
|
|
||||||
# End of testinput9
|
# End of testinput9
|
||||||
|
|
|
@ -14713,4 +14713,15 @@ No match
|
||||||
aacb
|
aacb
|
||||||
No match
|
No match
|
||||||
|
|
||||||
|
/(*MARK:a\zb)z/alt_verbnames
|
||||||
|
Failed: error 140 at offset 9: invalid escape sequence in (*VERB) name
|
||||||
|
|
||||||
|
/(*:ab\t(d\)c)xxx/
|
||||||
|
Failed: error 122 at offset 12: unmatched closing parenthesis
|
||||||
|
|
||||||
|
/(*:ab\t(d\)c)xxx/alt_verbnames,mark
|
||||||
|
cxxxz
|
||||||
|
0: xxx
|
||||||
|
MK: ab\x09(d)c
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -4064,4 +4064,11 @@ No match
|
||||||
=
|
=
|
||||||
0: =
|
0: =
|
||||||
|
|
||||||
|
# UTF tests
|
||||||
|
|
||||||
|
/(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark
|
||||||
|
cxxxz
|
||||||
|
0: xxx
|
||||||
|
MK: a\x{12345}b\x{09}(d)c
|
||||||
|
|
||||||
# End of testinput5
|
# End of testinput5
|
||||||
|
|
|
@ -356,4 +356,7 @@ Failed: error 177 at offset 6: character code point value in \u.... sequence is
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(*MARK:a\x{100}b)z/alt_verbnames
|
||||||
|
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
# End of testinput9
|
# End of testinput9
|
||||||
|
|
Loading…
Reference in New Issue