Document experimental pattern conversion functions and remove unimplemented
features.
This commit is contained in:
parent
4f7a608d56
commit
a23715d7b1
|
@ -216,6 +216,9 @@ unit". Previously only non-anchored patterns did this.
|
|||
49. Update extended grapheme breaking rules to the latest set that are in
|
||||
Unicode Standard Annex #29.
|
||||
|
||||
50. Added experimental foreign pattern conversion facilities
|
||||
(pcre2_pattern_convert() and friends).
|
||||
|
||||
|
||||
Version 10.23 14-February-2017
|
||||
------------------------------
|
||||
|
|
16
Makefile.am
16
Makefile.am
|
@ -36,6 +36,10 @@ dist_html_DATA = \
|
|||
doc/html/pcre2_compile_context_create.html \
|
||||
doc/html/pcre2_compile_context_free.html \
|
||||
doc/html/pcre2_config.html \
|
||||
doc/html/pcre2_convert_context_copy.html \
|
||||
doc/html/pcre2_convert_context_create.html \
|
||||
doc/html/pcre2_convert_context_free.html \
|
||||
doc/html/pcre2_converted_pattern_free.html \
|
||||
doc/html/pcre2_dfa_match.html \
|
||||
doc/html/pcre2_general_context_copy.html \
|
||||
doc/html/pcre2_general_context_create.html \
|
||||
|
@ -59,6 +63,7 @@ dist_html_DATA = \
|
|||
doc/html/pcre2_match_data_create.html \
|
||||
doc/html/pcre2_match_data_create_from_pattern.html \
|
||||
doc/html/pcre2_match_data_free.html \
|
||||
doc/html/pcre2_pattern_convert.html \
|
||||
doc/html/pcre2_pattern_info.html \
|
||||
doc/html/pcre2_serialize_decode.html \
|
||||
doc/html/pcre2_serialize_encode.html \
|
||||
|
@ -70,6 +75,8 @@ dist_html_DATA = \
|
|||
doc/html/pcre2_set_compile_extra_options.html \
|
||||
doc/html/pcre2_set_compile_recursion_guard.html \
|
||||
doc/html/pcre2_set_depth_limit.html \
|
||||
doc/html/pcre2_set_glob_escape.html \
|
||||
doc/html/pcre2_set_glob_separator.html \
|
||||
doc/html/pcre2_set_heap_limit.html \
|
||||
doc/html/pcre2_set_match_limit.html \
|
||||
doc/html/pcre2_set_max_pattern_length.html \
|
||||
|
@ -94,6 +101,7 @@ dist_html_DATA = \
|
|||
doc/html/pcre2build.html \
|
||||
doc/html/pcre2callout.html \
|
||||
doc/html/pcre2compat.html \
|
||||
doc/html/pcre2convert.html \
|
||||
doc/html/pcre2demo.html \
|
||||
doc/html/pcre2grep.html \
|
||||
doc/html/pcre2jit.html \
|
||||
|
@ -121,6 +129,10 @@ dist_man_MANS = \
|
|||
doc/pcre2_compile_context_create.3 \
|
||||
doc/pcre2_compile_context_free.3 \
|
||||
doc/pcre2_config.3 \
|
||||
doc/pcre2_convert_context_copy.3 \
|
||||
doc/pcre2_convert_context_create.3 \
|
||||
doc/pcre2_convert_context_free.3 \
|
||||
doc/pcre2_converted_pattern_free.3 \
|
||||
doc/pcre2_dfa_match.3 \
|
||||
doc/pcre2_general_context_copy.3 \
|
||||
doc/pcre2_general_context_create.3 \
|
||||
|
@ -144,6 +156,7 @@ dist_man_MANS = \
|
|||
doc/pcre2_match_data_create.3 \
|
||||
doc/pcre2_match_data_create_from_pattern.3 \
|
||||
doc/pcre2_match_data_free.3 \
|
||||
doc/pcre2_pattern_convert.3 \
|
||||
doc/pcre2_pattern_info.3 \
|
||||
doc/pcre2_serialize_decode.3 \
|
||||
doc/pcre2_serialize_encode.3 \
|
||||
|
@ -155,6 +168,8 @@ dist_man_MANS = \
|
|||
doc/pcre2_set_compile_extra_options.3 \
|
||||
doc/pcre2_set_compile_recursion_guard.3 \
|
||||
doc/pcre2_set_depth_limit.3 \
|
||||
doc/pcre2_set_glob_escape.3 \
|
||||
doc/pcre2_set_glob_separator.3 \
|
||||
doc/pcre2_set_heap_limit.3 \
|
||||
doc/pcre2_set_match_limit.3 \
|
||||
doc/pcre2_set_max_pattern_length.3 \
|
||||
|
@ -179,6 +194,7 @@ dist_man_MANS = \
|
|||
doc/pcre2build.3 \
|
||||
doc/pcre2callout.3 \
|
||||
doc/pcre2compat.3 \
|
||||
doc/pcre2convert.3 \
|
||||
doc/pcre2demo.3 \
|
||||
doc/pcre2grep.1 \
|
||||
doc/pcre2jit.3 \
|
||||
|
|
|
@ -35,6 +35,9 @@ first.
|
|||
<tr><td><a href="pcre2compat.html">pcre2compat</a></td>
|
||||
<td> Compability with Perl</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2convert.html">pcre2convert</a></td>
|
||||
<td> Experimental foreign pattern conversion functions</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2demo.html">pcre2demo</a></td>
|
||||
<td> A demonstration C program that uses the PCRE2 library</td></tr>
|
||||
|
||||
|
@ -112,6 +115,18 @@ in the library.
|
|||
<tr><td><a href="pcre2_config.html">pcre2_config</a></td>
|
||||
<td> Show build-time configuration options</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_convert_context_copy.html">pcre2_convert_context_copy</a></td>
|
||||
<td> Copy a convert context</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_convert_context_create.html">pcre2_convert_context_create</a></td>
|
||||
<td> Create a convert context</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_convert_context_free.html">pcre2_convert_context_free</a></td>
|
||||
<td> Free a convert context</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_converted_pattern_free.html">pcre2_converted_pattern_free</a></td>
|
||||
<td> Free converted foreign pattern</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_dfa_match.html">pcre2_dfa_match</a></td>
|
||||
<td> Match a compiled pattern to a subject string
|
||||
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
|
||||
|
@ -183,6 +198,9 @@ in the library.
|
|||
<tr><td><a href="pcre2_match_data_free.html">pcre2_match_data_free</a></td>
|
||||
<td> Free a match data block</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_pattern_convert.html">pcre2_pattern_convert</a></td>
|
||||
<td> Experimental foreign pattern converter</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_pattern_info.html">pcre2_pattern_info</a></td>
|
||||
<td> Extract information about a pattern</td></tr>
|
||||
|
||||
|
@ -216,6 +234,12 @@ in the library.
|
|||
<tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td>
|
||||
<td> Set the match backtracking depth limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_glob_escape.html">pcre2_set_glob_escape</a></td>
|
||||
<td> Set glob escape character</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_glob_separator.html">pcre2_set_glob_separator</a></td>
|
||||
<td> Set glob separator character</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_heap_limit.html">pcre2_set_heap_limit</a></td>
|
||||
<td> Set the match backtracking heap limit</td></tr>
|
||||
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre2_convert_context_copy specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_convert_context_copy man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre2_convert_context *pcre2_convert_context_copy(</b>
|
||||
<b> pcre2_convert_context *<i>cvcontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function is part of an experimental set of pattern conversion functions.
|
||||
It makes a new copy of a convert context, using the memory allocation function
|
||||
that was used for the original context. The result is NULL if the memory cannot
|
||||
be obtained.
|
||||
</P>
|
||||
<P>
|
||||
The pattern conversion functions are described in the
|
||||
<a href="pcre2convert.html"><b>pcre2convert</b></a>
|
||||
documentation.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
|
@ -0,0 +1,41 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre2_convert_context_create specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_convert_context_create man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre2_convert_context *pcre2_convert_context_create(</b>
|
||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function is part of an experimental set of pattern conversion functions.
|
||||
It creates and initializes a new convert context. If its argument is
|
||||
NULL, <b>malloc()</b> is used to get the necessary memory; otherwise the memory
|
||||
allocation function within the general context is used. The result is NULL if
|
||||
the memory could not be obtained.
|
||||
</P>
|
||||
<P>
|
||||
The pattern conversion functions are described in the
|
||||
<a href="pcre2convert.html"><b>pcre2convert</b></a>
|
||||
documentation.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
|
@ -0,0 +1,39 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre2_convert_context_free specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_convert_context_free man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>void pcre2_convert_context_free(pcre2_convert_context *<i>cvcontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function is part of an experimental set of pattern conversion functions.
|
||||
It frees the memory occupied by a convert context, using the memory
|
||||
freeing function from the general context with which it was created, or
|
||||
<b>free()</b> if that was not set.
|
||||
</P>
|
||||
<P>
|
||||
The pattern conversion functions are described in the
|
||||
<a href="pcre2convert.html"><b>pcre2convert</b></a>
|
||||
documentation.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
|
@ -0,0 +1,39 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre2_converted_pattern_free specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_converted_pattern_free man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>void pcre2_converted_pattern_free(PCRE2_UCHAR *<i>converted_pattern</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function is part of an experimental set of pattern conversion functions.
|
||||
It frees the memory occupied by a converted pattern that was obtained by
|
||||
calling <b>pcre2_pattern_convert()</b> with arguments that caused it to place
|
||||
the converted pattern into newly obtained heap memory.
|
||||
</P>
|
||||
<P>
|
||||
The pattern conversion functions are described in the
|
||||
<a href="pcre2convert.html"><b>pcre2convert</b></a>
|
||||
documentation.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
|
@ -0,0 +1,70 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre2_pattern_convert specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_pattern_convert man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_pattern_convert(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
|
||||
<b> uint32_t <i>options</i>, PCRE2_UCHAR **<i>buffer</i>,</b>
|
||||
<b> PCRE2_SIZE *<i>blength</i>, pcre2_convert_context *<i>cvcontext</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function is part of an experimental set of pattern conversion functions.
|
||||
It converts a foreign pattern (for example, a glob) into a PCRE2 regular
|
||||
expression pattern. Its arguments are:
|
||||
<pre>
|
||||
<i>pattern</i> The foreign pattern
|
||||
<i>length</i> The length of the input pattern or PCRE2_ZERO_TERMINATED
|
||||
<i>options</i> Option bits
|
||||
<i>buffer</i> Pointer to pointer to output buffer, or NULL
|
||||
<i>blength</i> Pointer to output length field
|
||||
<i>cvcontext</i> Pointer to a convert context or NULL
|
||||
</pre>
|
||||
The length of the converted pattern (excluding the terminating zero) is
|
||||
returned via <i>blength</i>. If <i>buffer</i> is NULL, the function just returns
|
||||
the output length. If <i>buffer</i> points to a NULL pointer, heap memory is
|
||||
obtained for the converted pattern, using the allocator in the context if
|
||||
present (or else <b>malloc()</b>), and the field pointed to by <i>buffer</i> is
|
||||
updated. If <i>buffer</i> points to a non-NULL field, that must point to a
|
||||
buffer whose size is in the variable pointed to by <i>blength</i>. This value is
|
||||
updated.
|
||||
</P>
|
||||
<P>
|
||||
The option bits are:
|
||||
<pre>
|
||||
PCRE2_CONVERT_UTF Input is UTF
|
||||
PCRE2_CONVERT_NO_UTF_CHECK Do not check UTF validity
|
||||
PCRE2_CONVERT_POSIX_BASIC Convert POSIX basic pattern
|
||||
PCRE2_CONVERT_POSIX_EXTENDED Convert POSIX extended pattern
|
||||
PCRE2_CONVERT_GLOB ) Convert
|
||||
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR ) various types
|
||||
PCRE2_CONVERT_GLOB_NO_STARSTAR ) of glob
|
||||
</pre>
|
||||
The return value from <b>pcre2_pattern_convert()</b> is zero on success or a
|
||||
non-zero PCRE2 error code.
|
||||
</P>
|
||||
<P>
|
||||
The pattern conversion functions are described in the
|
||||
<a href="pcre2convert.html"><b>pcre2convert</b></a>
|
||||
documentation.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
|
@ -0,0 +1,43 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre2_set_glob_escape specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_set_glob_escape man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_set_glob_escape(pcre2_convert_context *<i>cvcontext</i>,</b>
|
||||
<b> uint32_t <i>escape_char</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function is part of an experimental set of pattern conversion functions.
|
||||
It sets the escape character that is used when converting globs. The second
|
||||
argument must either be zero (meaning there is no escape character) or a
|
||||
punctuation character whose code point is less than 256. The default is grave
|
||||
accent if running under Windows, otherwise backslash. The result of the
|
||||
function is zero for success or PCRE2_ERROR_BADDATA if the second argument is
|
||||
invalid.
|
||||
</P>
|
||||
<P>
|
||||
The pattern conversion functions are described in the
|
||||
<a href="pcre2convert.html"><b>pcre2convert</b></a>
|
||||
documentation.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
|
@ -0,0 +1,42 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre2_set_glob_separator specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_set_glob_separator man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_set_glob_separator(pcre2_convert_context *<i>cvcontext</i>,</b>
|
||||
<b> uint32_t <i>separator_char</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function is part of an experimental set of pattern conversion functions.
|
||||
It sets the component separator character that is used when converting globs.
|
||||
The second argument must one of the characters forward slash, backslash, or
|
||||
dot. The default is backslash when running under Windows, otherwise forward
|
||||
slash. The result of the function is zero for success or PCRE2_ERROR_BADDATA if
|
||||
the second argument is invalid.
|
||||
</P>
|
||||
<P>
|
||||
The pattern conversion functions are described in the
|
||||
<a href="pcre2convert.html"><b>pcre2convert</b></a>
|
||||
documentation.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
|
@ -24,37 +24,38 @@ please consult the man page, in case the conversion went wrong.
|
|||
<li><a name="TOC9" href="#SEC9">PCRE2 NATIVE API SERIALIZATION FUNCTIONS</a>
|
||||
<li><a name="TOC10" href="#SEC10">PCRE2 NATIVE API AUXILIARY FUNCTIONS</a>
|
||||
<li><a name="TOC11" href="#SEC11">PCRE2 NATIVE API OBSOLETE FUNCTIONS</a>
|
||||
<li><a name="TOC12" href="#SEC12">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a>
|
||||
<li><a name="TOC13" href="#SEC13">PCRE2 API OVERVIEW</a>
|
||||
<li><a name="TOC14" href="#SEC14">STRING LENGTHS AND OFFSETS</a>
|
||||
<li><a name="TOC15" href="#SEC15">NEWLINES</a>
|
||||
<li><a name="TOC16" href="#SEC16">MULTITHREADING</a>
|
||||
<li><a name="TOC17" href="#SEC17">PCRE2 CONTEXTS</a>
|
||||
<li><a name="TOC18" href="#SEC18">CHECKING BUILD-TIME OPTIONS</a>
|
||||
<li><a name="TOC19" href="#SEC19">COMPILING A PATTERN</a>
|
||||
<li><a name="TOC20" href="#SEC20">COMPILATION ERROR CODES</a>
|
||||
<li><a name="TOC21" href="#SEC21">JUST-IN-TIME (JIT) COMPILATION</a>
|
||||
<li><a name="TOC22" href="#SEC22">LOCALE SUPPORT</a>
|
||||
<li><a name="TOC23" href="#SEC23">INFORMATION ABOUT A COMPILED PATTERN</a>
|
||||
<li><a name="TOC24" href="#SEC24">INFORMATION ABOUT A PATTERN'S CALLOUTS</a>
|
||||
<li><a name="TOC25" href="#SEC25">SERIALIZATION AND PRECOMPILING</a>
|
||||
<li><a name="TOC26" href="#SEC26">THE MATCH DATA BLOCK</a>
|
||||
<li><a name="TOC27" href="#SEC27">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
|
||||
<li><a name="TOC28" href="#SEC28">NEWLINE HANDLING WHEN MATCHING</a>
|
||||
<li><a name="TOC29" href="#SEC29">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a>
|
||||
<li><a name="TOC30" href="#SEC30">OTHER INFORMATION ABOUT A MATCH</a>
|
||||
<li><a name="TOC31" href="#SEC31">ERROR RETURNS FROM <b>pcre2_match()</b></a>
|
||||
<li><a name="TOC32" href="#SEC32">OBTAINING A TEXTUAL ERROR MESSAGE</a>
|
||||
<li><a name="TOC33" href="#SEC33">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
|
||||
<li><a name="TOC34" href="#SEC34">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
|
||||
<li><a name="TOC35" href="#SEC35">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
|
||||
<li><a name="TOC36" href="#SEC36">CREATING A NEW STRING WITH SUBSTITUTIONS</a>
|
||||
<li><a name="TOC37" href="#SEC37">DUPLICATE SUBPATTERN NAMES</a>
|
||||
<li><a name="TOC38" href="#SEC38">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a>
|
||||
<li><a name="TOC39" href="#SEC39">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
|
||||
<li><a name="TOC40" href="#SEC40">SEE ALSO</a>
|
||||
<li><a name="TOC41" href="#SEC41">AUTHOR</a>
|
||||
<li><a name="TOC42" href="#SEC42">REVISION</a>
|
||||
<li><a name="TOC12" href="#SEC12">PCRE2 EXPERIMENTAL PATTERN CONVERSION FUNCTIONS</a>
|
||||
<li><a name="TOC13" href="#SEC13">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a>
|
||||
<li><a name="TOC14" href="#SEC14">PCRE2 API OVERVIEW</a>
|
||||
<li><a name="TOC15" href="#SEC15">STRING LENGTHS AND OFFSETS</a>
|
||||
<li><a name="TOC16" href="#SEC16">NEWLINES</a>
|
||||
<li><a name="TOC17" href="#SEC17">MULTITHREADING</a>
|
||||
<li><a name="TOC18" href="#SEC18">PCRE2 CONTEXTS</a>
|
||||
<li><a name="TOC19" href="#SEC19">CHECKING BUILD-TIME OPTIONS</a>
|
||||
<li><a name="TOC20" href="#SEC20">COMPILING A PATTERN</a>
|
||||
<li><a name="TOC21" href="#SEC21">COMPILATION ERROR CODES</a>
|
||||
<li><a name="TOC22" href="#SEC22">JUST-IN-TIME (JIT) COMPILATION</a>
|
||||
<li><a name="TOC23" href="#SEC23">LOCALE SUPPORT</a>
|
||||
<li><a name="TOC24" href="#SEC24">INFORMATION ABOUT A COMPILED PATTERN</a>
|
||||
<li><a name="TOC25" href="#SEC25">INFORMATION ABOUT A PATTERN'S CALLOUTS</a>
|
||||
<li><a name="TOC26" href="#SEC26">SERIALIZATION AND PRECOMPILING</a>
|
||||
<li><a name="TOC27" href="#SEC27">THE MATCH DATA BLOCK</a>
|
||||
<li><a name="TOC28" href="#SEC28">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
|
||||
<li><a name="TOC29" href="#SEC29">NEWLINE HANDLING WHEN MATCHING</a>
|
||||
<li><a name="TOC30" href="#SEC30">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a>
|
||||
<li><a name="TOC31" href="#SEC31">OTHER INFORMATION ABOUT A MATCH</a>
|
||||
<li><a name="TOC32" href="#SEC32">ERROR RETURNS FROM <b>pcre2_match()</b></a>
|
||||
<li><a name="TOC33" href="#SEC33">OBTAINING A TEXTUAL ERROR MESSAGE</a>
|
||||
<li><a name="TOC34" href="#SEC34">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
|
||||
<li><a name="TOC35" href="#SEC35">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
|
||||
<li><a name="TOC36" href="#SEC36">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
|
||||
<li><a name="TOC37" href="#SEC37">CREATING A NEW STRING WITH SUBSTITUTIONS</a>
|
||||
<li><a name="TOC38" href="#SEC38">DUPLICATE SUBPATTERN NAMES</a>
|
||||
<li><a name="TOC39" href="#SEC39">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a>
|
||||
<li><a name="TOC40" href="#SEC40">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
|
||||
<li><a name="TOC41" href="#SEC41">SEE ALSO</a>
|
||||
<li><a name="TOC42" href="#SEC42">AUTHOR</a>
|
||||
<li><a name="TOC43" href="#SEC43">REVISION</a>
|
||||
</ul>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
|
@ -334,7 +335,43 @@ backward compatibility. They should not be used in new code. The first is
|
|||
replaced by <b>pcre2_set_depth_limit()</b>; the second is no longer needed and
|
||||
has no effect (it always returns zero).
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
|
||||
<br><a name="SEC12" href="#TOC1">PCRE2 EXPERIMENTAL PATTERN CONVERSION FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>pcre2_convert_context *pcre2_convert_context_create(</b>
|
||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre2_convert_context *pcre2_convert_context_copy(</b>
|
||||
<b> pcre2_convert_context *<i>cvcontext</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre2_convert_context_free(pcre2_convert_context *<i>cvcontext</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_glob_escape(pcre2_convert_context *<i>cvcontext</i>,</b>
|
||||
<b> uint32_t <i>escape_char</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_glob_separator(pcre2_convert_context *<i>cvcontext</i>,</b>
|
||||
<b> uint32_t <i>separator_char</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_pattern_convert(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
|
||||
<b> uint32_t <i>options</i>, PCRE2_UCHAR **<i>buffer</i>,</b>
|
||||
<b> PCRE2_SIZE *<i>blength</i>, pcre2_convert_context *<i>cvcontext</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre2_converted_pattern_free(PCRE2_UCHAR *<i>converted_pattern</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
These functions provide a way of converting non-PCRE2 patterns into
|
||||
patterns that can be processed by <b>pcre2_compile()</b>. This facility is
|
||||
experimental and may be changed in future releases. At present, "globs" and
|
||||
POSIX basic and extended patterns can be converted. Details are given in the
|
||||
<a href="pcre2convert.html"><b>pcre2convert</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<br><a name="SEC13" href="#TOC1">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
|
||||
<P>
|
||||
There are three PCRE2 libraries, supporting 8-bit, 16-bit, and 32-bit code
|
||||
units, respectively. However, there is just one header file, <b>pcre2.h</b>.
|
||||
|
@ -395,7 +432,7 @@ In the function summaries above, and in the rest of this document and other
|
|||
PCRE2 documents, functions and data types are described using their generic
|
||||
names, without the _8, _16, or _32 suffix.
|
||||
</P>
|
||||
<br><a name="SEC13" href="#TOC1">PCRE2 API OVERVIEW</a><br>
|
||||
<br><a name="SEC14" href="#TOC1">PCRE2 API OVERVIEW</a><br>
|
||||
<P>
|
||||
PCRE2 has its own native API, which is described in this document. There are
|
||||
also some wrapper functions for the 8-bit library that correspond to the
|
||||
|
@ -503,7 +540,7 @@ Functions with names ending with <b>_free()</b> are used for freeing memory
|
|||
blocks of various sorts. In all cases, if one of these functions is called with
|
||||
a NULL argument, it does nothing.
|
||||
</P>
|
||||
<br><a name="SEC14" href="#TOC1">STRING LENGTHS AND OFFSETS</a><br>
|
||||
<br><a name="SEC15" href="#TOC1">STRING LENGTHS AND OFFSETS</a><br>
|
||||
<P>
|
||||
The PCRE2 API uses string lengths and offsets into strings of code units in
|
||||
several places. These values are always of type PCRE2_SIZE, which is an
|
||||
|
@ -513,7 +550,7 @@ as a special indicator for zero-terminated strings and unset offsets.
|
|||
Therefore, the longest string that can be handled is one less than this
|
||||
maximum.
|
||||
<a name="newlines"></a></P>
|
||||
<br><a name="SEC15" href="#TOC1">NEWLINES</a><br>
|
||||
<br><a name="SEC16" href="#TOC1">NEWLINES</a><br>
|
||||
<P>
|
||||
PCRE2 supports five different conventions for indicating line breaks in
|
||||
strings: a single CR (carriage return) character, a single LF (linefeed)
|
||||
|
@ -548,7 +585,7 @@ The choice of newline convention does not affect the interpretation of
|
|||
the \n or \r escape sequences, nor does it affect what \R matches; this has
|
||||
its own separate convention.
|
||||
</P>
|
||||
<br><a name="SEC16" href="#TOC1">MULTITHREADING</a><br>
|
||||
<br><a name="SEC17" href="#TOC1">MULTITHREADING</a><br>
|
||||
<P>
|
||||
In a multithreaded application it is important to keep thread-specific data
|
||||
separate from data that can be shared between threads. The PCRE2 library code
|
||||
|
@ -628,7 +665,7 @@ match. This includes details of what was matched, as well as additional
|
|||
information such as the name of a (*MARK) setting. Each thread must provide its
|
||||
own copy of this memory.
|
||||
</P>
|
||||
<br><a name="SEC17" href="#TOC1">PCRE2 CONTEXTS</a><br>
|
||||
<br><a name="SEC18" href="#TOC1">PCRE2 CONTEXTS</a><br>
|
||||
<P>
|
||||
Some PCRE2 functions have a lot of parameters, many of which are used only by
|
||||
specialist applications, for example, those that use custom memory management
|
||||
|
@ -1013,7 +1050,7 @@ where ddd is a decimal number. However, such a setting is ignored unless ddd is
|
|||
less than the limit set by the caller of <b>pcre2_match()</b> or
|
||||
<b>pcre2_dfa_match()</b> or, if no such limit is set, less than the default.
|
||||
</P>
|
||||
<br><a name="SEC18" href="#TOC1">CHECKING BUILD-TIME OPTIONS</a><br>
|
||||
<br><a name="SEC19" href="#TOC1">CHECKING BUILD-TIME OPTIONS</a><br>
|
||||
<P>
|
||||
<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
|
@ -1150,7 +1187,7 @@ the PCRE2 version string, zero-terminated. The number of code units used is
|
|||
returned. This is the length of the string plus one unit for the terminating
|
||||
zero.
|
||||
<a name="compiling"></a></P>
|
||||
<br><a name="SEC19" href="#TOC1">COMPILING A PATTERN</a><br>
|
||||
<br><a name="SEC20" href="#TOC1">COMPILING A PATTERN</a><br>
|
||||
<P>
|
||||
<b>pcre2_code *pcre2_compile(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
|
||||
<b> uint32_t <i>options</i>, int *<i>errorcode</i>, PCRE2_SIZE *<i>erroroffset,</i></b>
|
||||
|
@ -1741,7 +1778,7 @@ dangerous option. Use with care.
|
|||
PCRE2_EXTRA_MATCH_LINE
|
||||
</pre>
|
||||
This option is provided for use by the <b>-x</b> option of <b>pcre2grep</b>. It
|
||||
causes the pattern only to match complete lines. This is achieved by
|
||||
causes the pattern only to match complete lines. This is achieved by
|
||||
automatically inserting the code for "^(?:" at the start of the compiled
|
||||
pattern and ")$" at the end. Thus, when PCRE2_MULTILINE is set, the matched
|
||||
line may be in the middle of the subject string. This option can be used with
|
||||
|
@ -1756,7 +1793,7 @@ at the start of the compiled pattern and ")\b" at the end. The option may be
|
|||
used with PCRE2_LITERAL. However, it is ignored if PCRE2_EXTRA_MATCH_LINE is
|
||||
also set.
|
||||
</P>
|
||||
<br><a name="SEC20" href="#TOC1">COMPILATION ERROR CODES</a><br>
|
||||
<br><a name="SEC21" href="#TOC1">COMPILATION ERROR CODES</a><br>
|
||||
<P>
|
||||
There are nearly 100 positive error codes that <b>pcre2_compile()</b> may return
|
||||
(via <i>errorcode</i>) if it finds an error in the pattern. There are also some
|
||||
|
@ -1769,7 +1806,7 @@ error message"
|
|||
<a href="#geterrormessage">below)</a>
|
||||
can be called to obtain a textual error message from any error code.
|
||||
<a name="jitcompiling"></a></P>
|
||||
<br><a name="SEC21" href="#TOC1">JUST-IN-TIME (JIT) COMPILATION</a><br>
|
||||
<br><a name="SEC22" href="#TOC1">JUST-IN-TIME (JIT) COMPILATION</a><br>
|
||||
<P>
|
||||
<b>int pcre2_jit_compile(pcre2_code *<i>code</i>, uint32_t <i>options</i>);</b>
|
||||
<br>
|
||||
|
@ -1807,7 +1844,7 @@ patterns to be analyzed, and for one-off matches and simple patterns the
|
|||
benefit of faster execution might be offset by a much slower compilation time.
|
||||
Most (but not all) patterns can be optimized by the JIT compiler.
|
||||
<a name="localesupport"></a></P>
|
||||
<br><a name="SEC22" href="#TOC1">LOCALE SUPPORT</a><br>
|
||||
<br><a name="SEC23" href="#TOC1">LOCALE SUPPORT</a><br>
|
||||
<P>
|
||||
PCRE2 handles caseless matching, and determines whether characters are letters,
|
||||
digits, or whatever, by reference to a set of tables, indexed by character code
|
||||
|
@ -1863,7 +1900,7 @@ is saved with the compiled pattern, and the same tables are used by
|
|||
compilation and matching both happen in the same locale, but different patterns
|
||||
can be processed in different locales.
|
||||
<a name="infoaboutpattern"></a></P>
|
||||
<br><a name="SEC23" href="#TOC1">INFORMATION ABOUT A COMPILED PATTERN</a><br>
|
||||
<br><a name="SEC24" href="#TOC1">INFORMATION ABOUT A COMPILED PATTERN</a><br>
|
||||
<P>
|
||||
<b>int pcre2_pattern_info(const pcre2 *<i>code</i>, uint32_t <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
|
@ -2188,7 +2225,7 @@ value returned by this option, because there are cases where the code that
|
|||
calculates the size has to over-estimate. Processing a pattern with the JIT
|
||||
compiler does not alter the value returned by this option.
|
||||
<a name="infoaboutcallouts"></a></P>
|
||||
<br><a name="SEC24" href="#TOC1">INFORMATION ABOUT A PATTERN'S CALLOUTS</a><br>
|
||||
<br><a name="SEC25" href="#TOC1">INFORMATION ABOUT A PATTERN'S CALLOUTS</a><br>
|
||||
<P>
|
||||
<b>int pcre2_callout_enumerate(const pcre2_code *<i>code</i>,</b>
|
||||
<b> int (*<i>callback</i>)(pcre2_callout_enumerate_block *, void *),</b>
|
||||
|
@ -2207,7 +2244,7 @@ contents of the callout enumeration block are described in the
|
|||
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
||||
documentation, which also gives further details about callouts.
|
||||
</P>
|
||||
<br><a name="SEC25" href="#TOC1">SERIALIZATION AND PRECOMPILING</a><br>
|
||||
<br><a name="SEC26" href="#TOC1">SERIALIZATION AND PRECOMPILING</a><br>
|
||||
<P>
|
||||
It is possible to save compiled patterns on disc or elsewhere, and reload them
|
||||
later, subject to a number of restrictions. The functions whose names begin
|
||||
|
@ -2216,7 +2253,7 @@ the
|
|||
<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
|
||||
documentation.
|
||||
<a name="matchdatablock"></a></P>
|
||||
<br><a name="SEC26" href="#TOC1">THE MATCH DATA BLOCK</a><br>
|
||||
<br><a name="SEC27" href="#TOC1">THE MATCH DATA BLOCK</a><br>
|
||||
<P>
|
||||
<b>pcre2_match_data *pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
|
||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||
|
@ -2287,7 +2324,7 @@ match data block (for that match) have taken place.
|
|||
When a match data block itself is no longer needed, it should be freed by
|
||||
calling <b>pcre2_match_data_free()</b>.
|
||||
</P>
|
||||
<br><a name="SEC27" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
|
||||
<br><a name="SEC28" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
|
||||
<P>
|
||||
<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||
|
@ -2525,7 +2562,7 @@ examples, in the
|
|||
<a href="pcre2partial.html"><b>pcre2partial</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<br><a name="SEC28" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
|
||||
<br><a name="SEC29" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
|
||||
<P>
|
||||
When PCRE2 is built, a default newline convention is set; this is usually the
|
||||
standard convention for the operating system. The default can be overridden in
|
||||
|
@ -2565,7 +2602,7 @@ does \s, even though it includes CR and LF in the characters that it matches.
|
|||
Notwithstanding the above, anomalous effects may still occur when CRLF is a
|
||||
valid newline sequence and explicit \r or \n escapes appear in the pattern.
|
||||
<a name="matchedstrings"></a></P>
|
||||
<br><a name="SEC29" href="#TOC1">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a><br>
|
||||
<br><a name="SEC30" href="#TOC1">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a><br>
|
||||
<P>
|
||||
<b>uint32_t pcre2_get_ovector_count(pcre2_match_data *<i>match_data</i>);</b>
|
||||
<br>
|
||||
|
@ -2664,7 +2701,7 @@ parentheses, no more than <i>ovector[0]</i> to <i>ovector[2n+1]</i> are set by
|
|||
<b>pcre2_match()</b>. The other elements retain whatever values they previously
|
||||
had.
|
||||
<a name="matchotherdata"></a></P>
|
||||
<br><a name="SEC30" href="#TOC1">OTHER INFORMATION ABOUT A MATCH</a><br>
|
||||
<br><a name="SEC31" href="#TOC1">OTHER INFORMATION ABOUT A MATCH</a><br>
|
||||
<P>
|
||||
<b>PCRE2_SPTR pcre2_get_mark(pcre2_match_data *<i>match_data</i>);</b>
|
||||
<br>
|
||||
|
@ -2714,7 +2751,7 @@ the code unit offset of the invalid UTF character. Details are given in the
|
|||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||
page.
|
||||
<a name="errorlist"></a></P>
|
||||
<br><a name="SEC31" href="#TOC1">ERROR RETURNS FROM <b>pcre2_match()</b></a><br>
|
||||
<br><a name="SEC32" href="#TOC1">ERROR RETURNS FROM <b>pcre2_match()</b></a><br>
|
||||
<P>
|
||||
If <b>pcre2_match()</b> fails, it returns a negative number. This can be
|
||||
converted to a text string by calling the <b>pcre2_get_error_message()</b>
|
||||
|
@ -2820,7 +2857,7 @@ faulted at compile time, but more complicated cases, in particular mutual
|
|||
recursions between two different subpatterns, cannot be detected until matching
|
||||
is attempted.
|
||||
<a name="geterrormessage"></a></P>
|
||||
<br><a name="SEC32" href="#TOC1">OBTAINING A TEXTUAL ERROR MESSAGE</a><br>
|
||||
<br><a name="SEC33" href="#TOC1">OBTAINING A TEXTUAL ERROR MESSAGE</a><br>
|
||||
<P>
|
||||
<b>int pcre2_get_error_message(int <i>errorcode</i>, PCRE2_UCHAR *<i>buffer</i>,</b>
|
||||
<b> PCRE2_SIZE <i>bufflen</i>);</b>
|
||||
|
@ -2841,7 +2878,7 @@ returned. If the buffer is too small, the message is truncated (but still with
|
|||
a trailing zero), and the negative error code PCRE2_ERROR_NOMEMORY is returned.
|
||||
None of the messages are very long; a buffer size of 120 code units is ample.
|
||||
<a name="extractbynumber"></a></P>
|
||||
<br><a name="SEC33" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
|
||||
<br><a name="SEC34" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
|
||||
<P>
|
||||
<b>int pcre2_substring_length_bynumber(pcre2_match_data *<i>match_data</i>,</b>
|
||||
<b> uint32_t <i>number</i>, PCRE2_SIZE *<i>length</i>);</b>
|
||||
|
@ -2938,7 +2975,7 @@ The substring did not participate in the match. For example, if the pattern is
|
|||
(abc)|(def) and the subject is "def", and the ovector contains at least two
|
||||
capturing slots, substring number 1 is unset.
|
||||
</P>
|
||||
<br><a name="SEC34" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
|
||||
<br><a name="SEC35" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
|
||||
<P>
|
||||
<b>int pcre2_substring_list_get(pcre2_match_data *<i>match_data</i>,</b>
|
||||
<b>" PCRE2_UCHAR ***<i>listptr</i>, PCRE2_SIZE **<i>lengthsptr</i>);</b>
|
||||
|
@ -2977,7 +3014,7 @@ can be distinguished from a genuine zero-length substring by inspecting the
|
|||
appropriate offset in the ovector, which contain PCRE2_UNSET for unset
|
||||
substrings, or by calling <b>pcre2_substring_length_bynumber()</b>.
|
||||
<a name="extractbyname"></a></P>
|
||||
<br><a name="SEC35" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
|
||||
<br><a name="SEC36" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
|
||||
<P>
|
||||
<b>int pcre2_substring_number_from_name(const pcre2_code *<i>code</i>,</b>
|
||||
<b> PCRE2_SPTR <i>name</i>);</b>
|
||||
|
@ -3037,7 +3074,7 @@ names are not included in the compiled code. The matching process uses only
|
|||
numbers. For this reason, the use of different names for subpatterns of the
|
||||
same number causes an error at compile time.
|
||||
</P>
|
||||
<br><a name="SEC36" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
|
||||
<br><a name="SEC37" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
|
||||
<P>
|
||||
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||
|
@ -3244,7 +3281,7 @@ obtained by calling the <b>pcre2_get_error_message()</b> function (see
|
|||
"Obtaining a textual error message"
|
||||
<a href="#geterrormessage">above).</a>
|
||||
</P>
|
||||
<br><a name="SEC37" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
||||
<br><a name="SEC38" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
||||
<P>
|
||||
<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
|
||||
<b> PCRE2_SPTR <i>name</i>, PCRE2_SPTR *<i>first</i>, PCRE2_SPTR *<i>last</i>);</b>
|
||||
|
@ -3289,7 +3326,7 @@ in the section entitled <i>Information about a pattern</i>. Given all the
|
|||
relevant entries for the name, you can extract each of their numbers, and hence
|
||||
the captured data.
|
||||
</P>
|
||||
<br><a name="SEC38" href="#TOC1">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a><br>
|
||||
<br><a name="SEC39" href="#TOC1">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a><br>
|
||||
<P>
|
||||
The traditional matching function uses a similar algorithm to Perl, which stops
|
||||
when it finds the first match at a given point in the subject. If you want to
|
||||
|
@ -3307,7 +3344,7 @@ substring. Then return 1, which forces <b>pcre2_match()</b> to backtrack and try
|
|||
other alternatives. Ultimately, when it runs out of matches,
|
||||
<b>pcre2_match()</b> will yield PCRE2_ERROR_NOMATCH.
|
||||
<a name="dfamatch"></a></P>
|
||||
<br><a name="SEC39" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
|
||||
<br><a name="SEC40" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
|
||||
<P>
|
||||
<b>int pcre2_dfa_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||
|
@ -3503,13 +3540,13 @@ some plausibility checks are made on the contents of the workspace, which
|
|||
should contain data about the previous partial match. If any of these checks
|
||||
fail, this error is given.
|
||||
</P>
|
||||
<br><a name="SEC40" href="#TOC1">SEE ALSO</a><br>
|
||||
<br><a name="SEC41" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcre2build</b>(3), <b>pcre2callout</b>(3), <b>pcre2demo(3)</b>,
|
||||
<b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3),
|
||||
<b>pcre2sample</b>(3), <b>pcre2unicode</b>(3).
|
||||
</P>
|
||||
<br><a name="SEC41" href="#TOC1">AUTHOR</a><br>
|
||||
<br><a name="SEC42" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
|
@ -3518,9 +3555,9 @@ University Computing Service
|
|||
Cambridge, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
||||
<br><a name="SEC43" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 16 June 2017
|
||||
Last updated: 10 July 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -0,0 +1,190 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre2convert specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2convert man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">EXPERIMENTAL PATTERN CONVERSION FUNCTIONS</a>
|
||||
<li><a name="TOC2" href="#SEC2">THE CONVERT CONTEXT</a>
|
||||
<li><a name="TOC3" href="#SEC3">THE CONVERSION FUNCTION</a>
|
||||
<li><a name="TOC4" href="#SEC4">CONVERTING GLOBS</a>
|
||||
<li><a name="TOC5" href="#SEC5">CONVERTING POSIX PATTERNS</a>
|
||||
<li><a name="TOC6" href="#SEC6">AUTHOR</a>
|
||||
<li><a name="TOC7" href="#SEC7">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">EXPERIMENTAL PATTERN CONVERSION FUNCTIONS</a><br>
|
||||
<P>
|
||||
This document describes a set of functions that can be used to convert
|
||||
"foreign" patterns into PCRE2 regular expressions. This facility is currently
|
||||
experimental, and may be changed in future releases. Two kinds of pattern,
|
||||
globs and POSIX patterns, are supported.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">THE CONVERT CONTEXT</a><br>
|
||||
<P>
|
||||
<b>pcre2_convert_context *pcre2_convert_context_create(</b>
|
||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre2_convert_context *pcre2_convert_context_copy(</b>
|
||||
<b> pcre2_convert_context *<i>cvcontext</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre2_convert_context_free(pcre2_convert_context *<i>cvcontext</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_glob_escape(pcre2_convert_context *<i>cvcontext</i>,</b>
|
||||
<b> uint32_t <i>escape_char</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_glob_separator(pcre2_convert_context *<i>cvcontext</i>,</b>
|
||||
<b> uint32_t <i>separator_char</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
A convert context is used to hold parameters that affect the way that pattern
|
||||
conversion works. Like all PCRE2 contexts, you need to use a context only if
|
||||
you want to override the defaults. There are the usual create, copy, and free
|
||||
functions. If custom memory management functions are set in a general context
|
||||
that is passed to <b>pcre2_convert_context_create()</b>, they are used for all
|
||||
memory management within the conversion functions.
|
||||
</P>
|
||||
<P>
|
||||
There are only two parameters in the convert context at present. Both apply
|
||||
only to glob conversions. The escape character defaults to grave accent under
|
||||
Windows, otherwise backslash. It can be set to zero, meaning no escape
|
||||
character, or to any punctuation character with a code point less than 256.
|
||||
The separator character defaults to backslash under Windows, otherwise forward
|
||||
slash. It can be set to forward slash, backslash, or dot.
|
||||
</P>
|
||||
<P>
|
||||
The two setting functions return zero on success, or PCRE2_ERROR_BADDATA if
|
||||
their second argument is invalid.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">THE CONVERSION FUNCTION</a><br>
|
||||
<P>
|
||||
<b>int pcre2_pattern_convert(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
|
||||
<b> uint32_t <i>options</i>, PCRE2_UCHAR **<i>buffer</i>,</b>
|
||||
<b> PCRE2_SIZE *<i>blength</i>, pcre2_convert_context *<i>cvcontext</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre2_converted_pattern_free(PCRE2_UCHAR *<i>converted_pattern</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
The first two arguments of <b>pcre2_pattern_convert()</b> define the foreign
|
||||
pattern that is to be converted. The length may be given as
|
||||
PCRE2_ZERO_TERMINATED. The <b>options</b> argument defines how the pattern is to
|
||||
be processed. If the input is UTF, the PCRE2_CONVERT_UTF option should be set.
|
||||
PCRE2_CONVERT_NO_UTF_CHECK may also be set if you are sure the input is valid.
|
||||
One or more of the glob options, or one of the following POSIX options must be
|
||||
set to define the type of conversion that is required:
|
||||
<pre>
|
||||
PCRE2_CONVERT_GLOB
|
||||
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
|
||||
PCRE2_CONVERT_GLOB_NO_STARSTAR
|
||||
PCRE2_CONVERT_POSIX_BASIC
|
||||
PCRE2_CONVERT_POSIX_EXTENDED
|
||||
</pre>
|
||||
Details of the conversions are given below. The <b>buffer</b> and <b>blength</b>
|
||||
arguments define how the output is handled:
|
||||
</P>
|
||||
<P>
|
||||
If <b>buffer</b> is NULL, the function just returns the length of the converted
|
||||
pattern via <b>blength</b>. This is one less than the length of buffer needed,
|
||||
because a terminating zero is always added to the output.
|
||||
</P>
|
||||
<P>
|
||||
If <b>buffer</b> points to a NULL pointer, an output buffer is obtained using
|
||||
the allocator in the context or <b>malloc()</b> if no context is supplied. A
|
||||
pointer to this buffer is placed in the variable to which <b>buffer</b> points.
|
||||
When no longer needed the output buffer must be freed by calling
|
||||
<b>pcre2_converted_pattern_free()</b>.
|
||||
</P>
|
||||
<P>
|
||||
If <b>buffer</b> points to a non-NULL pointer, <b>blength</b> must be set to the
|
||||
actual length of the buffer provided (in code units).
|
||||
</P>
|
||||
<P>
|
||||
In all cases, after successful conversion, the variable pointed to by
|
||||
<b>blength</b> is updated to the length actually used (in code units), excluding
|
||||
the terminating zero that is always added.
|
||||
</P>
|
||||
<P>
|
||||
If an error occurs, the length (via <b>blength</b>) is set to the offset
|
||||
within the input pattern where the error was detected. Only gross syntax errors
|
||||
are caught; there are plenty of errors that will get passed on for
|
||||
<b>pcre2_compile()</b> to discover.
|
||||
</P>
|
||||
<P>
|
||||
The return from <b>pcre2_pattern_convert()</b> is zero on success or a non-zero
|
||||
PCRE2 error code. Note that PCRE2 error codes may be positive or negative:
|
||||
<b>pcre2_compile()</b> uses mostly positive codes and <b>pcre2_match()</b>
|
||||
negative ones; <b>pcre2_convert()</b> uses existing codes of both kinds. A
|
||||
textual error message can be obtained by calling
|
||||
<b>pcre2_get_error_message()</b>.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">CONVERTING GLOBS</a><br>
|
||||
<P>
|
||||
Globs are used to match file names, and consequently have the concept of a
|
||||
"path separator", which defaults to backslash under Windows and forward slash
|
||||
otherwise. If PCRE2_CONVERT_GLOB is set, the wildcards * and ? are not
|
||||
permitted to match separator characters, but the double-star (**) feature
|
||||
(which does match separators) is supported.
|
||||
</P>
|
||||
<P>
|
||||
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR matches globs with wildcards allowed to
|
||||
match separator characters. PCRE2_GLOB_NO_STARSTAR matches globs with the
|
||||
double-star feature disabled. These options may be given together.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">CONVERTING POSIX PATTERNS</a><br>
|
||||
<P>
|
||||
POSIX defines two kinds of regular expression pattern: basic and extended.
|
||||
These can be processed by setting PCRE2_CONVERT_POSIX_BASIC or
|
||||
PCRE2_CONVERT_POSIX_EXTENDED, respectively.
|
||||
</P>
|
||||
<P>
|
||||
In POSIX patterns, backslash is not special in a character class. Unmatched
|
||||
closing parentheses are treated as literals.
|
||||
</P>
|
||||
<P>
|
||||
In basic patterns, ? + | {} and () must be escaped to be recognized
|
||||
as metacharacters outside a character class. If the first character in the
|
||||
pattern is * it is treated as a literal. ^ is a metacharacter only at the start
|
||||
of a branch.
|
||||
</P>
|
||||
<P>
|
||||
In extended patterns, a backslash not in a character class always
|
||||
makes the next character literal, whatever it is. There are no backreferences.
|
||||
</P>
|
||||
<P>
|
||||
Note: POSIX mandates that the longest possible match at the first matching
|
||||
position must be found. This is not what <b>pcre2_match()</b> does; it yields
|
||||
the first match that is found. An application can use <b>pcre2_dfa_match()</b>
|
||||
to find the longest match, but that does not support backreferences (but then
|
||||
neither do POSIX extended patterns).
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 12 July 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
|
@ -630,6 +630,10 @@ heavily used in the test files.
|
|||
bsr=[anycrlf|unicode] specify \R handling
|
||||
/B bincode show binary code without lengths
|
||||
callout_info show callout information
|
||||
convert=<options> request foreign pattern conversion
|
||||
convert_glob_escape=c set glob escape character
|
||||
convert_glob_separator=c set glob separator character
|
||||
convert_length set convert buffer length
|
||||
debug same as info,fullbincode
|
||||
framesize show matching frame size
|
||||
fullbincode show binary code with lengths
|
||||
|
@ -1065,6 +1069,41 @@ are ignored (for the stacked copy), with a warning message, except for
|
|||
<b>replace</b>, which causes an error. Note that <b>jitverify</b>, which is
|
||||
allowed, does not carry through to any subsequent matching that uses a stacked
|
||||
pattern.
|
||||
</P>
|
||||
<br><b>
|
||||
Testing foreign pattern conversion
|
||||
</b><br>
|
||||
<P>
|
||||
The experimental foreign pattern conversion functions in PCRE2 can be tested by
|
||||
setting the <b>convert</b> modifier. Its argument is a colon-separated list of
|
||||
options, which set the equivalent option for the <b>pcre2_pattern_convert()</b>
|
||||
function:
|
||||
<pre>
|
||||
glob PCRE2_CONVERT_GLOB
|
||||
glob_no_starstar PCRE2_CONVERT_GLOB_NO_STARSTAR
|
||||
glob_no_wild_separator PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
|
||||
posix_basic PCRE2_CONVERT_POSIX_BASIC
|
||||
posix_extended PCRE2_CONVERT_POSIX_EXTENDED
|
||||
unset Unset all options
|
||||
</pre>
|
||||
The "unset" value is useful for turning off a default that has been set by a
|
||||
<b>#pattern</b> command. When one of these options is set, the input pattern is
|
||||
passed to <b>pcre2_pattern_convert()</b>. If the conversion is successful, the
|
||||
result is reflected in the output and then passed to <b>pcre2_compile()</b>. The
|
||||
normal <b>utf</b> and <b>no_utf_check</b> options, if set, cause the
|
||||
PCRE2_CONVERT_UTF and PCRE2_CONVERT_NO_UTF_CHECK options to be passed to
|
||||
<b>pcre2_pattern_convert()</b>.
|
||||
</P>
|
||||
<P>
|
||||
By default, the conversion function is allowed to allocate a buffer for its
|
||||
output. However, if the <b>convert_length</b> modifier is set to a value greater
|
||||
than zero, <b>pcre2test</b> passes a buffer of the given length. This makes it
|
||||
possible to test the length check.
|
||||
</P>
|
||||
<P>
|
||||
The <b>convert_glob_escape</b> and <b>convert_glob_separator</b> modifiers can be
|
||||
used to specify the escape and separator characters for glob processing,
|
||||
overriding the defaults, which are operating-system dependent.
|
||||
<a name="subjectmodifiers"></a></P>
|
||||
<br><a name="SEC11" href="#TOC1">SUBJECT MODIFIERS</a><br>
|
||||
<P>
|
||||
|
@ -1866,7 +1905,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 02 July 2017
|
||||
Last updated: 12 July 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -35,6 +35,9 @@ first.
|
|||
<tr><td><a href="pcre2compat.html">pcre2compat</a></td>
|
||||
<td> Compability with Perl</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2convert.html">pcre2convert</a></td>
|
||||
<td> Experimental foreign pattern conversion functions</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2demo.html">pcre2demo</a></td>
|
||||
<td> A demonstration C program that uses the PCRE2 library</td></tr>
|
||||
|
||||
|
@ -112,6 +115,18 @@ in the library.
|
|||
<tr><td><a href="pcre2_config.html">pcre2_config</a></td>
|
||||
<td> Show build-time configuration options</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_convert_context_copy.html">pcre2_convert_context_copy</a></td>
|
||||
<td> Copy a convert context</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_convert_context_create.html">pcre2_convert_context_create</a></td>
|
||||
<td> Create a convert context</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_convert_context_free.html">pcre2_convert_context_free</a></td>
|
||||
<td> Free a convert context</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_converted_pattern_free.html">pcre2_converted_pattern_free</a></td>
|
||||
<td> Free converted foreign pattern</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_dfa_match.html">pcre2_dfa_match</a></td>
|
||||
<td> Match a compiled pattern to a subject string
|
||||
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
|
||||
|
@ -183,6 +198,9 @@ in the library.
|
|||
<tr><td><a href="pcre2_match_data_free.html">pcre2_match_data_free</a></td>
|
||||
<td> Free a match data block</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_pattern_convert.html">pcre2_pattern_convert</a></td>
|
||||
<td> Experimental foreign pattern converter</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_pattern_info.html">pcre2_pattern_info</a></td>
|
||||
<td> Extract information about a pattern</td></tr>
|
||||
|
||||
|
@ -216,6 +234,12 @@ in the library.
|
|||
<tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td>
|
||||
<td> Set the match backtracking depth limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_glob_escape.html">pcre2_set_glob_escape</a></td>
|
||||
<td> Set glob escape character</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_glob_separator.html">pcre2_set_glob_separator</a></td>
|
||||
<td> Set glob separator character</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_heap_limit.html">pcre2_set_heap_limit</a></td>
|
||||
<td> Set the match backtracking heap limit</td></tr>
|
||||
|
||||
|
|
|
@ -413,6 +413,35 @@ PCRE2 NATIVE API OBSOLETE FUNCTIONS
|
|||
needed and has no effect (it always returns zero).
|
||||
|
||||
|
||||
PCRE2 EXPERIMENTAL PATTERN CONVERSION FUNCTIONS
|
||||
|
||||
pcre2_convert_context *pcre2_convert_context_create(
|
||||
pcre2_general_context *gcontext);
|
||||
|
||||
pcre2_convert_context *pcre2_convert_context_copy(
|
||||
pcre2_convert_context *cvcontext);
|
||||
|
||||
void pcre2_convert_context_free(pcre2_convert_context *cvcontext);
|
||||
|
||||
int pcre2_set_glob_escape(pcre2_convert_context *cvcontext,
|
||||
uint32_t escape_char);
|
||||
|
||||
int pcre2_set_glob_separator(pcre2_convert_context *cvcontext,
|
||||
uint32_t separator_char);
|
||||
|
||||
int pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE length,
|
||||
uint32_t options, PCRE2_UCHAR **buffer,
|
||||
PCRE2_SIZE *blength, pcre2_convert_context *cvcontext);
|
||||
|
||||
void pcre2_converted_pattern_free(PCRE2_UCHAR *converted_pattern);
|
||||
|
||||
These functions provide a way of converting non-PCRE2 patterns into
|
||||
patterns that can be processed by pcre2_compile(). This facility is
|
||||
experimental and may be changed in future releases. At present, "globs"
|
||||
and POSIX basic and extended patterns can be converted. Details are
|
||||
given in the pcre2convert documentation.
|
||||
|
||||
|
||||
PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES
|
||||
|
||||
There are three PCRE2 libraries, supporting 8-bit, 16-bit, and 32-bit
|
||||
|
@ -3400,7 +3429,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 16 June 2017
|
||||
Last updated: 10 July 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
.TH PCRE2_CONVERT_CONTEXT_COPY 3 "10 July 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre2.h>
|
||||
.PP
|
||||
.nf
|
||||
.B pcre2_convert_context *pcre2_convert_context_copy(
|
||||
.B " pcre2_convert_context *\fIcvcontext\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function is part of an experimental set of pattern conversion functions.
|
||||
It makes a new copy of a convert context, using the memory allocation function
|
||||
that was used for the original context. The result is NULL if the memory cannot
|
||||
be obtained.
|
||||
.P
|
||||
The pattern conversion functions are described in the
|
||||
.\" HREF
|
||||
\fBpcre2convert\fP
|
||||
.\"
|
||||
documentation.
|
|
@ -0,0 +1,27 @@
|
|||
.TH PCRE2_CONVERT_CONTEXT_CREATE 3 "10 July 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre2.h>
|
||||
.PP
|
||||
.nf
|
||||
.B pcre2_convert_context *pcre2_convert_context_create(
|
||||
.B " pcre2_general_context *\fIgcontext\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function is part of an experimental set of pattern conversion functions.
|
||||
It creates and initializes a new convert context. If its argument is
|
||||
NULL, \fBmalloc()\fP is used to get the necessary memory; otherwise the memory
|
||||
allocation function within the general context is used. The result is NULL if
|
||||
the memory could not be obtained.
|
||||
.P
|
||||
The pattern conversion functions are described in the
|
||||
.\" HREF
|
||||
\fBpcre2convert\fP
|
||||
.\"
|
||||
documentation.
|
|
@ -0,0 +1,25 @@
|
|||
.TH PCRE2_CONVERT_CONTEXT_FREE 3 "10 July 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre2.h>
|
||||
.PP
|
||||
.nf
|
||||
.B void pcre2_convert_context_free(pcre2_convert_context *\fIcvcontext\fP);
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function is part of an experimental set of pattern conversion functions.
|
||||
It frees the memory occupied by a convert context, using the memory
|
||||
freeing function from the general context with which it was created, or
|
||||
\fBfree()\fP if that was not set.
|
||||
.P
|
||||
The pattern conversion functions are described in the
|
||||
.\" HREF
|
||||
\fBpcre2convert\fP
|
||||
.\"
|
||||
documentation.
|
|
@ -0,0 +1,25 @@
|
|||
.TH PCRE2_CONVERTED_PATTERN_FREE 3 "11 July 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre2.h>
|
||||
.PP
|
||||
.nf
|
||||
.B void pcre2_converted_pattern_free(PCRE2_UCHAR *\fIconverted_pattern\fP);
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function is part of an experimental set of pattern conversion functions.
|
||||
It frees the memory occupied by a converted pattern that was obtained by
|
||||
calling \fBpcre2_pattern_convert()\fP with arguments that caused it to place
|
||||
the converted pattern into newly obtained heap memory.
|
||||
.P
|
||||
The pattern conversion functions are described in the
|
||||
.\" HREF
|
||||
\fBpcre2convert\fP
|
||||
.\"
|
||||
documentation.
|
|
@ -0,0 +1,55 @@
|
|||
.TH PCRE2_PATTERN_CONVERT 3 "11 July 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre2.h>
|
||||
.PP
|
||||
.nf
|
||||
.B int pcre2_pattern_convert(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP,
|
||||
.B " uint32_t \fIoptions\fP, PCRE2_UCHAR **\fIbuffer\fP,"
|
||||
.B " PCRE2_SIZE *\fIblength\fP, pcre2_convert_context *\fIcvcontext\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function is part of an experimental set of pattern conversion functions.
|
||||
It converts a foreign pattern (for example, a glob) into a PCRE2 regular
|
||||
expression pattern. Its arguments are:
|
||||
.sp
|
||||
\fIpattern\fP The foreign pattern
|
||||
\fIlength\fP The length of the input pattern or PCRE2_ZERO_TERMINATED
|
||||
\fIoptions\fP Option bits
|
||||
\fIbuffer\fP Pointer to pointer to output buffer, or NULL
|
||||
\fIblength\fP Pointer to output length field
|
||||
\fIcvcontext\fP Pointer to a convert context or NULL
|
||||
.sp
|
||||
The length of the converted pattern (excluding the terminating zero) is
|
||||
returned via \fIblength\fP. If \fIbuffer\fP is NULL, the function just returns
|
||||
the output length. If \fIbuffer\fP points to a NULL pointer, heap memory is
|
||||
obtained for the converted pattern, using the allocator in the context if
|
||||
present (or else \fBmalloc()\fP), and the field pointed to by \fIbuffer\fP is
|
||||
updated. If \fIbuffer\fP points to a non-NULL field, that must point to a
|
||||
buffer whose size is in the variable pointed to by \fIblength\fP. This value is
|
||||
updated.
|
||||
.P
|
||||
The option bits are:
|
||||
.sp
|
||||
PCRE2_CONVERT_UTF Input is UTF
|
||||
PCRE2_CONVERT_NO_UTF_CHECK Do not check UTF validity
|
||||
PCRE2_CONVERT_POSIX_BASIC Convert POSIX basic pattern
|
||||
PCRE2_CONVERT_POSIX_EXTENDED Convert POSIX extended pattern
|
||||
PCRE2_CONVERT_GLOB ) Convert
|
||||
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR ) various types
|
||||
PCRE2_CONVERT_GLOB_NO_STARSTAR ) of glob
|
||||
.sp
|
||||
The return value from \fBpcre2_pattern_convert()\fP is zero on success or a
|
||||
non-zero PCRE2 error code.
|
||||
.P
|
||||
The pattern conversion functions are described in the
|
||||
.\" HREF
|
||||
\fBpcre2convert\fP
|
||||
.\"
|
||||
documentation.
|
|
@ -0,0 +1,29 @@
|
|||
.TH PCRE2_SET_GLOB_ESCAPE 3 "11 July 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre2.h>
|
||||
.PP
|
||||
.nf
|
||||
.B int pcre2_set_glob_escape(pcre2_convert_context *\fIcvcontext\fP,
|
||||
.B " uint32_t \fIescape_char\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function is part of an experimental set of pattern conversion functions.
|
||||
It sets the escape character that is used when converting globs. The second
|
||||
argument must either be zero (meaning there is no escape character) or a
|
||||
punctuation character whose code point is less than 256. The default is grave
|
||||
accent if running under Windows, otherwise backslash. The result of the
|
||||
function is zero for success or PCRE2_ERROR_BADDATA if the second argument is
|
||||
invalid.
|
||||
.P
|
||||
The pattern conversion functions are described in the
|
||||
.\" HREF
|
||||
\fBpcre2convert\fP
|
||||
.\"
|
||||
documentation.
|
|
@ -0,0 +1,28 @@
|
|||
.TH PCRE2_SET_GLOB_SEPARATOR 3 "11 July 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre2.h>
|
||||
.PP
|
||||
.nf
|
||||
.B int pcre2_set_glob_separator(pcre2_convert_context *\fIcvcontext\fP,
|
||||
.B " uint32_t \fIseparator_char\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function is part of an experimental set of pattern conversion functions.
|
||||
It sets the component separator character that is used when converting globs.
|
||||
The second argument must one of the characters forward slash, backslash, or
|
||||
dot. The default is backslash when running under Windows, otherwise forward
|
||||
slash. The result of the function is zero for success or PCRE2_ERROR_BADDATA if
|
||||
the second argument is invalid.
|
||||
.P
|
||||
The pattern conversion functions are described in the
|
||||
.\" HREF
|
||||
\fBpcre2convert\fP
|
||||
.\"
|
||||
documentation.
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "16 June 2017" "PCRE2 10.30"
|
||||
.TH PCRE2API 3 "10 July 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -272,6 +272,41 @@ replaced by \fBpcre2_set_depth_limit()\fP; the second is no longer needed and
|
|||
has no effect (it always returns zero).
|
||||
.
|
||||
.
|
||||
.SH "PCRE2 EXPERIMENTAL PATTERN CONVERSION FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B pcre2_convert_context *pcre2_convert_context_create(
|
||||
.B " pcre2_general_context *\fIgcontext\fP);"
|
||||
.sp
|
||||
.B pcre2_convert_context *pcre2_convert_context_copy(
|
||||
.B " pcre2_convert_context *\fIcvcontext\fP);"
|
||||
.sp
|
||||
.B void pcre2_convert_context_free(pcre2_convert_context *\fIcvcontext\fP);
|
||||
.sp
|
||||
.B int pcre2_set_glob_escape(pcre2_convert_context *\fIcvcontext\fP,
|
||||
.B " uint32_t \fIescape_char\fP);"
|
||||
.sp
|
||||
.B int pcre2_set_glob_separator(pcre2_convert_context *\fIcvcontext\fP,
|
||||
.B " uint32_t \fIseparator_char\fP);"
|
||||
.sp
|
||||
.B int pcre2_pattern_convert(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP,
|
||||
.B " uint32_t \fIoptions\fP, PCRE2_UCHAR **\fIbuffer\fP,"
|
||||
.B " PCRE2_SIZE *\fIblength\fP, pcre2_convert_context *\fIcvcontext\fP);"
|
||||
.sp
|
||||
.B void pcre2_converted_pattern_free(PCRE2_UCHAR *\fIconverted_pattern\fP);
|
||||
.fi
|
||||
.sp
|
||||
These functions provide a way of converting non-PCRE2 patterns into
|
||||
patterns that can be processed by \fBpcre2_compile()\fP. This facility is
|
||||
experimental and may be changed in future releases. At present, "globs" and
|
||||
POSIX basic and extended patterns can be converted. Details are given in the
|
||||
.\" HREF
|
||||
\fBpcre2convert\fP
|
||||
.\"
|
||||
documentation.
|
||||
.
|
||||
.
|
||||
.SH "PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES"
|
||||
.rs
|
||||
.sp
|
||||
|
@ -1695,7 +1730,7 @@ dangerous option. Use with care.
|
|||
PCRE2_EXTRA_MATCH_LINE
|
||||
.sp
|
||||
This option is provided for use by the \fB-x\fP option of \fBpcre2grep\fP. It
|
||||
causes the pattern only to match complete lines. This is achieved by
|
||||
causes the pattern only to match complete lines. This is achieved by
|
||||
automatically inserting the code for "^(?:" at the start of the compiled
|
||||
pattern and ")$" at the end. Thus, when PCRE2_MULTILINE is set, the matched
|
||||
line may be in the middle of the subject string. This option can be used with
|
||||
|
@ -3539,6 +3574,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 16 June 2017
|
||||
Last updated: 10 July 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -0,0 +1,163 @@
|
|||
.TH PCRE2CONVERT 3 "12 July 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH "EXPERIMENTAL PATTERN CONVERSION FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
This document describes a set of functions that can be used to convert
|
||||
"foreign" patterns into PCRE2 regular expressions. This facility is currently
|
||||
experimental, and may be changed in future releases. Two kinds of pattern,
|
||||
globs and POSIX patterns, are supported.
|
||||
.
|
||||
.
|
||||
.SH "THE CONVERT CONTEXT"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B pcre2_convert_context *pcre2_convert_context_create(
|
||||
.B " pcre2_general_context *\fIgcontext\fP);"
|
||||
.sp
|
||||
.B pcre2_convert_context *pcre2_convert_context_copy(
|
||||
.B " pcre2_convert_context *\fIcvcontext\fP);"
|
||||
.sp
|
||||
.B void pcre2_convert_context_free(pcre2_convert_context *\fIcvcontext\fP);
|
||||
.sp
|
||||
.B int pcre2_set_glob_escape(pcre2_convert_context *\fIcvcontext\fP,
|
||||
.B " uint32_t \fIescape_char\fP);"
|
||||
.sp
|
||||
.B int pcre2_set_glob_separator(pcre2_convert_context *\fIcvcontext\fP,
|
||||
.B " uint32_t \fIseparator_char\fP);"
|
||||
.fi
|
||||
.sp
|
||||
A convert context is used to hold parameters that affect the way that pattern
|
||||
conversion works. Like all PCRE2 contexts, you need to use a context only if
|
||||
you want to override the defaults. There are the usual create, copy, and free
|
||||
functions. If custom memory management functions are set in a general context
|
||||
that is passed to \fBpcre2_convert_context_create()\fP, they are used for all
|
||||
memory management within the conversion functions.
|
||||
.P
|
||||
There are only two parameters in the convert context at present. Both apply
|
||||
only to glob conversions. The escape character defaults to grave accent under
|
||||
Windows, otherwise backslash. It can be set to zero, meaning no escape
|
||||
character, or to any punctuation character with a code point less than 256.
|
||||
The separator character defaults to backslash under Windows, otherwise forward
|
||||
slash. It can be set to forward slash, backslash, or dot.
|
||||
.P
|
||||
The two setting functions return zero on success, or PCRE2_ERROR_BADDATA if
|
||||
their second argument is invalid.
|
||||
.
|
||||
.
|
||||
.SH "THE CONVERSION FUNCTION"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre2_pattern_convert(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP,
|
||||
.B " uint32_t \fIoptions\fP, PCRE2_UCHAR **\fIbuffer\fP,"
|
||||
.B " PCRE2_SIZE *\fIblength\fP, pcre2_convert_context *\fIcvcontext\fP);"
|
||||
.sp
|
||||
.B void pcre2_converted_pattern_free(PCRE2_UCHAR *\fIconverted_pattern\fP);
|
||||
.fi
|
||||
.sp
|
||||
The first two arguments of \fBpcre2_pattern_convert()\fP define the foreign
|
||||
pattern that is to be converted. The length may be given as
|
||||
PCRE2_ZERO_TERMINATED. The \fBoptions\fP argument defines how the pattern is to
|
||||
be processed. If the input is UTF, the PCRE2_CONVERT_UTF option should be set.
|
||||
PCRE2_CONVERT_NO_UTF_CHECK may also be set if you are sure the input is valid.
|
||||
One or more of the glob options, or one of the following POSIX options must be
|
||||
set to define the type of conversion that is required:
|
||||
.sp
|
||||
PCRE2_CONVERT_GLOB
|
||||
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
|
||||
PCRE2_CONVERT_GLOB_NO_STARSTAR
|
||||
PCRE2_CONVERT_POSIX_BASIC
|
||||
PCRE2_CONVERT_POSIX_EXTENDED
|
||||
.sp
|
||||
Details of the conversions are given below. The \fBbuffer\fP and \fBblength\fP
|
||||
arguments define how the output is handled:
|
||||
.P
|
||||
If \fBbuffer\fP is NULL, the function just returns the length of the converted
|
||||
pattern via \fBblength\fP. This is one less than the length of buffer needed,
|
||||
because a terminating zero is always added to the output.
|
||||
.P
|
||||
If \fBbuffer\fP points to a NULL pointer, an output buffer is obtained using
|
||||
the allocator in the context or \fBmalloc()\fP if no context is supplied. A
|
||||
pointer to this buffer is placed in the variable to which \fBbuffer\fP points.
|
||||
When no longer needed the output buffer must be freed by calling
|
||||
\fBpcre2_converted_pattern_free()\fP.
|
||||
.P
|
||||
If \fBbuffer\fP points to a non-NULL pointer, \fBblength\fP must be set to the
|
||||
actual length of the buffer provided (in code units).
|
||||
.P
|
||||
In all cases, after successful conversion, the variable pointed to by
|
||||
\fBblength\fP is updated to the length actually used (in code units), excluding
|
||||
the terminating zero that is always added.
|
||||
.P
|
||||
If an error occurs, the length (via \fBblength\fP) is set to the offset
|
||||
within the input pattern where the error was detected. Only gross syntax errors
|
||||
are caught; there are plenty of errors that will get passed on for
|
||||
\fBpcre2_compile()\fP to discover.
|
||||
.P
|
||||
The return from \fBpcre2_pattern_convert()\fP is zero on success or a non-zero
|
||||
PCRE2 error code. Note that PCRE2 error codes may be positive or negative:
|
||||
\fBpcre2_compile()\fP uses mostly positive codes and \fBpcre2_match()\fP
|
||||
negative ones; \fBpcre2_convert()\fP uses existing codes of both kinds. A
|
||||
textual error message can be obtained by calling
|
||||
\fBpcre2_get_error_message()\fP.
|
||||
.
|
||||
.
|
||||
.SH "CONVERTING GLOBS"
|
||||
.rs
|
||||
.sp
|
||||
Globs are used to match file names, and consequently have the concept of a
|
||||
"path separator", which defaults to backslash under Windows and forward slash
|
||||
otherwise. If PCRE2_CONVERT_GLOB is set, the wildcards * and ? are not
|
||||
permitted to match separator characters, but the double-star (**) feature
|
||||
(which does match separators) is supported.
|
||||
.P
|
||||
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR matches globs with wildcards allowed to
|
||||
match separator characters. PCRE2_GLOB_NO_STARSTAR matches globs with the
|
||||
double-star feature disabled. These options may be given together.
|
||||
.
|
||||
.
|
||||
.SH "CONVERTING POSIX PATTERNS"
|
||||
.rs
|
||||
.sp
|
||||
POSIX defines two kinds of regular expression pattern: basic and extended.
|
||||
These can be processed by setting PCRE2_CONVERT_POSIX_BASIC or
|
||||
PCRE2_CONVERT_POSIX_EXTENDED, respectively.
|
||||
.P
|
||||
In POSIX patterns, backslash is not special in a character class. Unmatched
|
||||
closing parentheses are treated as literals.
|
||||
.P
|
||||
In basic patterns, ? + | {} and () must be escaped to be recognized
|
||||
as metacharacters outside a character class. If the first character in the
|
||||
pattern is * it is treated as a literal. ^ is a metacharacter only at the start
|
||||
of a branch.
|
||||
.P
|
||||
In extended patterns, a backslash not in a character class always
|
||||
makes the next character literal, whatever it is. There are no backreferences.
|
||||
.P
|
||||
Note: POSIX mandates that the longest possible match at the first matching
|
||||
position must be found. This is not what \fBpcre2_match()\fP does; it yields
|
||||
the first match that is found. An application can use \fBpcre2_dfa_match()\fP
|
||||
to find the longest match, but that does not support backreferences (but then
|
||||
neither do POSIX extended patterns).
|
||||
.
|
||||
.
|
||||
.SH AUTHOR
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Philip Hazel
|
||||
University Computing Service
|
||||
Cambridge, England.
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH REVISION
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 12 July 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2TEST 1 "02 July 2017" "PCRE 10.30"
|
||||
.TH PCRE2TEST 1 "12 July 2017" "PCRE 10.30"
|
||||
.SH NAME
|
||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -592,6 +592,10 @@ heavily used in the test files.
|
|||
bsr=[anycrlf|unicode] specify \eR handling
|
||||
/B bincode show binary code without lengths
|
||||
callout_info show callout information
|
||||
convert=<options> request foreign pattern conversion
|
||||
convert_glob_escape=c set glob escape character
|
||||
convert_glob_separator=c set glob separator character
|
||||
convert_length set convert buffer length
|
||||
debug same as info,fullbincode
|
||||
framesize show matching frame size
|
||||
fullbincode show binary code with lengths
|
||||
|
@ -1035,6 +1039,39 @@ allowed, does not carry through to any subsequent matching that uses a stacked
|
|||
pattern.
|
||||
.
|
||||
.
|
||||
.SS "Testing foreign pattern conversion"
|
||||
.rs
|
||||
.sp
|
||||
The experimental foreign pattern conversion functions in PCRE2 can be tested by
|
||||
setting the \fBconvert\fP modifier. Its argument is a colon-separated list of
|
||||
options, which set the equivalent option for the \fBpcre2_pattern_convert()\fP
|
||||
function:
|
||||
.sp
|
||||
glob PCRE2_CONVERT_GLOB
|
||||
glob_no_starstar PCRE2_CONVERT_GLOB_NO_STARSTAR
|
||||
glob_no_wild_separator PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
|
||||
posix_basic PCRE2_CONVERT_POSIX_BASIC
|
||||
posix_extended PCRE2_CONVERT_POSIX_EXTENDED
|
||||
unset Unset all options
|
||||
.sp
|
||||
The "unset" value is useful for turning off a default that has been set by a
|
||||
\fB#pattern\fP command. When one of these options is set, the input pattern is
|
||||
passed to \fBpcre2_pattern_convert()\fP. If the conversion is successful, the
|
||||
result is reflected in the output and then passed to \fBpcre2_compile()\fP. The
|
||||
normal \fButf\fP and \fBno_utf_check\fP options, if set, cause the
|
||||
PCRE2_CONVERT_UTF and PCRE2_CONVERT_NO_UTF_CHECK options to be passed to
|
||||
\fBpcre2_pattern_convert()\fP.
|
||||
.P
|
||||
By default, the conversion function is allowed to allocate a buffer for its
|
||||
output. However, if the \fBconvert_length\fP modifier is set to a value greater
|
||||
than zero, \fBpcre2test\fP passes a buffer of the given length. This makes it
|
||||
possible to test the length check.
|
||||
.P
|
||||
The \fBconvert_glob_escape\fP and \fBconvert_glob_separator\fP modifiers can be
|
||||
used to specify the escape and separator characters for glob processing,
|
||||
overriding the defaults, which are operating-system dependent.
|
||||
.
|
||||
.
|
||||
.\" HTML <a name="subjectmodifiers"></a>
|
||||
.SH "SUBJECT MODIFIERS"
|
||||
.rs
|
||||
|
@ -1850,6 +1887,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 02 July 2017
|
||||
Last updated: 12 July 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -570,6 +570,10 @@ PATTERN MODIFIERS
|
|||
bsr=[anycrlf|unicode] specify \R handling
|
||||
/B bincode show binary code without lengths
|
||||
callout_info show callout information
|
||||
convert=<options> request foreign pattern conversion
|
||||
convert_glob_escape=c set glob escape character
|
||||
convert_glob_separator=c set glob separator character
|
||||
convert_length set convert buffer length
|
||||
debug same as info,fullbincode
|
||||
framesize show matching frame size
|
||||
fullbincode show binary code with lengths
|
||||
|
@ -953,6 +957,37 @@ PATTERN MODIFIERS
|
|||
that jitverify, which is allowed, does not carry through to any subse-
|
||||
quent matching that uses a stacked pattern.
|
||||
|
||||
Testing foreign pattern conversion
|
||||
|
||||
The experimental foreign pattern conversion functions in PCRE2 can be
|
||||
tested by setting the convert modifier. Its argument is a colon-sepa-
|
||||
rated list of options, which set the equivalent option for the
|
||||
pcre2_pattern_convert() function:
|
||||
|
||||
glob PCRE2_CONVERT_GLOB
|
||||
glob_no_starstar PCRE2_CONVERT_GLOB_NO_STARSTAR
|
||||
glob_no_wild_separator PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
|
||||
posix_basic PCRE2_CONVERT_POSIX_BASIC
|
||||
posix_extended PCRE2_CONVERT_POSIX_EXTENDED
|
||||
unset Unset all options
|
||||
|
||||
The "unset" value is useful for turning off a default that has been set
|
||||
by a #pattern command. When one of these options is set, the input pat-
|
||||
tern is passed to pcre2_pattern_convert(). If the conversion is suc-
|
||||
cessful, the result is reflected in the output and then passed to
|
||||
pcre2_compile(). The normal utf and no_utf_check options, if set, cause
|
||||
the PCRE2_CONVERT_UTF and PCRE2_CONVERT_NO_UTF_CHECK options to be
|
||||
passed to pcre2_pattern_convert().
|
||||
|
||||
By default, the conversion function is allowed to allocate a buffer for
|
||||
its output. However, if the convert_length modifier is set to a value
|
||||
greater than zero, pcre2test passes a buffer of the given length. This
|
||||
makes it possible to test the length check.
|
||||
|
||||
The convert_glob_escape and convert_glob_separator modifiers can be
|
||||
used to specify the escape and separator characters for glob process-
|
||||
ing, overriding the defaults, which are operating-system dependent.
|
||||
|
||||
|
||||
SUBJECT MODIFIERS
|
||||
|
||||
|
@ -1692,5 +1727,5 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 02 July 2017
|
||||
Last updated: 12 July 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
|
|
|
@ -193,8 +193,6 @@ ignored for pcre2_jit_match(). */
|
|||
#define PCRE2_CONVERT_GLOB 0x00000010u
|
||||
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u
|
||||
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u
|
||||
#define PCRE2_CONVERT_GLOB_BASIC 0x00000070u
|
||||
#define PCRE2_CONVERT_GLOB_IGNORE_DOT_START 0x00000080u
|
||||
|
||||
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||
must be kept in step with values set in config.h and both sets must all be
|
||||
|
|
|
@ -193,8 +193,6 @@ ignored for pcre2_jit_match(). */
|
|||
#define PCRE2_CONVERT_GLOB 0x00000010u
|
||||
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u
|
||||
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u
|
||||
#define PCRE2_CONVERT_GLOB_BASIC 0x00000070u
|
||||
#define PCRE2_CONVERT_GLOB_IGNORE_DOT_START 0x00000080u
|
||||
|
||||
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||
must be kept in step with values set in config.h and both sets must all be
|
||||
|
|
|
@ -49,7 +49,6 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED)
|
||||
|
||||
#define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \
|
||||
PCRE2_CONVERT_GLOB_IGNORE_DOT_START| \
|
||||
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \
|
||||
PCRE2_CONVERT_GLOB_NO_STARSTAR| \
|
||||
TYPE_OPTIONS)
|
||||
|
|
|
@ -401,8 +401,6 @@ typedef struct convertstruct {
|
|||
|
||||
static convertstruct convertlist[] = {
|
||||
{ "glob", PCRE2_CONVERT_GLOB },
|
||||
{ "glob_basic", PCRE2_CONVERT_GLOB_BASIC },
|
||||
{ "glob_ignore_dot_start", PCRE2_CONVERT_GLOB_IGNORE_DOT_START },
|
||||
{ "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR },
|
||||
{ "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
|
||||
{ "posix_basic", PCRE2_CONVERT_POSIX_BASIC },
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
# Set the glob separator explicitly so that different OS defaults are not a
|
||||
# problem. Then test various errors.
|
||||
|
||||
#pattern convert=glob_basic,convert_glob_escape=\,convert_glob_separator=/
|
||||
#pattern convert=glob,convert_glob_escape=\,convert_glob_separator=/
|
||||
|
||||
# The fact that this one works in 13 bytes in the 8-bit library shows that the
|
||||
# output is in UTF-8, though pcre2test shows the character as an escape.
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
# Set the glob separator explicitly so that different OS defaults are not a
|
||||
# problem. Then test various errors.
|
||||
|
||||
#pattern convert=glob_basic,convert_glob_escape=\,convert_glob_separator=/
|
||||
#pattern convert=glob,convert_glob_escape=\,convert_glob_separator=/
|
||||
|
||||
# The fact that this one works in 13 bytes in the 8-bit library shows that the
|
||||
# output is in UTF-8, though pcre2test shows the character as an escape.
|
||||
|
|
Loading…
Reference in New Issue