Document experimental pattern conversion functions and remove unimplemented

features.
This commit is contained in:
Philip.Hazel 2017-07-12 16:34:49 +00:00
parent 4f7a608d56
commit a23715d7b1
32 changed files with 1235 additions and 81 deletions

View File

@ -216,6 +216,9 @@ unit". Previously only non-anchored patterns did this.
49. Update extended grapheme breaking rules to the latest set that are in
Unicode Standard Annex #29.
50. Added experimental foreign pattern conversion facilities
(pcre2_pattern_convert() and friends).
Version 10.23 14-February-2017
------------------------------

View File

@ -36,6 +36,10 @@ dist_html_DATA = \
doc/html/pcre2_compile_context_create.html \
doc/html/pcre2_compile_context_free.html \
doc/html/pcre2_config.html \
doc/html/pcre2_convert_context_copy.html \
doc/html/pcre2_convert_context_create.html \
doc/html/pcre2_convert_context_free.html \
doc/html/pcre2_converted_pattern_free.html \
doc/html/pcre2_dfa_match.html \
doc/html/pcre2_general_context_copy.html \
doc/html/pcre2_general_context_create.html \
@ -59,6 +63,7 @@ dist_html_DATA = \
doc/html/pcre2_match_data_create.html \
doc/html/pcre2_match_data_create_from_pattern.html \
doc/html/pcre2_match_data_free.html \
doc/html/pcre2_pattern_convert.html \
doc/html/pcre2_pattern_info.html \
doc/html/pcre2_serialize_decode.html \
doc/html/pcre2_serialize_encode.html \
@ -70,6 +75,8 @@ dist_html_DATA = \
doc/html/pcre2_set_compile_extra_options.html \
doc/html/pcre2_set_compile_recursion_guard.html \
doc/html/pcre2_set_depth_limit.html \
doc/html/pcre2_set_glob_escape.html \
doc/html/pcre2_set_glob_separator.html \
doc/html/pcre2_set_heap_limit.html \
doc/html/pcre2_set_match_limit.html \
doc/html/pcre2_set_max_pattern_length.html \
@ -94,6 +101,7 @@ dist_html_DATA = \
doc/html/pcre2build.html \
doc/html/pcre2callout.html \
doc/html/pcre2compat.html \
doc/html/pcre2convert.html \
doc/html/pcre2demo.html \
doc/html/pcre2grep.html \
doc/html/pcre2jit.html \
@ -121,6 +129,10 @@ dist_man_MANS = \
doc/pcre2_compile_context_create.3 \
doc/pcre2_compile_context_free.3 \
doc/pcre2_config.3 \
doc/pcre2_convert_context_copy.3 \
doc/pcre2_convert_context_create.3 \
doc/pcre2_convert_context_free.3 \
doc/pcre2_converted_pattern_free.3 \
doc/pcre2_dfa_match.3 \
doc/pcre2_general_context_copy.3 \
doc/pcre2_general_context_create.3 \
@ -144,6 +156,7 @@ dist_man_MANS = \
doc/pcre2_match_data_create.3 \
doc/pcre2_match_data_create_from_pattern.3 \
doc/pcre2_match_data_free.3 \
doc/pcre2_pattern_convert.3 \
doc/pcre2_pattern_info.3 \
doc/pcre2_serialize_decode.3 \
doc/pcre2_serialize_encode.3 \
@ -155,6 +168,8 @@ dist_man_MANS = \
doc/pcre2_set_compile_extra_options.3 \
doc/pcre2_set_compile_recursion_guard.3 \
doc/pcre2_set_depth_limit.3 \
doc/pcre2_set_glob_escape.3 \
doc/pcre2_set_glob_separator.3 \
doc/pcre2_set_heap_limit.3 \
doc/pcre2_set_match_limit.3 \
doc/pcre2_set_max_pattern_length.3 \
@ -179,6 +194,7 @@ dist_man_MANS = \
doc/pcre2build.3 \
doc/pcre2callout.3 \
doc/pcre2compat.3 \
doc/pcre2convert.3 \
doc/pcre2demo.3 \
doc/pcre2grep.1 \
doc/pcre2jit.3 \

View File

@ -35,6 +35,9 @@ first.
<tr><td><a href="pcre2compat.html">pcre2compat</a></td>
<td>&nbsp;&nbsp;Compability with Perl</td></tr>
<tr><td><a href="pcre2convert.html">pcre2convert</a></td>
<td>&nbsp;&nbsp;Experimental foreign pattern conversion functions</td></tr>
<tr><td><a href="pcre2demo.html">pcre2demo</a></td>
<td>&nbsp;&nbsp;A demonstration C program that uses the PCRE2 library</td></tr>
@ -112,6 +115,18 @@ in the library.
<tr><td><a href="pcre2_config.html">pcre2_config</a></td>
<td>&nbsp;&nbsp;Show build-time configuration options</td></tr>
<tr><td><a href="pcre2_convert_context_copy.html">pcre2_convert_context_copy</a></td>
<td>&nbsp;&nbsp;Copy a convert context</td></tr>
<tr><td><a href="pcre2_convert_context_create.html">pcre2_convert_context_create</a></td>
<td>&nbsp;&nbsp;Create a convert context</td></tr>
<tr><td><a href="pcre2_convert_context_free.html">pcre2_convert_context_free</a></td>
<td>&nbsp;&nbsp;Free a convert context</td></tr>
<tr><td><a href="pcre2_converted_pattern_free.html">pcre2_converted_pattern_free</a></td>
<td>&nbsp;&nbsp;Free converted foreign pattern</td></tr>
<tr><td><a href="pcre2_dfa_match.html">pcre2_dfa_match</a></td>
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
@ -183,6 +198,9 @@ in the library.
<tr><td><a href="pcre2_match_data_free.html">pcre2_match_data_free</a></td>
<td>&nbsp;&nbsp;Free a match data block</td></tr>
<tr><td><a href="pcre2_pattern_convert.html">pcre2_pattern_convert</a></td>
<td>&nbsp;&nbsp;Experimental foreign pattern converter</td></tr>
<tr><td><a href="pcre2_pattern_info.html">pcre2_pattern_info</a></td>
<td>&nbsp;&nbsp;Extract information about a pattern</td></tr>
@ -216,6 +234,12 @@ in the library.
<tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td>
<td>&nbsp;&nbsp;Set the match backtracking depth limit</td></tr>
<tr><td><a href="pcre2_set_glob_escape.html">pcre2_set_glob_escape</a></td>
<td>&nbsp;&nbsp;Set glob escape character</td></tr>
<tr><td><a href="pcre2_set_glob_separator.html">pcre2_set_glob_separator</a></td>
<td>&nbsp;&nbsp;Set glob separator character</td></tr>
<tr><td><a href="pcre2_set_heap_limit.html">pcre2_set_heap_limit</a></td>
<td>&nbsp;&nbsp;Set the match backtracking heap limit</td></tr>

View File

@ -0,0 +1,40 @@
<html>
<head>
<title>pcre2_convert_context_copy specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_convert_context_copy man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_convert_context *pcre2_convert_context_copy(</b>
<b> pcre2_convert_context *<i>cvcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is part of an experimental set of pattern conversion functions.
It makes a new copy of a convert context, using the memory allocation function
that was used for the original context. The result is NULL if the memory cannot
be obtained.
</P>
<P>
The pattern conversion functions are described in the
<a href="pcre2convert.html"><b>pcre2convert</b></a>
documentation.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,41 @@
<html>
<head>
<title>pcre2_convert_context_create specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_convert_context_create man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>pcre2_convert_context *pcre2_convert_context_create(</b>
<b> pcre2_general_context *<i>gcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is part of an experimental set of pattern conversion functions.
It creates and initializes a new convert context. If its argument is
NULL, <b>malloc()</b> is used to get the necessary memory; otherwise the memory
allocation function within the general context is used. The result is NULL if
the memory could not be obtained.
</P>
<P>
The pattern conversion functions are described in the
<a href="pcre2convert.html"><b>pcre2convert</b></a>
documentation.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,39 @@
<html>
<head>
<title>pcre2_convert_context_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_convert_context_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_convert_context_free(pcre2_convert_context *<i>cvcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is part of an experimental set of pattern conversion functions.
It frees the memory occupied by a convert context, using the memory
freeing function from the general context with which it was created, or
<b>free()</b> if that was not set.
</P>
<P>
The pattern conversion functions are described in the
<a href="pcre2convert.html"><b>pcre2convert</b></a>
documentation.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,39 @@
<html>
<head>
<title>pcre2_converted_pattern_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_converted_pattern_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_converted_pattern_free(PCRE2_UCHAR *<i>converted_pattern</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is part of an experimental set of pattern conversion functions.
It frees the memory occupied by a converted pattern that was obtained by
calling <b>pcre2_pattern_convert()</b> with arguments that caused it to place
the converted pattern into newly obtained heap memory.
</P>
<P>
The pattern conversion functions are described in the
<a href="pcre2convert.html"><b>pcre2convert</b></a>
documentation.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,70 @@
<html>
<head>
<title>pcre2_pattern_convert specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_pattern_convert man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_pattern_convert(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
<b> uint32_t <i>options</i>, PCRE2_UCHAR **<i>buffer</i>,</b>
<b> PCRE2_SIZE *<i>blength</i>, pcre2_convert_context *<i>cvcontext</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is part of an experimental set of pattern conversion functions.
It converts a foreign pattern (for example, a glob) into a PCRE2 regular
expression pattern. Its arguments are:
<pre>
<i>pattern</i> The foreign pattern
<i>length</i> The length of the input pattern or PCRE2_ZERO_TERMINATED
<i>options</i> Option bits
<i>buffer</i> Pointer to pointer to output buffer, or NULL
<i>blength</i> Pointer to output length field
<i>cvcontext</i> Pointer to a convert context or NULL
</pre>
The length of the converted pattern (excluding the terminating zero) is
returned via <i>blength</i>. If <i>buffer</i> is NULL, the function just returns
the output length. If <i>buffer</i> points to a NULL pointer, heap memory is
obtained for the converted pattern, using the allocator in the context if
present (or else <b>malloc()</b>), and the field pointed to by <i>buffer</i> is
updated. If <i>buffer</i> points to a non-NULL field, that must point to a
buffer whose size is in the variable pointed to by <i>blength</i>. This value is
updated.
</P>
<P>
The option bits are:
<pre>
PCRE2_CONVERT_UTF Input is UTF
PCRE2_CONVERT_NO_UTF_CHECK Do not check UTF validity
PCRE2_CONVERT_POSIX_BASIC Convert POSIX basic pattern
PCRE2_CONVERT_POSIX_EXTENDED Convert POSIX extended pattern
PCRE2_CONVERT_GLOB ) Convert
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR ) various types
PCRE2_CONVERT_GLOB_NO_STARSTAR ) of glob
</pre>
The return value from <b>pcre2_pattern_convert()</b> is zero on success or a
non-zero PCRE2 error code.
</P>
<P>
The pattern conversion functions are described in the
<a href="pcre2convert.html"><b>pcre2convert</b></a>
documentation.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,43 @@
<html>
<head>
<title>pcre2_set_glob_escape specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_glob_escape man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_glob_escape(pcre2_convert_context *<i>cvcontext</i>,</b>
<b> uint32_t <i>escape_char</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is part of an experimental set of pattern conversion functions.
It sets the escape character that is used when converting globs. The second
argument must either be zero (meaning there is no escape character) or a
punctuation character whose code point is less than 256. The default is grave
accent if running under Windows, otherwise backslash. The result of the
function is zero for success or PCRE2_ERROR_BADDATA if the second argument is
invalid.
</P>
<P>
The pattern conversion functions are described in the
<a href="pcre2convert.html"><b>pcre2convert</b></a>
documentation.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -0,0 +1,42 @@
<html>
<head>
<title>pcre2_set_glob_separator specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_set_glob_separator man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>int pcre2_set_glob_separator(pcre2_convert_context *<i>cvcontext</i>,</b>
<b> uint32_t <i>separator_char</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is part of an experimental set of pattern conversion functions.
It sets the component separator character that is used when converting globs.
The second argument must one of the characters forward slash, backslash, or
dot. The default is backslash when running under Windows, otherwise forward
slash. The result of the function is zero for success or PCRE2_ERROR_BADDATA if
the second argument is invalid.
</P>
<P>
The pattern conversion functions are described in the
<a href="pcre2convert.html"><b>pcre2convert</b></a>
documentation.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -24,37 +24,38 @@ please consult the man page, in case the conversion went wrong.
<li><a name="TOC9" href="#SEC9">PCRE2 NATIVE API SERIALIZATION FUNCTIONS</a>
<li><a name="TOC10" href="#SEC10">PCRE2 NATIVE API AUXILIARY FUNCTIONS</a>
<li><a name="TOC11" href="#SEC11">PCRE2 NATIVE API OBSOLETE FUNCTIONS</a>
<li><a name="TOC12" href="#SEC12">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a>
<li><a name="TOC13" href="#SEC13">PCRE2 API OVERVIEW</a>
<li><a name="TOC14" href="#SEC14">STRING LENGTHS AND OFFSETS</a>
<li><a name="TOC15" href="#SEC15">NEWLINES</a>
<li><a name="TOC16" href="#SEC16">MULTITHREADING</a>
<li><a name="TOC17" href="#SEC17">PCRE2 CONTEXTS</a>
<li><a name="TOC18" href="#SEC18">CHECKING BUILD-TIME OPTIONS</a>
<li><a name="TOC19" href="#SEC19">COMPILING A PATTERN</a>
<li><a name="TOC20" href="#SEC20">COMPILATION ERROR CODES</a>
<li><a name="TOC21" href="#SEC21">JUST-IN-TIME (JIT) COMPILATION</a>
<li><a name="TOC22" href="#SEC22">LOCALE SUPPORT</a>
<li><a name="TOC23" href="#SEC23">INFORMATION ABOUT A COMPILED PATTERN</a>
<li><a name="TOC24" href="#SEC24">INFORMATION ABOUT A PATTERN'S CALLOUTS</a>
<li><a name="TOC25" href="#SEC25">SERIALIZATION AND PRECOMPILING</a>
<li><a name="TOC26" href="#SEC26">THE MATCH DATA BLOCK</a>
<li><a name="TOC27" href="#SEC27">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
<li><a name="TOC28" href="#SEC28">NEWLINE HANDLING WHEN MATCHING</a>
<li><a name="TOC29" href="#SEC29">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a>
<li><a name="TOC30" href="#SEC30">OTHER INFORMATION ABOUT A MATCH</a>
<li><a name="TOC31" href="#SEC31">ERROR RETURNS FROM <b>pcre2_match()</b></a>
<li><a name="TOC32" href="#SEC32">OBTAINING A TEXTUAL ERROR MESSAGE</a>
<li><a name="TOC33" href="#SEC33">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
<li><a name="TOC34" href="#SEC34">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
<li><a name="TOC35" href="#SEC35">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
<li><a name="TOC36" href="#SEC36">CREATING A NEW STRING WITH SUBSTITUTIONS</a>
<li><a name="TOC37" href="#SEC37">DUPLICATE SUBPATTERN NAMES</a>
<li><a name="TOC38" href="#SEC38">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a>
<li><a name="TOC39" href="#SEC39">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
<li><a name="TOC40" href="#SEC40">SEE ALSO</a>
<li><a name="TOC41" href="#SEC41">AUTHOR</a>
<li><a name="TOC42" href="#SEC42">REVISION</a>
<li><a name="TOC12" href="#SEC12">PCRE2 EXPERIMENTAL PATTERN CONVERSION FUNCTIONS</a>
<li><a name="TOC13" href="#SEC13">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a>
<li><a name="TOC14" href="#SEC14">PCRE2 API OVERVIEW</a>
<li><a name="TOC15" href="#SEC15">STRING LENGTHS AND OFFSETS</a>
<li><a name="TOC16" href="#SEC16">NEWLINES</a>
<li><a name="TOC17" href="#SEC17">MULTITHREADING</a>
<li><a name="TOC18" href="#SEC18">PCRE2 CONTEXTS</a>
<li><a name="TOC19" href="#SEC19">CHECKING BUILD-TIME OPTIONS</a>
<li><a name="TOC20" href="#SEC20">COMPILING A PATTERN</a>
<li><a name="TOC21" href="#SEC21">COMPILATION ERROR CODES</a>
<li><a name="TOC22" href="#SEC22">JUST-IN-TIME (JIT) COMPILATION</a>
<li><a name="TOC23" href="#SEC23">LOCALE SUPPORT</a>
<li><a name="TOC24" href="#SEC24">INFORMATION ABOUT A COMPILED PATTERN</a>
<li><a name="TOC25" href="#SEC25">INFORMATION ABOUT A PATTERN'S CALLOUTS</a>
<li><a name="TOC26" href="#SEC26">SERIALIZATION AND PRECOMPILING</a>
<li><a name="TOC27" href="#SEC27">THE MATCH DATA BLOCK</a>
<li><a name="TOC28" href="#SEC28">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
<li><a name="TOC29" href="#SEC29">NEWLINE HANDLING WHEN MATCHING</a>
<li><a name="TOC30" href="#SEC30">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a>
<li><a name="TOC31" href="#SEC31">OTHER INFORMATION ABOUT A MATCH</a>
<li><a name="TOC32" href="#SEC32">ERROR RETURNS FROM <b>pcre2_match()</b></a>
<li><a name="TOC33" href="#SEC33">OBTAINING A TEXTUAL ERROR MESSAGE</a>
<li><a name="TOC34" href="#SEC34">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
<li><a name="TOC35" href="#SEC35">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
<li><a name="TOC36" href="#SEC36">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
<li><a name="TOC37" href="#SEC37">CREATING A NEW STRING WITH SUBSTITUTIONS</a>
<li><a name="TOC38" href="#SEC38">DUPLICATE SUBPATTERN NAMES</a>
<li><a name="TOC39" href="#SEC39">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a>
<li><a name="TOC40" href="#SEC40">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
<li><a name="TOC41" href="#SEC41">SEE ALSO</a>
<li><a name="TOC42" href="#SEC42">AUTHOR</a>
<li><a name="TOC43" href="#SEC43">REVISION</a>
</ul>
<P>
<b>#include &#60;pcre2.h&#62;</b>
@ -334,7 +335,43 @@ backward compatibility. They should not be used in new code. The first is
replaced by <b>pcre2_set_depth_limit()</b>; the second is no longer needed and
has no effect (it always returns zero).
</P>
<br><a name="SEC12" href="#TOC1">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
<br><a name="SEC12" href="#TOC1">PCRE2 EXPERIMENTAL PATTERN CONVERSION FUNCTIONS</a><br>
<P>
<b>pcre2_convert_context *pcre2_convert_context_create(</b>
<b> pcre2_general_context *<i>gcontext</i>);</b>
<br>
<br>
<b>pcre2_convert_context *pcre2_convert_context_copy(</b>
<b> pcre2_convert_context *<i>cvcontext</i>);</b>
<br>
<br>
<b>void pcre2_convert_context_free(pcre2_convert_context *<i>cvcontext</i>);</b>
<br>
<br>
<b>int pcre2_set_glob_escape(pcre2_convert_context *<i>cvcontext</i>,</b>
<b> uint32_t <i>escape_char</i>);</b>
<br>
<br>
<b>int pcre2_set_glob_separator(pcre2_convert_context *<i>cvcontext</i>,</b>
<b> uint32_t <i>separator_char</i>);</b>
<br>
<br>
<b>int pcre2_pattern_convert(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
<b> uint32_t <i>options</i>, PCRE2_UCHAR **<i>buffer</i>,</b>
<b> PCRE2_SIZE *<i>blength</i>, pcre2_convert_context *<i>cvcontext</i>);</b>
<br>
<br>
<b>void pcre2_converted_pattern_free(PCRE2_UCHAR *<i>converted_pattern</i>);</b>
<br>
<br>
These functions provide a way of converting non-PCRE2 patterns into
patterns that can be processed by <b>pcre2_compile()</b>. This facility is
experimental and may be changed in future releases. At present, "globs" and
POSIX basic and extended patterns can be converted. Details are given in the
<a href="pcre2convert.html"><b>pcre2convert</b></a>
documentation.
</P>
<br><a name="SEC13" href="#TOC1">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
<P>
There are three PCRE2 libraries, supporting 8-bit, 16-bit, and 32-bit code
units, respectively. However, there is just one header file, <b>pcre2.h</b>.
@ -395,7 +432,7 @@ In the function summaries above, and in the rest of this document and other
PCRE2 documents, functions and data types are described using their generic
names, without the _8, _16, or _32 suffix.
</P>
<br><a name="SEC13" href="#TOC1">PCRE2 API OVERVIEW</a><br>
<br><a name="SEC14" href="#TOC1">PCRE2 API OVERVIEW</a><br>
<P>
PCRE2 has its own native API, which is described in this document. There are
also some wrapper functions for the 8-bit library that correspond to the
@ -503,7 +540,7 @@ Functions with names ending with <b>_free()</b> are used for freeing memory
blocks of various sorts. In all cases, if one of these functions is called with
a NULL argument, it does nothing.
</P>
<br><a name="SEC14" href="#TOC1">STRING LENGTHS AND OFFSETS</a><br>
<br><a name="SEC15" href="#TOC1">STRING LENGTHS AND OFFSETS</a><br>
<P>
The PCRE2 API uses string lengths and offsets into strings of code units in
several places. These values are always of type PCRE2_SIZE, which is an
@ -513,7 +550,7 @@ as a special indicator for zero-terminated strings and unset offsets.
Therefore, the longest string that can be handled is one less than this
maximum.
<a name="newlines"></a></P>
<br><a name="SEC15" href="#TOC1">NEWLINES</a><br>
<br><a name="SEC16" href="#TOC1">NEWLINES</a><br>
<P>
PCRE2 supports five different conventions for indicating line breaks in
strings: a single CR (carriage return) character, a single LF (linefeed)
@ -548,7 +585,7 @@ The choice of newline convention does not affect the interpretation of
the \n or \r escape sequences, nor does it affect what \R matches; this has
its own separate convention.
</P>
<br><a name="SEC16" href="#TOC1">MULTITHREADING</a><br>
<br><a name="SEC17" href="#TOC1">MULTITHREADING</a><br>
<P>
In a multithreaded application it is important to keep thread-specific data
separate from data that can be shared between threads. The PCRE2 library code
@ -628,7 +665,7 @@ match. This includes details of what was matched, as well as additional
information such as the name of a (*MARK) setting. Each thread must provide its
own copy of this memory.
</P>
<br><a name="SEC17" href="#TOC1">PCRE2 CONTEXTS</a><br>
<br><a name="SEC18" href="#TOC1">PCRE2 CONTEXTS</a><br>
<P>
Some PCRE2 functions have a lot of parameters, many of which are used only by
specialist applications, for example, those that use custom memory management
@ -1013,7 +1050,7 @@ where ddd is a decimal number. However, such a setting is ignored unless ddd is
less than the limit set by the caller of <b>pcre2_match()</b> or
<b>pcre2_dfa_match()</b> or, if no such limit is set, less than the default.
</P>
<br><a name="SEC18" href="#TOC1">CHECKING BUILD-TIME OPTIONS</a><br>
<br><a name="SEC19" href="#TOC1">CHECKING BUILD-TIME OPTIONS</a><br>
<P>
<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>);</b>
</P>
@ -1150,7 +1187,7 @@ the PCRE2 version string, zero-terminated. The number of code units used is
returned. This is the length of the string plus one unit for the terminating
zero.
<a name="compiling"></a></P>
<br><a name="SEC19" href="#TOC1">COMPILING A PATTERN</a><br>
<br><a name="SEC20" href="#TOC1">COMPILING A PATTERN</a><br>
<P>
<b>pcre2_code *pcre2_compile(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
<b> uint32_t <i>options</i>, int *<i>errorcode</i>, PCRE2_SIZE *<i>erroroffset,</i></b>
@ -1741,7 +1778,7 @@ dangerous option. Use with care.
PCRE2_EXTRA_MATCH_LINE
</pre>
This option is provided for use by the <b>-x</b> option of <b>pcre2grep</b>. It
causes the pattern only to match complete lines. This is achieved by
causes the pattern only to match complete lines. This is achieved by
automatically inserting the code for "^(?:" at the start of the compiled
pattern and ")$" at the end. Thus, when PCRE2_MULTILINE is set, the matched
line may be in the middle of the subject string. This option can be used with
@ -1756,7 +1793,7 @@ at the start of the compiled pattern and ")\b" at the end. The option may be
used with PCRE2_LITERAL. However, it is ignored if PCRE2_EXTRA_MATCH_LINE is
also set.
</P>
<br><a name="SEC20" href="#TOC1">COMPILATION ERROR CODES</a><br>
<br><a name="SEC21" href="#TOC1">COMPILATION ERROR CODES</a><br>
<P>
There are nearly 100 positive error codes that <b>pcre2_compile()</b> may return
(via <i>errorcode</i>) if it finds an error in the pattern. There are also some
@ -1769,7 +1806,7 @@ error message"
<a href="#geterrormessage">below)</a>
can be called to obtain a textual error message from any error code.
<a name="jitcompiling"></a></P>
<br><a name="SEC21" href="#TOC1">JUST-IN-TIME (JIT) COMPILATION</a><br>
<br><a name="SEC22" href="#TOC1">JUST-IN-TIME (JIT) COMPILATION</a><br>
<P>
<b>int pcre2_jit_compile(pcre2_code *<i>code</i>, uint32_t <i>options</i>);</b>
<br>
@ -1807,7 +1844,7 @@ patterns to be analyzed, and for one-off matches and simple patterns the
benefit of faster execution might be offset by a much slower compilation time.
Most (but not all) patterns can be optimized by the JIT compiler.
<a name="localesupport"></a></P>
<br><a name="SEC22" href="#TOC1">LOCALE SUPPORT</a><br>
<br><a name="SEC23" href="#TOC1">LOCALE SUPPORT</a><br>
<P>
PCRE2 handles caseless matching, and determines whether characters are letters,
digits, or whatever, by reference to a set of tables, indexed by character code
@ -1863,7 +1900,7 @@ is saved with the compiled pattern, and the same tables are used by
compilation and matching both happen in the same locale, but different patterns
can be processed in different locales.
<a name="infoaboutpattern"></a></P>
<br><a name="SEC23" href="#TOC1">INFORMATION ABOUT A COMPILED PATTERN</a><br>
<br><a name="SEC24" href="#TOC1">INFORMATION ABOUT A COMPILED PATTERN</a><br>
<P>
<b>int pcre2_pattern_info(const pcre2 *<i>code</i>, uint32_t <i>what</i>, void *<i>where</i>);</b>
</P>
@ -2188,7 +2225,7 @@ value returned by this option, because there are cases where the code that
calculates the size has to over-estimate. Processing a pattern with the JIT
compiler does not alter the value returned by this option.
<a name="infoaboutcallouts"></a></P>
<br><a name="SEC24" href="#TOC1">INFORMATION ABOUT A PATTERN'S CALLOUTS</a><br>
<br><a name="SEC25" href="#TOC1">INFORMATION ABOUT A PATTERN'S CALLOUTS</a><br>
<P>
<b>int pcre2_callout_enumerate(const pcre2_code *<i>code</i>,</b>
<b> int (*<i>callback</i>)(pcre2_callout_enumerate_block *, void *),</b>
@ -2207,7 +2244,7 @@ contents of the callout enumeration block are described in the
<a href="pcre2callout.html"><b>pcre2callout</b></a>
documentation, which also gives further details about callouts.
</P>
<br><a name="SEC25" href="#TOC1">SERIALIZATION AND PRECOMPILING</a><br>
<br><a name="SEC26" href="#TOC1">SERIALIZATION AND PRECOMPILING</a><br>
<P>
It is possible to save compiled patterns on disc or elsewhere, and reload them
later, subject to a number of restrictions. The functions whose names begin
@ -2216,7 +2253,7 @@ the
<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
documentation.
<a name="matchdatablock"></a></P>
<br><a name="SEC26" href="#TOC1">THE MATCH DATA BLOCK</a><br>
<br><a name="SEC27" href="#TOC1">THE MATCH DATA BLOCK</a><br>
<P>
<b>pcre2_match_data *pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
<b> pcre2_general_context *<i>gcontext</i>);</b>
@ -2287,7 +2324,7 @@ match data block (for that match) have taken place.
When a match data block itself is no longer needed, it should be freed by
calling <b>pcre2_match_data_free()</b>.
</P>
<br><a name="SEC27" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
<br><a name="SEC28" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
<P>
<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
@ -2525,7 +2562,7 @@ examples, in the
<a href="pcre2partial.html"><b>pcre2partial</b></a>
documentation.
</P>
<br><a name="SEC28" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
<br><a name="SEC29" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
<P>
When PCRE2 is built, a default newline convention is set; this is usually the
standard convention for the operating system. The default can be overridden in
@ -2565,7 +2602,7 @@ does \s, even though it includes CR and LF in the characters that it matches.
Notwithstanding the above, anomalous effects may still occur when CRLF is a
valid newline sequence and explicit \r or \n escapes appear in the pattern.
<a name="matchedstrings"></a></P>
<br><a name="SEC29" href="#TOC1">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a><br>
<br><a name="SEC30" href="#TOC1">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a><br>
<P>
<b>uint32_t pcre2_get_ovector_count(pcre2_match_data *<i>match_data</i>);</b>
<br>
@ -2664,7 +2701,7 @@ parentheses, no more than <i>ovector[0]</i> to <i>ovector[2n+1]</i> are set by
<b>pcre2_match()</b>. The other elements retain whatever values they previously
had.
<a name="matchotherdata"></a></P>
<br><a name="SEC30" href="#TOC1">OTHER INFORMATION ABOUT A MATCH</a><br>
<br><a name="SEC31" href="#TOC1">OTHER INFORMATION ABOUT A MATCH</a><br>
<P>
<b>PCRE2_SPTR pcre2_get_mark(pcre2_match_data *<i>match_data</i>);</b>
<br>
@ -2714,7 +2751,7 @@ the code unit offset of the invalid UTF character. Details are given in the
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
page.
<a name="errorlist"></a></P>
<br><a name="SEC31" href="#TOC1">ERROR RETURNS FROM <b>pcre2_match()</b></a><br>
<br><a name="SEC32" href="#TOC1">ERROR RETURNS FROM <b>pcre2_match()</b></a><br>
<P>
If <b>pcre2_match()</b> fails, it returns a negative number. This can be
converted to a text string by calling the <b>pcre2_get_error_message()</b>
@ -2820,7 +2857,7 @@ faulted at compile time, but more complicated cases, in particular mutual
recursions between two different subpatterns, cannot be detected until matching
is attempted.
<a name="geterrormessage"></a></P>
<br><a name="SEC32" href="#TOC1">OBTAINING A TEXTUAL ERROR MESSAGE</a><br>
<br><a name="SEC33" href="#TOC1">OBTAINING A TEXTUAL ERROR MESSAGE</a><br>
<P>
<b>int pcre2_get_error_message(int <i>errorcode</i>, PCRE2_UCHAR *<i>buffer</i>,</b>
<b> PCRE2_SIZE <i>bufflen</i>);</b>
@ -2841,7 +2878,7 @@ returned. If the buffer is too small, the message is truncated (but still with
a trailing zero), and the negative error code PCRE2_ERROR_NOMEMORY is returned.
None of the messages are very long; a buffer size of 120 code units is ample.
<a name="extractbynumber"></a></P>
<br><a name="SEC33" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
<br><a name="SEC34" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
<P>
<b>int pcre2_substring_length_bynumber(pcre2_match_data *<i>match_data</i>,</b>
<b> uint32_t <i>number</i>, PCRE2_SIZE *<i>length</i>);</b>
@ -2938,7 +2975,7 @@ The substring did not participate in the match. For example, if the pattern is
(abc)|(def) and the subject is "def", and the ovector contains at least two
capturing slots, substring number 1 is unset.
</P>
<br><a name="SEC34" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
<br><a name="SEC35" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
<P>
<b>int pcre2_substring_list_get(pcre2_match_data *<i>match_data</i>,</b>
<b>" PCRE2_UCHAR ***<i>listptr</i>, PCRE2_SIZE **<i>lengthsptr</i>);</b>
@ -2977,7 +3014,7 @@ can be distinguished from a genuine zero-length substring by inspecting the
appropriate offset in the ovector, which contain PCRE2_UNSET for unset
substrings, or by calling <b>pcre2_substring_length_bynumber()</b>.
<a name="extractbyname"></a></P>
<br><a name="SEC35" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
<br><a name="SEC36" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
<P>
<b>int pcre2_substring_number_from_name(const pcre2_code *<i>code</i>,</b>
<b> PCRE2_SPTR <i>name</i>);</b>
@ -3037,7 +3074,7 @@ names are not included in the compiled code. The matching process uses only
numbers. For this reason, the use of different names for subpatterns of the
same number causes an error at compile time.
</P>
<br><a name="SEC36" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
<br><a name="SEC37" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
<P>
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
@ -3244,7 +3281,7 @@ obtained by calling the <b>pcre2_get_error_message()</b> function (see
"Obtaining a textual error message"
<a href="#geterrormessage">above).</a>
</P>
<br><a name="SEC37" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
<br><a name="SEC38" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
<P>
<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
<b> PCRE2_SPTR <i>name</i>, PCRE2_SPTR *<i>first</i>, PCRE2_SPTR *<i>last</i>);</b>
@ -3289,7 +3326,7 @@ in the section entitled <i>Information about a pattern</i>. Given all the
relevant entries for the name, you can extract each of their numbers, and hence
the captured data.
</P>
<br><a name="SEC38" href="#TOC1">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a><br>
<br><a name="SEC39" href="#TOC1">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a><br>
<P>
The traditional matching function uses a similar algorithm to Perl, which stops
when it finds the first match at a given point in the subject. If you want to
@ -3307,7 +3344,7 @@ substring. Then return 1, which forces <b>pcre2_match()</b> to backtrack and try
other alternatives. Ultimately, when it runs out of matches,
<b>pcre2_match()</b> will yield PCRE2_ERROR_NOMATCH.
<a name="dfamatch"></a></P>
<br><a name="SEC39" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
<br><a name="SEC40" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
<P>
<b>int pcre2_dfa_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
@ -3503,13 +3540,13 @@ some plausibility checks are made on the contents of the workspace, which
should contain data about the previous partial match. If any of these checks
fail, this error is given.
</P>
<br><a name="SEC40" href="#TOC1">SEE ALSO</a><br>
<br><a name="SEC41" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcre2build</b>(3), <b>pcre2callout</b>(3), <b>pcre2demo(3)</b>,
<b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3),
<b>pcre2sample</b>(3), <b>pcre2unicode</b>(3).
</P>
<br><a name="SEC41" href="#TOC1">AUTHOR</a><br>
<br><a name="SEC42" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
@ -3518,9 +3555,9 @@ University Computing Service
Cambridge, England.
<br>
</P>
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
<br><a name="SEC43" href="#TOC1">REVISION</a><br>
<P>
Last updated: 16 June 2017
Last updated: 10 July 2017
<br>
Copyright &copy; 1997-2017 University of Cambridge.
<br>

190
doc/html/pcre2convert.html Normal file
View File

@ -0,0 +1,190 @@
<html>
<head>
<title>pcre2convert specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2convert man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">EXPERIMENTAL PATTERN CONVERSION FUNCTIONS</a>
<li><a name="TOC2" href="#SEC2">THE CONVERT CONTEXT</a>
<li><a name="TOC3" href="#SEC3">THE CONVERSION FUNCTION</a>
<li><a name="TOC4" href="#SEC4">CONVERTING GLOBS</a>
<li><a name="TOC5" href="#SEC5">CONVERTING POSIX PATTERNS</a>
<li><a name="TOC6" href="#SEC6">AUTHOR</a>
<li><a name="TOC7" href="#SEC7">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">EXPERIMENTAL PATTERN CONVERSION FUNCTIONS</a><br>
<P>
This document describes a set of functions that can be used to convert
"foreign" patterns into PCRE2 regular expressions. This facility is currently
experimental, and may be changed in future releases. Two kinds of pattern,
globs and POSIX patterns, are supported.
</P>
<br><a name="SEC2" href="#TOC1">THE CONVERT CONTEXT</a><br>
<P>
<b>pcre2_convert_context *pcre2_convert_context_create(</b>
<b> pcre2_general_context *<i>gcontext</i>);</b>
<br>
<br>
<b>pcre2_convert_context *pcre2_convert_context_copy(</b>
<b> pcre2_convert_context *<i>cvcontext</i>);</b>
<br>
<br>
<b>void pcre2_convert_context_free(pcre2_convert_context *<i>cvcontext</i>);</b>
<br>
<br>
<b>int pcre2_set_glob_escape(pcre2_convert_context *<i>cvcontext</i>,</b>
<b> uint32_t <i>escape_char</i>);</b>
<br>
<br>
<b>int pcre2_set_glob_separator(pcre2_convert_context *<i>cvcontext</i>,</b>
<b> uint32_t <i>separator_char</i>);</b>
<br>
<br>
A convert context is used to hold parameters that affect the way that pattern
conversion works. Like all PCRE2 contexts, you need to use a context only if
you want to override the defaults. There are the usual create, copy, and free
functions. If custom memory management functions are set in a general context
that is passed to <b>pcre2_convert_context_create()</b>, they are used for all
memory management within the conversion functions.
</P>
<P>
There are only two parameters in the convert context at present. Both apply
only to glob conversions. The escape character defaults to grave accent under
Windows, otherwise backslash. It can be set to zero, meaning no escape
character, or to any punctuation character with a code point less than 256.
The separator character defaults to backslash under Windows, otherwise forward
slash. It can be set to forward slash, backslash, or dot.
</P>
<P>
The two setting functions return zero on success, or PCRE2_ERROR_BADDATA if
their second argument is invalid.
</P>
<br><a name="SEC3" href="#TOC1">THE CONVERSION FUNCTION</a><br>
<P>
<b>int pcre2_pattern_convert(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
<b> uint32_t <i>options</i>, PCRE2_UCHAR **<i>buffer</i>,</b>
<b> PCRE2_SIZE *<i>blength</i>, pcre2_convert_context *<i>cvcontext</i>);</b>
<br>
<br>
<b>void pcre2_converted_pattern_free(PCRE2_UCHAR *<i>converted_pattern</i>);</b>
<br>
<br>
The first two arguments of <b>pcre2_pattern_convert()</b> define the foreign
pattern that is to be converted. The length may be given as
PCRE2_ZERO_TERMINATED. The <b>options</b> argument defines how the pattern is to
be processed. If the input is UTF, the PCRE2_CONVERT_UTF option should be set.
PCRE2_CONVERT_NO_UTF_CHECK may also be set if you are sure the input is valid.
One or more of the glob options, or one of the following POSIX options must be
set to define the type of conversion that is required:
<pre>
PCRE2_CONVERT_GLOB
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
PCRE2_CONVERT_GLOB_NO_STARSTAR
PCRE2_CONVERT_POSIX_BASIC
PCRE2_CONVERT_POSIX_EXTENDED
</pre>
Details of the conversions are given below. The <b>buffer</b> and <b>blength</b>
arguments define how the output is handled:
</P>
<P>
If <b>buffer</b> is NULL, the function just returns the length of the converted
pattern via <b>blength</b>. This is one less than the length of buffer needed,
because a terminating zero is always added to the output.
</P>
<P>
If <b>buffer</b> points to a NULL pointer, an output buffer is obtained using
the allocator in the context or <b>malloc()</b> if no context is supplied. A
pointer to this buffer is placed in the variable to which <b>buffer</b> points.
When no longer needed the output buffer must be freed by calling
<b>pcre2_converted_pattern_free()</b>.
</P>
<P>
If <b>buffer</b> points to a non-NULL pointer, <b>blength</b> must be set to the
actual length of the buffer provided (in code units).
</P>
<P>
In all cases, after successful conversion, the variable pointed to by
<b>blength</b> is updated to the length actually used (in code units), excluding
the terminating zero that is always added.
</P>
<P>
If an error occurs, the length (via <b>blength</b>) is set to the offset
within the input pattern where the error was detected. Only gross syntax errors
are caught; there are plenty of errors that will get passed on for
<b>pcre2_compile()</b> to discover.
</P>
<P>
The return from <b>pcre2_pattern_convert()</b> is zero on success or a non-zero
PCRE2 error code. Note that PCRE2 error codes may be positive or negative:
<b>pcre2_compile()</b> uses mostly positive codes and <b>pcre2_match()</b>
negative ones; <b>pcre2_convert()</b> uses existing codes of both kinds. A
textual error message can be obtained by calling
<b>pcre2_get_error_message()</b>.
</P>
<br><a name="SEC4" href="#TOC1">CONVERTING GLOBS</a><br>
<P>
Globs are used to match file names, and consequently have the concept of a
"path separator", which defaults to backslash under Windows and forward slash
otherwise. If PCRE2_CONVERT_GLOB is set, the wildcards * and ? are not
permitted to match separator characters, but the double-star (**) feature
(which does match separators) is supported.
</P>
<P>
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR matches globs with wildcards allowed to
match separator characters. PCRE2_GLOB_NO_STARSTAR matches globs with the
double-star feature disabled. These options may be given together.
</P>
<br><a name="SEC5" href="#TOC1">CONVERTING POSIX PATTERNS</a><br>
<P>
POSIX defines two kinds of regular expression pattern: basic and extended.
These can be processed by setting PCRE2_CONVERT_POSIX_BASIC or
PCRE2_CONVERT_POSIX_EXTENDED, respectively.
</P>
<P>
In POSIX patterns, backslash is not special in a character class. Unmatched
closing parentheses are treated as literals.
</P>
<P>
In basic patterns, ? + | {} and () must be escaped to be recognized
as metacharacters outside a character class. If the first character in the
pattern is * it is treated as a literal. ^ is a metacharacter only at the start
of a branch.
</P>
<P>
In extended patterns, a backslash not in a character class always
makes the next character literal, whatever it is. There are no backreferences.
</P>
<P>
Note: POSIX mandates that the longest possible match at the first matching
position must be found. This is not what <b>pcre2_match()</b> does; it yields
the first match that is found. An application can use <b>pcre2_dfa_match()</b>
to find the longest match, but that does not support backreferences (but then
neither do POSIX extended patterns).
</P>
<br><a name="SEC6" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge, England.
<br>
</P>
<br><a name="SEC7" href="#TOC1">REVISION</a><br>
<P>
Last updated: 12 July 2017
<br>
Copyright &copy; 1997-2017 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -630,6 +630,10 @@ heavily used in the test files.
bsr=[anycrlf|unicode] specify \R handling
/B bincode show binary code without lengths
callout_info show callout information
convert=&#60;options&#62; request foreign pattern conversion
convert_glob_escape=c set glob escape character
convert_glob_separator=c set glob separator character
convert_length set convert buffer length
debug same as info,fullbincode
framesize show matching frame size
fullbincode show binary code with lengths
@ -1065,6 +1069,41 @@ are ignored (for the stacked copy), with a warning message, except for
<b>replace</b>, which causes an error. Note that <b>jitverify</b>, which is
allowed, does not carry through to any subsequent matching that uses a stacked
pattern.
</P>
<br><b>
Testing foreign pattern conversion
</b><br>
<P>
The experimental foreign pattern conversion functions in PCRE2 can be tested by
setting the <b>convert</b> modifier. Its argument is a colon-separated list of
options, which set the equivalent option for the <b>pcre2_pattern_convert()</b>
function:
<pre>
glob PCRE2_CONVERT_GLOB
glob_no_starstar PCRE2_CONVERT_GLOB_NO_STARSTAR
glob_no_wild_separator PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
posix_basic PCRE2_CONVERT_POSIX_BASIC
posix_extended PCRE2_CONVERT_POSIX_EXTENDED
unset Unset all options
</pre>
The "unset" value is useful for turning off a default that has been set by a
<b>#pattern</b> command. When one of these options is set, the input pattern is
passed to <b>pcre2_pattern_convert()</b>. If the conversion is successful, the
result is reflected in the output and then passed to <b>pcre2_compile()</b>. The
normal <b>utf</b> and <b>no_utf_check</b> options, if set, cause the
PCRE2_CONVERT_UTF and PCRE2_CONVERT_NO_UTF_CHECK options to be passed to
<b>pcre2_pattern_convert()</b>.
</P>
<P>
By default, the conversion function is allowed to allocate a buffer for its
output. However, if the <b>convert_length</b> modifier is set to a value greater
than zero, <b>pcre2test</b> passes a buffer of the given length. This makes it
possible to test the length check.
</P>
<P>
The <b>convert_glob_escape</b> and <b>convert_glob_separator</b> modifiers can be
used to specify the escape and separator characters for glob processing,
overriding the defaults, which are operating-system dependent.
<a name="subjectmodifiers"></a></P>
<br><a name="SEC11" href="#TOC1">SUBJECT MODIFIERS</a><br>
<P>
@ -1866,7 +1905,7 @@ Cambridge, England.
</P>
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
<P>
Last updated: 02 July 2017
Last updated: 12 July 2017
<br>
Copyright &copy; 1997-2017 University of Cambridge.
<br>

View File

@ -35,6 +35,9 @@ first.
<tr><td><a href="pcre2compat.html">pcre2compat</a></td>
<td>&nbsp;&nbsp;Compability with Perl</td></tr>
<tr><td><a href="pcre2convert.html">pcre2convert</a></td>
<td>&nbsp;&nbsp;Experimental foreign pattern conversion functions</td></tr>
<tr><td><a href="pcre2demo.html">pcre2demo</a></td>
<td>&nbsp;&nbsp;A demonstration C program that uses the PCRE2 library</td></tr>
@ -112,6 +115,18 @@ in the library.
<tr><td><a href="pcre2_config.html">pcre2_config</a></td>
<td>&nbsp;&nbsp;Show build-time configuration options</td></tr>
<tr><td><a href="pcre2_convert_context_copy.html">pcre2_convert_context_copy</a></td>
<td>&nbsp;&nbsp;Copy a convert context</td></tr>
<tr><td><a href="pcre2_convert_context_create.html">pcre2_convert_context_create</a></td>
<td>&nbsp;&nbsp;Create a convert context</td></tr>
<tr><td><a href="pcre2_convert_context_free.html">pcre2_convert_context_free</a></td>
<td>&nbsp;&nbsp;Free a convert context</td></tr>
<tr><td><a href="pcre2_converted_pattern_free.html">pcre2_converted_pattern_free</a></td>
<td>&nbsp;&nbsp;Free converted foreign pattern</td></tr>
<tr><td><a href="pcre2_dfa_match.html">pcre2_dfa_match</a></td>
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
@ -183,6 +198,9 @@ in the library.
<tr><td><a href="pcre2_match_data_free.html">pcre2_match_data_free</a></td>
<td>&nbsp;&nbsp;Free a match data block</td></tr>
<tr><td><a href="pcre2_pattern_convert.html">pcre2_pattern_convert</a></td>
<td>&nbsp;&nbsp;Experimental foreign pattern converter</td></tr>
<tr><td><a href="pcre2_pattern_info.html">pcre2_pattern_info</a></td>
<td>&nbsp;&nbsp;Extract information about a pattern</td></tr>
@ -216,6 +234,12 @@ in the library.
<tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td>
<td>&nbsp;&nbsp;Set the match backtracking depth limit</td></tr>
<tr><td><a href="pcre2_set_glob_escape.html">pcre2_set_glob_escape</a></td>
<td>&nbsp;&nbsp;Set glob escape character</td></tr>
<tr><td><a href="pcre2_set_glob_separator.html">pcre2_set_glob_separator</a></td>
<td>&nbsp;&nbsp;Set glob separator character</td></tr>
<tr><td><a href="pcre2_set_heap_limit.html">pcre2_set_heap_limit</a></td>
<td>&nbsp;&nbsp;Set the match backtracking heap limit</td></tr>

View File

@ -413,6 +413,35 @@ PCRE2 NATIVE API OBSOLETE FUNCTIONS
needed and has no effect (it always returns zero).
PCRE2 EXPERIMENTAL PATTERN CONVERSION FUNCTIONS
pcre2_convert_context *pcre2_convert_context_create(
pcre2_general_context *gcontext);
pcre2_convert_context *pcre2_convert_context_copy(
pcre2_convert_context *cvcontext);
void pcre2_convert_context_free(pcre2_convert_context *cvcontext);
int pcre2_set_glob_escape(pcre2_convert_context *cvcontext,
uint32_t escape_char);
int pcre2_set_glob_separator(pcre2_convert_context *cvcontext,
uint32_t separator_char);
int pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE length,
uint32_t options, PCRE2_UCHAR **buffer,
PCRE2_SIZE *blength, pcre2_convert_context *cvcontext);
void pcre2_converted_pattern_free(PCRE2_UCHAR *converted_pattern);
These functions provide a way of converting non-PCRE2 patterns into
patterns that can be processed by pcre2_compile(). This facility is
experimental and may be changed in future releases. At present, "globs"
and POSIX basic and extended patterns can be converted. Details are
given in the pcre2convert documentation.
PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES
There are three PCRE2 libraries, supporting 8-bit, 16-bit, and 32-bit
@ -3400,7 +3429,7 @@ AUTHOR
REVISION
Last updated: 16 June 2017
Last updated: 10 July 2017
Copyright (c) 1997-2017 University of Cambridge.
------------------------------------------------------------------------------

View File

@ -0,0 +1,26 @@
.TH PCRE2_CONVERT_CONTEXT_COPY 3 "10 July 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
.rs
.sp
.B #include <pcre2.h>
.PP
.nf
.B pcre2_convert_context *pcre2_convert_context_copy(
.B " pcre2_convert_context *\fIcvcontext\fP);"
.fi
.
.SH DESCRIPTION
.rs
.sp
This function is part of an experimental set of pattern conversion functions.
It makes a new copy of a convert context, using the memory allocation function
that was used for the original context. The result is NULL if the memory cannot
be obtained.
.P
The pattern conversion functions are described in the
.\" HREF
\fBpcre2convert\fP
.\"
documentation.

View File

@ -0,0 +1,27 @@
.TH PCRE2_CONVERT_CONTEXT_CREATE 3 "10 July 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
.rs
.sp
.B #include <pcre2.h>
.PP
.nf
.B pcre2_convert_context *pcre2_convert_context_create(
.B " pcre2_general_context *\fIgcontext\fP);"
.fi
.
.SH DESCRIPTION
.rs
.sp
This function is part of an experimental set of pattern conversion functions.
It creates and initializes a new convert context. If its argument is
NULL, \fBmalloc()\fP is used to get the necessary memory; otherwise the memory
allocation function within the general context is used. The result is NULL if
the memory could not be obtained.
.P
The pattern conversion functions are described in the
.\" HREF
\fBpcre2convert\fP
.\"
documentation.

View File

@ -0,0 +1,25 @@
.TH PCRE2_CONVERT_CONTEXT_FREE 3 "10 July 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
.rs
.sp
.B #include <pcre2.h>
.PP
.nf
.B void pcre2_convert_context_free(pcre2_convert_context *\fIcvcontext\fP);
.fi
.
.SH DESCRIPTION
.rs
.sp
This function is part of an experimental set of pattern conversion functions.
It frees the memory occupied by a convert context, using the memory
freeing function from the general context with which it was created, or
\fBfree()\fP if that was not set.
.P
The pattern conversion functions are described in the
.\" HREF
\fBpcre2convert\fP
.\"
documentation.

View File

@ -0,0 +1,25 @@
.TH PCRE2_CONVERTED_PATTERN_FREE 3 "11 July 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
.rs
.sp
.B #include <pcre2.h>
.PP
.nf
.B void pcre2_converted_pattern_free(PCRE2_UCHAR *\fIconverted_pattern\fP);
.fi
.
.SH DESCRIPTION
.rs
.sp
This function is part of an experimental set of pattern conversion functions.
It frees the memory occupied by a converted pattern that was obtained by
calling \fBpcre2_pattern_convert()\fP with arguments that caused it to place
the converted pattern into newly obtained heap memory.
.P
The pattern conversion functions are described in the
.\" HREF
\fBpcre2convert\fP
.\"
documentation.

View File

@ -0,0 +1,55 @@
.TH PCRE2_PATTERN_CONVERT 3 "11 July 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
.rs
.sp
.B #include <pcre2.h>
.PP
.nf
.B int pcre2_pattern_convert(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP,
.B " uint32_t \fIoptions\fP, PCRE2_UCHAR **\fIbuffer\fP,"
.B " PCRE2_SIZE *\fIblength\fP, pcre2_convert_context *\fIcvcontext\fP);"
.fi
.
.SH DESCRIPTION
.rs
.sp
This function is part of an experimental set of pattern conversion functions.
It converts a foreign pattern (for example, a glob) into a PCRE2 regular
expression pattern. Its arguments are:
.sp
\fIpattern\fP The foreign pattern
\fIlength\fP The length of the input pattern or PCRE2_ZERO_TERMINATED
\fIoptions\fP Option bits
\fIbuffer\fP Pointer to pointer to output buffer, or NULL
\fIblength\fP Pointer to output length field
\fIcvcontext\fP Pointer to a convert context or NULL
.sp
The length of the converted pattern (excluding the terminating zero) is
returned via \fIblength\fP. If \fIbuffer\fP is NULL, the function just returns
the output length. If \fIbuffer\fP points to a NULL pointer, heap memory is
obtained for the converted pattern, using the allocator in the context if
present (or else \fBmalloc()\fP), and the field pointed to by \fIbuffer\fP is
updated. If \fIbuffer\fP points to a non-NULL field, that must point to a
buffer whose size is in the variable pointed to by \fIblength\fP. This value is
updated.
.P
The option bits are:
.sp
PCRE2_CONVERT_UTF Input is UTF
PCRE2_CONVERT_NO_UTF_CHECK Do not check UTF validity
PCRE2_CONVERT_POSIX_BASIC Convert POSIX basic pattern
PCRE2_CONVERT_POSIX_EXTENDED Convert POSIX extended pattern
PCRE2_CONVERT_GLOB ) Convert
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR ) various types
PCRE2_CONVERT_GLOB_NO_STARSTAR ) of glob
.sp
The return value from \fBpcre2_pattern_convert()\fP is zero on success or a
non-zero PCRE2 error code.
.P
The pattern conversion functions are described in the
.\" HREF
\fBpcre2convert\fP
.\"
documentation.

View File

@ -0,0 +1,29 @@
.TH PCRE2_SET_GLOB_ESCAPE 3 "11 July 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
.rs
.sp
.B #include <pcre2.h>
.PP
.nf
.B int pcre2_set_glob_escape(pcre2_convert_context *\fIcvcontext\fP,
.B " uint32_t \fIescape_char\fP);"
.fi
.
.SH DESCRIPTION
.rs
.sp
This function is part of an experimental set of pattern conversion functions.
It sets the escape character that is used when converting globs. The second
argument must either be zero (meaning there is no escape character) or a
punctuation character whose code point is less than 256. The default is grave
accent if running under Windows, otherwise backslash. The result of the
function is zero for success or PCRE2_ERROR_BADDATA if the second argument is
invalid.
.P
The pattern conversion functions are described in the
.\" HREF
\fBpcre2convert\fP
.\"
documentation.

View File

@ -0,0 +1,28 @@
.TH PCRE2_SET_GLOB_SEPARATOR 3 "11 July 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
.rs
.sp
.B #include <pcre2.h>
.PP
.nf
.B int pcre2_set_glob_separator(pcre2_convert_context *\fIcvcontext\fP,
.B " uint32_t \fIseparator_char\fP);"
.fi
.
.SH DESCRIPTION
.rs
.sp
This function is part of an experimental set of pattern conversion functions.
It sets the component separator character that is used when converting globs.
The second argument must one of the characters forward slash, backslash, or
dot. The default is backslash when running under Windows, otherwise forward
slash. The result of the function is zero for success or PCRE2_ERROR_BADDATA if
the second argument is invalid.
.P
The pattern conversion functions are described in the
.\" HREF
\fBpcre2convert\fP
.\"
documentation.

View File

@ -1,4 +1,4 @@
.TH PCRE2API 3 "16 June 2017" "PCRE2 10.30"
.TH PCRE2API 3 "10 July 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.sp
@ -272,6 +272,41 @@ replaced by \fBpcre2_set_depth_limit()\fP; the second is no longer needed and
has no effect (it always returns zero).
.
.
.SH "PCRE2 EXPERIMENTAL PATTERN CONVERSION FUNCTIONS"
.rs
.sp
.nf
.B pcre2_convert_context *pcre2_convert_context_create(
.B " pcre2_general_context *\fIgcontext\fP);"
.sp
.B pcre2_convert_context *pcre2_convert_context_copy(
.B " pcre2_convert_context *\fIcvcontext\fP);"
.sp
.B void pcre2_convert_context_free(pcre2_convert_context *\fIcvcontext\fP);
.sp
.B int pcre2_set_glob_escape(pcre2_convert_context *\fIcvcontext\fP,
.B " uint32_t \fIescape_char\fP);"
.sp
.B int pcre2_set_glob_separator(pcre2_convert_context *\fIcvcontext\fP,
.B " uint32_t \fIseparator_char\fP);"
.sp
.B int pcre2_pattern_convert(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP,
.B " uint32_t \fIoptions\fP, PCRE2_UCHAR **\fIbuffer\fP,"
.B " PCRE2_SIZE *\fIblength\fP, pcre2_convert_context *\fIcvcontext\fP);"
.sp
.B void pcre2_converted_pattern_free(PCRE2_UCHAR *\fIconverted_pattern\fP);
.fi
.sp
These functions provide a way of converting non-PCRE2 patterns into
patterns that can be processed by \fBpcre2_compile()\fP. This facility is
experimental and may be changed in future releases. At present, "globs" and
POSIX basic and extended patterns can be converted. Details are given in the
.\" HREF
\fBpcre2convert\fP
.\"
documentation.
.
.
.SH "PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES"
.rs
.sp
@ -1695,7 +1730,7 @@ dangerous option. Use with care.
PCRE2_EXTRA_MATCH_LINE
.sp
This option is provided for use by the \fB-x\fP option of \fBpcre2grep\fP. It
causes the pattern only to match complete lines. This is achieved by
causes the pattern only to match complete lines. This is achieved by
automatically inserting the code for "^(?:" at the start of the compiled
pattern and ")$" at the end. Thus, when PCRE2_MULTILINE is set, the matched
line may be in the middle of the subject string. This option can be used with
@ -3539,6 +3574,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 16 June 2017
Last updated: 10 July 2017
Copyright (c) 1997-2017 University of Cambridge.
.fi

163
doc/pcre2convert.3 Normal file
View File

@ -0,0 +1,163 @@
.TH PCRE2CONVERT 3 "12 July 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "EXPERIMENTAL PATTERN CONVERSION FUNCTIONS"
.rs
.sp
This document describes a set of functions that can be used to convert
"foreign" patterns into PCRE2 regular expressions. This facility is currently
experimental, and may be changed in future releases. Two kinds of pattern,
globs and POSIX patterns, are supported.
.
.
.SH "THE CONVERT CONTEXT"
.rs
.sp
.nf
.B pcre2_convert_context *pcre2_convert_context_create(
.B " pcre2_general_context *\fIgcontext\fP);"
.sp
.B pcre2_convert_context *pcre2_convert_context_copy(
.B " pcre2_convert_context *\fIcvcontext\fP);"
.sp
.B void pcre2_convert_context_free(pcre2_convert_context *\fIcvcontext\fP);
.sp
.B int pcre2_set_glob_escape(pcre2_convert_context *\fIcvcontext\fP,
.B " uint32_t \fIescape_char\fP);"
.sp
.B int pcre2_set_glob_separator(pcre2_convert_context *\fIcvcontext\fP,
.B " uint32_t \fIseparator_char\fP);"
.fi
.sp
A convert context is used to hold parameters that affect the way that pattern
conversion works. Like all PCRE2 contexts, you need to use a context only if
you want to override the defaults. There are the usual create, copy, and free
functions. If custom memory management functions are set in a general context
that is passed to \fBpcre2_convert_context_create()\fP, they are used for all
memory management within the conversion functions.
.P
There are only two parameters in the convert context at present. Both apply
only to glob conversions. The escape character defaults to grave accent under
Windows, otherwise backslash. It can be set to zero, meaning no escape
character, or to any punctuation character with a code point less than 256.
The separator character defaults to backslash under Windows, otherwise forward
slash. It can be set to forward slash, backslash, or dot.
.P
The two setting functions return zero on success, or PCRE2_ERROR_BADDATA if
their second argument is invalid.
.
.
.SH "THE CONVERSION FUNCTION"
.rs
.sp
.nf
.B int pcre2_pattern_convert(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP,
.B " uint32_t \fIoptions\fP, PCRE2_UCHAR **\fIbuffer\fP,"
.B " PCRE2_SIZE *\fIblength\fP, pcre2_convert_context *\fIcvcontext\fP);"
.sp
.B void pcre2_converted_pattern_free(PCRE2_UCHAR *\fIconverted_pattern\fP);
.fi
.sp
The first two arguments of \fBpcre2_pattern_convert()\fP define the foreign
pattern that is to be converted. The length may be given as
PCRE2_ZERO_TERMINATED. The \fBoptions\fP argument defines how the pattern is to
be processed. If the input is UTF, the PCRE2_CONVERT_UTF option should be set.
PCRE2_CONVERT_NO_UTF_CHECK may also be set if you are sure the input is valid.
One or more of the glob options, or one of the following POSIX options must be
set to define the type of conversion that is required:
.sp
PCRE2_CONVERT_GLOB
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
PCRE2_CONVERT_GLOB_NO_STARSTAR
PCRE2_CONVERT_POSIX_BASIC
PCRE2_CONVERT_POSIX_EXTENDED
.sp
Details of the conversions are given below. The \fBbuffer\fP and \fBblength\fP
arguments define how the output is handled:
.P
If \fBbuffer\fP is NULL, the function just returns the length of the converted
pattern via \fBblength\fP. This is one less than the length of buffer needed,
because a terminating zero is always added to the output.
.P
If \fBbuffer\fP points to a NULL pointer, an output buffer is obtained using
the allocator in the context or \fBmalloc()\fP if no context is supplied. A
pointer to this buffer is placed in the variable to which \fBbuffer\fP points.
When no longer needed the output buffer must be freed by calling
\fBpcre2_converted_pattern_free()\fP.
.P
If \fBbuffer\fP points to a non-NULL pointer, \fBblength\fP must be set to the
actual length of the buffer provided (in code units).
.P
In all cases, after successful conversion, the variable pointed to by
\fBblength\fP is updated to the length actually used (in code units), excluding
the terminating zero that is always added.
.P
If an error occurs, the length (via \fBblength\fP) is set to the offset
within the input pattern where the error was detected. Only gross syntax errors
are caught; there are plenty of errors that will get passed on for
\fBpcre2_compile()\fP to discover.
.P
The return from \fBpcre2_pattern_convert()\fP is zero on success or a non-zero
PCRE2 error code. Note that PCRE2 error codes may be positive or negative:
\fBpcre2_compile()\fP uses mostly positive codes and \fBpcre2_match()\fP
negative ones; \fBpcre2_convert()\fP uses existing codes of both kinds. A
textual error message can be obtained by calling
\fBpcre2_get_error_message()\fP.
.
.
.SH "CONVERTING GLOBS"
.rs
.sp
Globs are used to match file names, and consequently have the concept of a
"path separator", which defaults to backslash under Windows and forward slash
otherwise. If PCRE2_CONVERT_GLOB is set, the wildcards * and ? are not
permitted to match separator characters, but the double-star (**) feature
(which does match separators) is supported.
.P
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR matches globs with wildcards allowed to
match separator characters. PCRE2_GLOB_NO_STARSTAR matches globs with the
double-star feature disabled. These options may be given together.
.
.
.SH "CONVERTING POSIX PATTERNS"
.rs
.sp
POSIX defines two kinds of regular expression pattern: basic and extended.
These can be processed by setting PCRE2_CONVERT_POSIX_BASIC or
PCRE2_CONVERT_POSIX_EXTENDED, respectively.
.P
In POSIX patterns, backslash is not special in a character class. Unmatched
closing parentheses are treated as literals.
.P
In basic patterns, ? + | {} and () must be escaped to be recognized
as metacharacters outside a character class. If the first character in the
pattern is * it is treated as a literal. ^ is a metacharacter only at the start
of a branch.
.P
In extended patterns, a backslash not in a character class always
makes the next character literal, whatever it is. There are no backreferences.
.P
Note: POSIX mandates that the longest possible match at the first matching
position must be found. This is not what \fBpcre2_match()\fP does; it yields
the first match that is found. An application can use \fBpcre2_dfa_match()\fP
to find the longest match, but that does not support backreferences (but then
neither do POSIX extended patterns).
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge, England.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 12 July 2017
Copyright (c) 1997-2017 University of Cambridge.
.fi

View File

@ -1,4 +1,4 @@
.TH PCRE2TEST 1 "02 July 2017" "PCRE 10.30"
.TH PCRE2TEST 1 "12 July 2017" "PCRE 10.30"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@ -592,6 +592,10 @@ heavily used in the test files.
bsr=[anycrlf|unicode] specify \eR handling
/B bincode show binary code without lengths
callout_info show callout information
convert=<options> request foreign pattern conversion
convert_glob_escape=c set glob escape character
convert_glob_separator=c set glob separator character
convert_length set convert buffer length
debug same as info,fullbincode
framesize show matching frame size
fullbincode show binary code with lengths
@ -1035,6 +1039,39 @@ allowed, does not carry through to any subsequent matching that uses a stacked
pattern.
.
.
.SS "Testing foreign pattern conversion"
.rs
.sp
The experimental foreign pattern conversion functions in PCRE2 can be tested by
setting the \fBconvert\fP modifier. Its argument is a colon-separated list of
options, which set the equivalent option for the \fBpcre2_pattern_convert()\fP
function:
.sp
glob PCRE2_CONVERT_GLOB
glob_no_starstar PCRE2_CONVERT_GLOB_NO_STARSTAR
glob_no_wild_separator PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
posix_basic PCRE2_CONVERT_POSIX_BASIC
posix_extended PCRE2_CONVERT_POSIX_EXTENDED
unset Unset all options
.sp
The "unset" value is useful for turning off a default that has been set by a
\fB#pattern\fP command. When one of these options is set, the input pattern is
passed to \fBpcre2_pattern_convert()\fP. If the conversion is successful, the
result is reflected in the output and then passed to \fBpcre2_compile()\fP. The
normal \fButf\fP and \fBno_utf_check\fP options, if set, cause the
PCRE2_CONVERT_UTF and PCRE2_CONVERT_NO_UTF_CHECK options to be passed to
\fBpcre2_pattern_convert()\fP.
.P
By default, the conversion function is allowed to allocate a buffer for its
output. However, if the \fBconvert_length\fP modifier is set to a value greater
than zero, \fBpcre2test\fP passes a buffer of the given length. This makes it
possible to test the length check.
.P
The \fBconvert_glob_escape\fP and \fBconvert_glob_separator\fP modifiers can be
used to specify the escape and separator characters for glob processing,
overriding the defaults, which are operating-system dependent.
.
.
.\" HTML <a name="subjectmodifiers"></a>
.SH "SUBJECT MODIFIERS"
.rs
@ -1850,6 +1887,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 02 July 2017
Last updated: 12 July 2017
Copyright (c) 1997-2017 University of Cambridge.
.fi

View File

@ -570,6 +570,10 @@ PATTERN MODIFIERS
bsr=[anycrlf|unicode] specify \R handling
/B bincode show binary code without lengths
callout_info show callout information
convert=<options> request foreign pattern conversion
convert_glob_escape=c set glob escape character
convert_glob_separator=c set glob separator character
convert_length set convert buffer length
debug same as info,fullbincode
framesize show matching frame size
fullbincode show binary code with lengths
@ -953,6 +957,37 @@ PATTERN MODIFIERS
that jitverify, which is allowed, does not carry through to any subse-
quent matching that uses a stacked pattern.
Testing foreign pattern conversion
The experimental foreign pattern conversion functions in PCRE2 can be
tested by setting the convert modifier. Its argument is a colon-sepa-
rated list of options, which set the equivalent option for the
pcre2_pattern_convert() function:
glob PCRE2_CONVERT_GLOB
glob_no_starstar PCRE2_CONVERT_GLOB_NO_STARSTAR
glob_no_wild_separator PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
posix_basic PCRE2_CONVERT_POSIX_BASIC
posix_extended PCRE2_CONVERT_POSIX_EXTENDED
unset Unset all options
The "unset" value is useful for turning off a default that has been set
by a #pattern command. When one of these options is set, the input pat-
tern is passed to pcre2_pattern_convert(). If the conversion is suc-
cessful, the result is reflected in the output and then passed to
pcre2_compile(). The normal utf and no_utf_check options, if set, cause
the PCRE2_CONVERT_UTF and PCRE2_CONVERT_NO_UTF_CHECK options to be
passed to pcre2_pattern_convert().
By default, the conversion function is allowed to allocate a buffer for
its output. However, if the convert_length modifier is set to a value
greater than zero, pcre2test passes a buffer of the given length. This
makes it possible to test the length check.
The convert_glob_escape and convert_glob_separator modifiers can be
used to specify the escape and separator characters for glob process-
ing, overriding the defaults, which are operating-system dependent.
SUBJECT MODIFIERS
@ -1692,5 +1727,5 @@ AUTHOR
REVISION
Last updated: 02 July 2017
Last updated: 12 July 2017
Copyright (c) 1997-2017 University of Cambridge.

View File

@ -193,8 +193,6 @@ ignored for pcre2_jit_match(). */
#define PCRE2_CONVERT_GLOB 0x00000010u
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u
#define PCRE2_CONVERT_GLOB_BASIC 0x00000070u
#define PCRE2_CONVERT_GLOB_IGNORE_DOT_START 0x00000080u
/* Newline and \R settings, for use in compile contexts. The newline values
must be kept in step with values set in config.h and both sets must all be

View File

@ -193,8 +193,6 @@ ignored for pcre2_jit_match(). */
#define PCRE2_CONVERT_GLOB 0x00000010u
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u
#define PCRE2_CONVERT_GLOB_BASIC 0x00000070u
#define PCRE2_CONVERT_GLOB_IGNORE_DOT_START 0x00000080u
/* Newline and \R settings, for use in compile contexts. The newline values
must be kept in step with values set in config.h and both sets must all be

View File

@ -49,7 +49,6 @@ POSSIBILITY OF SUCH DAMAGE.
PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED)
#define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \
PCRE2_CONVERT_GLOB_IGNORE_DOT_START| \
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \
PCRE2_CONVERT_GLOB_NO_STARSTAR| \
TYPE_OPTIONS)

View File

@ -401,8 +401,6 @@ typedef struct convertstruct {
static convertstruct convertlist[] = {
{ "glob", PCRE2_CONVERT_GLOB },
{ "glob_basic", PCRE2_CONVERT_GLOB_BASIC },
{ "glob_ignore_dot_start", PCRE2_CONVERT_GLOB_IGNORE_DOT_START },
{ "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR },
{ "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
{ "posix_basic", PCRE2_CONVERT_POSIX_BASIC },

View File

@ -8,7 +8,7 @@
# Set the glob separator explicitly so that different OS defaults are not a
# problem. Then test various errors.
#pattern convert=glob_basic,convert_glob_escape=\,convert_glob_separator=/
#pattern convert=glob,convert_glob_escape=\,convert_glob_separator=/
# The fact that this one works in 13 bytes in the 8-bit library shows that the
# output is in UTF-8, though pcre2test shows the character as an escape.

View File

@ -8,7 +8,7 @@
# Set the glob separator explicitly so that different OS defaults are not a
# problem. Then test various errors.
#pattern convert=glob_basic,convert_glob_escape=\,convert_glob_separator=/
#pattern convert=glob,convert_glob_escape=\,convert_glob_separator=/
# The fact that this one works in 13 bytes in the 8-bit library shows that the
# output is in UTF-8, though pcre2test shows the character as an escape.