Add the pcre2_maketables_free() function.

This commit is contained in:
Philip.Hazel 2019-09-03 14:16:07 +00:00
parent 45b219e6bc
commit 0970ae4195
12 changed files with 204 additions and 55 deletions

View File

@ -133,6 +133,8 @@ optimizations were in force.
particular, if a pattern ended with a negative lookahead, characters that were
inspected in that lookahead were not included.
30. Add the pcre2_maketables_free() function.
Version 10.33 16-April-2019
---------------------------

View File

@ -57,6 +57,7 @@ dist_html_DATA = \
doc/html/pcre2_jit_stack_create.html \
doc/html/pcre2_jit_stack_free.html \
doc/html/pcre2_maketables.html \
doc/html/pcre2_maketables_free.html \
doc/html/pcre2_match.html \
doc/html/pcre2_match_context_copy.html \
doc/html/pcre2_match_context_create.html \
@ -152,6 +153,7 @@ dist_man_MANS = \
doc/pcre2_jit_stack_create.3 \
doc/pcre2_jit_stack_free.3 \
doc/pcre2_maketables.3 \
doc/pcre2_maketables_free.3 \
doc/pcre2_match.3 \
doc/pcre2_match_context_copy.3 \
doc/pcre2_match_context_create.3 \
@ -360,7 +362,7 @@ COMMON_SOURCES = \
src/pcre2_internal.h \
src/pcre2_intmodedep.h \
src/pcre2_jit_compile.c \
src/pcre2_jit_simd_inc.h \
src/pcre2_jit_simd_inc.h \
src/pcre2_maketables.c \
src/pcre2_match.c \
src/pcre2_match_data.c \

View File

@ -179,6 +179,9 @@ in the library.
<tr><td><a href="pcre2_maketables.html">pcre2_maketables</a></td>
<td>&nbsp;&nbsp;Build character tables in current locale</td></tr>
<tr><td><a href="pcre2_maketables_free.html">pcre2_maketables_free</a></td>
<td>&nbsp;&nbsp;Free character tables</td></tr>
<tr><td><a href="pcre2_match.html">pcre2_match</a></td>
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string
(Perl compatible)</td></tr>

View File

@ -0,0 +1,44 @@
<html>
<head>
<title>pcre2_maketables_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre2_maketables_free man page</h1>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>
<p>
This page is part of the PCRE2 HTML documentation. It was generated
automatically from the original man page. If there is any nonsense in it,
please consult the man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre2.h&#62;</b>
</P>
<P>
<b>void pcre2_maketables_free(pcre2_general_context *<i>gcontext</i>,</b>
<b> const uint8_t *<i>tables</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function discards a set of character tables that were created by a call
to
<a href="pcre2_maketables.html"><b>pcre2_maketables()</b>.</a>
</P>
<P>
The <i>gcontext</i> parameter should match what was used in that call to
account for any custom allocators that might be in use; if it is NULL
the system <b>free()</b> is used.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
</p>

View File

@ -312,6 +312,10 @@ document for an overview of all the PCRE2 documentation.
<b>const uint8_t *pcre2_maketables(pcre2_general_context *<i>gcontext</i>);</b>
<br>
<br>
<b>void pcre2_maketables_free(pcre2_general_context *<i>gcontext</i>, </b>
<b> const uint8_t *<i>tables</i>);</b>
<br>
<br>
<b>int pcre2_pattern_info(const pcre2_code *<i>code</i>, uint32_t <i>what</i>,</b>
<b> void *<i>where</i>);</b>
<br>
@ -794,7 +798,7 @@ interpreted matching functions, <i>pcre2_match()</i> and
<b> const uint8_t *<i>tables</i>);</b>
<br>
<br>
The value must be the result of a call to <i>pcre2_maketables()</i>, whose only
The value must be the result of a call to <b>pcre2_maketables()</b>, whose only
argument is a general context. This function builds a set of character tables
in the current locale.
<br>
@ -1982,13 +1986,20 @@ Most (but not all) patterns can be optimized by the JIT compiler.
<a name="localesupport"></a></P>
<br><a name="SEC22" href="#TOC1">LOCALE SUPPORT</a><br>
<P>
<b>const uint8_t *pcre2_maketables(pcre2_general_context *<i>gcontext</i>);</b>
<br>
<br>
<b>void pcre2_maketables_free(pcre2_general_context *<i>gcontext</i>, </b>
<b> const uint8_t *<i>tables</i>);</b>
</P>
<P>
PCRE2 handles caseless matching, and determines whether characters are letters,
digits, or whatever, by reference to a set of tables, indexed by character code
point. This applies only to characters whose code points are less than 256. By
default, higher-valued code points never match escapes such as \w or \d.
However, if PCRE2 is built with Unicode support, all characters can be tested
with \p and \P, or, alternatively, the PCRE2_UCP option can be set when a
pattern is compiled; this causes \w and friends to use Unicode property
point. However, this applies only to characters whose code points are less than
256. By default, higher-valued code points never match escapes such as \w or
\d. When PCRE2 is built with Unicode support (the default), all characters can
be tested with \p and \P, or, alternatively, the PCRE2_UCP option can be set
when a pattern is compiled; this causes \w and friends to use Unicode property
support instead of the built-in tables.
</P>
<P>
@ -1997,24 +2008,28 @@ with code points greater than 128, you should either use Unicode support, or
use locales, but not try to mix the two.
</P>
<P>
PCRE2 contains an internal set of character tables that are used by default.
PCRE2 contains a built-in set of character tables that are used by default.
These are sufficient for many applications. Normally, the internal tables
recognize only ASCII characters. However, when PCRE2 is built, it is possible
to cause the internal tables to be rebuilt in the default "C" locale of the
local system, which may cause them to be different.
</P>
<P>
The internal tables can be overridden by tables supplied by the application
The built-in tables can be overridden by tables supplied by the application
that calls PCRE2. These may be created in a different locale from the default.
As more and more applications change to using Unicode, the need for this locale
support is expected to die away.
</P>
<P>
External tables are built by calling the <b>pcre2_maketables()</b> function, in
the relevant locale. The result can be passed to <b>pcre2_compile()</b> as often
as necessary, by creating a compile context and calling
<b>pcre2_set_character_tables()</b> to set the tables pointer therein. For
example, to build and use tables that are appropriate for the French locale
the relevant locale. The only argument to this function is a general context,
which can be used to pass a custom memory allocator. If the argument is NULL,
the system <b>malloc()</b> is used. The result can be passed to
<b>pcre2_compile()</b> as often as necessary, by creating a compile context and
calling <b>pcre2_set_character_tables()</b> to set the tables pointer therein.
</P>
<P>
For example, to build and use tables that are appropriate for the French locale
(where accented characters with values greater than 128 are treated as
letters), the following code could be used:
<pre>
@ -2025,9 +2040,7 @@ letters), the following code could be used:
re = pcre2_compile(..., ccontext);
</pre>
The locale name "fr_FR" is used on Linux and other Unix-like systems; if you
are using Windows, the name for the French locale is "french". It is the
caller's responsibility to ensure that the memory containing the tables remains
available for as long as it is needed.
are using Windows, the name for the French locale is "french".
</P>
<P>
The pointer that is passed (via the compile context) to <b>pcre2_compile()</b>
@ -2035,6 +2048,13 @@ is saved with the compiled pattern, and the same tables are used by
<b>pcre2_match()</b> and <b>pcre_dfa_match()</b>. Thus, for any single pattern,
compilation and matching both happen in the same locale, but different patterns
can be processed in different locales.
</P>
<P>
It is the caller's responsibility to ensure that the memory containing the
tables remains available while they are still in use. When they are no longer
needed, you can discard them using <b>pcre2_maketables_free()</b>, which should
pass as its first parameter the same global context that was used to create the
tables.
<a name="infoaboutpattern"></a></P>
<br><a name="SEC23" href="#TOC1">INFORMATION ABOUT A COMPILED PATTERN</a><br>
<P>
@ -3851,7 +3871,7 @@ Cambridge, England.
</P>
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
<P>
Last updated: 01 August 2019
Last updated: 02 September 2019
<br>
Copyright &copy; 1997-2019 University of Cambridge.
<br>

View File

@ -179,6 +179,9 @@ in the library.
<tr><td><a href="pcre2_maketables.html">pcre2_maketables</a></td>
<td>&nbsp;&nbsp;Build character tables in current locale</td></tr>
<tr><td><a href="pcre2_maketables_free.html">pcre2_maketables_free</a></td>
<td>&nbsp;&nbsp;Free character tables</td></tr>
<tr><td><a href="pcre2_match.html">pcre2_match</a></td>
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string
(Perl compatible)</td></tr>

View File

@ -402,6 +402,9 @@ PCRE2 NATIVE API AUXILIARY FUNCTIONS
const uint8_t *pcre2_maketables(pcre2_general_context *gcontext);
void pcre2_maketables_free(pcre2_general_context *gcontext,
const uint8_t *tables);
int pcre2_pattern_info(const pcre2_code *code, uint32_t what,
void *where);
@ -1941,39 +1944,48 @@ JUST-IN-TIME (JIT) COMPILATION
LOCALE SUPPORT
const uint8_t *pcre2_maketables(pcre2_general_context *gcontext);
void pcre2_maketables_free(pcre2_general_context *gcontext,
const uint8_t *tables);
PCRE2 handles caseless matching, and determines whether characters are
letters, digits, or whatever, by reference to a set of tables, indexed
by character code point. This applies only to characters whose code
points are less than 256. By default, higher-valued code points never
match escapes such as \w or \d. However, if PCRE2 is built with Uni-
code support, all characters can be tested with \p and \P, or, alterna-
tively, the PCRE2_UCP option can be set when a pattern is compiled;
this causes \w and friends to use Unicode property support instead of
the built-in tables.
by character code point. However, this applies only to characters whose
code points are less than 256. By default, higher-valued code points
never match escapes such as \w or \d. When PCRE2 is built with Unicode
support (the default), all characters can be tested with \p and \P, or,
alternatively, the PCRE2_UCP option can be set when a pattern is com-
piled; this causes \w and friends to use Unicode property support in-
stead of the built-in tables.
The use of locales with Unicode is discouraged. If you are handling
characters with code points greater than 128, you should either use
Unicode support, or use locales, but not try to mix the two.
PCRE2 contains an internal set of character tables that are used by de-
PCRE2 contains a built-in set of character tables that are used by de-
fault. These are sufficient for many applications. Normally, the in-
ternal tables recognize only ASCII characters. However, when PCRE2 is
built, it is possible to cause the internal tables to be rebuilt in the
default "C" locale of the local system, which may cause them to be dif-
ferent.
The internal tables can be overridden by tables supplied by the appli-
The built-in tables can be overridden by tables supplied by the appli-
cation that calls PCRE2. These may be created in a different locale
from the default. As more and more applications change to using Uni-
code, the need for this locale support is expected to die away.
External tables are built by calling the pcre2_maketables() function,
in the relevant locale. The result can be passed to pcre2_compile() as
often as necessary, by creating a compile context and calling
pcre2_set_character_tables() to set the tables pointer therein. For ex-
ample, to build and use tables that are appropriate for the French lo-
cale (where accented characters with values greater than 128 are
treated as letters), the following code could be used:
in the relevant locale. The only argument to this function is a general
context, which can be used to pass a custom memory allocator. If the
argument is NULL, the system malloc() is used. The result can be passed
to pcre2_compile() as often as necessary, by creating a compile context
and calling pcre2_set_character_tables() to set the tables pointer
therein.
For example, to build and use tables that are appropriate for the
French locale (where accented characters with values greater than 128
are treated as letters), the following code could be used:
setlocale(LC_CTYPE, "fr_FR");
tables = pcre2_maketables(NULL);
@ -1982,9 +1994,7 @@ LOCALE SUPPORT
re = pcre2_compile(..., ccontext);
The locale name "fr_FR" is used on Linux and other Unix-like systems;
if you are using Windows, the name for the French locale is "french".
It is the caller's responsibility to ensure that the memory containing
the tables remains available for as long as it is needed.
if you are using Windows, the name for the French locale is "french".
The pointer that is passed (via the compile context) to pcre2_compile()
is saved with the compiled pattern, and the same tables are used by
@ -1992,6 +2002,12 @@ LOCALE SUPPORT
pilation and matching both happen in the same locale, but different
patterns can be processed in different locales.
It is the caller's responsibility to ensure that the memory containing
the tables remains available while they are still in use. When they are
no longer needed, you can discard them using pcre2_maketables_free(),
which should pass as its first parameter the same global context that
was used to create the tables.
INFORMATION ABOUT A COMPILED PATTERN
@ -3706,7 +3722,7 @@ AUTHOR
REVISION
Last updated: 01 August 2019
Last updated: 02 September 2019
Copyright (c) 1997-2019 University of Cambridge.
------------------------------------------------------------------------------

View File

@ -0,0 +1,31 @@
.TH PCRE2_MAKETABLES_FREE 3 "02 September 2019" "PCRE2 10.34"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
.rs
.sp
.B #include <pcre2.h>
.PP
.nf
.B void pcre2_maketables_free(pcre2_general_context *\fIgcontext\fP,
.B " const uint8_t *\fItables\fP);"
.fi
.
.SH DESCRIPTION
.rs
.sp
This function discards a set of character tables that were created by a call
to
.\" HREF
\fBpcre2_maketables()\fP.
.\"
.P
The \fIgcontext\fP parameter should match what was used in that call to
account for any custom allocators that might be in use; if it is NULL
the system \fBfree()\fP is used.
.P
There is a complete description of the PCRE2 native API in the
.\" HREF
\fBpcre2api\fP
.\"
page.

View File

@ -1,4 +1,4 @@
.TH PCRE2API 3 "01 August 2019" "PCRE2 10.34"
.TH PCRE2API 3 "02 September 2019" "PCRE2 10.34"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.sp
@ -247,6 +247,9 @@ document for an overview of all the PCRE2 documentation.
.sp
.B const uint8_t *pcre2_maketables(pcre2_general_context *\fIgcontext\fP);
.sp
.B void pcre2_maketables_free(pcre2_general_context *\fIgcontext\fP,
.B " const uint8_t *\fItables\fP);"
.sp
.B int pcre2_pattern_info(const pcre2_code *\fIcode\fP, uint32_t \fIwhat\fP,
.B " void *\fIwhere\fP);"
.sp
@ -728,7 +731,7 @@ interpreted matching functions, \fIpcre2_match()\fP and
.B " const uint8_t *\fItables\fP);"
.fi
.sp
The value must be the result of a call to \fIpcre2_maketables()\fP, whose only
The value must be the result of a call to \fBpcre2_maketables()\fP, whose only
argument is a general context. This function builds a set of character tables
in the current locale.
.sp
@ -1943,35 +1946,45 @@ Most (but not all) patterns can be optimized by the JIT compiler.
.SH "LOCALE SUPPORT"
.rs
.sp
.nf
.B const uint8_t *pcre2_maketables(pcre2_general_context *\fIgcontext\fP);
.sp
.B void pcre2_maketables_free(pcre2_general_context *\fIgcontext\fP,
.B " const uint8_t *\fItables\fP);"
.fi
.P
PCRE2 handles caseless matching, and determines whether characters are letters,
digits, or whatever, by reference to a set of tables, indexed by character code
point. This applies only to characters whose code points are less than 256. By
default, higher-valued code points never match escapes such as \ew or \ed.
However, if PCRE2 is built with Unicode support, all characters can be tested
with \ep and \eP, or, alternatively, the PCRE2_UCP option can be set when a
pattern is compiled; this causes \ew and friends to use Unicode property
point. However, this applies only to characters whose code points are less than
256. By default, higher-valued code points never match escapes such as \ew or
\ed. When PCRE2 is built with Unicode support (the default), all characters can
be tested with \ep and \eP, or, alternatively, the PCRE2_UCP option can be set
when a pattern is compiled; this causes \ew and friends to use Unicode property
support instead of the built-in tables.
.P
The use of locales with Unicode is discouraged. If you are handling characters
with code points greater than 128, you should either use Unicode support, or
use locales, but not try to mix the two.
.P
PCRE2 contains an internal set of character tables that are used by default.
PCRE2 contains a built-in set of character tables that are used by default.
These are sufficient for many applications. Normally, the internal tables
recognize only ASCII characters. However, when PCRE2 is built, it is possible
to cause the internal tables to be rebuilt in the default "C" locale of the
local system, which may cause them to be different.
.P
The internal tables can be overridden by tables supplied by the application
The built-in tables can be overridden by tables supplied by the application
that calls PCRE2. These may be created in a different locale from the default.
As more and more applications change to using Unicode, the need for this locale
support is expected to die away.
.P
External tables are built by calling the \fBpcre2_maketables()\fP function, in
the relevant locale. The result can be passed to \fBpcre2_compile()\fP as often
as necessary, by creating a compile context and calling
\fBpcre2_set_character_tables()\fP to set the tables pointer therein. For
example, to build and use tables that are appropriate for the French locale
the relevant locale. The only argument to this function is a general context,
which can be used to pass a custom memory allocator. If the argument is NULL,
the system \fBmalloc()\fP is used. The result can be passed to
\fBpcre2_compile()\fP as often as necessary, by creating a compile context and
calling \fBpcre2_set_character_tables()\fP to set the tables pointer therein.
.P
For example, to build and use tables that are appropriate for the French locale
(where accented characters with values greater than 128 are treated as
letters), the following code could be used:
.sp
@ -1982,15 +1995,19 @@ letters), the following code could be used:
re = pcre2_compile(..., ccontext);
.sp
The locale name "fr_FR" is used on Linux and other Unix-like systems; if you
are using Windows, the name for the French locale is "french". It is the
caller's responsibility to ensure that the memory containing the tables remains
available for as long as it is needed.
are using Windows, the name for the French locale is "french".
.P
The pointer that is passed (via the compile context) to \fBpcre2_compile()\fP
is saved with the compiled pattern, and the same tables are used by
\fBpcre2_match()\fP and \fBpcre_dfa_match()\fP. Thus, for any single pattern,
compilation and matching both happen in the same locale, but different patterns
can be processed in different locales.
.P
It is the caller's responsibility to ensure that the memory containing the
tables remains available while they are still in use. When they are no longer
needed, you can discard them using \fBpcre2_maketables_free()\fP, which should
pass as its first parameter the same global context that was used to create the
tables.
.
.
.\" HTML <a name="infoaboutpattern"></a>
@ -3863,6 +3880,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 01 August 2019
Last updated: 02 September 2019
Copyright (c) 1997-2019 University of Cambridge.
.fi

View File

@ -779,7 +779,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
PCRE2_EXP_DECL const uint8_t PCRE2_CALL_CONVENTION \
*pcre2_maketables(pcre2_general_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_maketables_free(pcre2_general_context *, const uint8_t *);
/* Define macros that generate width-specific names from generic versions. The
three-level macro scheme is necessary to get the macros expanded when we want
@ -855,6 +856,7 @@ pcre2_compile are called by application code. */
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
#define pcre2_maketables_free PCRE2_SUFFIX(pcre2_maketables_free_)
#define pcre2_match PCRE2_SUFFIX(pcre2_match_)
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)

View File

@ -147,4 +147,13 @@ for (i = 0; i < 256; i++)
return yield;
}
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
{
if (gcontext)
gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data);
else
free((void *)tables);
}
/* End of pcre2_maketables.c */

View File

@ -4390,7 +4390,7 @@ if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
#endif
free(main_buffer);
free((void *)character_tables);
if (character_tables != NULL) pcre2_maketables_free(NULL, character_tables);
pcre2_compile_context_free(compile_context);
pcre2_match_context_free(match_context);