From 0970ae4195c5cd3dd7398a01d2d738f5a9c1d1cc Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Tue, 3 Sep 2019 14:16:07 +0000 Subject: [PATCH] Add the pcre2_maketables_free() function. --- ChangeLog | 2 ++ Makefile.am | 4 ++- doc/html/index.html | 3 ++ doc/html/pcre2_maketables_free.html | 44 +++++++++++++++++++++++ doc/html/pcre2api.html | 52 ++++++++++++++++++--------- doc/index.html.src | 3 ++ doc/pcre2.txt | 54 +++++++++++++++++++---------- doc/pcre2_maketables_free.3 | 31 +++++++++++++++++ doc/pcre2api.3 | 51 ++++++++++++++++++--------- src/pcre2.h.in | 4 ++- src/pcre2_maketables.c | 9 +++++ src/pcre2grep.c | 2 +- 12 files changed, 204 insertions(+), 55 deletions(-) create mode 100644 doc/html/pcre2_maketables_free.html create mode 100644 doc/pcre2_maketables_free.3 diff --git a/ChangeLog b/ChangeLog index 65385a3..23948bf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -133,6 +133,8 @@ optimizations were in force. particular, if a pattern ended with a negative lookahead, characters that were inspected in that lookahead were not included. +30. Add the pcre2_maketables_free() function. + Version 10.33 16-April-2019 --------------------------- diff --git a/Makefile.am b/Makefile.am index ea5c600..708b953 100644 --- a/Makefile.am +++ b/Makefile.am @@ -57,6 +57,7 @@ dist_html_DATA = \ doc/html/pcre2_jit_stack_create.html \ doc/html/pcre2_jit_stack_free.html \ doc/html/pcre2_maketables.html \ + doc/html/pcre2_maketables_free.html \ doc/html/pcre2_match.html \ doc/html/pcre2_match_context_copy.html \ doc/html/pcre2_match_context_create.html \ @@ -152,6 +153,7 @@ dist_man_MANS = \ doc/pcre2_jit_stack_create.3 \ doc/pcre2_jit_stack_free.3 \ doc/pcre2_maketables.3 \ + doc/pcre2_maketables_free.3 \ doc/pcre2_match.3 \ doc/pcre2_match_context_copy.3 \ doc/pcre2_match_context_create.3 \ @@ -360,7 +362,7 @@ COMMON_SOURCES = \ src/pcre2_internal.h \ src/pcre2_intmodedep.h \ src/pcre2_jit_compile.c \ - src/pcre2_jit_simd_inc.h \ + src/pcre2_jit_simd_inc.h \ src/pcre2_maketables.c \ src/pcre2_match.c \ src/pcre2_match_data.c \ diff --git a/doc/html/index.html b/doc/html/index.html index 556965a..2c7c5fb 100644 --- a/doc/html/index.html +++ b/doc/html/index.html @@ -179,6 +179,9 @@ in the library. pcre2_maketables   Build character tables in current locale +pcre2_maketables_free +   Free character tables + pcre2_match   Match a compiled pattern to a subject string (Perl compatible) diff --git a/doc/html/pcre2_maketables_free.html b/doc/html/pcre2_maketables_free.html new file mode 100644 index 0000000..7316ab2 --- /dev/null +++ b/doc/html/pcre2_maketables_free.html @@ -0,0 +1,44 @@ + + +pcre2_maketables_free specification + + +

pcre2_maketables_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_maketables_free(pcre2_general_context *gcontext, + const uint8_t *tables); +

+
+DESCRIPTION +
+

+This function discards a set of character tables that were created by a call +to +pcre2_maketables(). +

+

+The gcontext parameter should match what was used in that call to +account for any custom allocators that might be in use; if it is NULL +the system free() is used. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2api.html b/doc/html/pcre2api.html index dabf852..16ad8bb 100644 --- a/doc/html/pcre2api.html +++ b/doc/html/pcre2api.html @@ -312,6 +312,10 @@ document for an overview of all the PCRE2 documentation. const uint8_t *pcre2_maketables(pcre2_general_context *gcontext);

+void pcre2_maketables_free(pcre2_general_context *gcontext, + const uint8_t *tables); +
+
int pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where);
@@ -794,7 +798,7 @@ interpreted matching functions, pcre2_match() and const uint8_t *tables);

-The value must be the result of a call to pcre2_maketables(), whose only +The value must be the result of a call to pcre2_maketables(), whose only argument is a general context. This function builds a set of character tables in the current locale.
@@ -1982,13 +1986,20 @@ Most (but not all) patterns can be optimized by the JIT compiler.


LOCALE SUPPORT

+const uint8_t *pcre2_maketables(pcre2_general_context *gcontext); +
+
+void pcre2_maketables_free(pcre2_general_context *gcontext, + const uint8_t *tables); +

+

PCRE2 handles caseless matching, and determines whether characters are letters, digits, or whatever, by reference to a set of tables, indexed by character code -point. This applies only to characters whose code points are less than 256. By -default, higher-valued code points never match escapes such as \w or \d. -However, if PCRE2 is built with Unicode support, all characters can be tested -with \p and \P, or, alternatively, the PCRE2_UCP option can be set when a -pattern is compiled; this causes \w and friends to use Unicode property +point. However, this applies only to characters whose code points are less than +256. By default, higher-valued code points never match escapes such as \w or +\d. When PCRE2 is built with Unicode support (the default), all characters can +be tested with \p and \P, or, alternatively, the PCRE2_UCP option can be set +when a pattern is compiled; this causes \w and friends to use Unicode property support instead of the built-in tables.

@@ -1997,24 +2008,28 @@ with code points greater than 128, you should either use Unicode support, or use locales, but not try to mix the two.

-PCRE2 contains an internal set of character tables that are used by default. +PCRE2 contains a built-in set of character tables that are used by default. These are sufficient for many applications. Normally, the internal tables recognize only ASCII characters. However, when PCRE2 is built, it is possible to cause the internal tables to be rebuilt in the default "C" locale of the local system, which may cause them to be different.

-The internal tables can be overridden by tables supplied by the application +The built-in tables can be overridden by tables supplied by the application that calls PCRE2. These may be created in a different locale from the default. As more and more applications change to using Unicode, the need for this locale support is expected to die away.

External tables are built by calling the pcre2_maketables() function, in -the relevant locale. The result can be passed to pcre2_compile() as often -as necessary, by creating a compile context and calling -pcre2_set_character_tables() to set the tables pointer therein. For -example, to build and use tables that are appropriate for the French locale +the relevant locale. The only argument to this function is a general context, +which can be used to pass a custom memory allocator. If the argument is NULL, +the system malloc() is used. The result can be passed to +pcre2_compile() as often as necessary, by creating a compile context and +calling pcre2_set_character_tables() to set the tables pointer therein. +

+

+For example, to build and use tables that are appropriate for the French locale (where accented characters with values greater than 128 are treated as letters), the following code could be used:

@@ -2025,9 +2040,7 @@ letters), the following code could be used:
   re = pcre2_compile(..., ccontext);
 
The locale name "fr_FR" is used on Linux and other Unix-like systems; if you -are using Windows, the name for the French locale is "french". It is the -caller's responsibility to ensure that the memory containing the tables remains -available for as long as it is needed. +are using Windows, the name for the French locale is "french".

The pointer that is passed (via the compile context) to pcre2_compile() @@ -2035,6 +2048,13 @@ is saved with the compiled pattern, and the same tables are used by pcre2_match() and pcre_dfa_match(). Thus, for any single pattern, compilation and matching both happen in the same locale, but different patterns can be processed in different locales. +

+

+It is the caller's responsibility to ensure that the memory containing the +tables remains available while they are still in use. When they are no longer +needed, you can discard them using pcre2_maketables_free(), which should +pass as its first parameter the same global context that was used to create the +tables.


INFORMATION ABOUT A COMPILED PATTERN

@@ -3851,7 +3871,7 @@ Cambridge, England.


REVISION

-Last updated: 01 August 2019 +Last updated: 02 September 2019
Copyright © 1997-2019 University of Cambridge.
diff --git a/doc/index.html.src b/doc/index.html.src index 556965a..2c7c5fb 100644 --- a/doc/index.html.src +++ b/doc/index.html.src @@ -179,6 +179,9 @@ in the library. pcre2_maketables   Build character tables in current locale +pcre2_maketables_free +   Free character tables + pcre2_match   Match a compiled pattern to a subject string (Perl compatible) diff --git a/doc/pcre2.txt b/doc/pcre2.txt index a990396..5e138e0 100644 --- a/doc/pcre2.txt +++ b/doc/pcre2.txt @@ -402,6 +402,9 @@ PCRE2 NATIVE API AUXILIARY FUNCTIONS const uint8_t *pcre2_maketables(pcre2_general_context *gcontext); + void pcre2_maketables_free(pcre2_general_context *gcontext, + const uint8_t *tables); + int pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where); @@ -1941,39 +1944,48 @@ JUST-IN-TIME (JIT) COMPILATION LOCALE SUPPORT + const uint8_t *pcre2_maketables(pcre2_general_context *gcontext); + + void pcre2_maketables_free(pcre2_general_context *gcontext, + const uint8_t *tables); + PCRE2 handles caseless matching, and determines whether characters are letters, digits, or whatever, by reference to a set of tables, indexed - by character code point. This applies only to characters whose code - points are less than 256. By default, higher-valued code points never - match escapes such as \w or \d. However, if PCRE2 is built with Uni- - code support, all characters can be tested with \p and \P, or, alterna- - tively, the PCRE2_UCP option can be set when a pattern is compiled; - this causes \w and friends to use Unicode property support instead of - the built-in tables. + by character code point. However, this applies only to characters whose + code points are less than 256. By default, higher-valued code points + never match escapes such as \w or \d. When PCRE2 is built with Unicode + support (the default), all characters can be tested with \p and \P, or, + alternatively, the PCRE2_UCP option can be set when a pattern is com- + piled; this causes \w and friends to use Unicode property support in- + stead of the built-in tables. The use of locales with Unicode is discouraged. If you are handling characters with code points greater than 128, you should either use Unicode support, or use locales, but not try to mix the two. - PCRE2 contains an internal set of character tables that are used by de- + PCRE2 contains a built-in set of character tables that are used by de- fault. These are sufficient for many applications. Normally, the in- ternal tables recognize only ASCII characters. However, when PCRE2 is built, it is possible to cause the internal tables to be rebuilt in the default "C" locale of the local system, which may cause them to be dif- ferent. - The internal tables can be overridden by tables supplied by the appli- + The built-in tables can be overridden by tables supplied by the appli- cation that calls PCRE2. These may be created in a different locale from the default. As more and more applications change to using Uni- code, the need for this locale support is expected to die away. External tables are built by calling the pcre2_maketables() function, - in the relevant locale. The result can be passed to pcre2_compile() as - often as necessary, by creating a compile context and calling - pcre2_set_character_tables() to set the tables pointer therein. For ex- - ample, to build and use tables that are appropriate for the French lo- - cale (where accented characters with values greater than 128 are - treated as letters), the following code could be used: + in the relevant locale. The only argument to this function is a general + context, which can be used to pass a custom memory allocator. If the + argument is NULL, the system malloc() is used. The result can be passed + to pcre2_compile() as often as necessary, by creating a compile context + and calling pcre2_set_character_tables() to set the tables pointer + therein. + + For example, to build and use tables that are appropriate for the + French locale (where accented characters with values greater than 128 + are treated as letters), the following code could be used: setlocale(LC_CTYPE, "fr_FR"); tables = pcre2_maketables(NULL); @@ -1982,9 +1994,7 @@ LOCALE SUPPORT re = pcre2_compile(..., ccontext); The locale name "fr_FR" is used on Linux and other Unix-like systems; - if you are using Windows, the name for the French locale is "french". - It is the caller's responsibility to ensure that the memory containing - the tables remains available for as long as it is needed. + if you are using Windows, the name for the French locale is "french". The pointer that is passed (via the compile context) to pcre2_compile() is saved with the compiled pattern, and the same tables are used by @@ -1992,6 +2002,12 @@ LOCALE SUPPORT pilation and matching both happen in the same locale, but different patterns can be processed in different locales. + It is the caller's responsibility to ensure that the memory containing + the tables remains available while they are still in use. When they are + no longer needed, you can discard them using pcre2_maketables_free(), + which should pass as its first parameter the same global context that + was used to create the tables. + INFORMATION ABOUT A COMPILED PATTERN @@ -3706,7 +3722,7 @@ AUTHOR REVISION - Last updated: 01 August 2019 + Last updated: 02 September 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ diff --git a/doc/pcre2_maketables_free.3 b/doc/pcre2_maketables_free.3 new file mode 100644 index 0000000..07986b9 --- /dev/null +++ b/doc/pcre2_maketables_free.3 @@ -0,0 +1,31 @@ +.TH PCRE2_MAKETABLES_FREE 3 "02 September 2019" "PCRE2 10.34" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_maketables_free(pcre2_general_context *\fIgcontext\fP, +.B " const uint8_t *\fItables\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function discards a set of character tables that were created by a call +to +.\" HREF +\fBpcre2_maketables()\fP. +.\" +.P +The \fIgcontext\fP parameter should match what was used in that call to +account for any custom allocators that might be in use; if it is NULL +the system \fBfree()\fP is used. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page. diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 index 2300a58..dc46369 100644 --- a/doc/pcre2api.3 +++ b/doc/pcre2api.3 @@ -1,4 +1,4 @@ -.TH PCRE2API 3 "01 August 2019" "PCRE2 10.34" +.TH PCRE2API 3 "02 September 2019" "PCRE2 10.34" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .sp @@ -247,6 +247,9 @@ document for an overview of all the PCRE2 documentation. .sp .B const uint8_t *pcre2_maketables(pcre2_general_context *\fIgcontext\fP); .sp +.B void pcre2_maketables_free(pcre2_general_context *\fIgcontext\fP, +.B " const uint8_t *\fItables\fP);" +.sp .B int pcre2_pattern_info(const pcre2_code *\fIcode\fP, uint32_t \fIwhat\fP, .B " void *\fIwhere\fP);" .sp @@ -728,7 +731,7 @@ interpreted matching functions, \fIpcre2_match()\fP and .B " const uint8_t *\fItables\fP);" .fi .sp -The value must be the result of a call to \fIpcre2_maketables()\fP, whose only +The value must be the result of a call to \fBpcre2_maketables()\fP, whose only argument is a general context. This function builds a set of character tables in the current locale. .sp @@ -1943,35 +1946,45 @@ Most (but not all) patterns can be optimized by the JIT compiler. .SH "LOCALE SUPPORT" .rs .sp +.nf +.B const uint8_t *pcre2_maketables(pcre2_general_context *\fIgcontext\fP); +.sp +.B void pcre2_maketables_free(pcre2_general_context *\fIgcontext\fP, +.B " const uint8_t *\fItables\fP);" +.fi +.P PCRE2 handles caseless matching, and determines whether characters are letters, digits, or whatever, by reference to a set of tables, indexed by character code -point. This applies only to characters whose code points are less than 256. By -default, higher-valued code points never match escapes such as \ew or \ed. -However, if PCRE2 is built with Unicode support, all characters can be tested -with \ep and \eP, or, alternatively, the PCRE2_UCP option can be set when a -pattern is compiled; this causes \ew and friends to use Unicode property +point. However, this applies only to characters whose code points are less than +256. By default, higher-valued code points never match escapes such as \ew or +\ed. When PCRE2 is built with Unicode support (the default), all characters can +be tested with \ep and \eP, or, alternatively, the PCRE2_UCP option can be set +when a pattern is compiled; this causes \ew and friends to use Unicode property support instead of the built-in tables. .P The use of locales with Unicode is discouraged. If you are handling characters with code points greater than 128, you should either use Unicode support, or use locales, but not try to mix the two. .P -PCRE2 contains an internal set of character tables that are used by default. +PCRE2 contains a built-in set of character tables that are used by default. These are sufficient for many applications. Normally, the internal tables recognize only ASCII characters. However, when PCRE2 is built, it is possible to cause the internal tables to be rebuilt in the default "C" locale of the local system, which may cause them to be different. .P -The internal tables can be overridden by tables supplied by the application +The built-in tables can be overridden by tables supplied by the application that calls PCRE2. These may be created in a different locale from the default. As more and more applications change to using Unicode, the need for this locale support is expected to die away. .P External tables are built by calling the \fBpcre2_maketables()\fP function, in -the relevant locale. The result can be passed to \fBpcre2_compile()\fP as often -as necessary, by creating a compile context and calling -\fBpcre2_set_character_tables()\fP to set the tables pointer therein. For -example, to build and use tables that are appropriate for the French locale +the relevant locale. The only argument to this function is a general context, +which can be used to pass a custom memory allocator. If the argument is NULL, +the system \fBmalloc()\fP is used. The result can be passed to +\fBpcre2_compile()\fP as often as necessary, by creating a compile context and +calling \fBpcre2_set_character_tables()\fP to set the tables pointer therein. +.P +For example, to build and use tables that are appropriate for the French locale (where accented characters with values greater than 128 are treated as letters), the following code could be used: .sp @@ -1982,15 +1995,19 @@ letters), the following code could be used: re = pcre2_compile(..., ccontext); .sp The locale name "fr_FR" is used on Linux and other Unix-like systems; if you -are using Windows, the name for the French locale is "french". It is the -caller's responsibility to ensure that the memory containing the tables remains -available for as long as it is needed. +are using Windows, the name for the French locale is "french". .P The pointer that is passed (via the compile context) to \fBpcre2_compile()\fP is saved with the compiled pattern, and the same tables are used by \fBpcre2_match()\fP and \fBpcre_dfa_match()\fP. Thus, for any single pattern, compilation and matching both happen in the same locale, but different patterns can be processed in different locales. +.P +It is the caller's responsibility to ensure that the memory containing the +tables remains available while they are still in use. When they are no longer +needed, you can discard them using \fBpcre2_maketables_free()\fP, which should +pass as its first parameter the same global context that was used to create the +tables. . . .\" HTML @@ -3863,6 +3880,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 01 August 2019 +Last updated: 02 September 2019 Copyright (c) 1997-2019 University of Cambridge. .fi diff --git a/src/pcre2.h.in b/src/pcre2.h.in index 1208d32..9a0ad0b 100644 --- a/src/pcre2.h.in +++ b/src/pcre2.h.in @@ -779,7 +779,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \ PCRE2_EXP_DECL const uint8_t PCRE2_CALL_CONVENTION \ *pcre2_maketables(pcre2_general_context *); \ - +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_maketables_free(pcre2_general_context *, const uint8_t *); /* Define macros that generate width-specific names from generic versions. The three-level macro scheme is necessary to get the macros expanded when we want @@ -855,6 +856,7 @@ pcre2_compile are called by application code. */ #define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_) #define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_) #define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_) +#define pcre2_maketables_free PCRE2_SUFFIX(pcre2_maketables_free_) #define pcre2_match PCRE2_SUFFIX(pcre2_match_) #define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_) #define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_) diff --git a/src/pcre2_maketables.c b/src/pcre2_maketables.c index 5921e90..b1eb137 100644 --- a/src/pcre2_maketables.c +++ b/src/pcre2_maketables.c @@ -147,4 +147,13 @@ for (i = 0; i < 256; i++) return yield; } +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables) +{ + if (gcontext) + gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data); + else + free((void *)tables); +} + /* End of pcre2_maketables.c */ diff --git a/src/pcre2grep.c b/src/pcre2grep.c index 3ffae77..12fe95e 100644 --- a/src/pcre2grep.c +++ b/src/pcre2grep.c @@ -4390,7 +4390,7 @@ if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack); #endif free(main_buffer); -free((void *)character_tables); +if (character_tables != NULL) pcre2_maketables_free(NULL, character_tables); pcre2_compile_context_free(compile_context); pcre2_match_context_free(match_context);