Renamed dftables as pcre2_dftables and enable it to write the tables in binary.
Update documentation about character tables.
This commit is contained in:
parent
953d4e9c95
commit
8057c3c8b9
|
@ -85,6 +85,7 @@
|
|||
# 2018-11-14 PH removed unnecessary checks for stdint.h and inttypes.h
|
||||
# 2018-11-16 PH added PCRE2GREP_SUPPORT_CALLOUT_FORK support and tidied
|
||||
# 2019-02-16 PH hacked to avoid CMP0026 policy issue (see comments below)
|
||||
# 2020-03-26 PH renamed dftables as pcre2_dftables (as elsewhere)
|
||||
|
||||
PROJECT(PCRE2 C)
|
||||
|
||||
|
@ -423,11 +424,11 @@ CONFIGURE_FILE(src/pcre2.h.in
|
|||
|
||||
OPTION(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
|
||||
IF(PCRE2_REBUILD_CHARTABLES)
|
||||
ADD_EXECUTABLE(dftables src/dftables.c)
|
||||
ADD_EXECUTABLE(pcre2_dftables src/pcre2_dftables.c)
|
||||
ADD_CUSTOM_COMMAND(
|
||||
COMMENT "Generating character tables (pcre2_chartables.c) for current locale"
|
||||
DEPENDS dftables
|
||||
COMMAND dftables
|
||||
DEPENDS pcre2_dftables
|
||||
COMMAND pcre2_dftables
|
||||
ARGS ${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||
OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||
)
|
||||
|
|
12
ChangeLog
12
ChangeLog
|
@ -82,6 +82,18 @@ could be mis-compiled and therefore not match correctly. This is the example
|
|||
that found this: /(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word/ which failed to
|
||||
match "word" because the "move back" value was set to zero.
|
||||
|
||||
21. Following a request from a user, some extensions and tidies to the
|
||||
character tables handling have been done:
|
||||
|
||||
(a) The dftables auxiliary program is renamed pcre2_dftables, but it is still
|
||||
not installed for public use.
|
||||
|
||||
(b) There is now a -b option for pcre2_dftables, which causes the tables to
|
||||
be written in binary. There is also a -help option.
|
||||
|
||||
(c) PCRE2_CONFIG_TABLES_LENGTH is added to pcre2_config() so that an
|
||||
application that wants to save tables in binary knows how long they are.
|
||||
|
||||
|
||||
Version 10.34 21-November-2019
|
||||
------------------------------
|
||||
|
|
19
Makefile.am
19
Makefile.am
|
@ -325,18 +325,18 @@ include_HEADERS = src/pcre2posix.h
|
|||
bin_SCRIPTS = pcre2-config
|
||||
|
||||
## ---------------------------------------------------------------
|
||||
## The dftables program is used to rebuild character tables before compiling
|
||||
## PCRE2, if --enable-rebuild-chartables is specified. It is not a user-visible
|
||||
## program. The default (when --enable-rebuild-chartables is not specified) is
|
||||
## to copy a distributed set of tables that are defined for ASCII code. In this
|
||||
## case, dftables is not needed.
|
||||
## The pcre2_dftables program is used to rebuild character tables before
|
||||
## compiling PCRE2, if --enable-rebuild-chartables is specified. It is not an
|
||||
## installed program. The default (when --enable-rebuild-chartables is not
|
||||
## specified) is to copy a distributed set of tables that are defined for ASCII
|
||||
## code. In this case, pcre2_dftables is not needed.
|
||||
|
||||
if WITH_REBUILD_CHARTABLES
|
||||
noinst_PROGRAMS += dftables
|
||||
dftables_SOURCES = src/dftables.c
|
||||
src/pcre2_chartables.c: dftables$(EXEEXT)
|
||||
noinst_PROGRAMS += pcre2_dftables
|
||||
pcre2_dftables_SOURCES = src/pcre2_dftables.c
|
||||
src/pcre2_chartables.c: pcre2_dftables$(EXEEXT)
|
||||
rm -f $@
|
||||
./dftables$(EXEEXT) $@
|
||||
./pcre2_dftables$(EXEEXT) $@
|
||||
else
|
||||
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist
|
||||
rm -f $@
|
||||
|
@ -634,6 +634,7 @@ EXTRA_DIST += \
|
|||
testdata/grepoutputCN \
|
||||
testdata/grepoutputN \
|
||||
testdata/greppatN4 \
|
||||
testdata/testbtables \
|
||||
testdata/testinput1 \
|
||||
testdata/testinput2 \
|
||||
testdata/testinput3 \
|
||||
|
|
|
@ -74,14 +74,14 @@ can skip ahead to the CMake section.
|
|||
src/pcre2_chartables.c.
|
||||
|
||||
OR:
|
||||
Compile src/dftables.c as a stand-alone program (using -DHAVE_CONFIG_H
|
||||
if you have set up src/config.h), and then run it with the single
|
||||
argument "src/pcre2_chartables.c". This generates a set of standard
|
||||
character tables and writes them to that file. The tables are generated
|
||||
using the default C locale for your system. If you want to use a locale
|
||||
that is specified by LC_xxx environment variables, add the -L option to
|
||||
the dftables command. You must use this method if you are building on a
|
||||
system that uses EBCDIC code.
|
||||
Compile src/pcre2_dftables.c as a stand-alone program (using
|
||||
-DHAVE_CONFIG_H if you have set up src/config.h), and then run it with
|
||||
the single argument "src/pcre2_chartables.c". This generates a set of
|
||||
standard character tables and writes them to that file. The tables are
|
||||
generated using the default C locale for your system. If you want to use
|
||||
a locale that is specified by LC_xxx environment variables, add the -L
|
||||
option to the pcre2_dftables command. You must use this method if you
|
||||
are building on a system that uses EBCDIC code.
|
||||
|
||||
The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
|
||||
specify alternative tables at run time.
|
||||
|
|
|
@ -190,7 +190,7 @@ files="\
|
|||
libpcre2-16.pc.in \
|
||||
libpcre2-32.pc.in \
|
||||
libpcre2-posix.pc.in \
|
||||
src/dftables.c \
|
||||
src/pcre2_dftables.c \
|
||||
src/pcre2.h.in \
|
||||
src/pcre2_auto_possess.c \
|
||||
src/pcre2_compile.c \
|
||||
|
|
68
README
68
README
|
@ -269,9 +269,9 @@ library. They are also documented in the pcre2build man page.
|
|||
|
||||
--enable-rebuild-chartables
|
||||
|
||||
a program called dftables is compiled and run in the default C locale when
|
||||
you obey "make". It builds a source file called pcre2_chartables.c. If you do
|
||||
not specify this option, pcre2_chartables.c is created as a copy of
|
||||
a program called pcre2_dftables is compiled and run in the default C locale
|
||||
when you obey "make". It builds a source file called pcre2_chartables.c. If
|
||||
you do not specify this option, pcre2_chartables.c is created as a copy of
|
||||
pcre2_chartables.c.dist. See "Character tables" below for further
|
||||
information.
|
||||
|
||||
|
@ -548,11 +548,11 @@ Cross-compiling using autotools
|
|||
|
||||
You can specify CC and CFLAGS in the normal way to the "configure" command, in
|
||||
order to cross-compile PCRE2 for some other host. However, you should NOT
|
||||
specify --enable-rebuild-chartables, because if you do, the dftables.c source
|
||||
file is compiled and run on the local host, in order to generate the inbuilt
|
||||
character tables (the pcre2_chartables.c file). This will probably not work,
|
||||
because dftables.c needs to be compiled with the local compiler, not the cross
|
||||
compiler.
|
||||
specify --enable-rebuild-chartables, because if you do, the pcre2_dftables.c
|
||||
source file is compiled and run on the local host, in order to generate the
|
||||
inbuilt character tables (the pcre2_chartables.c file). This will probably not
|
||||
work, because pcre2_dftables.c needs to be compiled with the local compiler,
|
||||
not the cross compiler.
|
||||
|
||||
When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
|
||||
created by making a copy of pcre2_chartables.c.dist, which is a default set of
|
||||
|
@ -560,9 +560,10 @@ tables that assumes ASCII code. Cross-compiling with the default tables should
|
|||
not be a problem.
|
||||
|
||||
If you need to modify the character tables when cross-compiling, you should
|
||||
move pcre2_chartables.c.dist out of the way, then compile dftables.c by hand
|
||||
and run it on the local host to make a new version of pcre2_chartables.c.dist.
|
||||
Then when you cross-compile PCRE2 this new version of the tables will be used.
|
||||
move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by
|
||||
hand and run it on the local host to make a new version of
|
||||
pcre2_chartables.c.dist. See the pcre2build section "Creating character tables
|
||||
at build time" for more details.
|
||||
|
||||
|
||||
Making new tarballs
|
||||
|
@ -721,8 +722,8 @@ compile context.
|
|||
The source file called pcre2_chartables.c contains the default set of tables.
|
||||
By default, this is created as a copy of pcre2_chartables.c.dist, which
|
||||
contains tables for ASCII coding. However, if --enable-rebuild-chartables is
|
||||
specified for ./configure, a different version of pcre2_chartables.c is built
|
||||
by the program dftables (compiled from dftables.c), which uses the ANSI C
|
||||
specified for ./configure, a new version of pcre2_chartables.c is built by the
|
||||
program pcre2_dftables (compiled from pcre2_dftables.c), which uses the ANSI C
|
||||
character handling functions such as isalnum(), isalpha(), isupper(),
|
||||
islower(), etc. to build the table sources. This means that the default C
|
||||
locale that is set for your system will control the contents of these default
|
||||
|
@ -732,32 +733,31 @@ file does not get automatically re-generated. The best way to do this is to
|
|||
move pcre2_chartables.c.dist out of the way and replace it with your customized
|
||||
tables.
|
||||
|
||||
When the dftables program is run as a result of --enable-rebuild-chartables,
|
||||
it uses the default C locale that is set on your system. It does not pay
|
||||
attention to the LC_xxx environment variables. In other words, it uses the
|
||||
system's default locale rather than whatever the compiling user happens to have
|
||||
set. If you really do want to build a source set of character tables in a
|
||||
locale that is specified by the LC_xxx variables, you can run the dftables
|
||||
program by hand with the -L option. For example:
|
||||
When the pcre2_dftables program is run as a result of specifying
|
||||
--enable-rebuild-chartables, it uses the default C locale that is set on your
|
||||
system. It does not pay attention to the LC_xxx environment variables. In other
|
||||
words, it uses the system's default locale rather than whatever the compiling
|
||||
user happens to have set. If you really do want to build a source set of
|
||||
character tables in a locale that is specified by the LC_xxx variables, you can
|
||||
run the pcre2_dftables program by hand with the -L option. For example:
|
||||
|
||||
./dftables -L pcre2_chartables.c.special
|
||||
./pcre2_dftables -L pcre2_chartables.c.special
|
||||
|
||||
The first two 256-byte tables provide lower casing and case flipping functions,
|
||||
respectively. The next table consists of three 32-byte bit maps which identify
|
||||
digits, "word" characters, and white space, respectively. These are used when
|
||||
building 32-byte bit maps that represent character classes for code points less
|
||||
than 256. The final 256-byte table has bits indicating various character types,
|
||||
as follows:
|
||||
The second argument names the file where the source code for the tables is
|
||||
written. The first two 256-byte tables provide lower casing and case flipping
|
||||
functions, respectively. The next table consists of a number of 32-byte bit
|
||||
maps which identify certain character classes such as digits, "word"
|
||||
characters, white space, etc. These are used when building 32-byte bit maps
|
||||
that represent character classes for code points less than 256. The final
|
||||
256-byte table has bits indicating various character types, as follows:
|
||||
|
||||
1 white space character
|
||||
2 letter
|
||||
4 decimal digit
|
||||
8 hexadecimal digit
|
||||
4 lower case letter
|
||||
8 decimal digit
|
||||
16 alphanumeric or '_'
|
||||
128 regular expression metacharacter or binary zero
|
||||
|
||||
You should not alter the set of characters that contain the 128 bit, as that
|
||||
will cause PCRE2 to malfunction.
|
||||
See also the pcre2build section "Creating character tables at build time".
|
||||
|
||||
|
||||
File manifest
|
||||
|
@ -768,7 +768,7 @@ The distribution should contain the files listed below.
|
|||
(A) Source files for the PCRE2 library functions and their headers are found in
|
||||
the src directory:
|
||||
|
||||
src/dftables.c auxiliary program for building pcre2_chartables.c
|
||||
src/pcre2_dftables.c auxiliary program for building pcre2_chartables.c
|
||||
when --enable-rebuild-chartables is specified
|
||||
|
||||
src/pcre2_chartables.c.dist a default set of character tables that assume
|
||||
|
@ -894,4 +894,4 @@ The distribution should contain the files listed below.
|
|||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 16 April 2019
|
||||
Last updated: 20 March 2020
|
||||
|
|
|
@ -74,14 +74,14 @@ can skip ahead to the CMake section.
|
|||
src/pcre2_chartables.c.
|
||||
|
||||
OR:
|
||||
Compile src/dftables.c as a stand-alone program (using -DHAVE_CONFIG_H
|
||||
if you have set up src/config.h), and then run it with the single
|
||||
argument "src/pcre2_chartables.c". This generates a set of standard
|
||||
character tables and writes them to that file. The tables are generated
|
||||
using the default C locale for your system. If you want to use a locale
|
||||
that is specified by LC_xxx environment variables, add the -L option to
|
||||
the dftables command. You must use this method if you are building on a
|
||||
system that uses EBCDIC code.
|
||||
Compile src/pcre2_dftables.c as a stand-alone program (using
|
||||
-DHAVE_CONFIG_H if you have set up src/config.h), and then run it with
|
||||
the single argument "src/pcre2_chartables.c". This generates a set of
|
||||
standard character tables and writes them to that file. The tables are
|
||||
generated using the default C locale for your system. If you want to use
|
||||
a locale that is specified by LC_xxx environment variables, add the -L
|
||||
option to the pcre2_dftables command. You must use this method if you
|
||||
are building on a system that uses EBCDIC code.
|
||||
|
||||
The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
|
||||
specify alternative tables at run time.
|
||||
|
|
|
@ -269,9 +269,9 @@ library. They are also documented in the pcre2build man page.
|
|||
|
||||
--enable-rebuild-chartables
|
||||
|
||||
a program called dftables is compiled and run in the default C locale when
|
||||
you obey "make". It builds a source file called pcre2_chartables.c. If you do
|
||||
not specify this option, pcre2_chartables.c is created as a copy of
|
||||
a program called pcre2_dftables is compiled and run in the default C locale
|
||||
when you obey "make". It builds a source file called pcre2_chartables.c. If
|
||||
you do not specify this option, pcre2_chartables.c is created as a copy of
|
||||
pcre2_chartables.c.dist. See "Character tables" below for further
|
||||
information.
|
||||
|
||||
|
@ -548,11 +548,11 @@ Cross-compiling using autotools
|
|||
|
||||
You can specify CC and CFLAGS in the normal way to the "configure" command, in
|
||||
order to cross-compile PCRE2 for some other host. However, you should NOT
|
||||
specify --enable-rebuild-chartables, because if you do, the dftables.c source
|
||||
file is compiled and run on the local host, in order to generate the inbuilt
|
||||
character tables (the pcre2_chartables.c file). This will probably not work,
|
||||
because dftables.c needs to be compiled with the local compiler, not the cross
|
||||
compiler.
|
||||
specify --enable-rebuild-chartables, because if you do, the pcre2_dftables.c
|
||||
source file is compiled and run on the local host, in order to generate the
|
||||
inbuilt character tables (the pcre2_chartables.c file). This will probably not
|
||||
work, because pcre2_dftables.c needs to be compiled with the local compiler,
|
||||
not the cross compiler.
|
||||
|
||||
When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
|
||||
created by making a copy of pcre2_chartables.c.dist, which is a default set of
|
||||
|
@ -560,9 +560,10 @@ tables that assumes ASCII code. Cross-compiling with the default tables should
|
|||
not be a problem.
|
||||
|
||||
If you need to modify the character tables when cross-compiling, you should
|
||||
move pcre2_chartables.c.dist out of the way, then compile dftables.c by hand
|
||||
and run it on the local host to make a new version of pcre2_chartables.c.dist.
|
||||
Then when you cross-compile PCRE2 this new version of the tables will be used.
|
||||
move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by
|
||||
hand and run it on the local host to make a new version of
|
||||
pcre2_chartables.c.dist. See the pcre2build section "Creating character tables
|
||||
at build time" for more details.
|
||||
|
||||
|
||||
Making new tarballs
|
||||
|
@ -721,8 +722,8 @@ compile context.
|
|||
The source file called pcre2_chartables.c contains the default set of tables.
|
||||
By default, this is created as a copy of pcre2_chartables.c.dist, which
|
||||
contains tables for ASCII coding. However, if --enable-rebuild-chartables is
|
||||
specified for ./configure, a different version of pcre2_chartables.c is built
|
||||
by the program dftables (compiled from dftables.c), which uses the ANSI C
|
||||
specified for ./configure, a new version of pcre2_chartables.c is built by the
|
||||
program pcre2_dftables (compiled from pcre2_dftables.c), which uses the ANSI C
|
||||
character handling functions such as isalnum(), isalpha(), isupper(),
|
||||
islower(), etc. to build the table sources. This means that the default C
|
||||
locale that is set for your system will control the contents of these default
|
||||
|
@ -732,32 +733,31 @@ file does not get automatically re-generated. The best way to do this is to
|
|||
move pcre2_chartables.c.dist out of the way and replace it with your customized
|
||||
tables.
|
||||
|
||||
When the dftables program is run as a result of --enable-rebuild-chartables,
|
||||
it uses the default C locale that is set on your system. It does not pay
|
||||
attention to the LC_xxx environment variables. In other words, it uses the
|
||||
system's default locale rather than whatever the compiling user happens to have
|
||||
set. If you really do want to build a source set of character tables in a
|
||||
locale that is specified by the LC_xxx variables, you can run the dftables
|
||||
program by hand with the -L option. For example:
|
||||
When the pcre2_dftables program is run as a result of specifying
|
||||
--enable-rebuild-chartables, it uses the default C locale that is set on your
|
||||
system. It does not pay attention to the LC_xxx environment variables. In other
|
||||
words, it uses the system's default locale rather than whatever the compiling
|
||||
user happens to have set. If you really do want to build a source set of
|
||||
character tables in a locale that is specified by the LC_xxx variables, you can
|
||||
run the pcre2_dftables program by hand with the -L option. For example:
|
||||
|
||||
./dftables -L pcre2_chartables.c.special
|
||||
./pcre2_dftables -L pcre2_chartables.c.special
|
||||
|
||||
The first two 256-byte tables provide lower casing and case flipping functions,
|
||||
respectively. The next table consists of three 32-byte bit maps which identify
|
||||
digits, "word" characters, and white space, respectively. These are used when
|
||||
building 32-byte bit maps that represent character classes for code points less
|
||||
than 256. The final 256-byte table has bits indicating various character types,
|
||||
as follows:
|
||||
The second argument names the file where the source code for the tables is
|
||||
written. The first two 256-byte tables provide lower casing and case flipping
|
||||
functions, respectively. The next table consists of a number of 32-byte bit
|
||||
maps which identify certain character classes such as digits, "word"
|
||||
characters, white space, etc. These are used when building 32-byte bit maps
|
||||
that represent character classes for code points less than 256. The final
|
||||
256-byte table has bits indicating various character types, as follows:
|
||||
|
||||
1 white space character
|
||||
2 letter
|
||||
4 decimal digit
|
||||
8 hexadecimal digit
|
||||
4 lower case letter
|
||||
8 decimal digit
|
||||
16 alphanumeric or '_'
|
||||
128 regular expression metacharacter or binary zero
|
||||
|
||||
You should not alter the set of characters that contain the 128 bit, as that
|
||||
will cause PCRE2 to malfunction.
|
||||
See also the pcre2build section "Creating character tables at build time".
|
||||
|
||||
|
||||
File manifest
|
||||
|
@ -768,7 +768,7 @@ The distribution should contain the files listed below.
|
|||
(A) Source files for the PCRE2 library functions and their headers are found in
|
||||
the src directory:
|
||||
|
||||
src/dftables.c auxiliary program for building pcre2_chartables.c
|
||||
src/pcre2_dftables.c auxiliary program for building pcre2_chartables.c
|
||||
when --enable-rebuild-chartables is specified
|
||||
|
||||
src/pcre2_chartables.c.dist a default set of character tables that assume
|
||||
|
@ -894,4 +894,4 @@ The distribution should contain the files listed below.
|
|||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 16 April 2019
|
||||
Last updated: 20 March 2020
|
||||
|
|
|
@ -27,9 +27,12 @@ DESCRIPTION
|
|||
</b><br>
|
||||
<P>
|
||||
This function sets a pointer to custom character tables within a compile
|
||||
context. The second argument must be the result of a call to
|
||||
<b>pcre2_maketables()</b> or NULL to request the default tables. The result is
|
||||
always zero.
|
||||
context. The second argument must point to a set of PCRE2 character tables or
|
||||
be NULL to request the default tables. The result is always zero. Character
|
||||
tables can be created by calling <b>pcre2_maketables()</b> or by running the
|
||||
<b>pcre2_dftables</b> maintenance command in binary mode (see the
|
||||
<a href="pcre2build.html"><b>pcre2build</b></a>
|
||||
documentation).
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -1105,10 +1105,11 @@ less than the limit set by the caller of <b>pcre2_match()</b> or
|
|||
<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
The function <b>pcre2_config()</b> makes it possible for a PCRE2 client to
|
||||
discover which optional features have been compiled into the PCRE2 library. The
|
||||
The function <b>pcre2_config()</b> makes it possible for a PCRE2 client to find
|
||||
the value of certain configuration parameters and to discover which optional
|
||||
features have been compiled into the PCRE2 library. The
|
||||
<a href="pcre2build.html"><b>pcre2build</b></a>
|
||||
documentation has more details about these optional features.
|
||||
documentation has more details about these features.
|
||||
</P>
|
||||
<P>
|
||||
The first argument for <b>pcre2_config()</b> specifies which information is
|
||||
|
@ -1224,6 +1225,13 @@ over compilation stack usage, see <b>pcre2_set_compile_recursion_guard()</b>.
|
|||
</pre>
|
||||
This parameter is obsolete and should not be used in new code. The output is a
|
||||
uint32_t integer that is always set to zero.
|
||||
<pre>
|
||||
PCRE2_CONFIG_TABLES_LENGTH
|
||||
</pre>
|
||||
The output is a uint32_t integer that gives the length of PCRE2's character
|
||||
processing tables in bytes. For details of these tables see the
|
||||
<a href="#localesupport">section on locale support</a>
|
||||
below.
|
||||
<pre>
|
||||
PCRE2_CONFIG_UNICODE_VERSION
|
||||
</pre>
|
||||
|
@ -2043,7 +2051,7 @@ calling <b>pcre2_set_character_tables()</b> to set the tables pointer therein.
|
|||
</P>
|
||||
<P>
|
||||
For example, to build and use tables that are appropriate for the French locale
|
||||
(where accented characters with values greater than 128 are treated as
|
||||
(where accented characters with values greater than 127 are treated as
|
||||
letters), the following code could be used:
|
||||
<pre>
|
||||
setlocale(LC_CTYPE, "fr_FR");
|
||||
|
@ -2057,10 +2065,10 @@ are using Windows, the name for the French locale is "french".
|
|||
</P>
|
||||
<P>
|
||||
The pointer that is passed (via the compile context) to <b>pcre2_compile()</b>
|
||||
is saved with the compiled pattern, and the same tables are used by
|
||||
<b>pcre2_match()</b> and <b>pcre_dfa_match()</b>. Thus, for any single pattern,
|
||||
compilation and matching both happen in the same locale, but different patterns
|
||||
can be processed in different locales.
|
||||
is saved with the compiled pattern, and the same tables are used by the
|
||||
matching functions. Thus, for any single pattern, compilation and matching both
|
||||
happen in the same locale, but different patterns can be processed in different
|
||||
locales.
|
||||
</P>
|
||||
<P>
|
||||
It is the caller's responsibility to ensure that the memory containing the
|
||||
|
@ -2068,6 +2076,23 @@ tables remains available while they are still in use. When they are no longer
|
|||
needed, you can discard them using <b>pcre2_maketables_free()</b>, which should
|
||||
pass as its first parameter the same global context that was used to create the
|
||||
tables.
|
||||
</P>
|
||||
<br><b>
|
||||
Saving locale tables
|
||||
</b><br>
|
||||
<P>
|
||||
The tables described above are just a sequence of binary bytes, which makes
|
||||
them independent of hardware characteristics such as endianness or whether the
|
||||
processor is 32-bit or 64-bit. A copy of the result of <b>pcre2_maketables()</b>
|
||||
can therefore be saved in a file or elsewhere and re-used later, even in a
|
||||
different program or on another computer. The size of the tables (number of
|
||||
bytes) must be obtained by calling <b>pcre2_config()</b> with the
|
||||
PCRE2_CONFIG_TABLES_LENGTH option because <b>pcre2_maketables()</b> does not
|
||||
return this value. Note that the <b>pcre2_dftables</b> program, which is part of
|
||||
the PCRE2 build system, can be used stand-alone to create a file that contains
|
||||
a set of binary tables. See the
|
||||
<a href="pcre2build.html#createtables"><b>pcre2build</b></a>
|
||||
documentation for details.
|
||||
<a name="infoaboutpattern"></a></P>
|
||||
<br><a name="SEC23" href="#TOC1">INFORMATION ABOUT A COMPILED PATTERN</a><br>
|
||||
<P>
|
||||
|
@ -2076,7 +2101,7 @@ tables.
|
|||
<P>
|
||||
The <b>pcre2_pattern_info()</b> function returns general information about a
|
||||
compiled pattern. For information about callouts, see the
|
||||
<a href="pcre2pattern.html#infoaboutcallouts">next section.</a>
|
||||
<a href="#infoaboutcallouts">next section.</a>
|
||||
The first argument for <b>pcre2_pattern_info()</b> is a pointer to the compiled
|
||||
pattern. The second argument specifies which piece of information is required,
|
||||
and the third argument is a pointer to a variable to receive the data. If the
|
||||
|
@ -3931,7 +3956,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 24 February 2020
|
||||
Last updated: 19 March 2020
|
||||
<br>
|
||||
Copyright © 1997-2020 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -128,7 +128,7 @@ To build it without Unicode support, add
|
|||
--disable-unicode
|
||||
</pre>
|
||||
to the <b>configure</b> command. This setting applies to all three libraries. It
|
||||
is not possible to build one library with Unicode support, and another without,
|
||||
is not possible to build one library with Unicode support and another without
|
||||
in the same configuration.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -188,11 +188,11 @@ which enables the use of an execmem allocator in JIT that is compatible with
|
|||
SELinux. This has no effect if JIT is not enabled. See the
|
||||
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
||||
documentation for a discussion of JIT usage. When JIT support is enabled,
|
||||
pcre2grep automatically makes use of it, unless you add
|
||||
<b>pcre2grep</b> automatically makes use of it, unless you add
|
||||
<pre>
|
||||
--disable-pcre2grep-jit
|
||||
</pre>
|
||||
to the "configure" command.
|
||||
to the <b>configure</b> command.
|
||||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">NEWLINE RECOGNITION</a><br>
|
||||
<P>
|
||||
|
@ -321,7 +321,7 @@ As well as applying to <b>pcre2_match()</b>, the depth limit also controls
|
|||
the depth of recursive function calls in <b>pcre2_dfa_match()</b>. These are
|
||||
used for lookaround assertions, atomic groups, and recursion within patterns.
|
||||
The limit does not apply to JIT matching.
|
||||
</P>
|
||||
<a name="createtables"></a></P>
|
||||
<br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
|
||||
<P>
|
||||
PCRE2 uses fixed tables for processing characters whose code points are less
|
||||
|
@ -332,12 +332,34 @@ only. If you add
|
|||
--enable-rebuild-chartables
|
||||
</pre>
|
||||
to the <b>configure</b> command, the distributed tables are no longer used.
|
||||
Instead, a program called <b>dftables</b> is compiled and run. This outputs the
|
||||
source for new set of tables, created in the default locale of your C run-time
|
||||
system. This method of replacing the tables does not work if you are cross
|
||||
compiling, because <b>dftables</b> is run on the local host. If you need to
|
||||
create alternative tables when cross compiling, you will have to do so "by
|
||||
hand".
|
||||
Instead, a program called <b>pcre2_dftables</b> is compiled and run. This
|
||||
outputs the source for new set of tables, created in the default locale of your
|
||||
C run-time system. This method of replacing the tables does not work if you are
|
||||
cross compiling, because <b>pcre2_dftables</b> needs to be run on the local
|
||||
host and therefore not compiled with the cross compiler.
|
||||
</P>
|
||||
<P>
|
||||
If you need to create alternative tables when cross compiling, you will have to
|
||||
do so "by hand". There may also be other reasons for creating tables manually.
|
||||
To cause <b>pcre2_dftables</b> to be built on the local host, run a normal
|
||||
compiling command, and then run the program with the output file as its
|
||||
argument, for example:
|
||||
<pre>
|
||||
cc src/pcre2_dftables.c -o pcre2_dftables
|
||||
./pcre2_dftables src/pcre2_chartables.c
|
||||
</pre>
|
||||
This builds the tables in the default locale of the local host. If you want to
|
||||
specify a locale, you must use the -L option:
|
||||
<pre>
|
||||
LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c
|
||||
</pre>
|
||||
You can also specify -b (with or without -L). This causes the tables to be
|
||||
written in binary instead of as source code. A set of binary tables can be
|
||||
loaded into memory by an application and passed to <b>pcre2_compile()</b> in the
|
||||
same way as tables created by calling <b>pcre2_maketables()</b>. The tables are
|
||||
just a string of bytes, independent of hardware characteristics such as
|
||||
endianness. This means they can be bundled with an application that runs in
|
||||
different environments, to ensure consistent behaviour.
|
||||
</P>
|
||||
<br><a name="SEC13" href="#TOC1">USING EBCDIC CODE</a><br>
|
||||
<P>
|
||||
|
@ -538,7 +560,7 @@ support these modifiers. If
|
|||
<pre>
|
||||
--disable-percent-zt
|
||||
</pre>
|
||||
is specified, no use is made of the z or t modifiers. Instead or %td or %zu,
|
||||
is specified, no use is made of the z or t modifiers. Instead of %td or %zu,
|
||||
%lu is used, with a cast for size_t values.
|
||||
</P>
|
||||
<br><a name="SEC22" href="#TOC1">SUPPORT FOR FUZZERS</a><br>
|
||||
|
@ -592,9 +614,9 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC26" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 03 March 2019
|
||||
Last updated: 20 March 2020
|
||||
<br>
|
||||
Copyright © 1997-2019 University of Cambridge.
|
||||
Copyright © 1997-2020 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
|
|
|
@ -375,6 +375,12 @@ output.
|
|||
This command is used to load a set of precompiled patterns from a file, as
|
||||
described in the section entitled "Saving and restoring compiled patterns"
|
||||
<a href="#saverestore">below.</a>
|
||||
<pre>
|
||||
#loadtables <filename>
|
||||
</pre>
|
||||
This command is used to load a set of binary character tables that can be
|
||||
accessed by the tables=3 qualifier. Such tables can be created by the
|
||||
<b>pcre2_dftables</b> program with the -b option.
|
||||
<pre>
|
||||
#newline_default [<newline-list>]
|
||||
</pre>
|
||||
|
@ -679,7 +685,7 @@ heavily used in the test files.
|
|||
pushcopy push a copy onto the stack
|
||||
stackguard=<number> test the stackguard feature
|
||||
subject_literal treat all subject lines as literal
|
||||
tables=[0|1|2] select internal tables
|
||||
tables=[0|1|2|3] select internal tables
|
||||
use_length do not zero-terminate the pattern
|
||||
utf8_input treat input as UTF-8
|
||||
</pre>
|
||||
|
@ -1027,18 +1033,20 @@ Using alternative character tables
|
|||
</b><br>
|
||||
<P>
|
||||
The value specified for the <b>tables</b> modifier must be one of the digits 0,
|
||||
1, or 2. It causes a specific set of built-in character tables to be passed to
|
||||
<b>pcre2_compile()</b>. This is used in the PCRE2 tests to check behaviour with
|
||||
different character tables. The digit specifies the tables as follows:
|
||||
1, 2, or 3. It causes a specific set of built-in character tables to be passed
|
||||
to <b>pcre2_compile()</b>. This is used in the PCRE2 tests to check behaviour
|
||||
with different character tables. The digit specifies the tables as follows:
|
||||
<pre>
|
||||
0 do not pass any special character tables
|
||||
1 the default ASCII tables, as distributed in
|
||||
pcre2_chartables.c.dist
|
||||
2 a set of tables defining ISO 8859 characters
|
||||
3 a set of tables loaded by the #loadtables command
|
||||
</pre>
|
||||
In table 2, some characters whose codes are greater than 128 are identified as
|
||||
letters, digits, spaces, etc. Setting alternate character tables and a locale
|
||||
are mutually exclusive.
|
||||
In tables 2, some characters whose codes are greater than 128 are identified as
|
||||
letters, digits, spaces, etc. Tables 3 can be used only after a
|
||||
<b>#loadtables</b> command has loaded them from a binary file. Setting alternate
|
||||
character tables and a locale are mutually exclusive.
|
||||
</P>
|
||||
<br><b>
|
||||
Setting certain match controls
|
||||
|
@ -2105,7 +2113,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 22 January 2020
|
||||
Last updated: 20 March 2020
|
||||
<br>
|
||||
Copyright © 1997-2020 University of Cambridge.
|
||||
<br>
|
||||
|
|
212
doc/pcre2.txt
212
doc/pcre2.txt
|
@ -1103,9 +1103,9 @@ CHECKING BUILD-TIME OPTIONS
|
|||
int pcre2_config(uint32_t what, void *where);
|
||||
|
||||
The function pcre2_config() makes it possible for a PCRE2 client to
|
||||
discover which optional features have been compiled into the PCRE2 li-
|
||||
brary. The pcre2build documentation has more details about these op-
|
||||
tional features.
|
||||
find the value of certain configuration parameters and to discover
|
||||
which optional features have been compiled into the PCRE2 library. The
|
||||
pcre2build documentation has more details about these features.
|
||||
|
||||
The first argument for pcre2_config() specifies which information is
|
||||
required. The second argument is a pointer to memory into which the in-
|
||||
|
@ -1225,6 +1225,12 @@ CHECKING BUILD-TIME OPTIONS
|
|||
This parameter is obsolete and should not be used in new code. The out-
|
||||
put is a uint32_t integer that is always set to zero.
|
||||
|
||||
PCRE2_CONFIG_TABLES_LENGTH
|
||||
|
||||
The output is a uint32_t integer that gives the length of PCRE2's char-
|
||||
acter processing tables in bytes. For details of these tables see the
|
||||
section on locale support below.
|
||||
|
||||
PCRE2_CONFIG_UNICODE_VERSION
|
||||
|
||||
The where argument should point to a buffer that is at least 24 code
|
||||
|
@ -1994,7 +2000,7 @@ LOCALE SUPPORT
|
|||
therein.
|
||||
|
||||
For example, to build and use tables that are appropriate for the
|
||||
French locale (where accented characters with values greater than 128
|
||||
French locale (where accented characters with values greater than 127
|
||||
are treated as letters), the following code could be used:
|
||||
|
||||
setlocale(LC_CTYPE, "fr_FR");
|
||||
|
@ -2007,10 +2013,10 @@ LOCALE SUPPORT
|
|||
if you are using Windows, the name for the French locale is "french".
|
||||
|
||||
The pointer that is passed (via the compile context) to pcre2_compile()
|
||||
is saved with the compiled pattern, and the same tables are used by
|
||||
pcre2_match() and pcre_dfa_match(). Thus, for any single pattern, com-
|
||||
pilation and matching both happen in the same locale, but different
|
||||
patterns can be processed in different locales.
|
||||
is saved with the compiled pattern, and the same tables are used by the
|
||||
matching functions. Thus, for any single pattern, compilation and
|
||||
matching both happen in the same locale, but different patterns can be
|
||||
processed in different locales.
|
||||
|
||||
It is the caller's responsibility to ensure that the memory containing
|
||||
the tables remains available while they are still in use. When they are
|
||||
|
@ -2018,6 +2024,20 @@ LOCALE SUPPORT
|
|||
which should pass as its first parameter the same global context that
|
||||
was used to create the tables.
|
||||
|
||||
Saving locale tables
|
||||
|
||||
The tables described above are just a sequence of binary bytes, which
|
||||
makes them independent of hardware characteristics such as endianness
|
||||
or whether the processor is 32-bit or 64-bit. A copy of the result of
|
||||
pcre2_maketables() can therefore be saved in a file or elsewhere and
|
||||
re-used later, even in a different program or on another computer. The
|
||||
size of the tables (number of bytes) must be obtained by calling
|
||||
pcre2_config() with the PCRE2_CONFIG_TABLES_LENGTH option because
|
||||
pcre2_maketables() does not return this value. Note that the
|
||||
pcre2_dftables program, which is part of the PCRE2 build system, can be
|
||||
used stand-alone to create a file that contains a set of binary tables.
|
||||
See the pcre2build documentation for details.
|
||||
|
||||
|
||||
INFORMATION ABOUT A COMPILED PATTERN
|
||||
|
||||
|
@ -3773,7 +3793,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 24 February 2020
|
||||
Last updated: 19 March 2020
|
||||
Copyright (c) 1997-2020 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -3873,8 +3893,8 @@ UNICODE AND UTF SUPPORT
|
|||
--disable-unicode
|
||||
|
||||
to the configure command. This setting applies to all three libraries.
|
||||
It is not possible to build one library with Unicode support, and an-
|
||||
other without, in the same configuration.
|
||||
It is not possible to build one library with Unicode support and an-
|
||||
other without in the same configuration.
|
||||
|
||||
Of itself, Unicode support does not make PCRE2 treat strings as UTF-8,
|
||||
UTF-16 or UTF-32. To do that, applications that use the library can set
|
||||
|
@ -3935,7 +3955,7 @@ JUST-IN-TIME COMPILER SUPPORT
|
|||
|
||||
--disable-pcre2grep-jit
|
||||
|
||||
to the "configure" command.
|
||||
to the configure command.
|
||||
|
||||
|
||||
NEWLINE RECOGNITION
|
||||
|
@ -4079,39 +4099,61 @@ CREATING CHARACTER TABLES AT BUILD TIME
|
|||
--enable-rebuild-chartables
|
||||
|
||||
to the configure command, the distributed tables are no longer used.
|
||||
Instead, a program called dftables is compiled and run. This outputs
|
||||
the source for new set of tables, created in the default locale of your
|
||||
C run-time system. This method of replacing the tables does not work if
|
||||
you are cross compiling, because dftables is run on the local host. If
|
||||
you need to create alternative tables when cross compiling, you will
|
||||
have to do so "by hand".
|
||||
Instead, a program called pcre2_dftables is compiled and run. This out-
|
||||
puts the source for new set of tables, created in the default locale of
|
||||
your C run-time system. This method of replacing the tables does not
|
||||
work if you are cross compiling, because pcre2_dftables needs to be run
|
||||
on the local host and therefore not compiled with the cross compiler.
|
||||
|
||||
If you need to create alternative tables when cross compiling, you will
|
||||
have to do so "by hand". There may also be other reasons for creating
|
||||
tables manually. To cause pcre2_dftables to be built on the local
|
||||
host, run a normal compiling command, and then run the program with the
|
||||
output file as its argument, for example:
|
||||
|
||||
cc src/pcre2_dftables.c -o pcre2_dftables
|
||||
./pcre2_dftables src/pcre2_chartables.c
|
||||
|
||||
This builds the tables in the default locale of the local host. If you
|
||||
want to specify a locale, you must use the -L option:
|
||||
|
||||
LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c
|
||||
|
||||
You can also specify -b (with or without -L). This causes the tables to
|
||||
be written in binary instead of as source code. A set of binary tables
|
||||
can be loaded into memory by an application and passed to pcre2_com-
|
||||
pile() in the same way as tables created by calling pcre2_maketables().
|
||||
The tables are just a string of bytes, independent of hardware charac-
|
||||
teristics such as endianness. This means they can be bundled with an
|
||||
application that runs in different environments, to ensure consistent
|
||||
behaviour.
|
||||
|
||||
|
||||
USING EBCDIC CODE
|
||||
|
||||
PCRE2 assumes by default that it will run in an environment where the
|
||||
character code is ASCII or Unicode, which is a superset of ASCII. This
|
||||
PCRE2 assumes by default that it will run in an environment where the
|
||||
character code is ASCII or Unicode, which is a superset of ASCII. This
|
||||
is the case for most computer operating systems. PCRE2 can, however, be
|
||||
compiled to run in an 8-bit EBCDIC environment by adding
|
||||
|
||||
--enable-ebcdic --disable-unicode
|
||||
|
||||
to the configure command. This setting implies --enable-rebuild-charta-
|
||||
bles. You should only use it if you know that you are in an EBCDIC en-
|
||||
bles. You should only use it if you know that you are in an EBCDIC en-
|
||||
vironment (for example, an IBM mainframe operating system).
|
||||
|
||||
It is not possible to support both EBCDIC and UTF-8 codes in the same
|
||||
version of the library. Consequently, --enable-unicode and --enable-
|
||||
It is not possible to support both EBCDIC and UTF-8 codes in the same
|
||||
version of the library. Consequently, --enable-unicode and --enable-
|
||||
ebcdic are mutually exclusive.
|
||||
|
||||
The EBCDIC character that corresponds to an ASCII LF is assumed to have
|
||||
the value 0x15 by default. However, in some EBCDIC environments, 0x25
|
||||
the value 0x15 by default. However, in some EBCDIC environments, 0x25
|
||||
is used. In such an environment you should use
|
||||
|
||||
--enable-ebcdic-nl25
|
||||
|
||||
as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR
|
||||
has the same value as in ASCII, namely, 0x0d. Whichever of 0x15 and
|
||||
has the same value as in ASCII, namely, 0x0d. Whichever of 0x15 and
|
||||
0x25 is not chosen as LF is made to correspond to the Unicode NEL char-
|
||||
acter (which, in Unicode, is 0x85).
|
||||
|
||||
|
@ -4123,47 +4165,47 @@ USING EBCDIC CODE
|
|||
PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS
|
||||
|
||||
By default pcre2grep supports the use of callouts with string arguments
|
||||
within the patterns it is matching. There are two kinds: one that gen-
|
||||
within the patterns it is matching. There are two kinds: one that gen-
|
||||
erates output using local code, and another that calls an external pro-
|
||||
gram or script. If --disable-pcre2grep-callout-fork is added to the
|
||||
configure command, only the first kind of callout is supported; if
|
||||
--disable-pcre2grep-callout is used, all callouts are completely ig-
|
||||
nored. For more details of pcre2grep callouts, see the pcre2grep docu-
|
||||
gram or script. If --disable-pcre2grep-callout-fork is added to the
|
||||
configure command, only the first kind of callout is supported; if
|
||||
--disable-pcre2grep-callout is used, all callouts are completely ig-
|
||||
nored. For more details of pcre2grep callouts, see the pcre2grep docu-
|
||||
mentation.
|
||||
|
||||
|
||||
PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT
|
||||
|
||||
By default, pcre2grep reads all files as plain text. You can build it
|
||||
so that it recognizes files whose names end in .gz or .bz2, and reads
|
||||
By default, pcre2grep reads all files as plain text. You can build it
|
||||
so that it recognizes files whose names end in .gz or .bz2, and reads
|
||||
them with libz or libbz2, respectively, by adding one or both of
|
||||
|
||||
--enable-pcre2grep-libz
|
||||
--enable-pcre2grep-libbz2
|
||||
|
||||
to the configure command. These options naturally require that the rel-
|
||||
evant libraries are installed on your system. Configuration will fail
|
||||
evant libraries are installed on your system. Configuration will fail
|
||||
if they are not.
|
||||
|
||||
|
||||
PCRE2GREP BUFFER SIZE
|
||||
|
||||
pcre2grep uses an internal buffer to hold a "window" on the file it is
|
||||
pcre2grep uses an internal buffer to hold a "window" on the file it is
|
||||
scanning, in order to be able to output "before" and "after" lines when
|
||||
it finds a match. The default starting size of the buffer is 20KiB. The
|
||||
buffer itself is three times this size, but because of the way it is
|
||||
buffer itself is three times this size, but because of the way it is
|
||||
used for holding "before" lines, the longest line that is guaranteed to
|
||||
be processable is the notional buffer size. If a longer line is encoun-
|
||||
tered, pcre2grep automatically expands the buffer, up to a specified
|
||||
maximum size, whose default is 1MiB or the starting size, whichever is
|
||||
the larger. You can change the default parameter values by adding, for
|
||||
tered, pcre2grep automatically expands the buffer, up to a specified
|
||||
maximum size, whose default is 1MiB or the starting size, whichever is
|
||||
the larger. You can change the default parameter values by adding, for
|
||||
example,
|
||||
|
||||
--with-pcre2grep-bufsize=51200
|
||||
--with-pcre2grep-max-bufsize=2097152
|
||||
|
||||
to the configure command. The caller of pcre2grep can override these
|
||||
values by using --buffer-size and --max-buffer-size on the command
|
||||
to the configure command. The caller of pcre2grep can override these
|
||||
values by using --buffer-size and --max-buffer-size on the command
|
||||
line.
|
||||
|
||||
|
||||
|
@ -4174,26 +4216,26 @@ PCRE2TEST OPTION FOR LIBREADLINE SUPPORT
|
|||
--enable-pcre2test-libreadline
|
||||
--enable-pcre2test-libedit
|
||||
|
||||
to the configure command, pcre2test is linked with the libreadline or-
|
||||
libedit library, respectively, and when its input is from a terminal,
|
||||
it reads it using the readline() function. This provides line-editing
|
||||
and history facilities. Note that libreadline is GPL-licensed, so if
|
||||
you distribute a binary of pcre2test linked in this way, there may be
|
||||
to the configure command, pcre2test is linked with the libreadline or-
|
||||
libedit library, respectively, and when its input is from a terminal,
|
||||
it reads it using the readline() function. This provides line-editing
|
||||
and history facilities. Note that libreadline is GPL-licensed, so if
|
||||
you distribute a binary of pcre2test linked in this way, there may be
|
||||
licensing issues. These can be avoided by linking instead with libedit,
|
||||
which has a BSD licence.
|
||||
|
||||
Setting --enable-pcre2test-libreadline causes the -lreadline option to
|
||||
be added to the pcre2test build. In many operating environments with a
|
||||
sytem-installed readline library this is sufficient. However, in some
|
||||
Setting --enable-pcre2test-libreadline causes the -lreadline option to
|
||||
be added to the pcre2test build. In many operating environments with a
|
||||
sytem-installed readline library this is sufficient. However, in some
|
||||
environments (e.g. if an unmodified distribution version of readline is
|
||||
in use), some extra configuration may be necessary. The INSTALL file
|
||||
in use), some extra configuration may be necessary. The INSTALL file
|
||||
for libreadline says this:
|
||||
|
||||
"Readline uses the termcap functions, but does not link with
|
||||
the termcap or curses library itself, allowing applications
|
||||
which link with readline the to choose an appropriate library."
|
||||
|
||||
If your environment has not been set up so that an appropriate library
|
||||
If your environment has not been set up so that an appropriate library
|
||||
is automatically included, you may need to add something like
|
||||
|
||||
LIBS="-ncurses"
|
||||
|
@ -4207,7 +4249,7 @@ INCLUDING DEBUGGING CODE
|
|||
|
||||
--enable-debug
|
||||
|
||||
to the configure command, additional debugging code is included in the
|
||||
to the configure command, additional debugging code is included in the
|
||||
build. This feature is intended for use by the PCRE2 maintainers.
|
||||
|
||||
|
||||
|
@ -4217,14 +4259,14 @@ DEBUGGING WITH VALGRIND SUPPORT
|
|||
|
||||
--enable-valgrind
|
||||
|
||||
to the configure command, PCRE2 will use valgrind annotations to mark
|
||||
certain memory regions as unaddressable. This allows it to detect in-
|
||||
to the configure command, PCRE2 will use valgrind annotations to mark
|
||||
certain memory regions as unaddressable. This allows it to detect in-
|
||||
valid memory accesses, and is mostly useful for debugging PCRE2 itself.
|
||||
|
||||
|
||||
CODE COVERAGE REPORTING
|
||||
|
||||
If your C compiler is gcc, you can build a version of PCRE2 that can
|
||||
If your C compiler is gcc, you can build a version of PCRE2 that can
|
||||
generate a code coverage report for its test suite. To enable this, you
|
||||
must install lcov version 1.6 or above. Then specify
|
||||
|
||||
|
@ -4233,20 +4275,20 @@ CODE COVERAGE REPORTING
|
|||
to the configure command and build PCRE2 in the usual way.
|
||||
|
||||
Note that using ccache (a caching C compiler) is incompatible with code
|
||||
coverage reporting. If you have configured ccache to run automatically
|
||||
coverage reporting. If you have configured ccache to run automatically
|
||||
on your system, you must set the environment variable
|
||||
|
||||
CCACHE_DISABLE=1
|
||||
|
||||
before running make to build PCRE2, so that ccache is not used.
|
||||
|
||||
When --enable-coverage is used, the following addition targets are
|
||||
When --enable-coverage is used, the following addition targets are
|
||||
added to the Makefile:
|
||||
|
||||
make coverage
|
||||
|
||||
This creates a fresh coverage report for the PCRE2 test suite. It is
|
||||
equivalent to running "make coverage-reset", "make coverage-baseline",
|
||||
This creates a fresh coverage report for the PCRE2 test suite. It is
|
||||
equivalent to running "make coverage-reset", "make coverage-baseline",
|
||||
"make check", and then "make coverage-report".
|
||||
|
||||
make coverage-reset
|
||||
|
@ -4263,71 +4305,71 @@ CODE COVERAGE REPORTING
|
|||
|
||||
make coverage-clean-report
|
||||
|
||||
This removes the generated coverage report without cleaning the cover-
|
||||
This removes the generated coverage report without cleaning the cover-
|
||||
age data itself.
|
||||
|
||||
make coverage-clean-data
|
||||
|
||||
This removes the captured coverage data without removing the coverage
|
||||
This removes the captured coverage data without removing the coverage
|
||||
files created at compile time (*.gcno).
|
||||
|
||||
make coverage-clean
|
||||
|
||||
This cleans all coverage data including the generated coverage report.
|
||||
For more information about code coverage, see the gcov and lcov docu-
|
||||
This cleans all coverage data including the generated coverage report.
|
||||
For more information about code coverage, see the gcov and lcov docu-
|
||||
mentation.
|
||||
|
||||
|
||||
DISABLING THE Z AND T FORMATTING MODIFIERS
|
||||
|
||||
The C99 standard defines formatting modifiers z and t for size_t and
|
||||
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers
|
||||
in environments other than Microsoft Visual Studio when __STDC_VER-
|
||||
The C99 standard defines formatting modifiers z and t for size_t and
|
||||
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers
|
||||
in environments other than Microsoft Visual Studio when __STDC_VER-
|
||||
SION__ is defined and has a value greater than or equal to 199901L (in-
|
||||
dicating C99). However, there is at least one environment that claims
|
||||
dicating C99). However, there is at least one environment that claims
|
||||
to be C99 but does not support these modifiers. If
|
||||
|
||||
--disable-percent-zt
|
||||
|
||||
is specified, no use is made of the z or t modifiers. Instead or %td or
|
||||
is specified, no use is made of the z or t modifiers. Instead of %td or
|
||||
%zu, %lu is used, with a cast for size_t values.
|
||||
|
||||
|
||||
SUPPORT FOR FUZZERS
|
||||
|
||||
There is a special option for use by people who want to run fuzzing
|
||||
There is a special option for use by people who want to run fuzzing
|
||||
tests on PCRE2:
|
||||
|
||||
--enable-fuzz-support
|
||||
|
||||
At present this applies only to the 8-bit library. If set, it causes an
|
||||
extra library called libpcre2-fuzzsupport.a to be built, but not in-
|
||||
stalled. This contains a single function called LLVMFuzzerTestOneIn-
|
||||
put() whose arguments are a pointer to a string and the length of the
|
||||
string. When called, this function tries to compile the string as a
|
||||
pattern, and if that succeeds, to match it. This is done both with no
|
||||
options and with some random options bits that are generated from the
|
||||
extra library called libpcre2-fuzzsupport.a to be built, but not in-
|
||||
stalled. This contains a single function called LLVMFuzzerTestOneIn-
|
||||
put() whose arguments are a pointer to a string and the length of the
|
||||
string. When called, this function tries to compile the string as a
|
||||
pattern, and if that succeeds, to match it. This is done both with no
|
||||
options and with some random options bits that are generated from the
|
||||
string.
|
||||
|
||||
Setting --enable-fuzz-support also causes a binary called pcre2fuz-
|
||||
zcheck to be created. This is normally run under valgrind or used when
|
||||
Setting --enable-fuzz-support also causes a binary called pcre2fuz-
|
||||
zcheck to be created. This is normally run under valgrind or used when
|
||||
PCRE2 is compiled with address sanitizing enabled. It calls the fuzzing
|
||||
function and outputs information about what it is doing. The input
|
||||
strings are specified by arguments: if an argument starts with "=" the
|
||||
rest of it is a literal input string. Otherwise, it is assumed to be a
|
||||
function and outputs information about what it is doing. The input
|
||||
strings are specified by arguments: if an argument starts with "=" the
|
||||
rest of it is a literal input string. Otherwise, it is assumed to be a
|
||||
file name, and the contents of the file are the test string.
|
||||
|
||||
|
||||
OBSOLETE OPTION
|
||||
|
||||
In versions of PCRE2 prior to 10.30, there were two ways of handling
|
||||
backtracking in the pcre2_match() function. The default was to use the
|
||||
In versions of PCRE2 prior to 10.30, there were two ways of handling
|
||||
backtracking in the pcre2_match() function. The default was to use the
|
||||
system stack, but if
|
||||
|
||||
--disable-stack-for-recursion
|
||||
|
||||
was set, memory on the heap was used. From release 10.30 onwards this
|
||||
has changed (the stack is no longer used) and this option now does
|
||||
was set, memory on the heap was used. From release 10.30 onwards this
|
||||
has changed (the stack is no longer used) and this option now does
|
||||
nothing except give a warning.
|
||||
|
||||
|
||||
|
@ -4345,8 +4387,8 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 03 March 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
Last updated: 20 March 2020
|
||||
Copyright (c) 1997-2020 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2_SET_CHARACTER_TABLES 3 "22 October 2014" "PCRE2 10.00"
|
||||
.TH PCRE2_SET_CHARACTER_TABLES 3 "20 March 2020" "PCRE2 10.35"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
@ -15,9 +15,14 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
|||
.rs
|
||||
.sp
|
||||
This function sets a pointer to custom character tables within a compile
|
||||
context. The second argument must be the result of a call to
|
||||
\fBpcre2_maketables()\fP or NULL to request the default tables. The result is
|
||||
always zero.
|
||||
context. The second argument must point to a set of PCRE2 character tables or
|
||||
be NULL to request the default tables. The result is always zero. Character
|
||||
tables can be created by calling \fBpcre2_maketables()\fP or by running the
|
||||
\fBpcre2_dftables\fP maintenance command in binary mode (see the
|
||||
.\" HREF
|
||||
\fBpcre2build\fP
|
||||
.\"
|
||||
documentation).
|
||||
.P
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
.\" HREF
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "24 February 2020" "PCRE2 10.35"
|
||||
.TH PCRE2API 3 "19 March 2020" "PCRE2 10.35"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -1034,12 +1034,13 @@ less than the limit set by the caller of \fBpcre2_match()\fP or
|
|||
.sp
|
||||
.B int pcre2_config(uint32_t \fIwhat\fP, void *\fIwhere\fP);
|
||||
.P
|
||||
The function \fBpcre2_config()\fP makes it possible for a PCRE2 client to
|
||||
discover which optional features have been compiled into the PCRE2 library. The
|
||||
The function \fBpcre2_config()\fP makes it possible for a PCRE2 client to find
|
||||
the value of certain configuration parameters and to discover which optional
|
||||
features have been compiled into the PCRE2 library. The
|
||||
.\" HREF
|
||||
\fBpcre2build\fP
|
||||
.\"
|
||||
documentation has more details about these optional features.
|
||||
documentation has more details about these features.
|
||||
.P
|
||||
The first argument for \fBpcre2_config()\fP specifies which information is
|
||||
required. The second argument is a pointer to memory into which the information
|
||||
|
@ -1152,6 +1153,16 @@ over compilation stack usage, see \fBpcre2_set_compile_recursion_guard()\fP.
|
|||
.sp
|
||||
This parameter is obsolete and should not be used in new code. The output is a
|
||||
uint32_t integer that is always set to zero.
|
||||
.sp
|
||||
PCRE2_CONFIG_TABLES_LENGTH
|
||||
.sp
|
||||
The output is a uint32_t integer that gives the length of PCRE2's character
|
||||
processing tables in bytes. For details of these tables see the
|
||||
.\" HTML <a href="#localesupport">
|
||||
.\" </a>
|
||||
section on locale support
|
||||
.\"
|
||||
below.
|
||||
.sp
|
||||
PCRE2_CONFIG_UNICODE_VERSION
|
||||
.sp
|
||||
|
@ -1996,7 +2007,7 @@ the system \fBmalloc()\fP is used. The result can be passed to
|
|||
calling \fBpcre2_set_character_tables()\fP to set the tables pointer therein.
|
||||
.P
|
||||
For example, to build and use tables that are appropriate for the French locale
|
||||
(where accented characters with values greater than 128 are treated as
|
||||
(where accented characters with values greater than 127 are treated as
|
||||
letters), the following code could be used:
|
||||
.sp
|
||||
setlocale(LC_CTYPE, "fr_FR");
|
||||
|
@ -2009,10 +2020,10 @@ The locale name "fr_FR" is used on Linux and other Unix-like systems; if you
|
|||
are using Windows, the name for the French locale is "french".
|
||||
.P
|
||||
The pointer that is passed (via the compile context) to \fBpcre2_compile()\fP
|
||||
is saved with the compiled pattern, and the same tables are used by
|
||||
\fBpcre2_match()\fP and \fBpcre_dfa_match()\fP. Thus, for any single pattern,
|
||||
compilation and matching both happen in the same locale, but different patterns
|
||||
can be processed in different locales.
|
||||
is saved with the compiled pattern, and the same tables are used by the
|
||||
matching functions. Thus, for any single pattern, compilation and matching both
|
||||
happen in the same locale, but different patterns can be processed in different
|
||||
locales.
|
||||
.P
|
||||
It is the caller's responsibility to ensure that the memory containing the
|
||||
tables remains available while they are still in use. When they are no longer
|
||||
|
@ -2021,6 +2032,26 @@ pass as its first parameter the same global context that was used to create the
|
|||
tables.
|
||||
.
|
||||
.
|
||||
.SS "Saving locale tables"
|
||||
.rs
|
||||
.sp
|
||||
The tables described above are just a sequence of binary bytes, which makes
|
||||
them independent of hardware characteristics such as endianness or whether the
|
||||
processor is 32-bit or 64-bit. A copy of the result of \fBpcre2_maketables()\fP
|
||||
can therefore be saved in a file or elsewhere and re-used later, even in a
|
||||
different program or on another computer. The size of the tables (number of
|
||||
bytes) must be obtained by calling \fBpcre2_config()\fP with the
|
||||
PCRE2_CONFIG_TABLES_LENGTH option because \fBpcre2_maketables()\fP does not
|
||||
return this value. Note that the \fBpcre2_dftables\fP program, which is part of
|
||||
the PCRE2 build system, can be used stand-alone to create a file that contains
|
||||
a set of binary tables. See the
|
||||
.\" HTML <a href="pcre2build.html#createtables">
|
||||
.\" </a>
|
||||
\fBpcre2build\fP
|
||||
.\"
|
||||
documentation for details.
|
||||
.
|
||||
.
|
||||
.\" HTML <a name="infoaboutpattern"></a>
|
||||
.SH "INFORMATION ABOUT A COMPILED PATTERN"
|
||||
.rs
|
||||
|
@ -2031,7 +2062,7 @@ tables.
|
|||
.P
|
||||
The \fBpcre2_pattern_info()\fP function returns general information about a
|
||||
compiled pattern. For information about callouts, see the
|
||||
.\" HTML <a href="pcre2pattern.html#infoaboutcallouts">
|
||||
.\" HTML <a href="#infoaboutcallouts">
|
||||
.\" </a>
|
||||
next section.
|
||||
.\"
|
||||
|
@ -3937,6 +3968,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 24 February 2020
|
||||
Last updated: 19 March 2020
|
||||
Copyright (c) 1997-2020 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2BUILD 3 "03 March 2019" "PCRE2 10.33"
|
||||
.TH PCRE2BUILD 3 "20 March 2020" "PCRE2 10.35"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.
|
||||
|
@ -110,7 +110,7 @@ To build it without Unicode support, add
|
|||
--disable-unicode
|
||||
.sp
|
||||
to the \fBconfigure\fP command. This setting applies to all three libraries. It
|
||||
is not possible to build one library with Unicode support, and another without,
|
||||
is not possible to build one library with Unicode support and another without
|
||||
in the same configuration.
|
||||
.P
|
||||
Of itself, Unicode support does not make PCRE2 treat strings as UTF-8, UTF-16
|
||||
|
@ -175,11 +175,11 @@ SELinux. This has no effect if JIT is not enabled. See the
|
|||
\fBpcre2jit\fP
|
||||
.\"
|
||||
documentation for a discussion of JIT usage. When JIT support is enabled,
|
||||
pcre2grep automatically makes use of it, unless you add
|
||||
\fBpcre2grep\fP automatically makes use of it, unless you add
|
||||
.sp
|
||||
--disable-pcre2grep-jit
|
||||
.sp
|
||||
to the "configure" command.
|
||||
to the \fBconfigure\fP command.
|
||||
.
|
||||
.
|
||||
.SH "NEWLINE RECOGNITION"
|
||||
|
@ -317,6 +317,7 @@ used for lookaround assertions, atomic groups, and recursion within patterns.
|
|||
The limit does not apply to JIT matching.
|
||||
.
|
||||
.
|
||||
.\" HTML <a name="createtables"></a>
|
||||
.SH "CREATING CHARACTER TABLES AT BUILD TIME"
|
||||
.rs
|
||||
.sp
|
||||
|
@ -328,12 +329,33 @@ only. If you add
|
|||
--enable-rebuild-chartables
|
||||
.sp
|
||||
to the \fBconfigure\fP command, the distributed tables are no longer used.
|
||||
Instead, a program called \fBdftables\fP is compiled and run. This outputs the
|
||||
source for new set of tables, created in the default locale of your C run-time
|
||||
system. This method of replacing the tables does not work if you are cross
|
||||
compiling, because \fBdftables\fP is run on the local host. If you need to
|
||||
create alternative tables when cross compiling, you will have to do so "by
|
||||
hand".
|
||||
Instead, a program called \fBpcre2_dftables\fP is compiled and run. This
|
||||
outputs the source for new set of tables, created in the default locale of your
|
||||
C run-time system. This method of replacing the tables does not work if you are
|
||||
cross compiling, because \fBpcre2_dftables\fP needs to be run on the local
|
||||
host and therefore not compiled with the cross compiler.
|
||||
.P
|
||||
If you need to create alternative tables when cross compiling, you will have to
|
||||
do so "by hand". There may also be other reasons for creating tables manually.
|
||||
To cause \fBpcre2_dftables\fP to be built on the local host, run a normal
|
||||
compiling command, and then run the program with the output file as its
|
||||
argument, for example:
|
||||
.sp
|
||||
cc src/pcre2_dftables.c -o pcre2_dftables
|
||||
./pcre2_dftables src/pcre2_chartables.c
|
||||
.sp
|
||||
This builds the tables in the default locale of the local host. If you want to
|
||||
specify a locale, you must use the -L option:
|
||||
.sp
|
||||
LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c
|
||||
.sp
|
||||
You can also specify -b (with or without -L). This causes the tables to be
|
||||
written in binary instead of as source code. A set of binary tables can be
|
||||
loaded into memory by an application and passed to \fBpcre2_compile()\fP in the
|
||||
same way as tables created by calling \fBpcre2_maketables()\fP. The tables are
|
||||
just a string of bytes, independent of hardware characteristics such as
|
||||
endianness. This means they can be bundled with an application that runs in
|
||||
different environments, to ensure consistent behaviour.
|
||||
.
|
||||
.
|
||||
.SH "USING EBCDIC CODE"
|
||||
|
@ -548,7 +570,7 @@ support these modifiers. If
|
|||
.sp
|
||||
--disable-percent-zt
|
||||
.sp
|
||||
is specified, no use is made of the z or t modifiers. Instead or %td or %zu,
|
||||
is specified, no use is made of the z or t modifiers. Instead of %td or %zu,
|
||||
%lu is used, with a cast for size_t values.
|
||||
.
|
||||
.
|
||||
|
@ -610,6 +632,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 03 March 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
Last updated: 20 March 2020
|
||||
Copyright (c) 1997-2020 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2TEST 1 "22 January 2020" "PCRE 10.35"
|
||||
.TH PCRE2TEST 1 "20 March 2020" "PCRE 10.35"
|
||||
.SH NAME
|
||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -326,6 +326,12 @@ described in the section entitled "Saving and restoring compiled patterns"
|
|||
.\" </a>
|
||||
below.
|
||||
.\"
|
||||
.sp
|
||||
#loadtables <filename>
|
||||
.sp
|
||||
This command is used to load a set of binary character tables that can be
|
||||
accessed by the tables=3 qualifier. Such tables can be created by the
|
||||
\fBpcre2_dftables\fP program with the -b option.
|
||||
.sp
|
||||
#newline_default [<newline-list>]
|
||||
.sp
|
||||
|
@ -638,7 +644,7 @@ heavily used in the test files.
|
|||
pushcopy push a copy onto the stack
|
||||
stackguard=<number> test the stackguard feature
|
||||
subject_literal treat all subject lines as literal
|
||||
tables=[0|1|2] select internal tables
|
||||
tables=[0|1|2|3] select internal tables
|
||||
use_length do not zero-terminate the pattern
|
||||
utf8_input treat input as UTF-8
|
||||
.sp
|
||||
|
@ -988,18 +994,20 @@ be aborted.
|
|||
.rs
|
||||
.sp
|
||||
The value specified for the \fBtables\fP modifier must be one of the digits 0,
|
||||
1, or 2. It causes a specific set of built-in character tables to be passed to
|
||||
\fBpcre2_compile()\fP. This is used in the PCRE2 tests to check behaviour with
|
||||
different character tables. The digit specifies the tables as follows:
|
||||
1, 2, or 3. It causes a specific set of built-in character tables to be passed
|
||||
to \fBpcre2_compile()\fP. This is used in the PCRE2 tests to check behaviour
|
||||
with different character tables. The digit specifies the tables as follows:
|
||||
.sp
|
||||
0 do not pass any special character tables
|
||||
1 the default ASCII tables, as distributed in
|
||||
pcre2_chartables.c.dist
|
||||
2 a set of tables defining ISO 8859 characters
|
||||
3 a set of tables loaded by the #loadtables command
|
||||
.sp
|
||||
In table 2, some characters whose codes are greater than 128 are identified as
|
||||
letters, digits, spaces, etc. Setting alternate character tables and a locale
|
||||
are mutually exclusive.
|
||||
In tables 2, some characters whose codes are greater than 128 are identified as
|
||||
letters, digits, spaces, etc. Tables 3 can be used only after a
|
||||
\fB#loadtables\fP command has loaded them from a binary file. Setting alternate
|
||||
character tables and a locale are mutually exclusive.
|
||||
.
|
||||
.
|
||||
.SS "Setting certain match controls"
|
||||
|
@ -2088,6 +2096,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 22 January 2020
|
||||
Last updated: 20 March 2020
|
||||
Copyright (c) 1997-2020 University of Cambridge.
|
||||
.fi
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -448,6 +448,7 @@ released, the numbers must not be changed. */
|
|||
#define PCRE2_CONFIG_HEAPLIMIT 12
|
||||
#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13
|
||||
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
|
||||
#define PCRE2_CONFIG_TABLES_LENGTH 15
|
||||
|
||||
|
||||
/* Types for code units in patterns and subject strings. */
|
||||
|
|
|
@ -2,17 +2,21 @@
|
|||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This file was automatically written by the dftables auxiliary
|
||||
/* This file was automatically written by the pcre2_dftables auxiliary
|
||||
program. It contains character tables that are used when no external
|
||||
tables are passed to PCRE2 by the application that calls it. The tables
|
||||
are used only for characters whose code values are less than 256. */
|
||||
|
||||
/*The dftables program (which is distributed with PCRE2) can be used to
|
||||
build alternative versions of this file. This is necessary if you are
|
||||
/* This set of tables was written in the C locale. */
|
||||
|
||||
/* The pcre2_ftables program (which is distributed with PCRE2) can be used
|
||||
to build alternative versions of this file. This is necessary if you are
|
||||
running in an EBCDIC environment, or if you want to default to a different
|
||||
encoding, for example ISO-8859-1. When dftables is run, it creates these
|
||||
tables in the current locale. This happens automatically if PCRE2 is
|
||||
configured with --enable-rebuild-chartables. */
|
||||
encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates
|
||||
these tables in the "C" locale by default. This happens automatically if
|
||||
PCRE2 is configured with --enable-rebuild-chartables. However, you can run
|
||||
pcre2_dftables manually with the -L option to build tables using the LC_ALL
|
||||
locale. */
|
||||
|
||||
/* The following #include is present because without it gcc 4.x may remove
|
||||
the array definition from the final binary if PCRE2 is built into a static
|
||||
|
@ -102,54 +106,54 @@ const uint8_t PRIV(default_tables)[] = {
|
|||
/* This table contains bit maps for various character classes. Each map is 32
|
||||
bytes long and the bits run from the least significant end of each byte. The
|
||||
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
|
||||
graph print, punct, and cntrl. Other classes are built from combinations. */
|
||||
graph, print, punct, and cntrl. Other classes are built from combinations. */
|
||||
|
||||
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
|
||||
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, /* space */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* xdigit */
|
||||
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* digit */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* upper */
|
||||
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* lower */
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* word */
|
||||
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, /* graph */
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
|
||||
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, /* print */
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, /* punct */
|
||||
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
|
||||
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, /* cntrl */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
|
|
@ -1202,7 +1202,7 @@ in the decoded tables. */
|
|||
|
||||
if ((code->flags & PCRE2_DEREF_TABLES) != 0)
|
||||
{
|
||||
ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
|
||||
ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH);
|
||||
(*ref_count)++;
|
||||
}
|
||||
|
||||
|
@ -1232,15 +1232,15 @@ if (newcode == NULL) return NULL;
|
|||
memcpy(newcode, code, code->blocksize);
|
||||
newcode->executable_jit = NULL;
|
||||
|
||||
newtables = code->memctl.malloc(tables_length + sizeof(PCRE2_SIZE),
|
||||
newtables = code->memctl.malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE),
|
||||
code->memctl.memory_data);
|
||||
if (newtables == NULL)
|
||||
{
|
||||
code->memctl.free((void *)newcode, code->memctl.memory_data);
|
||||
return NULL;
|
||||
}
|
||||
memcpy(newtables, code->tables, tables_length);
|
||||
ref_count = (PCRE2_SIZE *)(newtables + tables_length);
|
||||
memcpy(newtables, code->tables, TABLES_LENGTH);
|
||||
ref_count = (PCRE2_SIZE *)(newtables + TABLES_LENGTH);
|
||||
*ref_count = 1;
|
||||
|
||||
newcode->tables = newtables;
|
||||
|
@ -1270,7 +1270,7 @@ if (code != NULL)
|
|||
be freed when there are no more references to them. The *ref_count should
|
||||
always be > 0. */
|
||||
|
||||
ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
|
||||
ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH);
|
||||
if (*ref_count > 0)
|
||||
{
|
||||
(*ref_count)--;
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -43,7 +43,8 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
|
||||
its value gets changed by pcre2_internal.h to be in code units. */
|
||||
its value gets changed by pcre2_intmodedep.h (included by pcre2_internal.h) to
|
||||
be in code units. */
|
||||
|
||||
static int configured_link_size = LINK_SIZE;
|
||||
|
||||
|
@ -94,6 +95,7 @@ if (where == NULL) /* Requests a length */
|
|||
case PCRE2_CONFIG_NEWLINE:
|
||||
case PCRE2_CONFIG_PARENSLIMIT:
|
||||
case PCRE2_CONFIG_STACKRECURSE: /* Obsolete */
|
||||
case PCRE2_CONFIG_TABLES_LENGTH:
|
||||
case PCRE2_CONFIG_UNICODE:
|
||||
return sizeof(uint32_t);
|
||||
|
||||
|
@ -191,6 +193,10 @@ switch (what)
|
|||
*((uint32_t *)where) = 0;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_TABLES_LENGTH:
|
||||
*((uint32_t *)where) = TABLES_LENGTH;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||
{
|
||||
#if defined SUPPORT_UNICODE
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -40,9 +40,12 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
|
||||
/* This is a freestanding support program to generate a file containing
|
||||
character tables for PCRE2. The tables are built according to the current
|
||||
locale using the pcre2_maketables() function, which is part of the PCRE2 API.
|
||||
*/
|
||||
character tables for PCRE2. The tables are built using the pcre2_maketables()
|
||||
function, which is part of the PCRE2 API. By default, the system's "C" locale
|
||||
is used rather than what the building user happens to have set, but the -L
|
||||
option can be used to select the current locale from the LC_ALL environment
|
||||
variable. By default, the tables are written in source form, but if -b is
|
||||
given, they are written in binary. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
|
@ -56,73 +59,153 @@ locale using the pcre2_maketables() function, which is part of the PCRE2 API.
|
|||
#define PCRE2_CODE_UNIT_WIDTH 0 /* Must be set, but not relevant here */
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define DFTABLES /* pcre2_maketables.c notices this */
|
||||
#define PCRE2_DFTABLES /* pcre2_maketables.c notices this */
|
||||
#include "pcre2_maketables.c"
|
||||
|
||||
|
||||
static char *classlist[] =
|
||||
{
|
||||
"space", "xdigit", "digit", "upper", "lower",
|
||||
"word", "graph", "print", "punct", "cntrl"
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Usage *
|
||||
*************************************************/
|
||||
|
||||
static void
|
||||
usage(void)
|
||||
{
|
||||
(void)fprintf(stderr,
|
||||
"Usage: pcre2_dftables [options] <output file>\n"
|
||||
" -b Write output in binary (default is source code)\n"
|
||||
" -L Use locale from LC_ALL (default is \"C\" locale)\n"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Entry point *
|
||||
*************************************************/
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
FILE *f;
|
||||
int i = 1;
|
||||
int i;
|
||||
int nclass = 0;
|
||||
BOOL binary = FALSE;
|
||||
char *env = "C";
|
||||
const unsigned char *tables;
|
||||
const unsigned char *base_of_tables;
|
||||
|
||||
/* By default, the default C locale is used rather than what the building user
|
||||
happens to have set. However, if the -L option is given, set the locale from
|
||||
the LC_xxx environment variables. */
|
||||
/* Process options */
|
||||
|
||||
if (argc > 1 && strcmp(argv[1], "-L") == 0)
|
||||
for (i = 1; i < argc; i++)
|
||||
{
|
||||
setlocale(LC_ALL, ""); /* Set from environment variables */
|
||||
i++;
|
||||
unsigned char *arg = (unsigned char *)argv[i];
|
||||
if (*arg != '-') break;
|
||||
|
||||
if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0)
|
||||
{
|
||||
usage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
else if (strcmp(arg, "-L") == 0)
|
||||
{
|
||||
if (setlocale(LC_ALL, "") == NULL)
|
||||
{
|
||||
(void)fprintf(stderr, "pcre2_dftables: setlocale() failed\n");
|
||||
return 1;
|
||||
}
|
||||
env = getenv("LC_ALL");
|
||||
}
|
||||
|
||||
else if (strcmp(arg, "-b") == 0)
|
||||
binary = TRUE;
|
||||
|
||||
else
|
||||
{
|
||||
(void)fprintf(stderr, "pcre2_dftables: unrecognized option %s\n", arg);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (argc < i + 1)
|
||||
if (i != argc - 1)
|
||||
{
|
||||
fprintf(stderr, "dftables: one filename argument is required\n");
|
||||
(void)fprintf(stderr, "pcre2_dftables: one filename argument is required\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Make the tables */
|
||||
|
||||
tables = maketables();
|
||||
base_of_tables = tables;
|
||||
|
||||
f = fopen(argv[i], "wb");
|
||||
if (f == NULL)
|
||||
{
|
||||
fprintf(stderr, "dftables: failed to open %s for writing\n", argv[1]);
|
||||
fprintf(stderr, "pcre2_dftables: failed to open %s for writing\n", argv[1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* There are several fprintf() calls here, because gcc in pedantic mode
|
||||
complains about the very long string otherwise. */
|
||||
/* If -b was specified, we write the tables in binary. */
|
||||
|
||||
fprintf(f,
|
||||
if (binary)
|
||||
{
|
||||
int yield = 0;
|
||||
size_t len = fwrite(tables, 1, TABLES_LENGTH, f);
|
||||
if (len != TABLES_LENGTH)
|
||||
{
|
||||
(void)fprintf(stderr, "pcre2_dftables: fwrite() returned wrong length %d "
|
||||
"instead of %d\n", (int)len, TABLES_LENGTH);
|
||||
yield = 1;
|
||||
}
|
||||
fclose(f);
|
||||
free((void *)base_of_tables);
|
||||
return yield;
|
||||
}
|
||||
|
||||
/* Write the tables as source code for inclusion in the PCRE2 library. There
|
||||
are several fprintf() calls here, because gcc in pedantic mode complains about
|
||||
the very long string otherwise. */
|
||||
|
||||
(void)fprintf(f,
|
||||
"/*************************************************\n"
|
||||
"* Perl-Compatible Regular Expressions *\n"
|
||||
"*************************************************/\n\n"
|
||||
"/* This file was automatically written by the dftables auxiliary\n"
|
||||
"/* This file was automatically written by the pcre2_dftables auxiliary\n"
|
||||
"program. It contains character tables that are used when no external\n"
|
||||
"tables are passed to PCRE2 by the application that calls it. The tables\n"
|
||||
"are used only for characters whose code values are less than 256. */\n\n");
|
||||
|
||||
fprintf(f,
|
||||
"/*The dftables program (which is distributed with PCRE2) can be used to\n"
|
||||
"build alternative versions of this file. This is necessary if you are\n"
|
||||
(void)fprintf(f,
|
||||
"/* This set of tables was written in the %s locale. */\n\n", env);
|
||||
|
||||
(void)fprintf(f,
|
||||
"/* The pcre2_ftables program (which is distributed with PCRE2) can be used\n"
|
||||
"to build alternative versions of this file. This is necessary if you are\n"
|
||||
"running in an EBCDIC environment, or if you want to default to a different\n"
|
||||
"encoding, for example ISO-8859-1. When dftables is run, it creates these\n"
|
||||
"tables in the current locale. This happens automatically if PCRE2 is\n"
|
||||
"configured with --enable-rebuild-chartables. */\n\n");
|
||||
"encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates\n"
|
||||
"these tables in the \"C\" locale by default. This happens automatically if\n"
|
||||
"PCRE2 is configured with --enable-rebuild-chartables. However, you can run\n"
|
||||
"pcre2_dftables manually with the -L option to build tables using the LC_ALL\n"
|
||||
"locale. */\n\n");
|
||||
|
||||
/* Force config.h in z/OS */
|
||||
|
||||
#if defined NATIVE_ZOS
|
||||
fprintf(f,
|
||||
(void)fprintf(f,
|
||||
"/* For z/OS, config.h is forced */\n"
|
||||
"#ifndef HAVE_CONFIG_H\n"
|
||||
"#define HAVE_CONFIG_H 1\n"
|
||||
"#endif\n\n");
|
||||
#endif
|
||||
|
||||
fprintf(f,
|
||||
(void)fprintf(f,
|
||||
"/* The following #include is present because without it gcc 4.x may remove\n"
|
||||
"the array definition from the final binary if PCRE2 is built into a static\n"
|
||||
"library and dead code stripping is activated. This leads to link errors.\n"
|
||||
|
@ -130,56 +213,57 @@ fprintf(f,
|
|||
"outside this compilation unit might reference this\" and so it will always\n"
|
||||
"be supplied to the linker. */\n\n");
|
||||
|
||||
fprintf(f,
|
||||
(void)fprintf(f,
|
||||
"#ifdef HAVE_CONFIG_H\n"
|
||||
"#include \"config.h\"\n"
|
||||
"#endif\n\n"
|
||||
"#include \"pcre2_internal.h\"\n\n");
|
||||
|
||||
fprintf(f,
|
||||
(void)fprintf(f,
|
||||
"const uint8_t PRIV(default_tables)[] = {\n\n"
|
||||
"/* This table is a lower casing table. */\n\n");
|
||||
|
||||
fprintf(f, " ");
|
||||
(void)fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||
fprintf(f, "%3d", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
fprintf(f, ",\n\n");
|
||||
(void)fprintf(f, ",\n\n");
|
||||
|
||||
fprintf(f, "/* This table is a case flipping table. */\n\n");
|
||||
(void)fprintf(f, "/* This table is a case flipping table. */\n\n");
|
||||
|
||||
fprintf(f, " ");
|
||||
(void)fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||
fprintf(f, "%3d", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
fprintf(f, ",\n\n");
|
||||
(void)fprintf(f, ",\n\n");
|
||||
|
||||
fprintf(f,
|
||||
(void)fprintf(f,
|
||||
"/* This table contains bit maps for various character classes. Each map is 32\n"
|
||||
"bytes long and the bits run from the least significant end of each byte. The\n"
|
||||
"classes that have their own maps are: space, xdigit, digit, upper, lower, word,\n"
|
||||
"graph print, punct, and cntrl. Other classes are built from combinations. */\n\n");
|
||||
"graph, print, punct, and cntrl. Other classes are built from combinations. */\n\n");
|
||||
|
||||
fprintf(f, " ");
|
||||
(void)fprintf(f, " ");
|
||||
for (i = 0; i < cbit_length; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0)
|
||||
{
|
||||
if ((i & 31) == 0) fprintf(f, "\n");
|
||||
fprintf(f, "\n ");
|
||||
if ((i & 31) == 0) (void)fprintf(f, "\n");
|
||||
if ((i & 24) == 8) (void)fprintf(f, " /* %s */", classlist[nclass++]);
|
||||
(void)fprintf(f, "\n ");
|
||||
}
|
||||
fprintf(f, "0x%02x", *tables++);
|
||||
if (i != cbit_length - 1) fprintf(f, ",");
|
||||
(void)fprintf(f, "0x%02x", *tables++);
|
||||
if (i != cbit_length - 1) (void)fprintf(f, ",");
|
||||
}
|
||||
fprintf(f, ",\n\n");
|
||||
(void)fprintf(f, ",\n\n");
|
||||
|
||||
fprintf(f,
|
||||
(void)fprintf(f,
|
||||
"/* This table identifies various classes of character by individual bits:\n"
|
||||
" 0x%02x white space character\n"
|
||||
" 0x%02x letter\n"
|
||||
|
@ -188,32 +272,32 @@ fprintf(f,
|
|||
" 0x%02x alphanumeric or '_'\n*/\n\n",
|
||||
ctype_space, ctype_letter, ctype_lcletter, ctype_digit, ctype_word);
|
||||
|
||||
fprintf(f, " ");
|
||||
(void)fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0)
|
||||
{
|
||||
fprintf(f, " /* ");
|
||||
if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
||||
else fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
||||
else fprintf(f, "%3d", i-1);
|
||||
fprintf(f, " */\n ");
|
||||
(void)fprintf(f, " /* ");
|
||||
if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
|
||||
else (void)fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
|
||||
else (void)fprintf(f, "%3d", i-1);
|
||||
(void)fprintf(f, " */\n ");
|
||||
}
|
||||
fprintf(f, "0x%02x", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
(void)fprintf(f, "0x%02x", *tables++);
|
||||
if (i != 255) (void)fprintf(f, ",");
|
||||
}
|
||||
|
||||
fprintf(f, "};/* ");
|
||||
if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
||||
else fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
||||
else fprintf(f, "%3d", i-1);
|
||||
fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
|
||||
(void)fprintf(f, "};/* ");
|
||||
if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
|
||||
else (void)fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
|
||||
else (void)fprintf(f, "%3d", i-1);
|
||||
(void)fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
|
||||
|
||||
fclose(f);
|
||||
free((void *)base_of_tables);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of dftables.c */
|
||||
/* End of pcre2_dftables.c */
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -579,7 +579,7 @@ total length of the tables. */
|
|||
#define fcc_offset 256 /* Flip case */
|
||||
#define cbits_offset 512 /* Character classes */
|
||||
#define ctypes_offset (cbits_offset + cbit_length) /* Character types */
|
||||
#define tables_length (ctypes_offset + 256)
|
||||
#define TABLES_LENGTH (ctypes_offset + 256)
|
||||
|
||||
|
||||
/* -------------------- Character and string names ------------------------ */
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -41,10 +41,11 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
/* This module contains the external function pcre2_maketables(), which builds
|
||||
character tables for PCRE2 in the current locale. The file is compiled on its
|
||||
own as part of the PCRE2 library. However, it is also included in the
|
||||
compilation of dftables.c, in which case the macro DFTABLES is defined. */
|
||||
own as part of the PCRE2 library. It is also included in the compilation of
|
||||
pcre2_dftables.c as a freestanding program, in which case the macro
|
||||
PCRE2_DFTABLES is defined. */
|
||||
|
||||
#ifndef DFTABLES
|
||||
#ifndef PCRE2_DFTABLES /* Compiling the library */
|
||||
# ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
# endif
|
||||
|
@ -61,28 +62,29 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */
|
|||
a pointer to them. They are build using the ctype functions, and consequently
|
||||
their contents will depend upon the current locale setting. When compiled as
|
||||
part of the library, the store is obtained via a general context malloc, if
|
||||
supplied, but when DFTABLES is defined (when compiling the dftables auxiliary
|
||||
program) malloc() is used, and the function has a different name so as not to
|
||||
clash with the prototype in pcre2.h.
|
||||
supplied, but when PCRE2_DFTABLES is defined (when compiling the pcre2_dftables
|
||||
freestanding auxiliary program) malloc() is used, and the function has a
|
||||
different name so as not to clash with the prototype in pcre2.h.
|
||||
|
||||
Arguments: none when DFTABLES is defined
|
||||
else a PCRE2 general context or NULL
|
||||
Arguments: none when PCRE2_DFTABLES is defined
|
||||
else a PCRE2 general context or NULL
|
||||
Returns: pointer to the contiguous block of data
|
||||
else NULL if memory allocation failed
|
||||
*/
|
||||
|
||||
#ifdef DFTABLES /* Included in freestanding dftables.c program */
|
||||
#ifdef PCRE2_DFTABLES /* Included in freestanding pcre2_dftables program */
|
||||
static const uint8_t *maketables(void)
|
||||
{
|
||||
uint8_t *yield = (uint8_t *)malloc(tables_length);
|
||||
uint8_t *yield = (uint8_t *)malloc(TABLES_LENGTH);
|
||||
|
||||
#else /* Not DFTABLES, compiling the library */
|
||||
#else /* Not PCRE2_DFTABLES, that is, compiling the library */
|
||||
PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
|
||||
pcre2_maketables(pcre2_general_context *gcontext)
|
||||
{
|
||||
uint8_t *yield = (uint8_t *)((gcontext != NULL)?
|
||||
gcontext->memctl.malloc(tables_length, gcontext->memctl.memory_data) :
|
||||
malloc(tables_length));
|
||||
#endif /* DFTABLES */
|
||||
gcontext->memctl.malloc(TABLES_LENGTH, gcontext->memctl.memory_data) :
|
||||
malloc(TABLES_LENGTH));
|
||||
#endif /* PCRE2_DFTABLES */
|
||||
|
||||
int i;
|
||||
uint8_t *p;
|
||||
|
@ -103,8 +105,8 @@ exclusive ones - in some locales things may be different.
|
|||
|
||||
Note that the table for "space" includes everything "isspace" gives, including
|
||||
VT in the default locale. This makes it work for the POSIX class [:space:].
|
||||
From release 8.34 is is also correct for Perl space, because Perl added VT at
|
||||
release 5.18.
|
||||
From PCRE1 release 8.34 and for all PCRE2 releases it is also correct for Perl
|
||||
space, because Perl added VT at release 5.18.
|
||||
|
||||
Note also that it is possible for a character to be alnum or alpha without
|
||||
being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
|
||||
|
@ -114,24 +116,24 @@ test for alnum specially. */
|
|||
memset(p, 0, cbit_length);
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if (isdigit(i)) p[cbit_digit + i/8] |= 1u << (i&7);
|
||||
if (isupper(i)) p[cbit_upper + i/8] |= 1u << (i&7);
|
||||
if (islower(i)) p[cbit_lower + i/8] |= 1u << (i&7);
|
||||
if (isalnum(i)) p[cbit_word + i/8] |= 1u << (i&7);
|
||||
if (i == '_') p[cbit_word + i/8] |= 1u << (i&7);
|
||||
if (isspace(i)) p[cbit_space + i/8] |= 1u << (i&7);
|
||||
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1u << (i&7);
|
||||
if (isgraph(i)) p[cbit_graph + i/8] |= 1u << (i&7);
|
||||
if (isprint(i)) p[cbit_print + i/8] |= 1u << (i&7);
|
||||
if (ispunct(i)) p[cbit_punct + i/8] |= 1u << (i&7);
|
||||
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1u << (i&7);
|
||||
if (isdigit(i)) p[cbit_digit + i/8] |= 1u << (i&7);
|
||||
if (isupper(i)) p[cbit_upper + i/8] |= 1u << (i&7);
|
||||
if (islower(i)) p[cbit_lower + i/8] |= 1u << (i&7);
|
||||
if (isalnum(i)) p[cbit_word + i/8] |= 1u << (i&7);
|
||||
if (i == '_') p[cbit_word + i/8] |= 1u << (i&7);
|
||||
if (isspace(i)) p[cbit_space + i/8] |= 1u << (i&7);
|
||||
if (isxdigit(i)) p[cbit_xdigit + i/8] |= 1u << (i&7);
|
||||
if (isgraph(i)) p[cbit_graph + i/8] |= 1u << (i&7);
|
||||
if (isprint(i)) p[cbit_print + i/8] |= 1u << (i&7);
|
||||
if (ispunct(i)) p[cbit_punct + i/8] |= 1u << (i&7);
|
||||
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1u << (i&7);
|
||||
}
|
||||
p += cbit_length;
|
||||
|
||||
/* Finally, the character type table. In this, we used to exclude VT from the
|
||||
white space chars, because Perl didn't recognize it as such for \s and for
|
||||
comments within regexes. However, Perl changed at release 5.18, so PCRE changed
|
||||
at release 8.34. */
|
||||
comments within regexes. However, Perl changed at release 5.18, so PCRE1
|
||||
changed at release 8.34 and it's always been this way for PCRE2. */
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
|
@ -147,7 +149,7 @@ for (i = 0; i < 256; i++)
|
|||
return yield;
|
||||
}
|
||||
|
||||
#ifndef DFTABLES
|
||||
#ifndef PCRE2_DFTABLES /* Compiling the library */
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
|
||||
{
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -90,7 +90,7 @@ if (codes == NULL || serialized_bytes == NULL || serialized_size == NULL)
|
|||
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
|
||||
|
||||
/* Compute total size. */
|
||||
total_size = sizeof(pcre2_serialized_data) + tables_length;
|
||||
total_size = sizeof(pcre2_serialized_data) + TABLES_LENGTH;
|
||||
tables = NULL;
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
|
@ -121,8 +121,8 @@ data->number_of_codes = number_of_codes;
|
|||
|
||||
/* Copy all compiled code data. */
|
||||
dst_bytes = bytes + sizeof(pcre2_serialized_data);
|
||||
memcpy(dst_bytes, tables, tables_length);
|
||||
dst_bytes += tables_length;
|
||||
memcpy(dst_bytes, tables, TABLES_LENGTH);
|
||||
dst_bytes += TABLES_LENGTH;
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
{
|
||||
|
@ -189,12 +189,12 @@ src_bytes = bytes + sizeof(pcre2_serialized_data);
|
|||
/* Decode tables. The reference count for the tables is stored immediately
|
||||
following them. */
|
||||
|
||||
tables = memctl->malloc(tables_length + sizeof(PCRE2_SIZE), memctl->memory_data);
|
||||
tables = memctl->malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE), memctl->memory_data);
|
||||
if (tables == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
memcpy(tables, src_bytes, tables_length);
|
||||
*(PCRE2_SIZE *)(tables + tables_length) = number_of_codes;
|
||||
src_bytes += tables_length;
|
||||
memcpy(tables, src_bytes, TABLES_LENGTH);
|
||||
*(PCRE2_SIZE *)(tables + TABLES_LENGTH) = number_of_codes;
|
||||
src_bytes += TABLES_LENGTH;
|
||||
|
||||
/* Decode the byte stream. We must not try to read the size from the compiled
|
||||
code block in the stream, because it might be unaligned, which causes errors on
|
||||
|
|
|
@ -389,12 +389,14 @@ typedef struct cmdstruct {
|
|||
int value;
|
||||
} cmdstruct;
|
||||
|
||||
enum { CMD_FORBID_UTF, CMD_LOAD, CMD_NEWLINE_DEFAULT, CMD_PATTERN,
|
||||
CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT, CMD_UNKNOWN };
|
||||
enum { CMD_FORBID_UTF, CMD_LOAD, CMD_LOADTABLES, CMD_NEWLINE_DEFAULT,
|
||||
CMD_PATTERN, CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT,
|
||||
CMD_UNKNOWN };
|
||||
|
||||
static cmdstruct cmdlist[] = {
|
||||
{ "forbid_utf", CMD_FORBID_UTF },
|
||||
{ "load", CMD_LOAD },
|
||||
{ "loadtables", CMD_LOADTABLES },
|
||||
{ "newline_default", CMD_NEWLINE_DEFAULT },
|
||||
{ "pattern", CMD_PATTERN },
|
||||
{ "perltest", CMD_PERLTEST },
|
||||
|
@ -957,6 +959,8 @@ static int *dfa_workspace = NULL;
|
|||
static const uint8_t *locale_tables = NULL;
|
||||
static const uint8_t *use_tables = NULL;
|
||||
static uint8_t locale_name[32];
|
||||
static uint8_t *tables3 = NULL; /* For binary-loaded tables */
|
||||
static uint32_t loadtables_length = 0;
|
||||
|
||||
/* We need buffers for building 16/32-bit strings; 8-bit strings don't need
|
||||
rebuilding, but set up the same naming scheme for use in macros. The "buffer"
|
||||
|
@ -4795,12 +4799,13 @@ Arguments:
|
|||
buffptr point after the #command
|
||||
mode open mode
|
||||
fptr points to the FILE variable
|
||||
name name of # command
|
||||
|
||||
Returns: PR_OK or PR_ABEND
|
||||
*/
|
||||
|
||||
static int
|
||||
open_file(uint8_t *buffptr, const char *mode, FILE **fptr)
|
||||
open_file(uint8_t *buffptr, const char *mode, FILE **fptr, const char *name)
|
||||
{
|
||||
char *endf;
|
||||
char *filename = (char *)buffptr;
|
||||
|
@ -4810,7 +4815,7 @@ while (endf > filename && isspace(endf[-1])) endf--;
|
|||
|
||||
if (endf == filename)
|
||||
{
|
||||
fprintf(outfile, "** File name expected after #save\n");
|
||||
fprintf(outfile, "** File name expected after %s\n", name);
|
||||
return PR_ABEND;
|
||||
}
|
||||
|
||||
|
@ -4976,7 +4981,7 @@ switch(cmd)
|
|||
return PR_OK;
|
||||
}
|
||||
|
||||
rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f);
|
||||
rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f, "#save");
|
||||
if (rc != PR_OK) return rc;
|
||||
|
||||
PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
|
||||
|
@ -5015,7 +5020,7 @@ switch(cmd)
|
|||
/* Load a set of compiled patterns from a file onto the stack */
|
||||
|
||||
case CMD_LOAD:
|
||||
rc = open_file(argptr+1, BINARY_INPUT_MODE, &f);
|
||||
rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#load");
|
||||
if (rc != PR_OK) return rc;
|
||||
|
||||
serial_size = 0;
|
||||
|
@ -5067,6 +5072,31 @@ switch(cmd)
|
|||
|
||||
free(serial);
|
||||
break;
|
||||
|
||||
/* Load a set of binary tables into tables3. */
|
||||
|
||||
case CMD_LOADTABLES:
|
||||
rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#loadtables");
|
||||
if (rc != PR_OK) return rc;
|
||||
|
||||
if (tables3 == NULL)
|
||||
{
|
||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_TABLES_LENGTH, &loadtables_length);
|
||||
tables3 = malloc(loadtables_length);
|
||||
if (tables3 == NULL)
|
||||
{
|
||||
fprintf(outfile, "** Failed: malloc failed for #loadtables\n");
|
||||
return PR_ABEND;
|
||||
}
|
||||
}
|
||||
|
||||
if (fread(tables3, 1, loadtables_length, f) != loadtables_length)
|
||||
{
|
||||
fprintf(outfile, "** Wrong return from fread()\n");
|
||||
yield = PR_ABEND;
|
||||
}
|
||||
fclose(f);
|
||||
break;
|
||||
}
|
||||
|
||||
return yield;
|
||||
|
@ -5382,8 +5412,19 @@ else switch (pat_patctl.tables_id)
|
|||
case 0: use_tables = NULL; break;
|
||||
case 1: use_tables = tables1; break;
|
||||
case 2: use_tables = tables2; break;
|
||||
|
||||
case 3:
|
||||
if (tables3 == NULL)
|
||||
{
|
||||
fprintf(outfile, "** 'Tables = 3' is invalid: binary tables have not "
|
||||
"been loaded\n");
|
||||
return PR_SKIP;
|
||||
}
|
||||
use_tables = tables3;
|
||||
break;
|
||||
|
||||
default:
|
||||
fprintf(outfile, "** 'Tables' must specify 0, 1, or 2.\n");
|
||||
fprintf(outfile, "** 'Tables' must specify 0, 1, 2, or 3.\n");
|
||||
return PR_SKIP;
|
||||
}
|
||||
|
||||
|
@ -9112,6 +9153,7 @@ free(dbuffer);
|
|||
free(pbuffer8);
|
||||
free(dfa_workspace);
|
||||
free((void *)locale_tables);
|
||||
free(tables3);
|
||||
PCRE2_MATCH_DATA_FREE(match_data);
|
||||
SUB1(pcre2_code_free, compiled_code);
|
||||
|
||||
|
|
Binary file not shown.
|
@ -5837,4 +5837,22 @@ a)"xI
|
|||
|
||||
"(?<=X(?(DEFINE)(.*))(?1))."
|
||||
|
||||
/\sxxx\s/tables=1
|
||||
\= Expect no match
|
||||
AB\x{85}xxx\x{a0}XYZ
|
||||
|
||||
/\sxxx\s/tables=2
|
||||
AB\x{85}xxx\x{a0}XYZ
|
||||
|
||||
/^\w+/tables=2
|
||||
École
|
||||
|
||||
/^\w+/tables=3
|
||||
École
|
||||
|
||||
#loadtables ./testdata/testbtables
|
||||
|
||||
/^\w+/tables=3
|
||||
École
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -17580,6 +17580,29 @@ Failed: error -55 at offset 3 in replacement: requested value is not set
|
|||
"(?<=X(?(DEFINE)(.*))(?1))."
|
||||
Failed: error 125 at offset 0: lookbehind assertion is not fixed length
|
||||
|
||||
/\sxxx\s/tables=1
|
||||
\= Expect no match
|
||||
AB\x{85}xxx\x{a0}XYZ
|
||||
No match
|
||||
|
||||
/\sxxx\s/tables=2
|
||||
AB\x{85}xxx\x{a0}XYZ
|
||||
0: \x85xxx\xa0
|
||||
|
||||
/^\w+/tables=2
|
||||
École
|
||||
0: \xc3
|
||||
|
||||
/^\w+/tables=3
|
||||
** 'Tables = 3' is invalid: binary tables have not been loaded
|
||||
École
|
||||
|
||||
#loadtables ./testdata/testbtables
|
||||
|
||||
/^\w+/tables=3
|
||||
École
|
||||
0: \xc3
|
||||
|
||||
# End of testinput2
|
||||
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
|
|
Loading…
Reference in New Issue