Renamed dftables as pcre2_dftables and enable it to write the tables in binary.
Update documentation about character tables.
This commit is contained in:
parent
953d4e9c95
commit
8057c3c8b9
|
@ -85,6 +85,7 @@
|
||||||
# 2018-11-14 PH removed unnecessary checks for stdint.h and inttypes.h
|
# 2018-11-14 PH removed unnecessary checks for stdint.h and inttypes.h
|
||||||
# 2018-11-16 PH added PCRE2GREP_SUPPORT_CALLOUT_FORK support and tidied
|
# 2018-11-16 PH added PCRE2GREP_SUPPORT_CALLOUT_FORK support and tidied
|
||||||
# 2019-02-16 PH hacked to avoid CMP0026 policy issue (see comments below)
|
# 2019-02-16 PH hacked to avoid CMP0026 policy issue (see comments below)
|
||||||
|
# 2020-03-26 PH renamed dftables as pcre2_dftables (as elsewhere)
|
||||||
|
|
||||||
PROJECT(PCRE2 C)
|
PROJECT(PCRE2 C)
|
||||||
|
|
||||||
|
@ -423,11 +424,11 @@ CONFIGURE_FILE(src/pcre2.h.in
|
||||||
|
|
||||||
OPTION(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
|
OPTION(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
|
||||||
IF(PCRE2_REBUILD_CHARTABLES)
|
IF(PCRE2_REBUILD_CHARTABLES)
|
||||||
ADD_EXECUTABLE(dftables src/dftables.c)
|
ADD_EXECUTABLE(pcre2_dftables src/pcre2_dftables.c)
|
||||||
ADD_CUSTOM_COMMAND(
|
ADD_CUSTOM_COMMAND(
|
||||||
COMMENT "Generating character tables (pcre2_chartables.c) for current locale"
|
COMMENT "Generating character tables (pcre2_chartables.c) for current locale"
|
||||||
DEPENDS dftables
|
DEPENDS pcre2_dftables
|
||||||
COMMAND dftables
|
COMMAND pcre2_dftables
|
||||||
ARGS ${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
ARGS ${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||||
OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||||
)
|
)
|
||||||
|
|
12
ChangeLog
12
ChangeLog
|
@ -82,6 +82,18 @@ could be mis-compiled and therefore not match correctly. This is the example
|
||||||
that found this: /(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word/ which failed to
|
that found this: /(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word/ which failed to
|
||||||
match "word" because the "move back" value was set to zero.
|
match "word" because the "move back" value was set to zero.
|
||||||
|
|
||||||
|
21. Following a request from a user, some extensions and tidies to the
|
||||||
|
character tables handling have been done:
|
||||||
|
|
||||||
|
(a) The dftables auxiliary program is renamed pcre2_dftables, but it is still
|
||||||
|
not installed for public use.
|
||||||
|
|
||||||
|
(b) There is now a -b option for pcre2_dftables, which causes the tables to
|
||||||
|
be written in binary. There is also a -help option.
|
||||||
|
|
||||||
|
(c) PCRE2_CONFIG_TABLES_LENGTH is added to pcre2_config() so that an
|
||||||
|
application that wants to save tables in binary knows how long they are.
|
||||||
|
|
||||||
|
|
||||||
Version 10.34 21-November-2019
|
Version 10.34 21-November-2019
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
19
Makefile.am
19
Makefile.am
|
@ -325,18 +325,18 @@ include_HEADERS = src/pcre2posix.h
|
||||||
bin_SCRIPTS = pcre2-config
|
bin_SCRIPTS = pcre2-config
|
||||||
|
|
||||||
## ---------------------------------------------------------------
|
## ---------------------------------------------------------------
|
||||||
## The dftables program is used to rebuild character tables before compiling
|
## The pcre2_dftables program is used to rebuild character tables before
|
||||||
## PCRE2, if --enable-rebuild-chartables is specified. It is not a user-visible
|
## compiling PCRE2, if --enable-rebuild-chartables is specified. It is not an
|
||||||
## program. The default (when --enable-rebuild-chartables is not specified) is
|
## installed program. The default (when --enable-rebuild-chartables is not
|
||||||
## to copy a distributed set of tables that are defined for ASCII code. In this
|
## specified) is to copy a distributed set of tables that are defined for ASCII
|
||||||
## case, dftables is not needed.
|
## code. In this case, pcre2_dftables is not needed.
|
||||||
|
|
||||||
if WITH_REBUILD_CHARTABLES
|
if WITH_REBUILD_CHARTABLES
|
||||||
noinst_PROGRAMS += dftables
|
noinst_PROGRAMS += pcre2_dftables
|
||||||
dftables_SOURCES = src/dftables.c
|
pcre2_dftables_SOURCES = src/pcre2_dftables.c
|
||||||
src/pcre2_chartables.c: dftables$(EXEEXT)
|
src/pcre2_chartables.c: pcre2_dftables$(EXEEXT)
|
||||||
rm -f $@
|
rm -f $@
|
||||||
./dftables$(EXEEXT) $@
|
./pcre2_dftables$(EXEEXT) $@
|
||||||
else
|
else
|
||||||
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist
|
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist
|
||||||
rm -f $@
|
rm -f $@
|
||||||
|
@ -634,6 +634,7 @@ EXTRA_DIST += \
|
||||||
testdata/grepoutputCN \
|
testdata/grepoutputCN \
|
||||||
testdata/grepoutputN \
|
testdata/grepoutputN \
|
||||||
testdata/greppatN4 \
|
testdata/greppatN4 \
|
||||||
|
testdata/testbtables \
|
||||||
testdata/testinput1 \
|
testdata/testinput1 \
|
||||||
testdata/testinput2 \
|
testdata/testinput2 \
|
||||||
testdata/testinput3 \
|
testdata/testinput3 \
|
||||||
|
|
|
@ -74,14 +74,14 @@ can skip ahead to the CMake section.
|
||||||
src/pcre2_chartables.c.
|
src/pcre2_chartables.c.
|
||||||
|
|
||||||
OR:
|
OR:
|
||||||
Compile src/dftables.c as a stand-alone program (using -DHAVE_CONFIG_H
|
Compile src/pcre2_dftables.c as a stand-alone program (using
|
||||||
if you have set up src/config.h), and then run it with the single
|
-DHAVE_CONFIG_H if you have set up src/config.h), and then run it with
|
||||||
argument "src/pcre2_chartables.c". This generates a set of standard
|
the single argument "src/pcre2_chartables.c". This generates a set of
|
||||||
character tables and writes them to that file. The tables are generated
|
standard character tables and writes them to that file. The tables are
|
||||||
using the default C locale for your system. If you want to use a locale
|
generated using the default C locale for your system. If you want to use
|
||||||
that is specified by LC_xxx environment variables, add the -L option to
|
a locale that is specified by LC_xxx environment variables, add the -L
|
||||||
the dftables command. You must use this method if you are building on a
|
option to the pcre2_dftables command. You must use this method if you
|
||||||
system that uses EBCDIC code.
|
are building on a system that uses EBCDIC code.
|
||||||
|
|
||||||
The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
|
The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
|
||||||
specify alternative tables at run time.
|
specify alternative tables at run time.
|
||||||
|
|
|
@ -190,7 +190,7 @@ files="\
|
||||||
libpcre2-16.pc.in \
|
libpcre2-16.pc.in \
|
||||||
libpcre2-32.pc.in \
|
libpcre2-32.pc.in \
|
||||||
libpcre2-posix.pc.in \
|
libpcre2-posix.pc.in \
|
||||||
src/dftables.c \
|
src/pcre2_dftables.c \
|
||||||
src/pcre2.h.in \
|
src/pcre2.h.in \
|
||||||
src/pcre2_auto_possess.c \
|
src/pcre2_auto_possess.c \
|
||||||
src/pcre2_compile.c \
|
src/pcre2_compile.c \
|
||||||
|
|
68
README
68
README
|
@ -269,9 +269,9 @@ library. They are also documented in the pcre2build man page.
|
||||||
|
|
||||||
--enable-rebuild-chartables
|
--enable-rebuild-chartables
|
||||||
|
|
||||||
a program called dftables is compiled and run in the default C locale when
|
a program called pcre2_dftables is compiled and run in the default C locale
|
||||||
you obey "make". It builds a source file called pcre2_chartables.c. If you do
|
when you obey "make". It builds a source file called pcre2_chartables.c. If
|
||||||
not specify this option, pcre2_chartables.c is created as a copy of
|
you do not specify this option, pcre2_chartables.c is created as a copy of
|
||||||
pcre2_chartables.c.dist. See "Character tables" below for further
|
pcre2_chartables.c.dist. See "Character tables" below for further
|
||||||
information.
|
information.
|
||||||
|
|
||||||
|
@ -548,11 +548,11 @@ Cross-compiling using autotools
|
||||||
|
|
||||||
You can specify CC and CFLAGS in the normal way to the "configure" command, in
|
You can specify CC and CFLAGS in the normal way to the "configure" command, in
|
||||||
order to cross-compile PCRE2 for some other host. However, you should NOT
|
order to cross-compile PCRE2 for some other host. However, you should NOT
|
||||||
specify --enable-rebuild-chartables, because if you do, the dftables.c source
|
specify --enable-rebuild-chartables, because if you do, the pcre2_dftables.c
|
||||||
file is compiled and run on the local host, in order to generate the inbuilt
|
source file is compiled and run on the local host, in order to generate the
|
||||||
character tables (the pcre2_chartables.c file). This will probably not work,
|
inbuilt character tables (the pcre2_chartables.c file). This will probably not
|
||||||
because dftables.c needs to be compiled with the local compiler, not the cross
|
work, because pcre2_dftables.c needs to be compiled with the local compiler,
|
||||||
compiler.
|
not the cross compiler.
|
||||||
|
|
||||||
When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
|
When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
|
||||||
created by making a copy of pcre2_chartables.c.dist, which is a default set of
|
created by making a copy of pcre2_chartables.c.dist, which is a default set of
|
||||||
|
@ -560,9 +560,10 @@ tables that assumes ASCII code. Cross-compiling with the default tables should
|
||||||
not be a problem.
|
not be a problem.
|
||||||
|
|
||||||
If you need to modify the character tables when cross-compiling, you should
|
If you need to modify the character tables when cross-compiling, you should
|
||||||
move pcre2_chartables.c.dist out of the way, then compile dftables.c by hand
|
move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by
|
||||||
and run it on the local host to make a new version of pcre2_chartables.c.dist.
|
hand and run it on the local host to make a new version of
|
||||||
Then when you cross-compile PCRE2 this new version of the tables will be used.
|
pcre2_chartables.c.dist. See the pcre2build section "Creating character tables
|
||||||
|
at build time" for more details.
|
||||||
|
|
||||||
|
|
||||||
Making new tarballs
|
Making new tarballs
|
||||||
|
@ -721,8 +722,8 @@ compile context.
|
||||||
The source file called pcre2_chartables.c contains the default set of tables.
|
The source file called pcre2_chartables.c contains the default set of tables.
|
||||||
By default, this is created as a copy of pcre2_chartables.c.dist, which
|
By default, this is created as a copy of pcre2_chartables.c.dist, which
|
||||||
contains tables for ASCII coding. However, if --enable-rebuild-chartables is
|
contains tables for ASCII coding. However, if --enable-rebuild-chartables is
|
||||||
specified for ./configure, a different version of pcre2_chartables.c is built
|
specified for ./configure, a new version of pcre2_chartables.c is built by the
|
||||||
by the program dftables (compiled from dftables.c), which uses the ANSI C
|
program pcre2_dftables (compiled from pcre2_dftables.c), which uses the ANSI C
|
||||||
character handling functions such as isalnum(), isalpha(), isupper(),
|
character handling functions such as isalnum(), isalpha(), isupper(),
|
||||||
islower(), etc. to build the table sources. This means that the default C
|
islower(), etc. to build the table sources. This means that the default C
|
||||||
locale that is set for your system will control the contents of these default
|
locale that is set for your system will control the contents of these default
|
||||||
|
@ -732,32 +733,31 @@ file does not get automatically re-generated. The best way to do this is to
|
||||||
move pcre2_chartables.c.dist out of the way and replace it with your customized
|
move pcre2_chartables.c.dist out of the way and replace it with your customized
|
||||||
tables.
|
tables.
|
||||||
|
|
||||||
When the dftables program is run as a result of --enable-rebuild-chartables,
|
When the pcre2_dftables program is run as a result of specifying
|
||||||
it uses the default C locale that is set on your system. It does not pay
|
--enable-rebuild-chartables, it uses the default C locale that is set on your
|
||||||
attention to the LC_xxx environment variables. In other words, it uses the
|
system. It does not pay attention to the LC_xxx environment variables. In other
|
||||||
system's default locale rather than whatever the compiling user happens to have
|
words, it uses the system's default locale rather than whatever the compiling
|
||||||
set. If you really do want to build a source set of character tables in a
|
user happens to have set. If you really do want to build a source set of
|
||||||
locale that is specified by the LC_xxx variables, you can run the dftables
|
character tables in a locale that is specified by the LC_xxx variables, you can
|
||||||
program by hand with the -L option. For example:
|
run the pcre2_dftables program by hand with the -L option. For example:
|
||||||
|
|
||||||
./dftables -L pcre2_chartables.c.special
|
./pcre2_dftables -L pcre2_chartables.c.special
|
||||||
|
|
||||||
The first two 256-byte tables provide lower casing and case flipping functions,
|
The second argument names the file where the source code for the tables is
|
||||||
respectively. The next table consists of three 32-byte bit maps which identify
|
written. The first two 256-byte tables provide lower casing and case flipping
|
||||||
digits, "word" characters, and white space, respectively. These are used when
|
functions, respectively. The next table consists of a number of 32-byte bit
|
||||||
building 32-byte bit maps that represent character classes for code points less
|
maps which identify certain character classes such as digits, "word"
|
||||||
than 256. The final 256-byte table has bits indicating various character types,
|
characters, white space, etc. These are used when building 32-byte bit maps
|
||||||
as follows:
|
that represent character classes for code points less than 256. The final
|
||||||
|
256-byte table has bits indicating various character types, as follows:
|
||||||
|
|
||||||
1 white space character
|
1 white space character
|
||||||
2 letter
|
2 letter
|
||||||
4 decimal digit
|
4 lower case letter
|
||||||
8 hexadecimal digit
|
8 decimal digit
|
||||||
16 alphanumeric or '_'
|
16 alphanumeric or '_'
|
||||||
128 regular expression metacharacter or binary zero
|
|
||||||
|
|
||||||
You should not alter the set of characters that contain the 128 bit, as that
|
See also the pcre2build section "Creating character tables at build time".
|
||||||
will cause PCRE2 to malfunction.
|
|
||||||
|
|
||||||
|
|
||||||
File manifest
|
File manifest
|
||||||
|
@ -768,7 +768,7 @@ The distribution should contain the files listed below.
|
||||||
(A) Source files for the PCRE2 library functions and their headers are found in
|
(A) Source files for the PCRE2 library functions and their headers are found in
|
||||||
the src directory:
|
the src directory:
|
||||||
|
|
||||||
src/dftables.c auxiliary program for building pcre2_chartables.c
|
src/pcre2_dftables.c auxiliary program for building pcre2_chartables.c
|
||||||
when --enable-rebuild-chartables is specified
|
when --enable-rebuild-chartables is specified
|
||||||
|
|
||||||
src/pcre2_chartables.c.dist a default set of character tables that assume
|
src/pcre2_chartables.c.dist a default set of character tables that assume
|
||||||
|
@ -894,4 +894,4 @@ The distribution should contain the files listed below.
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: ph10
|
||||||
Email domain: cam.ac.uk
|
Email domain: cam.ac.uk
|
||||||
Last updated: 16 April 2019
|
Last updated: 20 March 2020
|
||||||
|
|
|
@ -74,14 +74,14 @@ can skip ahead to the CMake section.
|
||||||
src/pcre2_chartables.c.
|
src/pcre2_chartables.c.
|
||||||
|
|
||||||
OR:
|
OR:
|
||||||
Compile src/dftables.c as a stand-alone program (using -DHAVE_CONFIG_H
|
Compile src/pcre2_dftables.c as a stand-alone program (using
|
||||||
if you have set up src/config.h), and then run it with the single
|
-DHAVE_CONFIG_H if you have set up src/config.h), and then run it with
|
||||||
argument "src/pcre2_chartables.c". This generates a set of standard
|
the single argument "src/pcre2_chartables.c". This generates a set of
|
||||||
character tables and writes them to that file. The tables are generated
|
standard character tables and writes them to that file. The tables are
|
||||||
using the default C locale for your system. If you want to use a locale
|
generated using the default C locale for your system. If you want to use
|
||||||
that is specified by LC_xxx environment variables, add the -L option to
|
a locale that is specified by LC_xxx environment variables, add the -L
|
||||||
the dftables command. You must use this method if you are building on a
|
option to the pcre2_dftables command. You must use this method if you
|
||||||
system that uses EBCDIC code.
|
are building on a system that uses EBCDIC code.
|
||||||
|
|
||||||
The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
|
The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
|
||||||
specify alternative tables at run time.
|
specify alternative tables at run time.
|
||||||
|
|
|
@ -269,9 +269,9 @@ library. They are also documented in the pcre2build man page.
|
||||||
|
|
||||||
--enable-rebuild-chartables
|
--enable-rebuild-chartables
|
||||||
|
|
||||||
a program called dftables is compiled and run in the default C locale when
|
a program called pcre2_dftables is compiled and run in the default C locale
|
||||||
you obey "make". It builds a source file called pcre2_chartables.c. If you do
|
when you obey "make". It builds a source file called pcre2_chartables.c. If
|
||||||
not specify this option, pcre2_chartables.c is created as a copy of
|
you do not specify this option, pcre2_chartables.c is created as a copy of
|
||||||
pcre2_chartables.c.dist. See "Character tables" below for further
|
pcre2_chartables.c.dist. See "Character tables" below for further
|
||||||
information.
|
information.
|
||||||
|
|
||||||
|
@ -548,11 +548,11 @@ Cross-compiling using autotools
|
||||||
|
|
||||||
You can specify CC and CFLAGS in the normal way to the "configure" command, in
|
You can specify CC and CFLAGS in the normal way to the "configure" command, in
|
||||||
order to cross-compile PCRE2 for some other host. However, you should NOT
|
order to cross-compile PCRE2 for some other host. However, you should NOT
|
||||||
specify --enable-rebuild-chartables, because if you do, the dftables.c source
|
specify --enable-rebuild-chartables, because if you do, the pcre2_dftables.c
|
||||||
file is compiled and run on the local host, in order to generate the inbuilt
|
source file is compiled and run on the local host, in order to generate the
|
||||||
character tables (the pcre2_chartables.c file). This will probably not work,
|
inbuilt character tables (the pcre2_chartables.c file). This will probably not
|
||||||
because dftables.c needs to be compiled with the local compiler, not the cross
|
work, because pcre2_dftables.c needs to be compiled with the local compiler,
|
||||||
compiler.
|
not the cross compiler.
|
||||||
|
|
||||||
When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
|
When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
|
||||||
created by making a copy of pcre2_chartables.c.dist, which is a default set of
|
created by making a copy of pcre2_chartables.c.dist, which is a default set of
|
||||||
|
@ -560,9 +560,10 @@ tables that assumes ASCII code. Cross-compiling with the default tables should
|
||||||
not be a problem.
|
not be a problem.
|
||||||
|
|
||||||
If you need to modify the character tables when cross-compiling, you should
|
If you need to modify the character tables when cross-compiling, you should
|
||||||
move pcre2_chartables.c.dist out of the way, then compile dftables.c by hand
|
move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by
|
||||||
and run it on the local host to make a new version of pcre2_chartables.c.dist.
|
hand and run it on the local host to make a new version of
|
||||||
Then when you cross-compile PCRE2 this new version of the tables will be used.
|
pcre2_chartables.c.dist. See the pcre2build section "Creating character tables
|
||||||
|
at build time" for more details.
|
||||||
|
|
||||||
|
|
||||||
Making new tarballs
|
Making new tarballs
|
||||||
|
@ -721,8 +722,8 @@ compile context.
|
||||||
The source file called pcre2_chartables.c contains the default set of tables.
|
The source file called pcre2_chartables.c contains the default set of tables.
|
||||||
By default, this is created as a copy of pcre2_chartables.c.dist, which
|
By default, this is created as a copy of pcre2_chartables.c.dist, which
|
||||||
contains tables for ASCII coding. However, if --enable-rebuild-chartables is
|
contains tables for ASCII coding. However, if --enable-rebuild-chartables is
|
||||||
specified for ./configure, a different version of pcre2_chartables.c is built
|
specified for ./configure, a new version of pcre2_chartables.c is built by the
|
||||||
by the program dftables (compiled from dftables.c), which uses the ANSI C
|
program pcre2_dftables (compiled from pcre2_dftables.c), which uses the ANSI C
|
||||||
character handling functions such as isalnum(), isalpha(), isupper(),
|
character handling functions such as isalnum(), isalpha(), isupper(),
|
||||||
islower(), etc. to build the table sources. This means that the default C
|
islower(), etc. to build the table sources. This means that the default C
|
||||||
locale that is set for your system will control the contents of these default
|
locale that is set for your system will control the contents of these default
|
||||||
|
@ -732,32 +733,31 @@ file does not get automatically re-generated. The best way to do this is to
|
||||||
move pcre2_chartables.c.dist out of the way and replace it with your customized
|
move pcre2_chartables.c.dist out of the way and replace it with your customized
|
||||||
tables.
|
tables.
|
||||||
|
|
||||||
When the dftables program is run as a result of --enable-rebuild-chartables,
|
When the pcre2_dftables program is run as a result of specifying
|
||||||
it uses the default C locale that is set on your system. It does not pay
|
--enable-rebuild-chartables, it uses the default C locale that is set on your
|
||||||
attention to the LC_xxx environment variables. In other words, it uses the
|
system. It does not pay attention to the LC_xxx environment variables. In other
|
||||||
system's default locale rather than whatever the compiling user happens to have
|
words, it uses the system's default locale rather than whatever the compiling
|
||||||
set. If you really do want to build a source set of character tables in a
|
user happens to have set. If you really do want to build a source set of
|
||||||
locale that is specified by the LC_xxx variables, you can run the dftables
|
character tables in a locale that is specified by the LC_xxx variables, you can
|
||||||
program by hand with the -L option. For example:
|
run the pcre2_dftables program by hand with the -L option. For example:
|
||||||
|
|
||||||
./dftables -L pcre2_chartables.c.special
|
./pcre2_dftables -L pcre2_chartables.c.special
|
||||||
|
|
||||||
The first two 256-byte tables provide lower casing and case flipping functions,
|
The second argument names the file where the source code for the tables is
|
||||||
respectively. The next table consists of three 32-byte bit maps which identify
|
written. The first two 256-byte tables provide lower casing and case flipping
|
||||||
digits, "word" characters, and white space, respectively. These are used when
|
functions, respectively. The next table consists of a number of 32-byte bit
|
||||||
building 32-byte bit maps that represent character classes for code points less
|
maps which identify certain character classes such as digits, "word"
|
||||||
than 256. The final 256-byte table has bits indicating various character types,
|
characters, white space, etc. These are used when building 32-byte bit maps
|
||||||
as follows:
|
that represent character classes for code points less than 256. The final
|
||||||
|
256-byte table has bits indicating various character types, as follows:
|
||||||
|
|
||||||
1 white space character
|
1 white space character
|
||||||
2 letter
|
2 letter
|
||||||
4 decimal digit
|
4 lower case letter
|
||||||
8 hexadecimal digit
|
8 decimal digit
|
||||||
16 alphanumeric or '_'
|
16 alphanumeric or '_'
|
||||||
128 regular expression metacharacter or binary zero
|
|
||||||
|
|
||||||
You should not alter the set of characters that contain the 128 bit, as that
|
See also the pcre2build section "Creating character tables at build time".
|
||||||
will cause PCRE2 to malfunction.
|
|
||||||
|
|
||||||
|
|
||||||
File manifest
|
File manifest
|
||||||
|
@ -768,7 +768,7 @@ The distribution should contain the files listed below.
|
||||||
(A) Source files for the PCRE2 library functions and their headers are found in
|
(A) Source files for the PCRE2 library functions and their headers are found in
|
||||||
the src directory:
|
the src directory:
|
||||||
|
|
||||||
src/dftables.c auxiliary program for building pcre2_chartables.c
|
src/pcre2_dftables.c auxiliary program for building pcre2_chartables.c
|
||||||
when --enable-rebuild-chartables is specified
|
when --enable-rebuild-chartables is specified
|
||||||
|
|
||||||
src/pcre2_chartables.c.dist a default set of character tables that assume
|
src/pcre2_chartables.c.dist a default set of character tables that assume
|
||||||
|
@ -894,4 +894,4 @@ The distribution should contain the files listed below.
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: ph10
|
||||||
Email domain: cam.ac.uk
|
Email domain: cam.ac.uk
|
||||||
Last updated: 16 April 2019
|
Last updated: 20 March 2020
|
||||||
|
|
|
@ -27,9 +27,12 @@ DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
This function sets a pointer to custom character tables within a compile
|
This function sets a pointer to custom character tables within a compile
|
||||||
context. The second argument must be the result of a call to
|
context. The second argument must point to a set of PCRE2 character tables or
|
||||||
<b>pcre2_maketables()</b> or NULL to request the default tables. The result is
|
be NULL to request the default tables. The result is always zero. Character
|
||||||
always zero.
|
tables can be created by calling <b>pcre2_maketables()</b> or by running the
|
||||||
|
<b>pcre2_dftables</b> maintenance command in binary mode (see the
|
||||||
|
<a href="pcre2build.html"><b>pcre2build</b></a>
|
||||||
|
documentation).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE2 native API in the
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
|
|
@ -1105,10 +1105,11 @@ less than the limit set by the caller of <b>pcre2_match()</b> or
|
||||||
<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>);</b>
|
<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The function <b>pcre2_config()</b> makes it possible for a PCRE2 client to
|
The function <b>pcre2_config()</b> makes it possible for a PCRE2 client to find
|
||||||
discover which optional features have been compiled into the PCRE2 library. The
|
the value of certain configuration parameters and to discover which optional
|
||||||
|
features have been compiled into the PCRE2 library. The
|
||||||
<a href="pcre2build.html"><b>pcre2build</b></a>
|
<a href="pcre2build.html"><b>pcre2build</b></a>
|
||||||
documentation has more details about these optional features.
|
documentation has more details about these features.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The first argument for <b>pcre2_config()</b> specifies which information is
|
The first argument for <b>pcre2_config()</b> specifies which information is
|
||||||
|
@ -1224,6 +1225,13 @@ over compilation stack usage, see <b>pcre2_set_compile_recursion_guard()</b>.
|
||||||
</pre>
|
</pre>
|
||||||
This parameter is obsolete and should not be used in new code. The output is a
|
This parameter is obsolete and should not be used in new code. The output is a
|
||||||
uint32_t integer that is always set to zero.
|
uint32_t integer that is always set to zero.
|
||||||
|
<pre>
|
||||||
|
PCRE2_CONFIG_TABLES_LENGTH
|
||||||
|
</pre>
|
||||||
|
The output is a uint32_t integer that gives the length of PCRE2's character
|
||||||
|
processing tables in bytes. For details of these tables see the
|
||||||
|
<a href="#localesupport">section on locale support</a>
|
||||||
|
below.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_CONFIG_UNICODE_VERSION
|
PCRE2_CONFIG_UNICODE_VERSION
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -2043,7 +2051,7 @@ calling <b>pcre2_set_character_tables()</b> to set the tables pointer therein.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
For example, to build and use tables that are appropriate for the French locale
|
For example, to build and use tables that are appropriate for the French locale
|
||||||
(where accented characters with values greater than 128 are treated as
|
(where accented characters with values greater than 127 are treated as
|
||||||
letters), the following code could be used:
|
letters), the following code could be used:
|
||||||
<pre>
|
<pre>
|
||||||
setlocale(LC_CTYPE, "fr_FR");
|
setlocale(LC_CTYPE, "fr_FR");
|
||||||
|
@ -2057,10 +2065,10 @@ are using Windows, the name for the French locale is "french".
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The pointer that is passed (via the compile context) to <b>pcre2_compile()</b>
|
The pointer that is passed (via the compile context) to <b>pcre2_compile()</b>
|
||||||
is saved with the compiled pattern, and the same tables are used by
|
is saved with the compiled pattern, and the same tables are used by the
|
||||||
<b>pcre2_match()</b> and <b>pcre_dfa_match()</b>. Thus, for any single pattern,
|
matching functions. Thus, for any single pattern, compilation and matching both
|
||||||
compilation and matching both happen in the same locale, but different patterns
|
happen in the same locale, but different patterns can be processed in different
|
||||||
can be processed in different locales.
|
locales.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
It is the caller's responsibility to ensure that the memory containing the
|
It is the caller's responsibility to ensure that the memory containing the
|
||||||
|
@ -2068,6 +2076,23 @@ tables remains available while they are still in use. When they are no longer
|
||||||
needed, you can discard them using <b>pcre2_maketables_free()</b>, which should
|
needed, you can discard them using <b>pcre2_maketables_free()</b>, which should
|
||||||
pass as its first parameter the same global context that was used to create the
|
pass as its first parameter the same global context that was used to create the
|
||||||
tables.
|
tables.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
Saving locale tables
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
The tables described above are just a sequence of binary bytes, which makes
|
||||||
|
them independent of hardware characteristics such as endianness or whether the
|
||||||
|
processor is 32-bit or 64-bit. A copy of the result of <b>pcre2_maketables()</b>
|
||||||
|
can therefore be saved in a file or elsewhere and re-used later, even in a
|
||||||
|
different program or on another computer. The size of the tables (number of
|
||||||
|
bytes) must be obtained by calling <b>pcre2_config()</b> with the
|
||||||
|
PCRE2_CONFIG_TABLES_LENGTH option because <b>pcre2_maketables()</b> does not
|
||||||
|
return this value. Note that the <b>pcre2_dftables</b> program, which is part of
|
||||||
|
the PCRE2 build system, can be used stand-alone to create a file that contains
|
||||||
|
a set of binary tables. See the
|
||||||
|
<a href="pcre2build.html#createtables"><b>pcre2build</b></a>
|
||||||
|
documentation for details.
|
||||||
<a name="infoaboutpattern"></a></P>
|
<a name="infoaboutpattern"></a></P>
|
||||||
<br><a name="SEC23" href="#TOC1">INFORMATION ABOUT A COMPILED PATTERN</a><br>
|
<br><a name="SEC23" href="#TOC1">INFORMATION ABOUT A COMPILED PATTERN</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -2076,7 +2101,7 @@ tables.
|
||||||
<P>
|
<P>
|
||||||
The <b>pcre2_pattern_info()</b> function returns general information about a
|
The <b>pcre2_pattern_info()</b> function returns general information about a
|
||||||
compiled pattern. For information about callouts, see the
|
compiled pattern. For information about callouts, see the
|
||||||
<a href="pcre2pattern.html#infoaboutcallouts">next section.</a>
|
<a href="#infoaboutcallouts">next section.</a>
|
||||||
The first argument for <b>pcre2_pattern_info()</b> is a pointer to the compiled
|
The first argument for <b>pcre2_pattern_info()</b> is a pointer to the compiled
|
||||||
pattern. The second argument specifies which piece of information is required,
|
pattern. The second argument specifies which piece of information is required,
|
||||||
and the third argument is a pointer to a variable to receive the data. If the
|
and the third argument is a pointer to a variable to receive the data. If the
|
||||||
|
@ -3931,7 +3956,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 24 February 2020
|
Last updated: 19 March 2020
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2020 University of Cambridge.
|
Copyright © 1997-2020 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -128,7 +128,7 @@ To build it without Unicode support, add
|
||||||
--disable-unicode
|
--disable-unicode
|
||||||
</pre>
|
</pre>
|
||||||
to the <b>configure</b> command. This setting applies to all three libraries. It
|
to the <b>configure</b> command. This setting applies to all three libraries. It
|
||||||
is not possible to build one library with Unicode support, and another without,
|
is not possible to build one library with Unicode support and another without
|
||||||
in the same configuration.
|
in the same configuration.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -188,11 +188,11 @@ which enables the use of an execmem allocator in JIT that is compatible with
|
||||||
SELinux. This has no effect if JIT is not enabled. See the
|
SELinux. This has no effect if JIT is not enabled. See the
|
||||||
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
||||||
documentation for a discussion of JIT usage. When JIT support is enabled,
|
documentation for a discussion of JIT usage. When JIT support is enabled,
|
||||||
pcre2grep automatically makes use of it, unless you add
|
<b>pcre2grep</b> automatically makes use of it, unless you add
|
||||||
<pre>
|
<pre>
|
||||||
--disable-pcre2grep-jit
|
--disable-pcre2grep-jit
|
||||||
</pre>
|
</pre>
|
||||||
to the "configure" command.
|
to the <b>configure</b> command.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC8" href="#TOC1">NEWLINE RECOGNITION</a><br>
|
<br><a name="SEC8" href="#TOC1">NEWLINE RECOGNITION</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -321,7 +321,7 @@ As well as applying to <b>pcre2_match()</b>, the depth limit also controls
|
||||||
the depth of recursive function calls in <b>pcre2_dfa_match()</b>. These are
|
the depth of recursive function calls in <b>pcre2_dfa_match()</b>. These are
|
||||||
used for lookaround assertions, atomic groups, and recursion within patterns.
|
used for lookaround assertions, atomic groups, and recursion within patterns.
|
||||||
The limit does not apply to JIT matching.
|
The limit does not apply to JIT matching.
|
||||||
</P>
|
<a name="createtables"></a></P>
|
||||||
<br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
|
<br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
|
||||||
<P>
|
<P>
|
||||||
PCRE2 uses fixed tables for processing characters whose code points are less
|
PCRE2 uses fixed tables for processing characters whose code points are less
|
||||||
|
@ -332,12 +332,34 @@ only. If you add
|
||||||
--enable-rebuild-chartables
|
--enable-rebuild-chartables
|
||||||
</pre>
|
</pre>
|
||||||
to the <b>configure</b> command, the distributed tables are no longer used.
|
to the <b>configure</b> command, the distributed tables are no longer used.
|
||||||
Instead, a program called <b>dftables</b> is compiled and run. This outputs the
|
Instead, a program called <b>pcre2_dftables</b> is compiled and run. This
|
||||||
source for new set of tables, created in the default locale of your C run-time
|
outputs the source for new set of tables, created in the default locale of your
|
||||||
system. This method of replacing the tables does not work if you are cross
|
C run-time system. This method of replacing the tables does not work if you are
|
||||||
compiling, because <b>dftables</b> is run on the local host. If you need to
|
cross compiling, because <b>pcre2_dftables</b> needs to be run on the local
|
||||||
create alternative tables when cross compiling, you will have to do so "by
|
host and therefore not compiled with the cross compiler.
|
||||||
hand".
|
</P>
|
||||||
|
<P>
|
||||||
|
If you need to create alternative tables when cross compiling, you will have to
|
||||||
|
do so "by hand". There may also be other reasons for creating tables manually.
|
||||||
|
To cause <b>pcre2_dftables</b> to be built on the local host, run a normal
|
||||||
|
compiling command, and then run the program with the output file as its
|
||||||
|
argument, for example:
|
||||||
|
<pre>
|
||||||
|
cc src/pcre2_dftables.c -o pcre2_dftables
|
||||||
|
./pcre2_dftables src/pcre2_chartables.c
|
||||||
|
</pre>
|
||||||
|
This builds the tables in the default locale of the local host. If you want to
|
||||||
|
specify a locale, you must use the -L option:
|
||||||
|
<pre>
|
||||||
|
LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c
|
||||||
|
</pre>
|
||||||
|
You can also specify -b (with or without -L). This causes the tables to be
|
||||||
|
written in binary instead of as source code. A set of binary tables can be
|
||||||
|
loaded into memory by an application and passed to <b>pcre2_compile()</b> in the
|
||||||
|
same way as tables created by calling <b>pcre2_maketables()</b>. The tables are
|
||||||
|
just a string of bytes, independent of hardware characteristics such as
|
||||||
|
endianness. This means they can be bundled with an application that runs in
|
||||||
|
different environments, to ensure consistent behaviour.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC13" href="#TOC1">USING EBCDIC CODE</a><br>
|
<br><a name="SEC13" href="#TOC1">USING EBCDIC CODE</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -538,7 +560,7 @@ support these modifiers. If
|
||||||
<pre>
|
<pre>
|
||||||
--disable-percent-zt
|
--disable-percent-zt
|
||||||
</pre>
|
</pre>
|
||||||
is specified, no use is made of the z or t modifiers. Instead or %td or %zu,
|
is specified, no use is made of the z or t modifiers. Instead of %td or %zu,
|
||||||
%lu is used, with a cast for size_t values.
|
%lu is used, with a cast for size_t values.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC22" href="#TOC1">SUPPORT FOR FUZZERS</a><br>
|
<br><a name="SEC22" href="#TOC1">SUPPORT FOR FUZZERS</a><br>
|
||||||
|
@ -592,9 +614,9 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC26" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC26" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 03 March 2019
|
Last updated: 20 March 2020
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2019 University of Cambridge.
|
Copyright © 1997-2020 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
|
|
@ -375,6 +375,12 @@ output.
|
||||||
This command is used to load a set of precompiled patterns from a file, as
|
This command is used to load a set of precompiled patterns from a file, as
|
||||||
described in the section entitled "Saving and restoring compiled patterns"
|
described in the section entitled "Saving and restoring compiled patterns"
|
||||||
<a href="#saverestore">below.</a>
|
<a href="#saverestore">below.</a>
|
||||||
|
<pre>
|
||||||
|
#loadtables <filename>
|
||||||
|
</pre>
|
||||||
|
This command is used to load a set of binary character tables that can be
|
||||||
|
accessed by the tables=3 qualifier. Such tables can be created by the
|
||||||
|
<b>pcre2_dftables</b> program with the -b option.
|
||||||
<pre>
|
<pre>
|
||||||
#newline_default [<newline-list>]
|
#newline_default [<newline-list>]
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -679,7 +685,7 @@ heavily used in the test files.
|
||||||
pushcopy push a copy onto the stack
|
pushcopy push a copy onto the stack
|
||||||
stackguard=<number> test the stackguard feature
|
stackguard=<number> test the stackguard feature
|
||||||
subject_literal treat all subject lines as literal
|
subject_literal treat all subject lines as literal
|
||||||
tables=[0|1|2] select internal tables
|
tables=[0|1|2|3] select internal tables
|
||||||
use_length do not zero-terminate the pattern
|
use_length do not zero-terminate the pattern
|
||||||
utf8_input treat input as UTF-8
|
utf8_input treat input as UTF-8
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -1027,18 +1033,20 @@ Using alternative character tables
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The value specified for the <b>tables</b> modifier must be one of the digits 0,
|
The value specified for the <b>tables</b> modifier must be one of the digits 0,
|
||||||
1, or 2. It causes a specific set of built-in character tables to be passed to
|
1, 2, or 3. It causes a specific set of built-in character tables to be passed
|
||||||
<b>pcre2_compile()</b>. This is used in the PCRE2 tests to check behaviour with
|
to <b>pcre2_compile()</b>. This is used in the PCRE2 tests to check behaviour
|
||||||
different character tables. The digit specifies the tables as follows:
|
with different character tables. The digit specifies the tables as follows:
|
||||||
<pre>
|
<pre>
|
||||||
0 do not pass any special character tables
|
0 do not pass any special character tables
|
||||||
1 the default ASCII tables, as distributed in
|
1 the default ASCII tables, as distributed in
|
||||||
pcre2_chartables.c.dist
|
pcre2_chartables.c.dist
|
||||||
2 a set of tables defining ISO 8859 characters
|
2 a set of tables defining ISO 8859 characters
|
||||||
|
3 a set of tables loaded by the #loadtables command
|
||||||
</pre>
|
</pre>
|
||||||
In table 2, some characters whose codes are greater than 128 are identified as
|
In tables 2, some characters whose codes are greater than 128 are identified as
|
||||||
letters, digits, spaces, etc. Setting alternate character tables and a locale
|
letters, digits, spaces, etc. Tables 3 can be used only after a
|
||||||
are mutually exclusive.
|
<b>#loadtables</b> command has loaded them from a binary file. Setting alternate
|
||||||
|
character tables and a locale are mutually exclusive.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Setting certain match controls
|
Setting certain match controls
|
||||||
|
@ -2105,7 +2113,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 22 January 2020
|
Last updated: 20 March 2020
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2020 University of Cambridge.
|
Copyright © 1997-2020 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -1103,9 +1103,9 @@ CHECKING BUILD-TIME OPTIONS
|
||||||
int pcre2_config(uint32_t what, void *where);
|
int pcre2_config(uint32_t what, void *where);
|
||||||
|
|
||||||
The function pcre2_config() makes it possible for a PCRE2 client to
|
The function pcre2_config() makes it possible for a PCRE2 client to
|
||||||
discover which optional features have been compiled into the PCRE2 li-
|
find the value of certain configuration parameters and to discover
|
||||||
brary. The pcre2build documentation has more details about these op-
|
which optional features have been compiled into the PCRE2 library. The
|
||||||
tional features.
|
pcre2build documentation has more details about these features.
|
||||||
|
|
||||||
The first argument for pcre2_config() specifies which information is
|
The first argument for pcre2_config() specifies which information is
|
||||||
required. The second argument is a pointer to memory into which the in-
|
required. The second argument is a pointer to memory into which the in-
|
||||||
|
@ -1225,6 +1225,12 @@ CHECKING BUILD-TIME OPTIONS
|
||||||
This parameter is obsolete and should not be used in new code. The out-
|
This parameter is obsolete and should not be used in new code. The out-
|
||||||
put is a uint32_t integer that is always set to zero.
|
put is a uint32_t integer that is always set to zero.
|
||||||
|
|
||||||
|
PCRE2_CONFIG_TABLES_LENGTH
|
||||||
|
|
||||||
|
The output is a uint32_t integer that gives the length of PCRE2's char-
|
||||||
|
acter processing tables in bytes. For details of these tables see the
|
||||||
|
section on locale support below.
|
||||||
|
|
||||||
PCRE2_CONFIG_UNICODE_VERSION
|
PCRE2_CONFIG_UNICODE_VERSION
|
||||||
|
|
||||||
The where argument should point to a buffer that is at least 24 code
|
The where argument should point to a buffer that is at least 24 code
|
||||||
|
@ -1994,7 +2000,7 @@ LOCALE SUPPORT
|
||||||
therein.
|
therein.
|
||||||
|
|
||||||
For example, to build and use tables that are appropriate for the
|
For example, to build and use tables that are appropriate for the
|
||||||
French locale (where accented characters with values greater than 128
|
French locale (where accented characters with values greater than 127
|
||||||
are treated as letters), the following code could be used:
|
are treated as letters), the following code could be used:
|
||||||
|
|
||||||
setlocale(LC_CTYPE, "fr_FR");
|
setlocale(LC_CTYPE, "fr_FR");
|
||||||
|
@ -2007,10 +2013,10 @@ LOCALE SUPPORT
|
||||||
if you are using Windows, the name for the French locale is "french".
|
if you are using Windows, the name for the French locale is "french".
|
||||||
|
|
||||||
The pointer that is passed (via the compile context) to pcre2_compile()
|
The pointer that is passed (via the compile context) to pcre2_compile()
|
||||||
is saved with the compiled pattern, and the same tables are used by
|
is saved with the compiled pattern, and the same tables are used by the
|
||||||
pcre2_match() and pcre_dfa_match(). Thus, for any single pattern, com-
|
matching functions. Thus, for any single pattern, compilation and
|
||||||
pilation and matching both happen in the same locale, but different
|
matching both happen in the same locale, but different patterns can be
|
||||||
patterns can be processed in different locales.
|
processed in different locales.
|
||||||
|
|
||||||
It is the caller's responsibility to ensure that the memory containing
|
It is the caller's responsibility to ensure that the memory containing
|
||||||
the tables remains available while they are still in use. When they are
|
the tables remains available while they are still in use. When they are
|
||||||
|
@ -2018,6 +2024,20 @@ LOCALE SUPPORT
|
||||||
which should pass as its first parameter the same global context that
|
which should pass as its first parameter the same global context that
|
||||||
was used to create the tables.
|
was used to create the tables.
|
||||||
|
|
||||||
|
Saving locale tables
|
||||||
|
|
||||||
|
The tables described above are just a sequence of binary bytes, which
|
||||||
|
makes them independent of hardware characteristics such as endianness
|
||||||
|
or whether the processor is 32-bit or 64-bit. A copy of the result of
|
||||||
|
pcre2_maketables() can therefore be saved in a file or elsewhere and
|
||||||
|
re-used later, even in a different program or on another computer. The
|
||||||
|
size of the tables (number of bytes) must be obtained by calling
|
||||||
|
pcre2_config() with the PCRE2_CONFIG_TABLES_LENGTH option because
|
||||||
|
pcre2_maketables() does not return this value. Note that the
|
||||||
|
pcre2_dftables program, which is part of the PCRE2 build system, can be
|
||||||
|
used stand-alone to create a file that contains a set of binary tables.
|
||||||
|
See the pcre2build documentation for details.
|
||||||
|
|
||||||
|
|
||||||
INFORMATION ABOUT A COMPILED PATTERN
|
INFORMATION ABOUT A COMPILED PATTERN
|
||||||
|
|
||||||
|
@ -3773,7 +3793,7 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 24 February 2020
|
Last updated: 19 March 2020
|
||||||
Copyright (c) 1997-2020 University of Cambridge.
|
Copyright (c) 1997-2020 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -3873,8 +3893,8 @@ UNICODE AND UTF SUPPORT
|
||||||
--disable-unicode
|
--disable-unicode
|
||||||
|
|
||||||
to the configure command. This setting applies to all three libraries.
|
to the configure command. This setting applies to all three libraries.
|
||||||
It is not possible to build one library with Unicode support, and an-
|
It is not possible to build one library with Unicode support and an-
|
||||||
other without, in the same configuration.
|
other without in the same configuration.
|
||||||
|
|
||||||
Of itself, Unicode support does not make PCRE2 treat strings as UTF-8,
|
Of itself, Unicode support does not make PCRE2 treat strings as UTF-8,
|
||||||
UTF-16 or UTF-32. To do that, applications that use the library can set
|
UTF-16 or UTF-32. To do that, applications that use the library can set
|
||||||
|
@ -3935,7 +3955,7 @@ JUST-IN-TIME COMPILER SUPPORT
|
||||||
|
|
||||||
--disable-pcre2grep-jit
|
--disable-pcre2grep-jit
|
||||||
|
|
||||||
to the "configure" command.
|
to the configure command.
|
||||||
|
|
||||||
|
|
||||||
NEWLINE RECOGNITION
|
NEWLINE RECOGNITION
|
||||||
|
@ -4079,12 +4099,34 @@ CREATING CHARACTER TABLES AT BUILD TIME
|
||||||
--enable-rebuild-chartables
|
--enable-rebuild-chartables
|
||||||
|
|
||||||
to the configure command, the distributed tables are no longer used.
|
to the configure command, the distributed tables are no longer used.
|
||||||
Instead, a program called dftables is compiled and run. This outputs
|
Instead, a program called pcre2_dftables is compiled and run. This out-
|
||||||
the source for new set of tables, created in the default locale of your
|
puts the source for new set of tables, created in the default locale of
|
||||||
C run-time system. This method of replacing the tables does not work if
|
your C run-time system. This method of replacing the tables does not
|
||||||
you are cross compiling, because dftables is run on the local host. If
|
work if you are cross compiling, because pcre2_dftables needs to be run
|
||||||
you need to create alternative tables when cross compiling, you will
|
on the local host and therefore not compiled with the cross compiler.
|
||||||
have to do so "by hand".
|
|
||||||
|
If you need to create alternative tables when cross compiling, you will
|
||||||
|
have to do so "by hand". There may also be other reasons for creating
|
||||||
|
tables manually. To cause pcre2_dftables to be built on the local
|
||||||
|
host, run a normal compiling command, and then run the program with the
|
||||||
|
output file as its argument, for example:
|
||||||
|
|
||||||
|
cc src/pcre2_dftables.c -o pcre2_dftables
|
||||||
|
./pcre2_dftables src/pcre2_chartables.c
|
||||||
|
|
||||||
|
This builds the tables in the default locale of the local host. If you
|
||||||
|
want to specify a locale, you must use the -L option:
|
||||||
|
|
||||||
|
LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c
|
||||||
|
|
||||||
|
You can also specify -b (with or without -L). This causes the tables to
|
||||||
|
be written in binary instead of as source code. A set of binary tables
|
||||||
|
can be loaded into memory by an application and passed to pcre2_com-
|
||||||
|
pile() in the same way as tables created by calling pcre2_maketables().
|
||||||
|
The tables are just a string of bytes, independent of hardware charac-
|
||||||
|
teristics such as endianness. This means they can be bundled with an
|
||||||
|
application that runs in different environments, to ensure consistent
|
||||||
|
behaviour.
|
||||||
|
|
||||||
|
|
||||||
USING EBCDIC CODE
|
USING EBCDIC CODE
|
||||||
|
@ -4289,7 +4331,7 @@ DISABLING THE Z AND T FORMATTING MODIFIERS
|
||||||
|
|
||||||
--disable-percent-zt
|
--disable-percent-zt
|
||||||
|
|
||||||
is specified, no use is made of the z or t modifiers. Instead or %td or
|
is specified, no use is made of the z or t modifiers. Instead of %td or
|
||||||
%zu, %lu is used, with a cast for size_t values.
|
%zu, %lu is used, with a cast for size_t values.
|
||||||
|
|
||||||
|
|
||||||
|
@ -4345,8 +4387,8 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 03 March 2019
|
Last updated: 20 March 2020
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2020 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2_SET_CHARACTER_TABLES 3 "22 October 2014" "PCRE2 10.00"
|
.TH PCRE2_SET_CHARACTER_TABLES 3 "20 March 2020" "PCRE2 10.35"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -15,9 +15,14 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
This function sets a pointer to custom character tables within a compile
|
This function sets a pointer to custom character tables within a compile
|
||||||
context. The second argument must be the result of a call to
|
context. The second argument must point to a set of PCRE2 character tables or
|
||||||
\fBpcre2_maketables()\fP or NULL to request the default tables. The result is
|
be NULL to request the default tables. The result is always zero. Character
|
||||||
always zero.
|
tables can be created by calling \fBpcre2_maketables()\fP or by running the
|
||||||
|
\fBpcre2_dftables\fP maintenance command in binary mode (see the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2build\fP
|
||||||
|
.\"
|
||||||
|
documentation).
|
||||||
.P
|
.P
|
||||||
There is a complete description of the PCRE2 native API in the
|
There is a complete description of the PCRE2 native API in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2API 3 "24 February 2020" "PCRE2 10.35"
|
.TH PCRE2API 3 "19 March 2020" "PCRE2 10.35"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
|
@ -1034,12 +1034,13 @@ less than the limit set by the caller of \fBpcre2_match()\fP or
|
||||||
.sp
|
.sp
|
||||||
.B int pcre2_config(uint32_t \fIwhat\fP, void *\fIwhere\fP);
|
.B int pcre2_config(uint32_t \fIwhat\fP, void *\fIwhere\fP);
|
||||||
.P
|
.P
|
||||||
The function \fBpcre2_config()\fP makes it possible for a PCRE2 client to
|
The function \fBpcre2_config()\fP makes it possible for a PCRE2 client to find
|
||||||
discover which optional features have been compiled into the PCRE2 library. The
|
the value of certain configuration parameters and to discover which optional
|
||||||
|
features have been compiled into the PCRE2 library. The
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcre2build\fP
|
\fBpcre2build\fP
|
||||||
.\"
|
.\"
|
||||||
documentation has more details about these optional features.
|
documentation has more details about these features.
|
||||||
.P
|
.P
|
||||||
The first argument for \fBpcre2_config()\fP specifies which information is
|
The first argument for \fBpcre2_config()\fP specifies which information is
|
||||||
required. The second argument is a pointer to memory into which the information
|
required. The second argument is a pointer to memory into which the information
|
||||||
|
@ -1152,6 +1153,16 @@ over compilation stack usage, see \fBpcre2_set_compile_recursion_guard()\fP.
|
||||||
.sp
|
.sp
|
||||||
This parameter is obsolete and should not be used in new code. The output is a
|
This parameter is obsolete and should not be used in new code. The output is a
|
||||||
uint32_t integer that is always set to zero.
|
uint32_t integer that is always set to zero.
|
||||||
|
.sp
|
||||||
|
PCRE2_CONFIG_TABLES_LENGTH
|
||||||
|
.sp
|
||||||
|
The output is a uint32_t integer that gives the length of PCRE2's character
|
||||||
|
processing tables in bytes. For details of these tables see the
|
||||||
|
.\" HTML <a href="#localesupport">
|
||||||
|
.\" </a>
|
||||||
|
section on locale support
|
||||||
|
.\"
|
||||||
|
below.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_CONFIG_UNICODE_VERSION
|
PCRE2_CONFIG_UNICODE_VERSION
|
||||||
.sp
|
.sp
|
||||||
|
@ -1996,7 +2007,7 @@ the system \fBmalloc()\fP is used. The result can be passed to
|
||||||
calling \fBpcre2_set_character_tables()\fP to set the tables pointer therein.
|
calling \fBpcre2_set_character_tables()\fP to set the tables pointer therein.
|
||||||
.P
|
.P
|
||||||
For example, to build and use tables that are appropriate for the French locale
|
For example, to build and use tables that are appropriate for the French locale
|
||||||
(where accented characters with values greater than 128 are treated as
|
(where accented characters with values greater than 127 are treated as
|
||||||
letters), the following code could be used:
|
letters), the following code could be used:
|
||||||
.sp
|
.sp
|
||||||
setlocale(LC_CTYPE, "fr_FR");
|
setlocale(LC_CTYPE, "fr_FR");
|
||||||
|
@ -2009,10 +2020,10 @@ The locale name "fr_FR" is used on Linux and other Unix-like systems; if you
|
||||||
are using Windows, the name for the French locale is "french".
|
are using Windows, the name for the French locale is "french".
|
||||||
.P
|
.P
|
||||||
The pointer that is passed (via the compile context) to \fBpcre2_compile()\fP
|
The pointer that is passed (via the compile context) to \fBpcre2_compile()\fP
|
||||||
is saved with the compiled pattern, and the same tables are used by
|
is saved with the compiled pattern, and the same tables are used by the
|
||||||
\fBpcre2_match()\fP and \fBpcre_dfa_match()\fP. Thus, for any single pattern,
|
matching functions. Thus, for any single pattern, compilation and matching both
|
||||||
compilation and matching both happen in the same locale, but different patterns
|
happen in the same locale, but different patterns can be processed in different
|
||||||
can be processed in different locales.
|
locales.
|
||||||
.P
|
.P
|
||||||
It is the caller's responsibility to ensure that the memory containing the
|
It is the caller's responsibility to ensure that the memory containing the
|
||||||
tables remains available while they are still in use. When they are no longer
|
tables remains available while they are still in use. When they are no longer
|
||||||
|
@ -2021,6 +2032,26 @@ pass as its first parameter the same global context that was used to create the
|
||||||
tables.
|
tables.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
.SS "Saving locale tables"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
The tables described above are just a sequence of binary bytes, which makes
|
||||||
|
them independent of hardware characteristics such as endianness or whether the
|
||||||
|
processor is 32-bit or 64-bit. A copy of the result of \fBpcre2_maketables()\fP
|
||||||
|
can therefore be saved in a file or elsewhere and re-used later, even in a
|
||||||
|
different program or on another computer. The size of the tables (number of
|
||||||
|
bytes) must be obtained by calling \fBpcre2_config()\fP with the
|
||||||
|
PCRE2_CONFIG_TABLES_LENGTH option because \fBpcre2_maketables()\fP does not
|
||||||
|
return this value. Note that the \fBpcre2_dftables\fP program, which is part of
|
||||||
|
the PCRE2 build system, can be used stand-alone to create a file that contains
|
||||||
|
a set of binary tables. See the
|
||||||
|
.\" HTML <a href="pcre2build.html#createtables">
|
||||||
|
.\" </a>
|
||||||
|
\fBpcre2build\fP
|
||||||
|
.\"
|
||||||
|
documentation for details.
|
||||||
|
.
|
||||||
|
.
|
||||||
.\" HTML <a name="infoaboutpattern"></a>
|
.\" HTML <a name="infoaboutpattern"></a>
|
||||||
.SH "INFORMATION ABOUT A COMPILED PATTERN"
|
.SH "INFORMATION ABOUT A COMPILED PATTERN"
|
||||||
.rs
|
.rs
|
||||||
|
@ -2031,7 +2062,7 @@ tables.
|
||||||
.P
|
.P
|
||||||
The \fBpcre2_pattern_info()\fP function returns general information about a
|
The \fBpcre2_pattern_info()\fP function returns general information about a
|
||||||
compiled pattern. For information about callouts, see the
|
compiled pattern. For information about callouts, see the
|
||||||
.\" HTML <a href="pcre2pattern.html#infoaboutcallouts">
|
.\" HTML <a href="#infoaboutcallouts">
|
||||||
.\" </a>
|
.\" </a>
|
||||||
next section.
|
next section.
|
||||||
.\"
|
.\"
|
||||||
|
@ -3937,6 +3968,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 24 February 2020
|
Last updated: 19 March 2020
|
||||||
Copyright (c) 1997-2020 University of Cambridge.
|
Copyright (c) 1997-2020 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2BUILD 3 "03 March 2019" "PCRE2 10.33"
|
.TH PCRE2BUILD 3 "20 March 2020" "PCRE2 10.35"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.
|
.
|
||||||
|
@ -110,7 +110,7 @@ To build it without Unicode support, add
|
||||||
--disable-unicode
|
--disable-unicode
|
||||||
.sp
|
.sp
|
||||||
to the \fBconfigure\fP command. This setting applies to all three libraries. It
|
to the \fBconfigure\fP command. This setting applies to all three libraries. It
|
||||||
is not possible to build one library with Unicode support, and another without,
|
is not possible to build one library with Unicode support and another without
|
||||||
in the same configuration.
|
in the same configuration.
|
||||||
.P
|
.P
|
||||||
Of itself, Unicode support does not make PCRE2 treat strings as UTF-8, UTF-16
|
Of itself, Unicode support does not make PCRE2 treat strings as UTF-8, UTF-16
|
||||||
|
@ -175,11 +175,11 @@ SELinux. This has no effect if JIT is not enabled. See the
|
||||||
\fBpcre2jit\fP
|
\fBpcre2jit\fP
|
||||||
.\"
|
.\"
|
||||||
documentation for a discussion of JIT usage. When JIT support is enabled,
|
documentation for a discussion of JIT usage. When JIT support is enabled,
|
||||||
pcre2grep automatically makes use of it, unless you add
|
\fBpcre2grep\fP automatically makes use of it, unless you add
|
||||||
.sp
|
.sp
|
||||||
--disable-pcre2grep-jit
|
--disable-pcre2grep-jit
|
||||||
.sp
|
.sp
|
||||||
to the "configure" command.
|
to the \fBconfigure\fP command.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "NEWLINE RECOGNITION"
|
.SH "NEWLINE RECOGNITION"
|
||||||
|
@ -317,6 +317,7 @@ used for lookaround assertions, atomic groups, and recursion within patterns.
|
||||||
The limit does not apply to JIT matching.
|
The limit does not apply to JIT matching.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
.\" HTML <a name="createtables"></a>
|
||||||
.SH "CREATING CHARACTER TABLES AT BUILD TIME"
|
.SH "CREATING CHARACTER TABLES AT BUILD TIME"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
@ -328,12 +329,33 @@ only. If you add
|
||||||
--enable-rebuild-chartables
|
--enable-rebuild-chartables
|
||||||
.sp
|
.sp
|
||||||
to the \fBconfigure\fP command, the distributed tables are no longer used.
|
to the \fBconfigure\fP command, the distributed tables are no longer used.
|
||||||
Instead, a program called \fBdftables\fP is compiled and run. This outputs the
|
Instead, a program called \fBpcre2_dftables\fP is compiled and run. This
|
||||||
source for new set of tables, created in the default locale of your C run-time
|
outputs the source for new set of tables, created in the default locale of your
|
||||||
system. This method of replacing the tables does not work if you are cross
|
C run-time system. This method of replacing the tables does not work if you are
|
||||||
compiling, because \fBdftables\fP is run on the local host. If you need to
|
cross compiling, because \fBpcre2_dftables\fP needs to be run on the local
|
||||||
create alternative tables when cross compiling, you will have to do so "by
|
host and therefore not compiled with the cross compiler.
|
||||||
hand".
|
.P
|
||||||
|
If you need to create alternative tables when cross compiling, you will have to
|
||||||
|
do so "by hand". There may also be other reasons for creating tables manually.
|
||||||
|
To cause \fBpcre2_dftables\fP to be built on the local host, run a normal
|
||||||
|
compiling command, and then run the program with the output file as its
|
||||||
|
argument, for example:
|
||||||
|
.sp
|
||||||
|
cc src/pcre2_dftables.c -o pcre2_dftables
|
||||||
|
./pcre2_dftables src/pcre2_chartables.c
|
||||||
|
.sp
|
||||||
|
This builds the tables in the default locale of the local host. If you want to
|
||||||
|
specify a locale, you must use the -L option:
|
||||||
|
.sp
|
||||||
|
LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c
|
||||||
|
.sp
|
||||||
|
You can also specify -b (with or without -L). This causes the tables to be
|
||||||
|
written in binary instead of as source code. A set of binary tables can be
|
||||||
|
loaded into memory by an application and passed to \fBpcre2_compile()\fP in the
|
||||||
|
same way as tables created by calling \fBpcre2_maketables()\fP. The tables are
|
||||||
|
just a string of bytes, independent of hardware characteristics such as
|
||||||
|
endianness. This means they can be bundled with an application that runs in
|
||||||
|
different environments, to ensure consistent behaviour.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "USING EBCDIC CODE"
|
.SH "USING EBCDIC CODE"
|
||||||
|
@ -548,7 +570,7 @@ support these modifiers. If
|
||||||
.sp
|
.sp
|
||||||
--disable-percent-zt
|
--disable-percent-zt
|
||||||
.sp
|
.sp
|
||||||
is specified, no use is made of the z or t modifiers. Instead or %td or %zu,
|
is specified, no use is made of the z or t modifiers. Instead of %td or %zu,
|
||||||
%lu is used, with a cast for size_t values.
|
%lu is used, with a cast for size_t values.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
@ -610,6 +632,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 03 March 2019
|
Last updated: 20 March 2020
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2020 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2TEST 1 "22 January 2020" "PCRE 10.35"
|
.TH PCRE2TEST 1 "20 March 2020" "PCRE 10.35"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -326,6 +326,12 @@ described in the section entitled "Saving and restoring compiled patterns"
|
||||||
.\" </a>
|
.\" </a>
|
||||||
below.
|
below.
|
||||||
.\"
|
.\"
|
||||||
|
.sp
|
||||||
|
#loadtables <filename>
|
||||||
|
.sp
|
||||||
|
This command is used to load a set of binary character tables that can be
|
||||||
|
accessed by the tables=3 qualifier. Such tables can be created by the
|
||||||
|
\fBpcre2_dftables\fP program with the -b option.
|
||||||
.sp
|
.sp
|
||||||
#newline_default [<newline-list>]
|
#newline_default [<newline-list>]
|
||||||
.sp
|
.sp
|
||||||
|
@ -638,7 +644,7 @@ heavily used in the test files.
|
||||||
pushcopy push a copy onto the stack
|
pushcopy push a copy onto the stack
|
||||||
stackguard=<number> test the stackguard feature
|
stackguard=<number> test the stackguard feature
|
||||||
subject_literal treat all subject lines as literal
|
subject_literal treat all subject lines as literal
|
||||||
tables=[0|1|2] select internal tables
|
tables=[0|1|2|3] select internal tables
|
||||||
use_length do not zero-terminate the pattern
|
use_length do not zero-terminate the pattern
|
||||||
utf8_input treat input as UTF-8
|
utf8_input treat input as UTF-8
|
||||||
.sp
|
.sp
|
||||||
|
@ -988,18 +994,20 @@ be aborted.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
The value specified for the \fBtables\fP modifier must be one of the digits 0,
|
The value specified for the \fBtables\fP modifier must be one of the digits 0,
|
||||||
1, or 2. It causes a specific set of built-in character tables to be passed to
|
1, 2, or 3. It causes a specific set of built-in character tables to be passed
|
||||||
\fBpcre2_compile()\fP. This is used in the PCRE2 tests to check behaviour with
|
to \fBpcre2_compile()\fP. This is used in the PCRE2 tests to check behaviour
|
||||||
different character tables. The digit specifies the tables as follows:
|
with different character tables. The digit specifies the tables as follows:
|
||||||
.sp
|
.sp
|
||||||
0 do not pass any special character tables
|
0 do not pass any special character tables
|
||||||
1 the default ASCII tables, as distributed in
|
1 the default ASCII tables, as distributed in
|
||||||
pcre2_chartables.c.dist
|
pcre2_chartables.c.dist
|
||||||
2 a set of tables defining ISO 8859 characters
|
2 a set of tables defining ISO 8859 characters
|
||||||
|
3 a set of tables loaded by the #loadtables command
|
||||||
.sp
|
.sp
|
||||||
In table 2, some characters whose codes are greater than 128 are identified as
|
In tables 2, some characters whose codes are greater than 128 are identified as
|
||||||
letters, digits, spaces, etc. Setting alternate character tables and a locale
|
letters, digits, spaces, etc. Tables 3 can be used only after a
|
||||||
are mutually exclusive.
|
\fB#loadtables\fP command has loaded them from a binary file. Setting alternate
|
||||||
|
character tables and a locale are mutually exclusive.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SS "Setting certain match controls"
|
.SS "Setting certain match controls"
|
||||||
|
@ -2088,6 +2096,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 22 January 2020
|
Last updated: 20 March 2020
|
||||||
Copyright (c) 1997-2020 University of Cambridge.
|
Copyright (c) 1997-2020 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -309,6 +309,12 @@ COMMAND LINES
|
||||||
as described in the section entitled "Saving and restoring compiled
|
as described in the section entitled "Saving and restoring compiled
|
||||||
patterns" below.
|
patterns" below.
|
||||||
|
|
||||||
|
#loadtables <filename>
|
||||||
|
|
||||||
|
This command is used to load a set of binary character tables that can
|
||||||
|
be accessed by the tables=3 qualifier. Such tables can be created by
|
||||||
|
the pcre2_dftables program with the -b option.
|
||||||
|
|
||||||
#newline_default [<newline-list>]
|
#newline_default [<newline-list>]
|
||||||
|
|
||||||
When PCRE2 is built, a default newline convention can be specified.
|
When PCRE2 is built, a default newline convention can be specified.
|
||||||
|
@ -613,7 +619,7 @@ PATTERN MODIFIERS
|
||||||
pushcopy push a copy onto the stack
|
pushcopy push a copy onto the stack
|
||||||
stackguard=<number> test the stackguard feature
|
stackguard=<number> test the stackguard feature
|
||||||
subject_literal treat all subject lines as literal
|
subject_literal treat all subject lines as literal
|
||||||
tables=[0|1|2] select internal tables
|
tables=[0|1|2|3] select internal tables
|
||||||
use_length do not zero-terminate the pattern
|
use_length do not zero-terminate the pattern
|
||||||
utf8_input treat input as UTF-8
|
utf8_input treat input as UTF-8
|
||||||
|
|
||||||
|
@ -914,19 +920,21 @@ PATTERN MODIFIERS
|
||||||
Using alternative character tables
|
Using alternative character tables
|
||||||
|
|
||||||
The value specified for the tables modifier must be one of the digits
|
The value specified for the tables modifier must be one of the digits
|
||||||
0, 1, or 2. It causes a specific set of built-in character tables to be
|
0, 1, 2, or 3. It causes a specific set of built-in character tables to
|
||||||
passed to pcre2_compile(). This is used in the PCRE2 tests to check be-
|
be passed to pcre2_compile(). This is used in the PCRE2 tests to check
|
||||||
haviour with different character tables. The digit specifies the tables
|
behaviour with different character tables. The digit specifies the ta-
|
||||||
as follows:
|
bles as follows:
|
||||||
|
|
||||||
0 do not pass any special character tables
|
0 do not pass any special character tables
|
||||||
1 the default ASCII tables, as distributed in
|
1 the default ASCII tables, as distributed in
|
||||||
pcre2_chartables.c.dist
|
pcre2_chartables.c.dist
|
||||||
2 a set of tables defining ISO 8859 characters
|
2 a set of tables defining ISO 8859 characters
|
||||||
|
3 a set of tables loaded by the #loadtables command
|
||||||
|
|
||||||
In table 2, some characters whose codes are greater than 128 are iden-
|
In tables 2, some characters whose codes are greater than 128 are iden-
|
||||||
tified as letters, digits, spaces, etc. Setting alternate character ta-
|
tified as letters, digits, spaces, etc. Tables 3 can be used only after
|
||||||
bles and a locale are mutually exclusive.
|
a #loadtables command has loaded them from a binary file. Setting al-
|
||||||
|
ternate character tables and a locale are mutually exclusive.
|
||||||
|
|
||||||
Setting certain match controls
|
Setting certain match controls
|
||||||
|
|
||||||
|
@ -1914,5 +1922,5 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 22 January 2020
|
Last updated: 20 March 2020
|
||||||
Copyright (c) 1997-2020 University of Cambridge.
|
Copyright (c) 1997-2020 University of Cambridge.
|
||||||
|
|
|
@ -448,6 +448,7 @@ released, the numbers must not be changed. */
|
||||||
#define PCRE2_CONFIG_HEAPLIMIT 12
|
#define PCRE2_CONFIG_HEAPLIMIT 12
|
||||||
#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13
|
#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13
|
||||||
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
|
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
|
||||||
|
#define PCRE2_CONFIG_TABLES_LENGTH 15
|
||||||
|
|
||||||
|
|
||||||
/* Types for code units in patterns and subject strings. */
|
/* Types for code units in patterns and subject strings. */
|
||||||
|
|
|
@ -2,17 +2,21 @@
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This file was automatically written by the dftables auxiliary
|
/* This file was automatically written by the pcre2_dftables auxiliary
|
||||||
program. It contains character tables that are used when no external
|
program. It contains character tables that are used when no external
|
||||||
tables are passed to PCRE2 by the application that calls it. The tables
|
tables are passed to PCRE2 by the application that calls it. The tables
|
||||||
are used only for characters whose code values are less than 256. */
|
are used only for characters whose code values are less than 256. */
|
||||||
|
|
||||||
/*The dftables program (which is distributed with PCRE2) can be used to
|
/* This set of tables was written in the C locale. */
|
||||||
build alternative versions of this file. This is necessary if you are
|
|
||||||
|
/* The pcre2_ftables program (which is distributed with PCRE2) can be used
|
||||||
|
to build alternative versions of this file. This is necessary if you are
|
||||||
running in an EBCDIC environment, or if you want to default to a different
|
running in an EBCDIC environment, or if you want to default to a different
|
||||||
encoding, for example ISO-8859-1. When dftables is run, it creates these
|
encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates
|
||||||
tables in the current locale. This happens automatically if PCRE2 is
|
these tables in the "C" locale by default. This happens automatically if
|
||||||
configured with --enable-rebuild-chartables. */
|
PCRE2 is configured with --enable-rebuild-chartables. However, you can run
|
||||||
|
pcre2_dftables manually with the -L option to build tables using the LC_ALL
|
||||||
|
locale. */
|
||||||
|
|
||||||
/* The following #include is present because without it gcc 4.x may remove
|
/* The following #include is present because without it gcc 4.x may remove
|
||||||
the array definition from the final binary if PCRE2 is built into a static
|
the array definition from the final binary if PCRE2 is built into a static
|
||||||
|
@ -102,54 +106,54 @@ const uint8_t PRIV(default_tables)[] = {
|
||||||
/* This table contains bit maps for various character classes. Each map is 32
|
/* This table contains bit maps for various character classes. Each map is 32
|
||||||
bytes long and the bits run from the least significant end of each byte. The
|
bytes long and the bits run from the least significant end of each byte. The
|
||||||
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
|
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
|
||||||
graph print, punct, and cntrl. Other classes are built from combinations. */
|
graph, print, punct, and cntrl. Other classes are built from combinations. */
|
||||||
|
|
||||||
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
|
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, /* space */
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* xdigit */
|
||||||
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
|
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* digit */
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* upper */
|
||||||
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
|
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* lower */
|
||||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
|
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* word */
|
||||||
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
|
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
|
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, /* graph */
|
||||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
|
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, /* print */
|
||||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
|
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, /* punct */
|
||||||
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
|
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
||||||
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
|
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, /* cntrl */
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||||
|
|
|
@ -1202,7 +1202,7 @@ in the decoded tables. */
|
||||||
|
|
||||||
if ((code->flags & PCRE2_DEREF_TABLES) != 0)
|
if ((code->flags & PCRE2_DEREF_TABLES) != 0)
|
||||||
{
|
{
|
||||||
ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
|
ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH);
|
||||||
(*ref_count)++;
|
(*ref_count)++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1232,15 +1232,15 @@ if (newcode == NULL) return NULL;
|
||||||
memcpy(newcode, code, code->blocksize);
|
memcpy(newcode, code, code->blocksize);
|
||||||
newcode->executable_jit = NULL;
|
newcode->executable_jit = NULL;
|
||||||
|
|
||||||
newtables = code->memctl.malloc(tables_length + sizeof(PCRE2_SIZE),
|
newtables = code->memctl.malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE),
|
||||||
code->memctl.memory_data);
|
code->memctl.memory_data);
|
||||||
if (newtables == NULL)
|
if (newtables == NULL)
|
||||||
{
|
{
|
||||||
code->memctl.free((void *)newcode, code->memctl.memory_data);
|
code->memctl.free((void *)newcode, code->memctl.memory_data);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
memcpy(newtables, code->tables, tables_length);
|
memcpy(newtables, code->tables, TABLES_LENGTH);
|
||||||
ref_count = (PCRE2_SIZE *)(newtables + tables_length);
|
ref_count = (PCRE2_SIZE *)(newtables + TABLES_LENGTH);
|
||||||
*ref_count = 1;
|
*ref_count = 1;
|
||||||
|
|
||||||
newcode->tables = newtables;
|
newcode->tables = newtables;
|
||||||
|
@ -1270,7 +1270,7 @@ if (code != NULL)
|
||||||
be freed when there are no more references to them. The *ref_count should
|
be freed when there are no more references to them. The *ref_count should
|
||||||
always be > 0. */
|
always be > 0. */
|
||||||
|
|
||||||
ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
|
ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH);
|
||||||
if (*ref_count > 0)
|
if (*ref_count > 0)
|
||||||
{
|
{
|
||||||
(*ref_count)--;
|
(*ref_count)--;
|
||||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -43,7 +43,8 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
|
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
|
||||||
its value gets changed by pcre2_internal.h to be in code units. */
|
its value gets changed by pcre2_intmodedep.h (included by pcre2_internal.h) to
|
||||||
|
be in code units. */
|
||||||
|
|
||||||
static int configured_link_size = LINK_SIZE;
|
static int configured_link_size = LINK_SIZE;
|
||||||
|
|
||||||
|
@ -94,6 +95,7 @@ if (where == NULL) /* Requests a length */
|
||||||
case PCRE2_CONFIG_NEWLINE:
|
case PCRE2_CONFIG_NEWLINE:
|
||||||
case PCRE2_CONFIG_PARENSLIMIT:
|
case PCRE2_CONFIG_PARENSLIMIT:
|
||||||
case PCRE2_CONFIG_STACKRECURSE: /* Obsolete */
|
case PCRE2_CONFIG_STACKRECURSE: /* Obsolete */
|
||||||
|
case PCRE2_CONFIG_TABLES_LENGTH:
|
||||||
case PCRE2_CONFIG_UNICODE:
|
case PCRE2_CONFIG_UNICODE:
|
||||||
return sizeof(uint32_t);
|
return sizeof(uint32_t);
|
||||||
|
|
||||||
|
@ -191,6 +193,10 @@ switch (what)
|
||||||
*((uint32_t *)where) = 0;
|
*((uint32_t *)where) = 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_TABLES_LENGTH:
|
||||||
|
*((uint32_t *)where) = TABLES_LENGTH;
|
||||||
|
break;
|
||||||
|
|
||||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||||
{
|
{
|
||||||
#if defined SUPPORT_UNICODE
|
#if defined SUPPORT_UNICODE
|
||||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -40,9 +40,12 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
|
||||||
/* This is a freestanding support program to generate a file containing
|
/* This is a freestanding support program to generate a file containing
|
||||||
character tables for PCRE2. The tables are built according to the current
|
character tables for PCRE2. The tables are built using the pcre2_maketables()
|
||||||
locale using the pcre2_maketables() function, which is part of the PCRE2 API.
|
function, which is part of the PCRE2 API. By default, the system's "C" locale
|
||||||
*/
|
is used rather than what the building user happens to have set, but the -L
|
||||||
|
option can be used to select the current locale from the LC_ALL environment
|
||||||
|
variable. By default, the tables are written in source form, but if -b is
|
||||||
|
given, they are written in binary. */
|
||||||
|
|
||||||
#ifdef HAVE_CONFIG_H
|
#ifdef HAVE_CONFIG_H
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
@ -56,31 +59,88 @@ locale using the pcre2_maketables() function, which is part of the PCRE2 API.
|
||||||
#define PCRE2_CODE_UNIT_WIDTH 0 /* Must be set, but not relevant here */
|
#define PCRE2_CODE_UNIT_WIDTH 0 /* Must be set, but not relevant here */
|
||||||
#include "pcre2_internal.h"
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
#define DFTABLES /* pcre2_maketables.c notices this */
|
#define PCRE2_DFTABLES /* pcre2_maketables.c notices this */
|
||||||
#include "pcre2_maketables.c"
|
#include "pcre2_maketables.c"
|
||||||
|
|
||||||
|
|
||||||
|
static char *classlist[] =
|
||||||
|
{
|
||||||
|
"space", "xdigit", "digit", "upper", "lower",
|
||||||
|
"word", "graph", "print", "punct", "cntrl"
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Usage *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
static void
|
||||||
|
usage(void)
|
||||||
|
{
|
||||||
|
(void)fprintf(stderr,
|
||||||
|
"Usage: pcre2_dftables [options] <output file>\n"
|
||||||
|
" -b Write output in binary (default is source code)\n"
|
||||||
|
" -L Use locale from LC_ALL (default is \"C\" locale)\n"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Entry point *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
FILE *f;
|
FILE *f;
|
||||||
int i = 1;
|
int i;
|
||||||
|
int nclass = 0;
|
||||||
|
BOOL binary = FALSE;
|
||||||
|
char *env = "C";
|
||||||
const unsigned char *tables;
|
const unsigned char *tables;
|
||||||
const unsigned char *base_of_tables;
|
const unsigned char *base_of_tables;
|
||||||
|
|
||||||
/* By default, the default C locale is used rather than what the building user
|
/* Process options */
|
||||||
happens to have set. However, if the -L option is given, set the locale from
|
|
||||||
the LC_xxx environment variables. */
|
|
||||||
|
|
||||||
if (argc > 1 && strcmp(argv[1], "-L") == 0)
|
for (i = 1; i < argc; i++)
|
||||||
{
|
{
|
||||||
setlocale(LC_ALL, ""); /* Set from environment variables */
|
unsigned char *arg = (unsigned char *)argv[i];
|
||||||
i++;
|
if (*arg != '-') break;
|
||||||
|
|
||||||
|
if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0)
|
||||||
|
{
|
||||||
|
usage();
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (argc < i + 1)
|
else if (strcmp(arg, "-L") == 0)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "dftables: one filename argument is required\n");
|
if (setlocale(LC_ALL, "") == NULL)
|
||||||
|
{
|
||||||
|
(void)fprintf(stderr, "pcre2_dftables: setlocale() failed\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
env = getenv("LC_ALL");
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (strcmp(arg, "-b") == 0)
|
||||||
|
binary = TRUE;
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
(void)fprintf(stderr, "pcre2_dftables: unrecognized option %s\n", arg);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i != argc - 1)
|
||||||
|
{
|
||||||
|
(void)fprintf(stderr, "pcre2_dftables: one filename argument is required\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Make the tables */
|
||||||
|
|
||||||
tables = maketables();
|
tables = maketables();
|
||||||
base_of_tables = tables;
|
base_of_tables = tables;
|
||||||
|
@ -88,41 +148,64 @@ base_of_tables = tables;
|
||||||
f = fopen(argv[i], "wb");
|
f = fopen(argv[i], "wb");
|
||||||
if (f == NULL)
|
if (f == NULL)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "dftables: failed to open %s for writing\n", argv[1]);
|
fprintf(stderr, "pcre2_dftables: failed to open %s for writing\n", argv[1]);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* There are several fprintf() calls here, because gcc in pedantic mode
|
/* If -b was specified, we write the tables in binary. */
|
||||||
complains about the very long string otherwise. */
|
|
||||||
|
|
||||||
fprintf(f,
|
if (binary)
|
||||||
|
{
|
||||||
|
int yield = 0;
|
||||||
|
size_t len = fwrite(tables, 1, TABLES_LENGTH, f);
|
||||||
|
if (len != TABLES_LENGTH)
|
||||||
|
{
|
||||||
|
(void)fprintf(stderr, "pcre2_dftables: fwrite() returned wrong length %d "
|
||||||
|
"instead of %d\n", (int)len, TABLES_LENGTH);
|
||||||
|
yield = 1;
|
||||||
|
}
|
||||||
|
fclose(f);
|
||||||
|
free((void *)base_of_tables);
|
||||||
|
return yield;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Write the tables as source code for inclusion in the PCRE2 library. There
|
||||||
|
are several fprintf() calls here, because gcc in pedantic mode complains about
|
||||||
|
the very long string otherwise. */
|
||||||
|
|
||||||
|
(void)fprintf(f,
|
||||||
"/*************************************************\n"
|
"/*************************************************\n"
|
||||||
"* Perl-Compatible Regular Expressions *\n"
|
"* Perl-Compatible Regular Expressions *\n"
|
||||||
"*************************************************/\n\n"
|
"*************************************************/\n\n"
|
||||||
"/* This file was automatically written by the dftables auxiliary\n"
|
"/* This file was automatically written by the pcre2_dftables auxiliary\n"
|
||||||
"program. It contains character tables that are used when no external\n"
|
"program. It contains character tables that are used when no external\n"
|
||||||
"tables are passed to PCRE2 by the application that calls it. The tables\n"
|
"tables are passed to PCRE2 by the application that calls it. The tables\n"
|
||||||
"are used only for characters whose code values are less than 256. */\n\n");
|
"are used only for characters whose code values are less than 256. */\n\n");
|
||||||
|
|
||||||
fprintf(f,
|
(void)fprintf(f,
|
||||||
"/*The dftables program (which is distributed with PCRE2) can be used to\n"
|
"/* This set of tables was written in the %s locale. */\n\n", env);
|
||||||
"build alternative versions of this file. This is necessary if you are\n"
|
|
||||||
|
(void)fprintf(f,
|
||||||
|
"/* The pcre2_ftables program (which is distributed with PCRE2) can be used\n"
|
||||||
|
"to build alternative versions of this file. This is necessary if you are\n"
|
||||||
"running in an EBCDIC environment, or if you want to default to a different\n"
|
"running in an EBCDIC environment, or if you want to default to a different\n"
|
||||||
"encoding, for example ISO-8859-1. When dftables is run, it creates these\n"
|
"encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates\n"
|
||||||
"tables in the current locale. This happens automatically if PCRE2 is\n"
|
"these tables in the \"C\" locale by default. This happens automatically if\n"
|
||||||
"configured with --enable-rebuild-chartables. */\n\n");
|
"PCRE2 is configured with --enable-rebuild-chartables. However, you can run\n"
|
||||||
|
"pcre2_dftables manually with the -L option to build tables using the LC_ALL\n"
|
||||||
|
"locale. */\n\n");
|
||||||
|
|
||||||
/* Force config.h in z/OS */
|
/* Force config.h in z/OS */
|
||||||
|
|
||||||
#if defined NATIVE_ZOS
|
#if defined NATIVE_ZOS
|
||||||
fprintf(f,
|
(void)fprintf(f,
|
||||||
"/* For z/OS, config.h is forced */\n"
|
"/* For z/OS, config.h is forced */\n"
|
||||||
"#ifndef HAVE_CONFIG_H\n"
|
"#ifndef HAVE_CONFIG_H\n"
|
||||||
"#define HAVE_CONFIG_H 1\n"
|
"#define HAVE_CONFIG_H 1\n"
|
||||||
"#endif\n\n");
|
"#endif\n\n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
fprintf(f,
|
(void)fprintf(f,
|
||||||
"/* The following #include is present because without it gcc 4.x may remove\n"
|
"/* The following #include is present because without it gcc 4.x may remove\n"
|
||||||
"the array definition from the final binary if PCRE2 is built into a static\n"
|
"the array definition from the final binary if PCRE2 is built into a static\n"
|
||||||
"library and dead code stripping is activated. This leads to link errors.\n"
|
"library and dead code stripping is activated. This leads to link errors.\n"
|
||||||
|
@ -130,56 +213,57 @@ fprintf(f,
|
||||||
"outside this compilation unit might reference this\" and so it will always\n"
|
"outside this compilation unit might reference this\" and so it will always\n"
|
||||||
"be supplied to the linker. */\n\n");
|
"be supplied to the linker. */\n\n");
|
||||||
|
|
||||||
fprintf(f,
|
(void)fprintf(f,
|
||||||
"#ifdef HAVE_CONFIG_H\n"
|
"#ifdef HAVE_CONFIG_H\n"
|
||||||
"#include \"config.h\"\n"
|
"#include \"config.h\"\n"
|
||||||
"#endif\n\n"
|
"#endif\n\n"
|
||||||
"#include \"pcre2_internal.h\"\n\n");
|
"#include \"pcre2_internal.h\"\n\n");
|
||||||
|
|
||||||
fprintf(f,
|
(void)fprintf(f,
|
||||||
"const uint8_t PRIV(default_tables)[] = {\n\n"
|
"const uint8_t PRIV(default_tables)[] = {\n\n"
|
||||||
"/* This table is a lower casing table. */\n\n");
|
"/* This table is a lower casing table. */\n\n");
|
||||||
|
|
||||||
fprintf(f, " ");
|
(void)fprintf(f, " ");
|
||||||
for (i = 0; i < 256; i++)
|
for (i = 0; i < 256; i++)
|
||||||
{
|
{
|
||||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||||
fprintf(f, "%3d", *tables++);
|
fprintf(f, "%3d", *tables++);
|
||||||
if (i != 255) fprintf(f, ",");
|
if (i != 255) fprintf(f, ",");
|
||||||
}
|
}
|
||||||
fprintf(f, ",\n\n");
|
(void)fprintf(f, ",\n\n");
|
||||||
|
|
||||||
fprintf(f, "/* This table is a case flipping table. */\n\n");
|
(void)fprintf(f, "/* This table is a case flipping table. */\n\n");
|
||||||
|
|
||||||
fprintf(f, " ");
|
(void)fprintf(f, " ");
|
||||||
for (i = 0; i < 256; i++)
|
for (i = 0; i < 256; i++)
|
||||||
{
|
{
|
||||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||||
fprintf(f, "%3d", *tables++);
|
fprintf(f, "%3d", *tables++);
|
||||||
if (i != 255) fprintf(f, ",");
|
if (i != 255) fprintf(f, ",");
|
||||||
}
|
}
|
||||||
fprintf(f, ",\n\n");
|
(void)fprintf(f, ",\n\n");
|
||||||
|
|
||||||
fprintf(f,
|
(void)fprintf(f,
|
||||||
"/* This table contains bit maps for various character classes. Each map is 32\n"
|
"/* This table contains bit maps for various character classes. Each map is 32\n"
|
||||||
"bytes long and the bits run from the least significant end of each byte. The\n"
|
"bytes long and the bits run from the least significant end of each byte. The\n"
|
||||||
"classes that have their own maps are: space, xdigit, digit, upper, lower, word,\n"
|
"classes that have their own maps are: space, xdigit, digit, upper, lower, word,\n"
|
||||||
"graph print, punct, and cntrl. Other classes are built from combinations. */\n\n");
|
"graph, print, punct, and cntrl. Other classes are built from combinations. */\n\n");
|
||||||
|
|
||||||
fprintf(f, " ");
|
(void)fprintf(f, " ");
|
||||||
for (i = 0; i < cbit_length; i++)
|
for (i = 0; i < cbit_length; i++)
|
||||||
{
|
{
|
||||||
if ((i & 7) == 0 && i != 0)
|
if ((i & 7) == 0 && i != 0)
|
||||||
{
|
{
|
||||||
if ((i & 31) == 0) fprintf(f, "\n");
|
if ((i & 31) == 0) (void)fprintf(f, "\n");
|
||||||
fprintf(f, "\n ");
|
if ((i & 24) == 8) (void)fprintf(f, " /* %s */", classlist[nclass++]);
|
||||||
|
(void)fprintf(f, "\n ");
|
||||||
}
|
}
|
||||||
fprintf(f, "0x%02x", *tables++);
|
(void)fprintf(f, "0x%02x", *tables++);
|
||||||
if (i != cbit_length - 1) fprintf(f, ",");
|
if (i != cbit_length - 1) (void)fprintf(f, ",");
|
||||||
}
|
}
|
||||||
fprintf(f, ",\n\n");
|
(void)fprintf(f, ",\n\n");
|
||||||
|
|
||||||
fprintf(f,
|
(void)fprintf(f,
|
||||||
"/* This table identifies various classes of character by individual bits:\n"
|
"/* This table identifies various classes of character by individual bits:\n"
|
||||||
" 0x%02x white space character\n"
|
" 0x%02x white space character\n"
|
||||||
" 0x%02x letter\n"
|
" 0x%02x letter\n"
|
||||||
|
@ -188,32 +272,32 @@ fprintf(f,
|
||||||
" 0x%02x alphanumeric or '_'\n*/\n\n",
|
" 0x%02x alphanumeric or '_'\n*/\n\n",
|
||||||
ctype_space, ctype_letter, ctype_lcletter, ctype_digit, ctype_word);
|
ctype_space, ctype_letter, ctype_lcletter, ctype_digit, ctype_word);
|
||||||
|
|
||||||
fprintf(f, " ");
|
(void)fprintf(f, " ");
|
||||||
for (i = 0; i < 256; i++)
|
for (i = 0; i < 256; i++)
|
||||||
{
|
{
|
||||||
if ((i & 7) == 0 && i != 0)
|
if ((i & 7) == 0 && i != 0)
|
||||||
{
|
{
|
||||||
fprintf(f, " /* ");
|
(void)fprintf(f, " /* ");
|
||||||
if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
|
||||||
else fprintf(f, "%3d-", i-8);
|
else (void)fprintf(f, "%3d-", i-8);
|
||||||
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
|
||||||
else fprintf(f, "%3d", i-1);
|
else (void)fprintf(f, "%3d", i-1);
|
||||||
fprintf(f, " */\n ");
|
(void)fprintf(f, " */\n ");
|
||||||
}
|
}
|
||||||
fprintf(f, "0x%02x", *tables++);
|
(void)fprintf(f, "0x%02x", *tables++);
|
||||||
if (i != 255) fprintf(f, ",");
|
if (i != 255) (void)fprintf(f, ",");
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(f, "};/* ");
|
(void)fprintf(f, "};/* ");
|
||||||
if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
|
||||||
else fprintf(f, "%3d-", i-8);
|
else (void)fprintf(f, "%3d-", i-8);
|
||||||
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
|
||||||
else fprintf(f, "%3d", i-1);
|
else (void)fprintf(f, "%3d", i-1);
|
||||||
fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
|
(void)fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
|
||||||
|
|
||||||
fclose(f);
|
fclose(f);
|
||||||
free((void *)base_of_tables);
|
free((void *)base_of_tables);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* End of dftables.c */
|
/* End of pcre2_dftables.c */
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -579,7 +579,7 @@ total length of the tables. */
|
||||||
#define fcc_offset 256 /* Flip case */
|
#define fcc_offset 256 /* Flip case */
|
||||||
#define cbits_offset 512 /* Character classes */
|
#define cbits_offset 512 /* Character classes */
|
||||||
#define ctypes_offset (cbits_offset + cbit_length) /* Character types */
|
#define ctypes_offset (cbits_offset + cbit_length) /* Character types */
|
||||||
#define tables_length (ctypes_offset + 256)
|
#define TABLES_LENGTH (ctypes_offset + 256)
|
||||||
|
|
||||||
|
|
||||||
/* -------------------- Character and string names ------------------------ */
|
/* -------------------- Character and string names ------------------------ */
|
||||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -41,10 +41,11 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
/* This module contains the external function pcre2_maketables(), which builds
|
/* This module contains the external function pcre2_maketables(), which builds
|
||||||
character tables for PCRE2 in the current locale. The file is compiled on its
|
character tables for PCRE2 in the current locale. The file is compiled on its
|
||||||
own as part of the PCRE2 library. However, it is also included in the
|
own as part of the PCRE2 library. It is also included in the compilation of
|
||||||
compilation of dftables.c, in which case the macro DFTABLES is defined. */
|
pcre2_dftables.c as a freestanding program, in which case the macro
|
||||||
|
PCRE2_DFTABLES is defined. */
|
||||||
|
|
||||||
#ifndef DFTABLES
|
#ifndef PCRE2_DFTABLES /* Compiling the library */
|
||||||
# ifdef HAVE_CONFIG_H
|
# ifdef HAVE_CONFIG_H
|
||||||
# include "config.h"
|
# include "config.h"
|
||||||
# endif
|
# endif
|
||||||
|
@ -61,28 +62,29 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */
|
||||||
a pointer to them. They are build using the ctype functions, and consequently
|
a pointer to them. They are build using the ctype functions, and consequently
|
||||||
their contents will depend upon the current locale setting. When compiled as
|
their contents will depend upon the current locale setting. When compiled as
|
||||||
part of the library, the store is obtained via a general context malloc, if
|
part of the library, the store is obtained via a general context malloc, if
|
||||||
supplied, but when DFTABLES is defined (when compiling the dftables auxiliary
|
supplied, but when PCRE2_DFTABLES is defined (when compiling the pcre2_dftables
|
||||||
program) malloc() is used, and the function has a different name so as not to
|
freestanding auxiliary program) malloc() is used, and the function has a
|
||||||
clash with the prototype in pcre2.h.
|
different name so as not to clash with the prototype in pcre2.h.
|
||||||
|
|
||||||
Arguments: none when DFTABLES is defined
|
Arguments: none when PCRE2_DFTABLES is defined
|
||||||
else a PCRE2 general context or NULL
|
else a PCRE2 general context or NULL
|
||||||
Returns: pointer to the contiguous block of data
|
Returns: pointer to the contiguous block of data
|
||||||
|
else NULL if memory allocation failed
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef DFTABLES /* Included in freestanding dftables.c program */
|
#ifdef PCRE2_DFTABLES /* Included in freestanding pcre2_dftables program */
|
||||||
static const uint8_t *maketables(void)
|
static const uint8_t *maketables(void)
|
||||||
{
|
{
|
||||||
uint8_t *yield = (uint8_t *)malloc(tables_length);
|
uint8_t *yield = (uint8_t *)malloc(TABLES_LENGTH);
|
||||||
|
|
||||||
#else /* Not DFTABLES, compiling the library */
|
#else /* Not PCRE2_DFTABLES, that is, compiling the library */
|
||||||
PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
|
||||||
pcre2_maketables(pcre2_general_context *gcontext)
|
pcre2_maketables(pcre2_general_context *gcontext)
|
||||||
{
|
{
|
||||||
uint8_t *yield = (uint8_t *)((gcontext != NULL)?
|
uint8_t *yield = (uint8_t *)((gcontext != NULL)?
|
||||||
gcontext->memctl.malloc(tables_length, gcontext->memctl.memory_data) :
|
gcontext->memctl.malloc(TABLES_LENGTH, gcontext->memctl.memory_data) :
|
||||||
malloc(tables_length));
|
malloc(TABLES_LENGTH));
|
||||||
#endif /* DFTABLES */
|
#endif /* PCRE2_DFTABLES */
|
||||||
|
|
||||||
int i;
|
int i;
|
||||||
uint8_t *p;
|
uint8_t *p;
|
||||||
|
@ -103,8 +105,8 @@ exclusive ones - in some locales things may be different.
|
||||||
|
|
||||||
Note that the table for "space" includes everything "isspace" gives, including
|
Note that the table for "space" includes everything "isspace" gives, including
|
||||||
VT in the default locale. This makes it work for the POSIX class [:space:].
|
VT in the default locale. This makes it work for the POSIX class [:space:].
|
||||||
From release 8.34 is is also correct for Perl space, because Perl added VT at
|
From PCRE1 release 8.34 and for all PCRE2 releases it is also correct for Perl
|
||||||
release 5.18.
|
space, because Perl added VT at release 5.18.
|
||||||
|
|
||||||
Note also that it is possible for a character to be alnum or alpha without
|
Note also that it is possible for a character to be alnum or alpha without
|
||||||
being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
|
being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
|
||||||
|
@ -120,7 +122,7 @@ for (i = 0; i < 256; i++)
|
||||||
if (isalnum(i)) p[cbit_word + i/8] |= 1u << (i&7);
|
if (isalnum(i)) p[cbit_word + i/8] |= 1u << (i&7);
|
||||||
if (i == '_') p[cbit_word + i/8] |= 1u << (i&7);
|
if (i == '_') p[cbit_word + i/8] |= 1u << (i&7);
|
||||||
if (isspace(i)) p[cbit_space + i/8] |= 1u << (i&7);
|
if (isspace(i)) p[cbit_space + i/8] |= 1u << (i&7);
|
||||||
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1u << (i&7);
|
if (isxdigit(i)) p[cbit_xdigit + i/8] |= 1u << (i&7);
|
||||||
if (isgraph(i)) p[cbit_graph + i/8] |= 1u << (i&7);
|
if (isgraph(i)) p[cbit_graph + i/8] |= 1u << (i&7);
|
||||||
if (isprint(i)) p[cbit_print + i/8] |= 1u << (i&7);
|
if (isprint(i)) p[cbit_print + i/8] |= 1u << (i&7);
|
||||||
if (ispunct(i)) p[cbit_punct + i/8] |= 1u << (i&7);
|
if (ispunct(i)) p[cbit_punct + i/8] |= 1u << (i&7);
|
||||||
|
@ -130,8 +132,8 @@ p += cbit_length;
|
||||||
|
|
||||||
/* Finally, the character type table. In this, we used to exclude VT from the
|
/* Finally, the character type table. In this, we used to exclude VT from the
|
||||||
white space chars, because Perl didn't recognize it as such for \s and for
|
white space chars, because Perl didn't recognize it as such for \s and for
|
||||||
comments within regexes. However, Perl changed at release 5.18, so PCRE changed
|
comments within regexes. However, Perl changed at release 5.18, so PCRE1
|
||||||
at release 8.34. */
|
changed at release 8.34 and it's always been this way for PCRE2. */
|
||||||
|
|
||||||
for (i = 0; i < 256; i++)
|
for (i = 0; i < 256; i++)
|
||||||
{
|
{
|
||||||
|
@ -147,7 +149,7 @@ for (i = 0; i < 256; i++)
|
||||||
return yield;
|
return yield;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef DFTABLES
|
#ifndef PCRE2_DFTABLES /* Compiling the library */
|
||||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||||
pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
|
pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
|
||||||
{
|
{
|
||||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -90,7 +90,7 @@ if (codes == NULL || serialized_bytes == NULL || serialized_size == NULL)
|
||||||
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
|
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
|
||||||
|
|
||||||
/* Compute total size. */
|
/* Compute total size. */
|
||||||
total_size = sizeof(pcre2_serialized_data) + tables_length;
|
total_size = sizeof(pcre2_serialized_data) + TABLES_LENGTH;
|
||||||
tables = NULL;
|
tables = NULL;
|
||||||
|
|
||||||
for (i = 0; i < number_of_codes; i++)
|
for (i = 0; i < number_of_codes; i++)
|
||||||
|
@ -121,8 +121,8 @@ data->number_of_codes = number_of_codes;
|
||||||
|
|
||||||
/* Copy all compiled code data. */
|
/* Copy all compiled code data. */
|
||||||
dst_bytes = bytes + sizeof(pcre2_serialized_data);
|
dst_bytes = bytes + sizeof(pcre2_serialized_data);
|
||||||
memcpy(dst_bytes, tables, tables_length);
|
memcpy(dst_bytes, tables, TABLES_LENGTH);
|
||||||
dst_bytes += tables_length;
|
dst_bytes += TABLES_LENGTH;
|
||||||
|
|
||||||
for (i = 0; i < number_of_codes; i++)
|
for (i = 0; i < number_of_codes; i++)
|
||||||
{
|
{
|
||||||
|
@ -189,12 +189,12 @@ src_bytes = bytes + sizeof(pcre2_serialized_data);
|
||||||
/* Decode tables. The reference count for the tables is stored immediately
|
/* Decode tables. The reference count for the tables is stored immediately
|
||||||
following them. */
|
following them. */
|
||||||
|
|
||||||
tables = memctl->malloc(tables_length + sizeof(PCRE2_SIZE), memctl->memory_data);
|
tables = memctl->malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE), memctl->memory_data);
|
||||||
if (tables == NULL) return PCRE2_ERROR_NOMEMORY;
|
if (tables == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||||
|
|
||||||
memcpy(tables, src_bytes, tables_length);
|
memcpy(tables, src_bytes, TABLES_LENGTH);
|
||||||
*(PCRE2_SIZE *)(tables + tables_length) = number_of_codes;
|
*(PCRE2_SIZE *)(tables + TABLES_LENGTH) = number_of_codes;
|
||||||
src_bytes += tables_length;
|
src_bytes += TABLES_LENGTH;
|
||||||
|
|
||||||
/* Decode the byte stream. We must not try to read the size from the compiled
|
/* Decode the byte stream. We must not try to read the size from the compiled
|
||||||
code block in the stream, because it might be unaligned, which causes errors on
|
code block in the stream, because it might be unaligned, which causes errors on
|
||||||
|
|
|
@ -389,12 +389,14 @@ typedef struct cmdstruct {
|
||||||
int value;
|
int value;
|
||||||
} cmdstruct;
|
} cmdstruct;
|
||||||
|
|
||||||
enum { CMD_FORBID_UTF, CMD_LOAD, CMD_NEWLINE_DEFAULT, CMD_PATTERN,
|
enum { CMD_FORBID_UTF, CMD_LOAD, CMD_LOADTABLES, CMD_NEWLINE_DEFAULT,
|
||||||
CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT, CMD_UNKNOWN };
|
CMD_PATTERN, CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT,
|
||||||
|
CMD_UNKNOWN };
|
||||||
|
|
||||||
static cmdstruct cmdlist[] = {
|
static cmdstruct cmdlist[] = {
|
||||||
{ "forbid_utf", CMD_FORBID_UTF },
|
{ "forbid_utf", CMD_FORBID_UTF },
|
||||||
{ "load", CMD_LOAD },
|
{ "load", CMD_LOAD },
|
||||||
|
{ "loadtables", CMD_LOADTABLES },
|
||||||
{ "newline_default", CMD_NEWLINE_DEFAULT },
|
{ "newline_default", CMD_NEWLINE_DEFAULT },
|
||||||
{ "pattern", CMD_PATTERN },
|
{ "pattern", CMD_PATTERN },
|
||||||
{ "perltest", CMD_PERLTEST },
|
{ "perltest", CMD_PERLTEST },
|
||||||
|
@ -957,6 +959,8 @@ static int *dfa_workspace = NULL;
|
||||||
static const uint8_t *locale_tables = NULL;
|
static const uint8_t *locale_tables = NULL;
|
||||||
static const uint8_t *use_tables = NULL;
|
static const uint8_t *use_tables = NULL;
|
||||||
static uint8_t locale_name[32];
|
static uint8_t locale_name[32];
|
||||||
|
static uint8_t *tables3 = NULL; /* For binary-loaded tables */
|
||||||
|
static uint32_t loadtables_length = 0;
|
||||||
|
|
||||||
/* We need buffers for building 16/32-bit strings; 8-bit strings don't need
|
/* We need buffers for building 16/32-bit strings; 8-bit strings don't need
|
||||||
rebuilding, but set up the same naming scheme for use in macros. The "buffer"
|
rebuilding, but set up the same naming scheme for use in macros. The "buffer"
|
||||||
|
@ -4795,12 +4799,13 @@ Arguments:
|
||||||
buffptr point after the #command
|
buffptr point after the #command
|
||||||
mode open mode
|
mode open mode
|
||||||
fptr points to the FILE variable
|
fptr points to the FILE variable
|
||||||
|
name name of # command
|
||||||
|
|
||||||
Returns: PR_OK or PR_ABEND
|
Returns: PR_OK or PR_ABEND
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
open_file(uint8_t *buffptr, const char *mode, FILE **fptr)
|
open_file(uint8_t *buffptr, const char *mode, FILE **fptr, const char *name)
|
||||||
{
|
{
|
||||||
char *endf;
|
char *endf;
|
||||||
char *filename = (char *)buffptr;
|
char *filename = (char *)buffptr;
|
||||||
|
@ -4810,7 +4815,7 @@ while (endf > filename && isspace(endf[-1])) endf--;
|
||||||
|
|
||||||
if (endf == filename)
|
if (endf == filename)
|
||||||
{
|
{
|
||||||
fprintf(outfile, "** File name expected after #save\n");
|
fprintf(outfile, "** File name expected after %s\n", name);
|
||||||
return PR_ABEND;
|
return PR_ABEND;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4976,7 +4981,7 @@ switch(cmd)
|
||||||
return PR_OK;
|
return PR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f);
|
rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f, "#save");
|
||||||
if (rc != PR_OK) return rc;
|
if (rc != PR_OK) return rc;
|
||||||
|
|
||||||
PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
|
PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
|
||||||
|
@ -5015,7 +5020,7 @@ switch(cmd)
|
||||||
/* Load a set of compiled patterns from a file onto the stack */
|
/* Load a set of compiled patterns from a file onto the stack */
|
||||||
|
|
||||||
case CMD_LOAD:
|
case CMD_LOAD:
|
||||||
rc = open_file(argptr+1, BINARY_INPUT_MODE, &f);
|
rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#load");
|
||||||
if (rc != PR_OK) return rc;
|
if (rc != PR_OK) return rc;
|
||||||
|
|
||||||
serial_size = 0;
|
serial_size = 0;
|
||||||
|
@ -5067,6 +5072,31 @@ switch(cmd)
|
||||||
|
|
||||||
free(serial);
|
free(serial);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
/* Load a set of binary tables into tables3. */
|
||||||
|
|
||||||
|
case CMD_LOADTABLES:
|
||||||
|
rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#loadtables");
|
||||||
|
if (rc != PR_OK) return rc;
|
||||||
|
|
||||||
|
if (tables3 == NULL)
|
||||||
|
{
|
||||||
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_TABLES_LENGTH, &loadtables_length);
|
||||||
|
tables3 = malloc(loadtables_length);
|
||||||
|
if (tables3 == NULL)
|
||||||
|
{
|
||||||
|
fprintf(outfile, "** Failed: malloc failed for #loadtables\n");
|
||||||
|
return PR_ABEND;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fread(tables3, 1, loadtables_length, f) != loadtables_length)
|
||||||
|
{
|
||||||
|
fprintf(outfile, "** Wrong return from fread()\n");
|
||||||
|
yield = PR_ABEND;
|
||||||
|
}
|
||||||
|
fclose(f);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return yield;
|
return yield;
|
||||||
|
@ -5382,8 +5412,19 @@ else switch (pat_patctl.tables_id)
|
||||||
case 0: use_tables = NULL; break;
|
case 0: use_tables = NULL; break;
|
||||||
case 1: use_tables = tables1; break;
|
case 1: use_tables = tables1; break;
|
||||||
case 2: use_tables = tables2; break;
|
case 2: use_tables = tables2; break;
|
||||||
|
|
||||||
|
case 3:
|
||||||
|
if (tables3 == NULL)
|
||||||
|
{
|
||||||
|
fprintf(outfile, "** 'Tables = 3' is invalid: binary tables have not "
|
||||||
|
"been loaded\n");
|
||||||
|
return PR_SKIP;
|
||||||
|
}
|
||||||
|
use_tables = tables3;
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
fprintf(outfile, "** 'Tables' must specify 0, 1, or 2.\n");
|
fprintf(outfile, "** 'Tables' must specify 0, 1, 2, or 3.\n");
|
||||||
return PR_SKIP;
|
return PR_SKIP;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9112,6 +9153,7 @@ free(dbuffer);
|
||||||
free(pbuffer8);
|
free(pbuffer8);
|
||||||
free(dfa_workspace);
|
free(dfa_workspace);
|
||||||
free((void *)locale_tables);
|
free((void *)locale_tables);
|
||||||
|
free(tables3);
|
||||||
PCRE2_MATCH_DATA_FREE(match_data);
|
PCRE2_MATCH_DATA_FREE(match_data);
|
||||||
SUB1(pcre2_code_free, compiled_code);
|
SUB1(pcre2_code_free, compiled_code);
|
||||||
|
|
||||||
|
|
Binary file not shown.
|
@ -5837,4 +5837,22 @@ a)"xI
|
||||||
|
|
||||||
"(?<=X(?(DEFINE)(.*))(?1))."
|
"(?<=X(?(DEFINE)(.*))(?1))."
|
||||||
|
|
||||||
|
/\sxxx\s/tables=1
|
||||||
|
\= Expect no match
|
||||||
|
AB\x{85}xxx\x{a0}XYZ
|
||||||
|
|
||||||
|
/\sxxx\s/tables=2
|
||||||
|
AB\x{85}xxx\x{a0}XYZ
|
||||||
|
|
||||||
|
/^\w+/tables=2
|
||||||
|
École
|
||||||
|
|
||||||
|
/^\w+/tables=3
|
||||||
|
École
|
||||||
|
|
||||||
|
#loadtables ./testdata/testbtables
|
||||||
|
|
||||||
|
/^\w+/tables=3
|
||||||
|
École
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -17580,6 +17580,29 @@ Failed: error -55 at offset 3 in replacement: requested value is not set
|
||||||
"(?<=X(?(DEFINE)(.*))(?1))."
|
"(?<=X(?(DEFINE)(.*))(?1))."
|
||||||
Failed: error 125 at offset 0: lookbehind assertion is not fixed length
|
Failed: error 125 at offset 0: lookbehind assertion is not fixed length
|
||||||
|
|
||||||
|
/\sxxx\s/tables=1
|
||||||
|
\= Expect no match
|
||||||
|
AB\x{85}xxx\x{a0}XYZ
|
||||||
|
No match
|
||||||
|
|
||||||
|
/\sxxx\s/tables=2
|
||||||
|
AB\x{85}xxx\x{a0}XYZ
|
||||||
|
0: \x85xxx\xa0
|
||||||
|
|
||||||
|
/^\w+/tables=2
|
||||||
|
École
|
||||||
|
0: \xc3
|
||||||
|
|
||||||
|
/^\w+/tables=3
|
||||||
|
** 'Tables = 3' is invalid: binary tables have not been loaded
|
||||||
|
École
|
||||||
|
|
||||||
|
#loadtables ./testdata/testbtables
|
||||||
|
|
||||||
|
/^\w+/tables=3
|
||||||
|
École
|
||||||
|
0: \xc3
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
||||||
Error -62: bad serialized data
|
Error -62: bad serialized data
|
||||||
|
|
Loading…
Reference in New Issue