From b6b9087081c1dc665221aff10f45a0068a0b73e7 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Sun, 10 Aug 2014 16:09:24 +0000 Subject: [PATCH] Multi-config tests (without JIT) now work. --- Makefile.am | 9 +- RunTest | 2 +- libpcre2-posix.pc | 13 - maint/ManyConfigTests | 246 ++++++++------ src/config.h | 293 ---------------- src/dftables.c | 213 ++++++++++++ src/pcre2.h | 655 ------------------------------------ src/pcre2_auto_possess.c | 4 +- src/pcre2_chartables.c | 1 - src/pcre2_chartables.c.dist | 8 +- src/pcre2_compile.c | 32 +- src/pcre2_config.c | 7 +- src/pcre2_internal.h | 2 + src/pcre2_intmodedep.h | 8 +- src/pcre2_maketables.c | 28 +- src/pcre2_printint.c | 22 +- src/pcre2_valid_utf.c | 8 +- src/pcre2_xclass.c | 4 +- src/pcre2test.c | 175 +++++++--- testdata/testoutput10 | 1 + testdata/testoutput11-16 | 1 + testdata/testoutput11-32 | 1 + testdata/testoutput12-16 | 2 + testdata/testoutput12-32 | 2 + testdata/testoutput2 | 8 + testdata/testoutput5 | 2 + testdata/testoutput6 | 3 + testdata/testoutput7 | 2 + 28 files changed, 578 insertions(+), 1174 deletions(-) delete mode 100644 libpcre2-posix.pc delete mode 100644 src/config.h create mode 100644 src/dftables.c delete mode 100644 src/pcre2.h delete mode 120000 src/pcre2_chartables.c diff --git a/Makefile.am b/Makefile.am index 7c7f680..bbff915 100644 --- a/Makefile.am +++ b/Makefile.am @@ -2,7 +2,7 @@ AUTOMAKE_OPTIONS = subdir-objects ACLOCAL_AMFLAGS = -I m4 - +AM_CPPFLAGS = -I$(builddir)/src -I$(srcdir)/src ## Specify the documentation files that are distributed. @@ -238,20 +238,21 @@ bin_SCRIPTS = pcre2-config ## --------------------------------------------------------------- ## The dftables program is used to rebuild character tables before compiling -## PCRE, if --enable-rebuild-chartables is specified. It is not a user-visible +## PCRE2, if --enable-rebuild-chartables is specified. It is not a user-visible ## program. The default (when --enable-rebuild-chartables is not specified) is ## to copy a distributed set of tables that are defined for ASCII code. In this ## case, dftables is not needed. if WITH_REBUILD_CHARTABLES noinst_PROGRAMS += dftables -dftables_SOURCES = dftables.c +dftables_SOURCES = src/dftables.c src/pcre2_chartables.c: dftables$(EXEEXT) + rm -f $@ ./dftables$(EXEEXT) $@ else src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist rm -f $@ - $(LN_S) $(srcdir)/pcre2_chartables.c.dist $@ + $(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.dist $(abs_builddir)/src/pcre2_chartables.c endif # WITH_REBUILD_CHARTABLES BUILT_SOURCES = src/pcre2_chartables.c diff --git a/RunTest b/RunTest index 95b71e6..bc44693 100755 --- a/RunTest +++ b/RunTest @@ -72,7 +72,7 @@ title17="Test 17: Tests of the POSIX interface with UTF/UCP" #title21="Test 21: Reloads for the basic 16/32-bit library" #title22="Test 22: Reloads for the 16/32-bit library with UTF-16/32 support" -maxtest=2 +maxtest=17 if [ $# -eq 1 -a "$1" = "list" ]; then echo $title1 diff --git a/libpcre2-posix.pc b/libpcre2-posix.pc deleted file mode 100644 index 5844ddf..0000000 --- a/libpcre2-posix.pc +++ /dev/null @@ -1,13 +0,0 @@ -# Package Information for pkg-config - -prefix=/usr/local -exec_prefix=${prefix} -libdir=${exec_prefix}/lib -includedir=${prefix}/include - -Name: libpcre2-posix -Description: Posix compatible interface to libpcre2-8 -Version: 10.00-DEV -Libs: -L${libdir} -lpcre2-posix -Cflags: -I${includedir} @PCRE_STATIC_CFLAG@ -Requires.private: libpcre2-8 diff --git a/maint/ManyConfigTests b/maint/ManyConfigTests index ad62b1a..879eab4 100755 --- a/maint/ManyConfigTests +++ b/maint/ManyConfigTests @@ -1,50 +1,73 @@ #! /bin/sh -# This is a script for the use of PCRE maintainers. It configures and rebuilds +# This is a script for the use of PCRE2 maintainers. It configures and rebuilds # PCRE2 with a variety of configuration options, and in each case runs the # tests to ensure that all goes well. Every possible combination would take far # too long, so we use a representative sample. This script should be run in the # PCRE2 source directory. -# Some of the tests have to be skipped when PCRE2 is built with non-Unix -# newline recognition. I am planning to reduce this as much as possible in due -# course. +# While debugging, it is sometimes useful to be able to cut out some of the +# tests, in order to run those that are giving errors. The following options +# do this: +# +# -nojit skip JIT tests +# -nomain skip the main set of tests +# -notmp skip the test in a temporary directory +# -novalgrind skip the valgrind tests +# The -v option causes a call to 'pcre2test -C' to happen for each +# configuration. + +# Some of the tests are automatically skipped when PCRE2 is built with non-Unix +# newline recognition because they don't work. I am hoping to reduce this as +# much as possible in due course. + +usejit=1 +usemain=1 +usetmp=1 +usevalgrind=1 +verbose=0 + +while [ $# -gt 0 ] ; do + case $1 in + -nojit) usejit=0;; + -nomain) usemain=0;; + -notmp) usetmp=0;; + -novalgrind) usevalgrind=0;; + -v) verbose=1;; + *) echo "Unknown option '$1'"; exit 1;; + esac + shift +done # This is in case the caller has set aliases (as I do - PH) unset cp ls mv rm -# Use -v to make the output more verbose - -verbose=0 -if [ "$1" = "-v" ] ; then verbose=1; fi - # This is a temporary directory for testing out-of-line builds -tmp=/tmp/pcretesting +tmp=/tmp/pcre2testing # Don't bother with compiler optimization for most tests; it just slows down -# compilation a lot (and running the tests themselves is quick). However, a -# few specific tests turn optimization on, because it can provoke some compiler +# compilation a lot (and running the tests themselves is quick). However, one +# special test turns optimization on, because it can provoke some compiler # warnings. CFLAGS="-g -O0" -CXXFLAGS="$CFLAGS" -ISGCC="no" +ISGCC=0 # If the compiler is gcc, add a lot of warning switches. cc --version >zzz 2>/dev/null if [ $? -eq 0 ] && grep GCC zzz >/dev/null; then - ISGCC="yes" + ISGCC=1 CFLAGS="$CFLAGS -Wall" CFLAGS="$CFLAGS -Wno-overlength-strings" CFLAGS="$CFLAGS -Wpointer-arith" CFLAGS="$CFLAGS -Wwrite-strings" CFLAGS="$CFLAGS -Wundef -Wshadow" CFLAGS="$CFLAGS -Wmissing-field-initializers" - CFLAGS="$CFLAGS -Wunused-parameter" + CFLAGS="$CFLAGS -Wunused-parameter" CFLAGS="$CFLAGS -Wextra -Wformat" CFLAGS="$CFLAGS -Wbad-function-cast" CFLAGS="$CFLAGS -Wmissing-declarations" @@ -71,12 +94,12 @@ function runtest() echo " $opts" fi - CFLAGS="$CFLAGS" CXXFLAGS="$CXXFLAGS" \ + CFLAGS="$CFLAGS" \ $srcdir/configure $opts >/dev/null 2>teststderr if [ $? -ne 0 ]; then echo " " - echo "**** Error while configuring ****" + echo "******** Error while configuring ********" cat teststderr exit 1 fi @@ -85,7 +108,7 @@ function runtest() make -j >/dev/null 2>teststderr if [ $? -ne 0 -o -s teststderr ]; then echo " " - echo "**** Errors or warnings while making ****" + echo "******** Errors or warnings while making ********" echo " " cat teststderr exit 1 @@ -96,7 +119,7 @@ function runtest() fi nl=`./pcre2test -C newline` - ./pcretest -C jit >/dev/null + ./pcre2test -C jit >/dev/null jit=$? ./pcre2test -C utf >/dev/null utf=$? @@ -141,38 +164,19 @@ function runtest() else echo "Skipping JIT regression tests: JIT or UTF not enabled" fi - -# if [ "$nl" = "LF" -o "$nl" = "ANY" ]; then -# if [ -f pcrecpp_unittest ] ; then -# for utest in pcrecpp_unittest \ -# pcre_scanner_unittest \ -# pcre_stringpiece_unittest -# do -# echo "Running $utest $withvalgrind" -# $cvalgrind $utest >teststdout -# if [ $? -ne 0 ]; then -# echo " " -# echo "**** Test failed ****" -# cat teststdout -# exit 1 -# fi -# done -# else -# echo "Skipping C++ tests: pcrecpp_unittest does not exist" -# fi -# else -# echo "Skipping C++ tests: newline is $nl" -# fi } - # Update the total count whenever a new test is added; it is used to show # progess as each test is run. -testtotal=40 +testtotal=`expr 20 \* $usemain + \ + 1 \* $usetmp + \ + 1 \* $ISGCC \* $usemain + \ + 13 \* $usejit + \ + \( 3 + 2 \* $usejit \) \* $usevalgrind` testcount=0 -# This set of tests builds PCRE and runs the tests with a variety of configure +# This set of tests builds PCRE2 and runs the tests with a variety of configure # options, in the current (source) directory. The empty configuration builds # with all the default settings. As well as testing that these options work, we # use --disable-shared or --disable-static after the default test (which builds @@ -188,71 +192,99 @@ export srcdir # If gcc is in use, run a maximally configured test with -O2, because that can # throw up warnings that are not detected with -O0. -if [ "$ISGCC" = "yes" ]; then - echo "Maximally configured test with -O2" +if [ $usejit -ne 0 ]; then + enable_jit=--enable-jit +else + enable_jit= +fi + +if [ $ISGCC -ne 0 -a $usemain -ne 0 ]; then + echo "---------- Maximally configured test with -O2 ----------" SAVECLFAGS="$CFLAGS" CFLAGS="$CFLAGS -O2" - opts="--disable-shared --enable-utf --enable-jit --enable-pcre16 --enable-pcre32" + opts="--disable-shared --enable-utf $enable_jit --enable-pcre16 --enable-pcre32" runtest CFLAGS="$SAVECFLAGS" fi -echo "General tests in the current directory" -for opts in \ - "" \ - "--enable-utf --disable-static" \ - "--disable-stack-for-recursion --disable-shared" \ - "--enable-utf --disable-shared" \ - "--enable-utf --disable-stack-for-recursion --disable-shared" \ - "--enable-utf --with-link-size=3 --disable-shared" \ - "--enable-rebuild-chartables --disable-shared" \ - "--enable-newline-is-any --disable-shared" \ - "--enable-newline-is-cr --disable-shared" \ - "--enable-newline-is-crlf --disable-shared" \ - "--enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared" \ - "--enable-utf --enable-newline-is-any --disable-stack-for-recursion --disable-static" \ - "--enable-jit --disable-shared" \ - "--enable-jit --enable-utf --disable-shared" \ - "--enable-jit --enable-utf --with-link-size=3 --disable-shared" \ - "--enable-pcre16" \ - "--enable-pcre16 --enable-jit --enable-utf --disable-shared" \ - "--enable-pcre16 --enable-jit --disable-pcre8 --disable-shared" \ - "--enable-pcre16 --enable-jit --disable-pcre8 --enable-utf --disable-shared" \ - "--enable-pcre16 --disable-stack-for-recursion --disable-shared" \ - "--enable-pcre16 --enable-utf --disable-stack-for-recursion --disable-shared" \ - "--enable-pcre16 --enable-jit --enable-utf --with-link-size=3 --disable-shared" \ - "--enable-pcre16 --enable-jit --enable-utf --with-link-size=4 --disable-shared" \ - "--enable-pcre32" \ - "--enable-pcre32 --enable-jit --enable-utf --disable-shared" \ - "--enable-pcre32 --enable-jit --disable-pcre8 --disable-shared" \ - "--enable-pcre32 --enable-jit --disable-pcre8 --enable-utf --disable-shared" \ - "--enable-pcre32 --disable-stack-for-recursion --disable-shared" \ - "--enable-pcre32 --enable-utf --disable-stack-for-recursion --disable-shared" \ - "--enable-pcre32 --enable-jit --enable-utf --with-link-size=4 --disable-shared" \ - "--enable-pcre32 --enable-pcre16 --disable-shared" \ - "--enable-pcre32 --enable-pcre16 --disable-pcre8 --disable-shared" \ - "--enable-pcre32 --enable-pcre16 --disable-pcre8 --enable-jit --enable-utf --enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared" -do - runtest -done +if [ $usemain -ne 0 ]; then + echo "---------- Non-JIT tests in the current directory ----------" + for opts in \ + "" \ + "--enable-utf --disable-static" \ + "--disable-stack-for-recursion --disable-shared" \ + "--enable-utf --disable-shared" \ + "--enable-utf --disable-stack-for-recursion --disable-shared" \ + "--enable-utf --with-link-size=3 --disable-shared" \ + "--enable-rebuild-chartables --disable-shared" \ + "--enable-newline-is-any --disable-shared" \ + "--enable-newline-is-cr --disable-shared" \ + "--enable-newline-is-crlf --disable-shared" \ + "--enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared" \ + "--enable-utf --enable-newline-is-any --disable-stack-for-recursion --disable-static" \ + "--enable-pcre16" \ + "--enable-pcre16 --disable-stack-for-recursion --disable-shared" \ + "--enable-pcre16 --enable-utf --disable-stack-for-recursion --disable-shared" \ + "--enable-pcre32" \ + "--enable-pcre32 --disable-stack-for-recursion --disable-shared" \ + "--enable-pcre32 --enable-utf --disable-stack-for-recursion --disable-shared" \ + "--enable-pcre32 --enable-pcre16 --disable-shared" \ + "--enable-pcre32 --enable-pcre16 --disable-pcre8 --disable-shared" + do + runtest + done +fi + +# Now run the JIT tests unless disabled + +if [ $usejit -ne 0 ]; then + echo "---------- JIT tests in the current directory ----------" + for opts in \ + "--enable-jit --disable-shared" \ + "--enable-jit --enable-utf --disable-shared" \ + "--enable-jit --enable-utf --with-link-size=3 --disable-shared" \ + "--enable-jit --enable-pcre16 --enable-utf --disable-shared" \ + "--enable-jit --enable-pcre16 --disable-pcre8 --disable-shared" \ + "--enable-jit --enable-pcre16 --disable-pcre8 --enable-utf --disable-shared" \ + "--enable-jit --enable-pcre16 --enable-utf --with-link-size=3 --disable-shared" \ + "--enable-jit --enable-pcre16 --enable-utf --with-link-size=4 --disable-shared" \ + "--enable-jit --enable-pcre32 --enable-utf --disable-shared" \ + "--enable-jit --enable-pcre32 --disable-pcre8 --disable-shared" \ + "--enable-jit --enable-pcre32 --disable-pcre8 --enable-utf --disable-shared" \ + "--enable-jit --enable-pcre32 --enable-utf --with-link-size=4 --disable-shared" \ + "--enable-jit --enable-pcre32 --enable-pcre16 --disable-pcre8 --enable-utf --enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared" + do + runtest + done +fi # Now re-run some of the tests under valgrind. -echo "Tests in the current directory using valgrind" -valgrind=valgrind -cvalgrind="valgrind -q --smc-check=all" -withvalgrind="with valgrind" +if [ $usevalgrind -ne 0 ]; then + echo "---------- Tests in the current directory using valgrind ----------" + valgrind=valgrind + cvalgrind="valgrind -q --smc-check=all" + withvalgrind="with valgrind" -for opts in \ - "--enable-utf --disable-stack-for-recursion --disable-shared" \ - "--enable-utf --with-link-size=3 --disable-shared" \ - "--enable-jit --enable-utf --disable-shared" \ - "--enable-pcre16 --enable-pcre32 --enable-jit --enable-utf " \ - "--disable-shared" -do - opts="--enable-valgrind $opts" - runtest -done + for opts in \ + "--enable-utf --disable-stack-for-recursion --disable-shared" \ + "--enable-utf --with-link-size=3 --disable-shared" \ + "--disable-shared" + do + opts="--enable-valgrind $opts" + runtest + done + + if [ $usejit -ne 0 ]; then + for opts in \ + "--enable-jit --enable-utf --disable-shared" \ + "--enable-jit --enable-pcre16 --enable-pcre32 --enable-utf" + do + opts="--enable-valgrind $opts" + runtest + done + fi +fi valgrind= cvalgrind= @@ -271,7 +303,7 @@ if [ -f Makefile ]; then fi fi -echo "Tests in the $tmp directory" +echo "---------- Tests in the $tmp directory ----------" srcdir=`pwd` export srcdir @@ -290,11 +322,13 @@ if [ $? -ne 0 ]; then exit 1 fi -for opts in \ - "--enable-utf --disable-shared" -do - runtest -done +if [ $usetmp -ne 0 ]; then + for opts in \ + "--enable-utf --disable-shared" + do + runtest + done +fi echo "Removing $tmp" diff --git a/src/config.h b/src/config.h deleted file mode 100644 index b81449e..0000000 --- a/src/config.h +++ /dev/null @@ -1,293 +0,0 @@ -/* src/config.h. Generated from config.h.in by configure. */ -/* src/config.h.in. Generated from configure.ac by autoheader. */ - - -/* PCRE2 is written in Standard C, but there are a few non-standard things it -can cope with, allowing it to run on SunOS4 and other "close to standard" -systems. - -In environments that support the GNU autotools, config.h.in is converted into -config.h by the "configure" script. In environments that use CMake, -config-cmake.in is converted into config.h. If you are going to build PCRE2 "by -hand" without using "configure" or CMake, you should copy the distributed -config.h.generic to config.h, and edit the macro definitions to be the way you -need them. You must then add -DHAVE_CONFIG_H to all of your compile commands, -so that config.h is included at the start of every source. - -Alternatively, you can avoid editing by using -D on the compiler command line -to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H, -but if you do, default values will be taken from config.h for non-boolean -macros that are not defined on the command line. - -Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE8 should either be defined -(conventionally to 1) for TRUE, and not defined at all for FALSE. All such -macros are listed as a commented #undef in config.h.generic. Macros such as -MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are -surrounded by #ifndef/#endif lines so that the value can be overridden by -D. - -PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if -HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make -sure both macros are undefined; an emulation function will then be used. */ - -/* By default, the \R escape sequence matches any Unicode line ending - character or sequence of characters. If BSR_ANYCRLF is defined (to any - value), this is changed so that backslash-R matches only CR, LF, or CRLF. - The build-time default can be overridden by the user of PCRE2 at runtime. - */ -/* #undef BSR_ANYCRLF */ - -/* If you are compiling for a system that uses EBCDIC instead of ASCII - character codes, define this macro to any value. When EBCDIC is set, PCRE2 - assumes that all input strings are in EBCDIC. If you do not define this - macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It - is not possible to build a version of PCRE2 that supports both EBCDIC and - UTF-8/16/32. */ -/* #undef EBCDIC */ - -/* In an EBCDIC environment, define this macro to any value to arrange for the - NL character to be 0x25 instead of the default 0x15. NL plays the role that - LF does in an ASCII/Unicode environment. */ -/* #undef EBCDIC_NL25 */ - -/* Define to 1 if you have the `bcopy' function. */ -#define HAVE_BCOPY 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_BZLIB_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_DIRENT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_DLFCN_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_EDITLINE_READLINE_H */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_EDIT_READLINE_READLINE_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_LIMITS_H 1 - -/* Define to 1 if you have the `memmove' function. */ -#define HAVE_MEMMOVE 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* Define if you have POSIX threads libraries and header files. */ -/* #undef HAVE_PTHREAD */ - -/* Have PTHREAD_PRIO_INHERIT. */ -/* #undef HAVE_PTHREAD_PRIO_INHERIT */ - -/* Define to 1 if you have the header file. */ -#define HAVE_READLINE_HISTORY_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_READLINE_READLINE_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the `strerror' function. */ -#define HAVE_STRERROR 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to 1 if the compiler supports simple visibility declarations. */ -#define HAVE_VISIBILITY 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_WINDOWS_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_ZLIB_H 1 - -/* The value of LINK_SIZE determines the number of bytes used to store links - as offsets within the compiled regex. The default is 2, which allows for - compiled patterns up to 64K long. This covers the vast majority of cases. - However, PCRE2 can also be compiled to use 3 or 4 bytes instead. This - allows for longer patterns in extreme cases. */ -#define LINK_SIZE 2 - -/* Define to the sub-directory in which libtool stores uninstalled libraries. - */ -#define LT_OBJDIR ".libs/" - -/* The value of MATCH_LIMIT determines the default number of times the - internal match() function can be called during a single execution of - pcre2_match(). There is a runtime interface for setting a different limit. - The limit exists in order to catch runaway regular expressions that take - for ever to determine that they do not match. The default is set very large - so that it does not accidentally catch legitimate cases. */ -#define MATCH_LIMIT 10000000 - -/* The above limit applies to all calls of match(), whether or not they - increase the recursion depth. In some environments it is desirable to limit - the depth of recursive calls of match() more strictly, in order to restrict - the maximum amount of stack (or heap, if NO_RECURSE is defined) that is - used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of - match(). To have any useful effect, it must be less than the value of - MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is - a runtime method for setting a different limit. */ -#define MATCH_LIMIT_RECURSION MATCH_LIMIT - -/* This limit is parameterized just in case anybody ever wants to change it. - Care must be taken if it is increased, because it guards against integer - overflow caused by enormously large patterns. */ -#define MAX_NAME_COUNT 10000 - -/* This limit is parameterized just in case anybody ever wants to change it. - Care must be taken if it is increased, because it guards against integer - overflow caused by enormously large patterns. */ -#define MAX_NAME_SIZE 32 - -/* The value of NEWLINE_DEFAULT determines the default newline character - sequence. PCRE2 client programs can override this by selecting other values - at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5 - (ANYCRLF). */ -#define NEWLINE_DEFAULT 2 - -/* PCRE2 uses recursive function calls to handle backtracking while matching. - This can sometimes be a problem on systems that have stacks of limited - size. Define NO_RECURSE to any value to get a version that doesn't use - recursion in the match() function; instead it creates its own stack by - steam using memory from the heap. For more detail, see the comments and - other stuff just above the match() function. */ -/* #undef NO_RECURSE */ - -/* Name of package */ -#define PACKAGE "pcre2" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "PCRE2" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PCRE2 10.00-DEV" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "pcre2" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "10.00-DEV" - -/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested - parentheses (of any kind) in a pattern. This limits the amount of system - stack that is used while compiling a pattern. */ -#define PARENS_NEST_LIMIT 250 - -/* The value of PCRE2GREP_BUFSIZE determines the size of buffer used by - pcre2grep to hold parts of the file it is searching. This is also the - minimum value. The actual amount of memory used by pcre2grep is three times - this number, because it allows for the buffering of "before" and "after" - lines. */ -#define PCRE2GREP_BUFSIZE 20480 - -/* to make a symbol visible */ -#define PCRE2POSIX_EXP_DECL extern __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCRE2POSIX_EXP_DEFN extern __attribute__ ((visibility ("default"))) - -/* to make a symbol visible */ -#define PCRE2_EXP_DECL extern __attribute__ ((visibility ("default"))) - - -/* If you are compiling for a system other than a Unix-like system or - Win32, and it needs some magic to be inserted before the definition - of a function that is exported by the library, define this macro to - contain the relevant magic. If you do not define this macro, a suitable - __declspec value is used for Windows systems; in other environments - "extern" is used for a C compiler and "extern C" for a C++ compiler. - This macro apears at the start of every exported function that is part - of the external API. It does not appear on functions that are "external" - in the C sense, but which are internal to the library. */ -#define PCRE2_EXP_DEFN __attribute__ ((visibility ("default"))) - -/* Define to any value if linking statically (TODO: make nice with Libtool) */ -#define PCRE2_STATIC 1 - -/* Define to necessary symbol if this constant uses a non-standard name on - your system. */ -/* #undef PTHREAD_CREATE_JOINABLE */ - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Define to any value to enable support for Just-In-Time compiling. */ -/* #undef SUPPORT_JIT */ - -/* Define to any value to allow pcre2grep to be linked with libbz2, so that it - is able to handle .bz2 files. */ -#define SUPPORT_LIBBZ2 /**/ - -/* Define to any value to allow pcre2test to be linked with libedit. */ -/* #undef SUPPORT_LIBEDIT */ - -/* Define to any value to allow pcre2test to be linked with libreadline. */ -#define SUPPORT_LIBREADLINE /**/ - -/* Define to any value to allow pcre2grep to be linked with libz, so that it - is able to handle .gz files. */ -#define SUPPORT_LIBZ /**/ - -/* Define to any value to enable the 16 bit PCRE2 library. */ -#define SUPPORT_PCRE16 /**/ - -/* Define to any value to enable JIT support in pcre2grep. */ -/* #undef SUPPORT_PCRE2GREP_JIT */ - -/* Define to any value to enable the 32 bit PCRE2 library. */ -#define SUPPORT_PCRE32 /**/ - -/* Define to any value to enable the 8 bit PCRE2 library. */ -#define SUPPORT_PCRE8 /**/ - -/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding. - This will work even in an EBCDIC environment, but it is incompatible with - the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or* - ASCII/UTF-8/16/32, but not both at once. */ -#define SUPPORT_UTF /**/ - -/* Define to any value for valgrind support to find invalid memory reads. */ -#define SUPPORT_VALGRIND /**/ - -/* Version number of package */ -#define VERSION "10.00-DEV" - -/* Define to empty if `const' does not conform to ANSI C. */ -/* #undef const */ - -/* Define to the type of a signed integer type of width exactly 64 bits if - such a type exists and the standard includes do not define it. */ -/* #undef int64_t */ - -/* Define to `unsigned int' if does not define. */ -/* #undef size_t */ diff --git a/src/dftables.c b/src/dftables.c new file mode 100644 index 0000000..6272d0f --- /dev/null +++ b/src/dftables.c @@ -0,0 +1,213 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2014 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This is a freestanding support program to generate a file containing +character tables for PCRE2. The tables are built according to the current +locale using the pcre2_maketables() function, which is part of the PCRE2 API. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include + +#include "pcre2_internal.h" + +#define DFTABLES /* pcre2_maketables.c notices this */ +#include "pcre2_maketables.c" + +int main(int argc, char **argv) +{ +FILE *f; +int i = 1; +const unsigned char *tables; +const unsigned char *base_of_tables; + +/* By default, the default C locale is used rather than what the building user +happens to have set. However, if the -L option is given, set the locale from +the LC_xxx environment variables. */ + +if (argc > 1 && strcmp(argv[1], "-L") == 0) + { + setlocale(LC_ALL, ""); /* Set from environment variables */ + i++; + } + +if (argc < i + 1) + { + fprintf(stderr, "dftables: one filename argument is required\n"); + return 1; + } + +tables = maketables(); +base_of_tables = tables; + +f = fopen(argv[i], "wb"); +if (f == NULL) + { + fprintf(stderr, "dftables: failed to open %s for writing\n", argv[1]); + return 1; + } + +/* There are several fprintf() calls here, because gcc in pedantic mode +complains about the very long string otherwise. */ + +fprintf(f, + "/*************************************************\n" + "* Perl-Compatible Regular Expressions *\n" + "*************************************************/\n\n" + "/* This file was automatically written by the dftables auxiliary\n" + "program. It contains character tables that are used when no external\n" + "tables are passed to PCRE2 by the application that calls it. The tables\n" + "are used only for characters whose code values are less than 256.\n\n"); + +/* Force config.h in z/OS */ + +#if defined NATIVE_ZOS +fprintf(f, + "/* For z/OS, config.h is forced */\n" + "#ifndef HAVE_CONFIG_H\n" + "#define HAVE_CONFIG_H 1\n" + "#endif\n\n"); +#endif + +fprintf(f, + "The following #includes are present because without them gcc 4.x may remove\n" + "the array definition from the final binary if PCRE2 is built into a static\n" + "library and dead code stripping is activated. This leads to link errors.\n" + "Pulling in the header ensures that the array gets flagged as \"someone\n" + "outside this compilation unit might reference this\" and so it will always\n" + "be supplied to the linker. */\n\n"); + +fprintf(f, + "#ifdef HAVE_CONFIG_H\n" + "#include \"config.h\"\n" + "#endif\n\n" + "#include \"pcre2_internal.h\"\n\n"); + +fprintf(f, + "const uint8_t PRIV(default_tables)[] = {\n\n" + "/* This table is a lower casing table. */\n\n"); + +fprintf(f, " "); +for (i = 0; i < 256; i++) + { + if ((i & 7) == 0 && i != 0) fprintf(f, "\n "); + fprintf(f, "%3d", *tables++); + if (i != 255) fprintf(f, ","); + } +fprintf(f, ",\n\n"); + +fprintf(f, "/* This table is a case flipping table. */\n\n"); + +fprintf(f, " "); +for (i = 0; i < 256; i++) + { + if ((i & 7) == 0 && i != 0) fprintf(f, "\n "); + fprintf(f, "%3d", *tables++); + if (i != 255) fprintf(f, ","); + } +fprintf(f, ",\n\n"); + +fprintf(f, + "/* This table contains bit maps for various character classes.\n" + "Each map is 32 bytes long and the bits run from the least\n" + "significant end of each byte. The classes that have their own\n" + "maps are: space, xdigit, digit, upper, lower, word, graph\n" + "print, punct, and cntrl. Other classes are built from combinations. */\n\n"); + +fprintf(f, " "); +for (i = 0; i < cbit_length; i++) + { + if ((i & 7) == 0 && i != 0) + { + if ((i & 31) == 0) fprintf(f, "\n"); + fprintf(f, "\n "); + } + fprintf(f, "0x%02x", *tables++); + if (i != cbit_length - 1) fprintf(f, ","); + } +fprintf(f, ",\n\n"); + +fprintf(f, + "/* This table identifies various classes of character by individual bits:\n" + " 0x%02x white space character\n" + " 0x%02x letter\n" + " 0x%02x decimal digit\n" + " 0x%02x hexadecimal digit\n" + " 0x%02x alphanumeric or '_'\n" + " 0x%02x regular expression metacharacter or binary zero\n*/\n\n", + ctype_space, ctype_letter, ctype_digit, ctype_xdigit, ctype_word, + ctype_meta); + +fprintf(f, " "); +for (i = 0; i < 256; i++) + { + if ((i & 7) == 0 && i != 0) + { + fprintf(f, " /* "); + if (isprint(i-8)) fprintf(f, " %c -", i-8); + else fprintf(f, "%3d-", i-8); + if (isprint(i-1)) fprintf(f, " %c ", i-1); + else fprintf(f, "%3d", i-1); + fprintf(f, " */\n "); + } + fprintf(f, "0x%02x", *tables++); + if (i != 255) fprintf(f, ","); + } + +fprintf(f, "};/* "); +if (isprint(i-8)) fprintf(f, " %c -", i-8); + else fprintf(f, "%3d-", i-8); +if (isprint(i-1)) fprintf(f, " %c ", i-1); + else fprintf(f, "%3d", i-1); +fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n"); + +fclose(f); +free((void *)base_of_tables); +return 0; +} + +/* End of dftables.c */ diff --git a/src/pcre2.h b/src/pcre2.h deleted file mode 100644 index be214ee..0000000 --- a/src/pcre2.h +++ /dev/null @@ -1,655 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* This is the public header file for the PCRE library, second API, to be -#included by applications that call PCRE2 functions. - - Copyright (c) 2014 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -#ifndef _PCRE2_H -#define _PCRE2_H - -/* The current PCRE version information. */ - -#define PCRE2_MAJOR 10 -#define PCRE2_MINOR 00 -#define PCRE2_PRERELEASE -DEV -#define PCRE2_DATE 2014-99-99 - -/* When an application links to a PCRE DLL in Windows, the symbols that are -imported have to be identified as such. When building PCRE2, the appropriate -export setting is defined in pcre2_internal.h, which includes this file. So we -don't change existing definitions of PCRE2_EXP_DECL. */ - -#if defined(_WIN32) && !defined(PCRE2_STATIC) -# ifndef PCRE2_EXP_DECL -# define PCRE2_EXP_DECL extern __declspec(dllimport) -# endif -#endif - -/* By default, we use the standard "extern" declarations. */ - -#ifndef PCRE2_EXP_DECL -# ifdef __cplusplus -# define PCRE2_EXP_DECL extern "C" -# else -# define PCRE2_EXP_DECL extern -# endif -#endif - -/* Have to include limits.h, stdlib.h and stdint.h to ensure that size_t and -uint8_t, UCHAR_MAX, etc are defined. */ - -#include -#include -#include - -/* Allow for C++ users compiling this directly. */ - -#ifdef __cplusplus -extern "C" { -#endif - -/* The following options can be passed to pcre2_compile(), pcre2_match(), or -pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it is -passed. */ - -#define PCRE2_ANCHORED 0x00000001 -#define PCRE2_NO_START_OPTIMIZE 0x00000002 -#define PCRE2_NO_UTF_CHECK 0x00000004 - -/* Other options that can be passed to pcre2_compile(). They may affect -compilation, JIT compilation, and/or interpretive execution. The following tags -indicate which: - -C alters what is compiled -J alters what JIT compiles -E is inspected during pcre2_match() execution -D is inspected during pcre2_dfa_match() execution -*/ - -#define PCRE2_ALLOW_EMPTY_CLASS 0x00000008 /* C */ -#define PCRE2_ALT_BSUX 0x00000010 /* C */ -#define PCRE2_AUTO_CALLOUT 0x00000020 /* C */ -#define PCRE2_CASELESS 0x00000040 /* C */ -#define PCRE2_DOLLAR_ENDONLY 0x00000080 /* J E D */ -#define PCRE2_DOTALL 0x00000100 /* C */ -#define PCRE2_DUPNAMES 0x00000200 /* C */ -#define PCRE2_EXTENDED 0x00000400 /* C */ -#define PCRE2_FIRSTLINE 0x00000800 /* J E D */ -#define PCRE2_MATCH_UNSET_BACKREF 0x00001000 /* C J E */ -#define PCRE2_MULTILINE 0x00002000 /* C */ -#define PCRE2_NEVER_UCP 0x00004000 /* C */ -#define PCRE2_NEVER_UTF 0x00008000 /* C */ -#define PCRE2_NO_AUTO_CAPTURE 0x00010000 /* C */ -#define PCRE2_NO_AUTO_POSSESS 0x00020000 /* C */ -#define PCRE2_UCP 0x00040000 /* C J E D */ -#define PCRE2_UNGREEDY 0x00080000 /* C */ -#define PCRE2_UTF 0x00100000 /* C J E D */ - -/* These are for pcre2_jit_compile(). */ - -#define PCRE2_JIT 0x00000001 /* For full matching */ -#define PCRE2_JIT_PARTIAL_SOFT 0x00000002 -#define PCRE2_JIT_PARTIAL_HARD 0x00000004 - -/* These are for pcre2_match() and pcre2_dfa_match(). Note that PCRE2_ANCHORED, -PCRE2_NO_START_OPTIMIZE, and PCRE2_NO_UTF_CHECK can also be passed to these -functions, so take care not to define synonyms by mistake. */ - -#define PCRE2_NOTBOL 0x00000008 -#define PCRE2_NOTEOL 0x00000010 -#define PCRE2_NOTEMPTY 0x00000020 -#define PCRE2_NOTEMPTY_ATSTART 0x00000040 -#define PCRE2_PARTIAL_SOFT 0x00000080 -#define PCRE2_PARTIAL_HARD 0x00000100 - -/* These are additional options for pcre2_dfa_match(). */ - -#define PCRE2_DFA_RESTART 0x00000200 -#define PCRE2_DFA_SHORTEST 0x00000400 - -/* Newline and \R settings, for use in the compile and match contexts. The -newline values must be kept in step with values set in config.h and both sets -must all be greater than zero. */ - -#define PCRE2_NEWLINE_CR 1 -#define PCRE2_NEWLINE_LF 2 -#define PCRE2_NEWLINE_CRLF 3 -#define PCRE2_NEWLINE_ANY 4 -#define PCRE2_NEWLINE_ANYCRLF 5 - -#define PCRE2_BSR_UNICODE 1 -#define PCRE2_BSR_ANYCRLF 2 - -/* Error codes: no match and partial match are "expected" errors. */ - -#define PCRE2_ERROR_NOMATCH (-1) -#define PCRE2_ERROR_PARTIAL (-2) - -/* Error codes for UTF-8 validity checks */ - -#define PCRE2_ERROR_UTF8_ERR1 (-3) -#define PCRE2_ERROR_UTF8_ERR2 (-4) -#define PCRE2_ERROR_UTF8_ERR3 (-5) -#define PCRE2_ERROR_UTF8_ERR4 (-6) -#define PCRE2_ERROR_UTF8_ERR5 (-7) -#define PCRE2_ERROR_UTF8_ERR6 (-8) -#define PCRE2_ERROR_UTF8_ERR7 (-9) -#define PCRE2_ERROR_UTF8_ERR8 (-10) -#define PCRE2_ERROR_UTF8_ERR9 (-11) -#define PCRE2_ERROR_UTF8_ERR10 (-12) -#define PCRE2_ERROR_UTF8_ERR11 (-13) -#define PCRE2_ERROR_UTF8_ERR12 (-14) -#define PCRE2_ERROR_UTF8_ERR13 (-15) -#define PCRE2_ERROR_UTF8_ERR14 (-16) -#define PCRE2_ERROR_UTF8_ERR15 (-17) -#define PCRE2_ERROR_UTF8_ERR16 (-18) -#define PCRE2_ERROR_UTF8_ERR17 (-19) -#define PCRE2_ERROR_UTF8_ERR18 (-20) -#define PCRE2_ERROR_UTF8_ERR19 (-21) -#define PCRE2_ERROR_UTF8_ERR20 (-22) -#define PCRE2_ERROR_UTF8_ERR21 (-23) - -/* Error codes for UTF-16 validity checks */ - -#define PCRE2_ERROR_UTF16_ERR1 (-24) -#define PCRE2_ERROR_UTF16_ERR2 (-25) -#define PCRE2_ERROR_UTF16_ERR3 (-26) - -/* Error codes for UTF-32 validity checks */ - -#define PCRE2_ERROR_UTF32_ERR1 (-27) -#define PCRE2_ERROR_UTF32_ERR2 (-28) - -/* Error codes for pcre2[_dfa]_match() */ - -#define PCRE2_ERROR_BADCOUNT (-29) -#define PCRE2_ERROR_BADENDIANNESS (-30) -#define PCRE2_ERROR_BADLENGTH (-31) -#define PCRE2_ERROR_BADMAGIC (-32) -#define PCRE2_ERROR_BADMODE (-33) -#define PCRE2_ERROR_BADOFFSET (-34) -#define PCRE2_ERROR_BADOPTION (-35) -#define PCRE2_ERROR_BADUTFOFFSET (-36) -#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */ -#define PCRE2_ERROR_DFA_BADRESTART (-38) -#define PCRE2_ERROR_DFA_RECURSE (-39) -#define PCRE2_ERROR_DFA_UCOND (-40) -#define PCRE2_ERROR_DFA_UITEM (-41) -#define PCRE2_ERROR_DFA_UMLIMIT (-42) -#define PCRE2_ERROR_DFA_WSSIZE (-43) -#define PCRE2_ERROR_INTERNAL (-44) -#define PCRE2_ERROR_JIT_BADOPTION (-45) -#define PCRE2_ERROR_JIT_STACKLIMIT (-46) -#define PCRE2_ERROR_MATCHLIMIT (-47) -#define PCRE2_ERROR_NOMEMORY (-48) -#define PCRE2_ERROR_NOSUBSTRING (-49) -#define PCRE2_ERROR_NULL (-50) -#define PCRE2_ERROR_RECURSELOOP (-51) -#define PCRE2_ERROR_RECURSIONLIMIT (-52) - -/* Request types for pcre2_pattern_info() */ - -#define PCRE2_INFO_ALLOPTIONS 0 -#define PCRE2_INFO_ARGOPTIONS 1 -#define PCRE2_INFO_BACKREFMAX 2 -#define PCRE2_INFO_BSR 3 -#define PCRE2_INFO_CAPTURECOUNT 4 -#define PCRE2_INFO_FIRSTCODEUNIT 5 -#define PCRE2_INFO_FIRSTCODETYPE 6 -#define PCRE2_INFO_FIRSTBITMAP 7 -#define PCRE2_INFO_HASCRORLF 8 -#define PCRE2_INFO_JCHANGED 9 -#define PCRE2_INFO_JITSIZE 10 -#define PCRE2_INFO_LASTCODEUNIT 11 -#define PCRE2_INFO_LASTCODETYPE 12 -#define PCRE2_INFO_MATCHEMPTY 13 -#define PCRE2_INFO_MATCHLIMIT 14 -#define PCRE2_INFO_MAXLOOKBEHIND 15 -#define PCRE2_INFO_MINLENGTH 16 -#define PCRE2_INFO_NAMECOUNT 17 -#define PCRE2_INFO_NAMEENTRYSIZE 18 -#define PCRE2_INFO_NAMETABLE 19 -#define PCRE2_INFO_NEWLINE 20 -#define PCRE2_INFO_RECURSIONLIMIT 21 -#define PCRE2_INFO_SIZE 22 - -/* Request types for pcre2_config(). */ - -#define PCRE2_CONFIG_BSR 0 -#define PCRE2_CONFIG_JIT 1 -#define PCRE2_CONFIG_JITTARGET 2 -#define PCRE2_CONFIG_LINKSIZE 3 -#define PCRE2_CONFIG_MATCHLIMIT 4 -#define PCRE2_CONFIG_NEWLINE 6 -#define PCRE2_CONFIG_PARENSLIMIT 7 -#define PCRE2_CONFIG_RECURSIONLIMIT 5 -#define PCRE2_CONFIG_STACKRECURSE 8 -#define PCRE2_CONFIG_UTF 9 -#define PCRE2_CONFIG_VERSION 10 - -/* Types for code units in patterns and subject strings. */ - -typedef uint8_t PCRE2_UCHAR8; -typedef uint16_t PCRE2_UCHAR16; -typedef uint32_t PCRE2_UCHAR32; - -typedef const PCRE2_UCHAR8 *PCRE2_SPTR8; -typedef const PCRE2_UCHAR16 *PCRE2_SPTR16; -typedef const PCRE2_UCHAR32 *PCRE2_SPTR32; - -/* Offsets in the pattern (for errors) and in the subject (after a match) are -unsigned 32-bit numbers. We also define a value to indicate "unset" in the -offset vector (ovector). */ - -#define PCRE2_OFFSET PCRE2_UCHAR32 -#define PCRE2_UNSET (~(PCRE2_OFFSET)0) - -/* Generic types for opaque structures and JIT callback functions. These -declarations are defined in a macro that is expanded for each width later. */ - -#define PCRE2_TYPES_LIST \ -struct pcre2_real_general_context; \ -typedef struct pcre2_real_general_context pcre2_general_context; \ -\ -struct pcre2_real_compile_context; \ -typedef struct pcre2_real_compile_context pcre2_compile_context; \ -\ -struct pcre2_real_match_context; \ -typedef struct pcre2_real_match_context pcre2_match_context; \ -\ -struct pcre2_real_code; \ -typedef struct pcre2_real_code pcre2_code; \ -\ -struct pcre2_real_match_data; \ -typedef struct pcre2_real_match_data pcre2_match_data; \ -\ -struct pcre2_real_jit_stack; \ -typedef struct pcre2_real_jit_stack pcre2_jit_stack; \ -\ -typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *); - - -/* The structure for passing out data via the pcre_callout_function. We use a -structure so that new fields can be added on the end in future versions, -without changing the API of the function, thereby allowing old clients to work -without modification. Define the generic version in a macro; the width-specific -versions are generated from this macro below. */ - -#define PCRE2_STRUCTURE_LIST \ -typedef struct pcre2_callout_block { \ - int version; /* Identifies version of block */ \ - /* ------------------------ Version 0 ------------------------------- */ \ - uint32_t callout_number; /* Number compiled into pattern */ \ - PCRE2_OFFSET *offset_vector; /* The offset vector */ \ - PCRE2_SPTR subject; /* The subject being matched */ \ - size_t subject_length; /* The length of the subject */ \ - PCRE2_OFFSET start_match; /* Offset to start of this match attempt */ \ - PCRE2_OFFSET current_position; /* Where we currently are in the subject */ \ - uint32_t capture_top; /* Max current capture */ \ - uint32_t capture_last; /* Most recently closed capture */ \ - void *callout_data; /* Data passed in with the call */ \ - /* ------------------- Added for Version 1 -------------------------- */ \ - PCRE2_OFFSET pattern_position; /* Offset to next item in the pattern */ \ - PCRE2_OFFSET next_item_length; /* Length of next item in the pattern */ \ - /* ------------------- Added for Version 2 -------------------------- */ \ - PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \ - /* ------------------------------------------------------------------ */ \ -} pcre2_callout_block; - - -/* Utility functions for byte order swaps. These are not generic functions; -each appears only its own library. */ - -PCRE2_EXP_DECL int pcre2_utf16_to_host_byte_order(PCRE2_UCHAR16 *, - PCRE2_SPTR16, int, int *, int); -PCRE2_EXP_DECL int pcre2_utf32_to_host_byte_order(PCRE2_UCHAR32 *, - PCRE2_SPTR32, int, int *, int); - - -/* List the generic forms of all other functions in macros, which will be -expanded for each width below. Start with functions that give general -information. */ - -#define PCRE2_GENERAL_INFO_FUNCTIONS \ -PCRE2_EXP_DECL int pcre2_config(int, void *, size_t); - - -/* Functions for manipulating contexts. */ - -#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \ -PCRE2_EXP_DECL \ - pcre2_general_context *pcre2_general_context_copy(pcre2_general_context *); \ -PCRE2_EXP_DECL \ - pcre2_general_context *pcre2_general_context_create( \ - void *(*)(size_t, void *), \ - void (*)(void *, void *), void *); \ -PCRE2_EXP_DECL void pcre2_general_context_free(pcre2_general_context *); - -#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \ -PCRE2_EXP_DECL \ - pcre2_compile_context *pcre2_compile_context_copy(pcre2_compile_context *); \ -PCRE2_EXP_DECL \ - pcre2_compile_context *pcre2_compile_context_create(pcre2_general_context *);\ -PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \ -PCRE2_EXP_DECL int pcre2_set_bsr_compile(pcre2_compile_context *, \ - uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \ - const unsigned char *); \ -PCRE2_EXP_DECL int pcre2_set_newline_compile(pcre2_compile_context *, \ - uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \ - uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_compile_recursion_guard(\ - pcre2_compile_context *, int (*)(uint32_t)); \ - -#define PCRE2_MATCH_CONTEXT_FUNCTIONS \ -PCRE2_EXP_DECL \ - pcre2_match_context *pcre2_match_context_copy(pcre2_match_context *); \ -PCRE2_EXP_DECL \ - pcre2_match_context *pcre2_match_context_create(pcre2_general_context *); \ -PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \ -PCRE2_EXP_DECL int pcre2_set_bsr_match(pcre2_match_context *, \ - uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \ - int (*)(pcre2_callout_block *), void *); \ -PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \ - uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_newline_match(pcre2_match_context *, \ - uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \ - uint32_t); \ -PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \ - pcre2_match_context *, void *(*)(size_t, void *), \ - void (*)(void *, void *), void *); - - -/* Functions concerned with compiling a pattern to PCRE internal code. */ - -#define PCRE2_COMPILE_FUNCTIONS \ -PCRE2_EXP_DECL \ - pcre2_code *pcre2_compile(PCRE2_SPTR, int, uint32_t, \ - int *, PCRE2_OFFSET *, pcre2_compile_context *); \ -PCRE2_EXP_DECL void pcre2_code_free(pcre2_code *); - - -/* Functions that give information about a compiled pattern. */ - -#define PCRE2_PATTERN_INFO_FUNCTIONS \ -PCRE2_EXP_DECL int pcre2_pattern_info(const pcre2_code *, uint32_t, \ - void *); - - -/* Functions for running a match and inspecting the result. */ - -#define PCRE2_MATCH_FUNCTIONS \ -PCRE2_EXP_DECL \ - pcre2_match_data *pcre2_match_data_create(uint32_t, \ - pcre2_general_context *); \ -PCRE2_EXP_DECL \ - pcre2_match_data *pcre2_match_data_create_from_pattern(pcre2_code *, \ - pcre2_general_context *); \ -PCRE2_EXP_DECL int pcre2_dfa_match(const pcre2_code *, \ - PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \ - pcre2_match_data *, pcre2_match_context *, int *, \ - size_t); \ -PCRE2_EXP_DECL int pcre2_match(const pcre2_code *, \ - PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \ - pcre2_match_data *, pcre2_match_context *); \ -PCRE2_EXP_DECL void pcre2_match_data_free(pcre2_match_data *); \ -PCRE2_EXP_DECL PCRE2_OFFSET pcre2_get_leftchar(pcre2_match_data *); \ -PCRE2_EXP_DECL PCRE2_SPTR pcre2_get_mark(pcre2_match_data *); \ -PCRE2_EXP_DECL uint32_t pcre2_get_ovector_count(pcre2_match_data *); \ -PCRE2_EXP_DECL PCRE2_OFFSET *pcre2_get_ovector_pointer(pcre2_match_data *); \ -PCRE2_EXP_DECL PCRE2_OFFSET pcre2_get_rightchar(pcre2_match_data *); \ -PCRE2_EXP_DECL PCRE2_OFFSET pcre2_get_startchar(pcre2_match_data *); - - -/* Convenience functions for handling matched substrings. */ - -#define PCRE2_SUBSTRING_FUNCTIONS \ -PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \ - PCRE2_SPTR, PCRE2_UCHAR *, size_t); \ -PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \ - int, PCRE2_UCHAR *, size_t); \ -PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \ -PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \ - PCRE2_SPTR, PCRE2_UCHAR **); \ -PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \ - int, PCRE2_UCHAR **); \ -PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \ - PCRE2_SPTR); \ -PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \ - int); \ -PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \ - PCRE2_SPTR, PCRE2_SPTR *, PCRE2_SPTR *); \ -PCRE2_EXP_DECL int pcre2_substring_number_from_name(\ - const pcre2_code *, PCRE2_SPTR); \ -PCRE2_EXP_DECL void pcre2_substring_list_free(PCRE2_SPTR *); \ -PCRE2_EXP_DECL int pcre2_substring_list_get(pcre2_match_data *, \ - PCRE2_UCHAR ***, size_t **); - - -/* Functions for JIT processing */ - -#define PCRE2_JIT_FUNCTIONS \ -PCRE2_EXP_DECL void pcre2_jit_compile(pcre2_code *, uint32_t); \ -PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \ - PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \ - pcre2_match_data *, pcre2_match_context *, \ - pcre2_jit_stack *); \ -PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *);\ -PCRE2_EXP_DECL \ - pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *, \ - size_t, size_t); \ -PCRE2_EXP_DECL void pcre2_jit_stack_assign(const pcre2_code *, \ - pcre2_jit_callback, void *); \ -PCRE2_EXP_DECL void pcre2_jit_stack_free(pcre2_jit_stack *); - - -/* Other miscellaneous functions. */ - -#define PCRE2_OTHER_FUNCTIONS \ -PCRE2_EXP_DECL int pcre2_get_error_message(int, PCRE2_UCHAR *, size_t); \ -PCRE2_EXP_DECL \ - const uint8_t *pcre2_maketables(pcre2_general_context *); \ -PCRE2_EXP_DECL int pcre2_pattern_to_host_byte_order(pcre2_code *); - - -/* Define macros that generate width-specific names from generic versions. The -three-level macro scheme is necessary to get the macros expanded when we want -them to be. First we get the width from PCRE2_LOCAL_WIDTH, which is used for -generating three versions of everything below. After that, PCRE2_SUFFIX will be -re-defined to use PCRE2_CODE_UNIT_WIDTH, for use when macros such as -pcre2_compile are called by application code. */ - -#define PCRE2_JOIN(a,b) a ## b -#define PCRE2_GLUE(a,b) PCRE2_JOIN(a,b) -#define PCRE2_SUFFIX(a) PCRE2_GLUE(a,PCRE2_LOCAL_WIDTH) - - -/* Data types */ - -#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR) -#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR) - -#define pcre2_code PCRE2_SUFFIX(pcre2_code_) -#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_) -#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_) - -#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_) -#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_) -#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_) -#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_) -#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_) -#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_) - - -/* Data blocks */ - -#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_) -#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_) -#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_) -#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_) -#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_) - - -/* Functions: the complete list in alphabetical order */ - -#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_) -#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_) -#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_) -#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_) -#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_) -#define pcre2_config PCRE2_SUFFIX(pcre2_config_) -#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_) -#define pcre2_match PCRE2_SUFFIX(pcre2_match_) -#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_) -#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_) -#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_) -#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_) -#define pcre2_get_leftchar PCRE2_SUFFIX(pcre2_get_leftchar_) -#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_) -#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_) -#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_) -#define pcre2_get_rightchar PCRE2_SUFFIX(pcre2_get_rightchar_) -#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_) -#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_) -#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_) -#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_) -#define pcre2_jit_stack_alloc PCRE2_SUFFIX(pcre2_jit_stack_alloc_) -#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_) -#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_) -#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_) -#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_) -#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_) -#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_) -#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_) -#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_) -#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_) -#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_) -#define pcre2_pattern_to_host_byte_order PCRE2_SUFFIX(pcre2_pattern_to_host_byte_order_) -#define pcre2_set_bsr_compile PCRE2_SUFFIX(pcre2_set_bsr_compile_) -#define pcre2_set_bsr_match PCRE2_SUFFIX(pcre2_set_bsr_match_) -#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_) -#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_) -#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_) -#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) -#define pcre2_set_newline_compile PCRE2_SUFFIX(pcre2_set_newline_compile_) -#define pcre2_set_newline_match PCRE2_SUFFIX(pcre2_set_newline_match_) -#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) -#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_) -#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_) -#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_) -#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_) -#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_) -#define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_) -#define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_) -#define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_) -#define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_) -#define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_) -#define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_) -#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_) -#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_) - - -/* Now generate all three sets of width-specific structures and function -prototypes. */ - -#define PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS \ -PCRE2_TYPES_LIST \ -PCRE2_STRUCTURE_LIST \ -PCRE2_GENERAL_INFO_FUNCTIONS \ -PCRE2_GENERAL_CONTEXT_FUNCTIONS \ -PCRE2_COMPILE_CONTEXT_FUNCTIONS \ -PCRE2_MATCH_CONTEXT_FUNCTIONS \ -PCRE2_COMPILE_FUNCTIONS \ -PCRE2_PATTERN_INFO_FUNCTIONS \ -PCRE2_MATCH_FUNCTIONS \ -PCRE2_SUBSTRING_FUNCTIONS \ -PCRE2_JIT_FUNCTIONS \ -PCRE2_OTHER_FUNCTIONS - -#define PCRE2_LOCAL_WIDTH 8 -PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS -#undef PCRE2_LOCAL_WIDTH - -#define PCRE2_LOCAL_WIDTH 16 -PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS -#undef PCRE2_LOCAL_WIDTH - -#define PCRE2_LOCAL_WIDTH 32 -PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS -#undef PCRE2_LOCAL_WIDTH - -/* Undefine the list macros; they are no longer needed. */ - -#undef PCRE2_TYPES_LIST -#undef PCRE2_STRUCTURE_LIST -#undef PCRE2_GENERAL_INFO_FUNCTIONS -#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS -#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS -#undef PCRE2_MATCH_CONTEXT_FUNCTIONS -#undef PCRE2_COMPILE_FUNCTIONS -#undef PCRE2_PATTERN_INFO_FUNCTIONS -#undef PCRE2_MATCH_FUNCTIONS -#undef PCRE2_SUBSTRING_FUNCTIONS -#undef PCRE2_JIT_FUNCTIONS -#undef PCRE2_OTHER_FUNCTIONS -#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS - -/* Re-define PCRE2_SUFFIX to use the external width value, if defined. -Otherwise, undefine the other macros and make PCRE2_SUFFIX a no-op, to reduce -confusion. */ - -#undef PCRE2_SUFFIX -#ifdef PCRE2_CODE_UNIT_WIDTH -#if PCRE2_CODE_UNIT_WIDTH != 8 && \ - PCRE2_CODE_UNIT_WIDTH != 16 && \ - PCRE2_CODE_UNIT_WIDTH != 32 -#error PCRE2_CODE_UNIT_WIDTH must be 8, 16, or 32 -#endif -#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH) -#else -#undef PCRE2_JOIN -#undef PCRE2_GLUE -#define PCRE2_SUFFIX(a) a -#endif - - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* End of pcre2.h */ diff --git a/src/pcre2_auto_possess.c b/src/pcre2_auto_possess.c index 2aa113f..866f98a 100644 --- a/src/pcre2_auto_possess.c +++ b/src/pcre2_auto_possess.c @@ -1251,7 +1251,7 @@ for (;;) followed by a multi-byte character. The length in the table is a minimum, so we have to arrange to skip the extra code units. */ -#ifdef SUPPORT_WIDE_CHARS +#ifdef MAYBE_UTF_MULTI if (utf) switch(c) { case OP_CHAR: @@ -1315,7 +1315,7 @@ for (;;) } #else (void)(utf); /* Keep compiler happy by referencing function argument */ -#endif +#endif /* SUPPORT_WIDE_CHARS */ } } diff --git a/src/pcre2_chartables.c b/src/pcre2_chartables.c deleted file mode 120000 index ed9b54c..0000000 --- a/src/pcre2_chartables.c +++ /dev/null @@ -1 +0,0 @@ -./pcre2_chartables.c.dist \ No newline at end of file diff --git a/src/pcre2_chartables.c.dist b/src/pcre2_chartables.c.dist index 0ffa098..203cb1a 100644 --- a/src/pcre2_chartables.c.dist +++ b/src/pcre2_chartables.c.dist @@ -3,19 +3,19 @@ *************************************************/ /* This file contains character tables that are used when no external tables -are passed to PCRE by the application that calls it. The tables are used only +are passed to PCRE2 by the application that calls it. The tables are used only for characters whose code values are less than 256. This is a default version of the tables that assumes ASCII encoding. A program -called dftables (which is distributed with PCRE) can be used to build +called dftables (which is distributed with PCRE2) can be used to build alternative versions of this file. This is necessary if you are running in an EBCDIC environment, or if you want to default to a different encoding, for example ISO-8859-1. When dftables is run, it creates these tables in the -current locale. If PCRE is configured with --enable-rebuild-chartables, this +current locale. If PCRE2 is configured with --enable-rebuild-chartables, this happens automatically. The following #includes are present because without them gcc 4.x may remove the -array definition from the final binary if PCRE is built into a static library +array definition from the final binary if PCRE2 is built into a static library and dead code stripping is activated. This leads to link errors. Pulling in the header ensures that the array gets flagged as "someone outside this compilation unit might reference this" and so it will always be supplied to the linker. */ diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 4437af2..0a236db 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -54,7 +54,6 @@ POSSIBILITY OF SUCH DAMAGE. by defining macros in order to minimize #if usage. */ #if PCRE2_CODE_UNIT_WIDTH == 8 -#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */ #define STRING_UTFn_RIGHTPAR STRING_UTF8_RIGHTPAR, 5 #define XDIGIT(c) xdigitab[c] @@ -62,10 +61,9 @@ by defining macros in order to minimize #if usage. */ #define XDIGIT(c) (MAX_255(c)? xdigitab[c] : 0xff) #if PCRE2_CODE_UNIT_WIDTH == 16 -#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */ #define STRING_UTFn_RIGHTPAR STRING_UTF16_RIGHTPAR, 6 -#else /* 33-bit */ +#else /* 32-bit */ #define STRING_UTFn_RIGHTPAR STRING_UTF32_RIGHTPAR, 6 #endif #endif @@ -1469,7 +1467,6 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); case OP_POSQUERYI: case OP_NOTPOSQUERY: case OP_NOTPOSQUERYI: - if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]); break; @@ -1487,10 +1484,9 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); case OP_POSUPTOI: case OP_NOTPOSUPTO: case OP_NOTPOSUPTOI: - if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]); break; -#endif +#endif /* MAYBE_UTF_MULTI */ /* MARK, and PRUNE/SKIP/THEN with an argument must skip over the argument string. */ @@ -2353,7 +2349,7 @@ for (;;) } #else (void)(utf); /* Keep compiler happy by referencing function argument */ -#endif +#endif /* MAYBE_UTF_MULTI */ } } } @@ -2498,7 +2494,7 @@ for (;;) } #else (void)(utf); /* Keep compiler happy by referencing function argument */ -#endif +#endif /* MAYBE_UTF_MULTI */ } } } @@ -2879,9 +2875,10 @@ if (end >= start) *uchardata++ = start; } #endif - *uchardptr = uchardata; /* Updata extra data pointer */ } +#else + (void)uchardptr; /* Avoid compiler warning */ #endif /* SUPPORT_WIDE_CHARS */ return n8; /* Number of 8-bit characters */ @@ -4244,7 +4241,7 @@ for (;; ptr++) c |= UTF_LENGTH; /* Flag c as a length */ } else -#endif /* SUPPORT_UTF */ +#endif /* MAYBE_UTF_MULTI */ /* Handle the case of a single charater - either with no UTF support, or with UTF disabled, or for a single-code-unit UTF character. */ @@ -4357,7 +4354,7 @@ for (;; ptr++) code += c & 7; } else -#endif +#endif /* MAYBE_UTF_MULTI */ { *code++ = c; if (prop_type >= 0) @@ -4394,7 +4391,7 @@ for (;; ptr++) code += c & 7; } else -#endif +#endif /* MAYBEW_UTF_MULTI */ { *code++ = c; if (prop_type >= 0) @@ -7265,7 +7262,7 @@ pcre2_compile(PCRE2_SPTR pattern, int patlen, uint32_t options, BOOL utf; /* Set TRUE for UTF mode */ pcre2_real_code *re = NULL; /* What we will return */ pcre2_compile_context default_context; /* For use if no context given */ -compile_block cb; /* "Static" compile-time data */ +compile_block cb; /* "Static" compile-time data */ const uint8_t *tables; /* Char tables base pointer */ PCRE2_UCHAR *code; /* Current pointer in compiled code */ @@ -7277,6 +7274,7 @@ size_t re_blocksize; /* Size of memory block */ int32_t firstcuflags, reqcuflags; /* Type of first/req code unit */ uint32_t firstcu, reqcu; /* Value of first/req code unit */ +uint32_t setflags = 0; /* NL and BSR set flags */ uint32_t skipatstart; /* When checking (*UTF) etc */ uint32_t limit_match = UINT32_MAX; /* Unset match limits */ @@ -7350,7 +7348,6 @@ if (patlen < 0) patlen = PRIV(strlen)(pattern); else /* ------------ Initialize the "static" compile data -------------- */ - tables = (ccontext->tables != NULL)? ccontext->tables : PRIV(default_tables); cb.lcc = tables + lcc_offset; /* Individual */ @@ -7388,7 +7385,6 @@ references to help in deciding whether (.*) can be treated as anchored or not. cb.top_backref = 0; cb.backref_map = 0; - /* --------------- Start looking at the pattern --------------- */ /* Check for global one-time option settings at the start of the pattern, and @@ -7418,10 +7414,12 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && case PSO_NL: newline = p->value; + setflags |= PCRE2_NL_SET; break; case PSO_BSR: bsr = p->value; + setflags |= PCRE2_BSR_SET; break; case PSO_LIMM: @@ -7456,7 +7454,7 @@ ptr += skipatstart; /* Can't support UTF or UCP unless PCRE2 has been compiled with UTF support. */ #ifndef SUPPORT_UTF -if ((cb->external_options & (PCRE2_UTF|PCRE2_UCP)) != 0) +if ((cb.external_options & (PCRE2_UTF|PCRE2_UCP)) != 0) { errorcode = ERR32; goto HAD_ERROR; @@ -7585,7 +7583,7 @@ re->blocksize = re_blocksize; re->magic_number = MAGIC_NUMBER; re->compile_options = options; re->overall_options = cb.external_options; -re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags; +re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags; re->limit_match = limit_match; re->limit_recursion = limit_recursion; re->first_codeunit = 0; diff --git a/src/pcre2_config.c b/src/pcre2_config.c index 4b24407..adf8937 100644 --- a/src/pcre2_config.c +++ b/src/pcre2_config.c @@ -42,6 +42,11 @@ POSSIBILITY OF SUCH DAMAGE. #include "config.h" #endif +/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes +its value gets changed by pcre2_internal.h to be in code units. */ + +static int configured_link_size = LINK_SIZE; + #include "pcre2_internal.h" /* These macros are the standard way of turning unquoted text into C strings. @@ -108,7 +113,7 @@ switch (what) break; case PCRE2_CONFIG_LINKSIZE: - *((int *)where) = LINK_SIZE; + *((int *)where) = configured_link_size; break; case PCRE2_CONFIG_MATCHLIMIT: diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h index 590878e..d1d8e49 100644 --- a/src/pcre2_internal.h +++ b/src/pcre2_internal.h @@ -532,6 +532,8 @@ bytes in a code unit in that mode. */ #define PCRE2_HASCRORLF 0x00000800 /* explicit \r or \n in pattern */ #define PCRE2_HASTHEN 0x00001000 /* pattern contains (*THEN) */ #define PCRE2_MATCH_EMPTY 0x00002000 /* pattern can match empty string */ +#define PCRE2_BSR_SET 0x00004000 /* BSR was set in the pattern */ +#define PCRE2_NL_SET 0x00008000 /* newline was set in the pattern */ #define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32) diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h index 90685e4..94c7bae 100644 --- a/src/pcre2_intmodedep.h +++ b/src/pcre2_intmodedep.h @@ -127,7 +127,7 @@ values of 2 or 4 are also supported. */ #define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */ #else -#error LINK_SIZE must be either 2, 3, or 4 +#error LINK_SIZE must be 2, 3, or 4 #endif @@ -155,7 +155,7 @@ values of 2 or 4 are also supported. */ #define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */ #else -#error LINK_SIZE must be either 2, 3, or 4 +#error LINK_SIZE must be 2, 3, or 4 #endif @@ -268,6 +268,7 @@ UTF support is omitted, we don't even define them. */ /* ------------------- 8-bit support ------------------ */ #if PCRE2_CODE_UNIT_WIDTH == 8 +#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */ /* The largest UTF code point that can be encoded as a single code unit. */ @@ -352,6 +353,7 @@ because almost all calls are already within a block of UTF-8 only code. */ /* ------------------- 16-bit support ------------------ */ #elif PCRE2_CODE_UNIT_WIDTH == 16 +#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */ /* The largest UTF code point that can be encoded as a single code unit. */ @@ -458,7 +460,7 @@ code. */ #else /* These are trivial for the 32-bit library, since all UTF-32 characters fit -into one PCRE_UCHAR unit. */ +into one PCRE2_UCHAR unit. */ #define MAX_UTF_SINGLE_CU (0x10ffffu) #define HAS_EXTRALEN(c) (0) diff --git a/src/pcre2_maketables.c b/src/pcre2_maketables.c index 12ad0b2..5df7ca4 100644 --- a/src/pcre2_maketables.c +++ b/src/pcre2_maketables.c @@ -61,25 +61,31 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */ a pointer to them. They are build using the ctype functions, and consequently their contents will depend upon the current locale setting. When compiled as part of the library, the store is obtained via a general context malloc, if -supplied, but otherwise via malloc(). +supplied, but when DFTABLES is defined (when compiling the dftables auxiliary +program) malloc() is used, and the function has a different name so as not to +clash with the prototype in pcre2.h. -Arguments: a PCRE2 general context (for malloc) or NULL +Arguments: none when DFTABLES is defined + else a PCRE2 general context or NULL Returns: pointer to the contiguous block of data */ +#ifdef DFTABLES /* Included in freestanding dftables.c program */ +static const uint8_t *maketables(void) +{ +uint8_t *yield = (uint8_t *)malloc(tables_length); + +#else /* Not DFTABLES, compiling the library */ PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION pcre2_maketables(pcre2_general_context *gcontext) { -uint8_t *yield, *p; -int i; +uint8_t *yield = (uint8_t *)((gcontext != NULL)? + gcontext->memctl.malloc(tables_length, gcontext->memctl.memory_data) : + malloc(tables_length)); +#endif /* DFTABLES */ -#ifndef DFTABLES -if (gcontext != NULL) - yield = (uint8_t *)gcontext->memctl.malloc(tables_length, - gcontext->memctl.memory_data); -else -#endif -yield = (uint8_t *)malloc(tables_length); +int i; +uint8_t *p; if (yield == NULL) return NULL; p = yield; diff --git a/src/pcre2_printint.c b/src/pcre2_printint.c index acb1e3e..add6312 100644 --- a/src/pcre2_printint.c +++ b/src/pcre2_printint.c @@ -103,7 +103,7 @@ if (utf) one_code_unit = (c & 0xfc00) != 0xd800; #else one_code_unit = (c & 0xfffff800u) != 0xd800u; -#endif +#endif /* CODE_UNIT_WIDTH */ } #endif /* SUPPORT_UTF */ @@ -117,9 +117,13 @@ if (one_code_unit) return 0; } -/* Per-width code for invalid UTF code units and multi-unit UTF characters. */ +/* Code for invalid UTF code units and multi-unit UTF characters is different +for each width. If UTF is not supported, control should never get here, but we +need a return statement to keep the compiler happy. */ -#ifdef SUPPORT_UTF +#ifndef SUPPORT_UTF +return 0; +#else /* Malformed UTF-8 should occur only if the sanity check has been turned off. Rather than swallow random bytes, just stop if we hit a bad one. Print it with @@ -209,15 +213,27 @@ while (*ptr != '\0') * Find Unicode property name * *************************************************/ +/* When there is no UTF/UCP support, the table of names does not exist. This +function should not be called in such configurations, because a pattern that +tries to use Unicode properties won't compile. Rather than put lots of #ifdefs +into the main code, however, we just put one into this function. */ + static const char * get_ucpname(unsigned int ptype, unsigned int pvalue) { +#ifdef SUPPORT_UTF int i; for (i = utt_size - 1; i >= 0; i--) { if (ptype == utt[i].type && pvalue == utt[i].value) break; } return (i >= 0)? utt_names + utt[i].name_offset : "??"; + +#else /* No UTF support */ +(void)ptype; +(void)pvalue; +return "??"; +#endif /* SUPPORT_UTF */ } diff --git a/src/pcre2_valid_utf.c b/src/pcre2_valid_utf.c index 8382ab8..6cdaf81 100644 --- a/src/pcre2_valid_utf.c +++ b/src/pcre2_valid_utf.c @@ -62,10 +62,10 @@ PRIV(valid_utf)(PCRE2_SPTR string, int length, PCRE2_OFFSET *erroroffset) { (void)string; (void)length; -(void)erroroffset); +(void)erroroffset; return 0; } -#else +#else /* UTF is supported */ @@ -392,8 +392,8 @@ for (p = string; length-- > 0; p++) } } return 0; -#endif /* CODE_UNIT_WIDTH */ -#endif /* SUPPORT_UTF */ +#endif /* CODE_UNIT_WIDTH */ } +#endif /* SUPPORT_UTF */ /* End of pcre2_valid_utf.c */ diff --git a/src/pcre2_xclass.c b/src/pcre2_xclass.c index eb38359..9f29b92 100644 --- a/src/pcre2_xclass.c +++ b/src/pcre2_xclass.c @@ -110,7 +110,7 @@ while ((t = *data++) != XCL_END) } else #endif - x = *data++; + x = *data++; if (c == x) return !negated; } else if (t == XCL_RANGE) @@ -260,6 +260,8 @@ while ((t = *data++) != XCL_END) data += 2; } +#else + (void)utf; /* Avoid compiler warning */ #endif /* SUPPORT_UTF */ } diff --git a/src/pcre2test.c b/src/pcre2test.c index 99ed684..8d96a15 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -346,6 +346,9 @@ either on a pattern or a data line, so they must all be distinct. */ #define CTL_PATLEN 0x00040000 #define CTL_POSIX 0x00080000 +#define CTL_BSR_SET 0x00100000 /* This is informational */ +#define CTL_NL_SET 0x00200000 /* This is informational */ + #define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */ #define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE) /* For testing */ #define CTL_ANYGLOB (CTL_ALTGLOBAL|CTL_GLOBAL) @@ -389,8 +392,7 @@ typedef struct datctl { /* Structure for data line modifiers. */ enum { CTX_PAT, /* Active pattern context */ CTX_DEFPAT, /* Default pattern context */ CTX_DAT, /* Active data (match) context */ - CTX_DEFDAT, /* Default data (match) context */ - CTX_DEFANY }; /* Any default context (depends on the modifier) */ + CTX_DEFDAT }; /* Default data (match) context */ /* Macros to simplify the big table below. */ @@ -619,7 +621,9 @@ static patctl pat_patctl; static datctl def_datctl; static datctl dat_datctl; +#ifdef SUPPORT_PCRE8 static regex_t preg = { NULL, NULL, 0, 0 }; +#endif static int *dfa_workspace = NULL; static const uint8_t *locale_tables = NULL; @@ -678,6 +682,7 @@ static uint32_t *pbuffer32 = NULL; #define CAST8VAR(x) CASTVAR(uint8_t *, x) #define SET(x,y) SETOP(x,y,=) #define SETPLUS(x,y) SETOP(x,y,+=) +#define strlen8 strlen /* ---------------- Mode-dependent, runtime-testing macros ------------------*/ @@ -994,11 +999,11 @@ the three different cases. */ (t)(G(a,BITTWO)->b)) #define CASTVAR(t,x) ( \ - (test_mode == G(G(PCRE,BITONE(,_MODE))? \ + (test_mode == G(G(PCRE,BITONE),_MODE))? \ (t)G(x,BITONE) : (t)G(x,BITTWO)) #define CODE_UNIT(a,b) ( \ - (test_mode == G(G(PCRE,BITONE(,_MODE))? \ + (test_mode == G(G(PCRE,BITONE),_MODE))? \ (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \ (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b])) @@ -1021,13 +1026,13 @@ the three different cases. */ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \ else \ - lv = G(PCHARS,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f) + lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f) #define PCHARSV(p, offset, len, utf, f) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \ else \ - (void)G(PCHARS,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f) + (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f) #define PCRE2_COMPILE(a,b,c,d,e,f,g) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ @@ -1096,10 +1101,10 @@ the three different cases. */ #define PCRE2_SET_CALLOUT(a,b,c) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ G(pcre2_set_callout_,BITONE)(G(a,BITONE), \ - (int (*)(G(pcre2_callout_block_BITONE) *))b,c); \ + (int (*)(G(pcre2_callout_block_,BITONE) *))b,c); \ else \ G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \ - (int (*)(G(pcre2_callout_block_BITTWO) *))b,c); + (int (*)(G(pcre2_callout_block_,BITTWO) *))b,c); #define PCRE2_SET_CHARACTER_TABLES(a,b) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ @@ -1127,10 +1132,10 @@ the three different cases. */ #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ - a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),G(c,BITONE),\ + a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\ (G(PCRE2_UCHAR,BITONE) *)d,e); \ else \ - a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\ + a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\ (G(PCRE2_UCHAR,BITTWO) *)d,e) #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ @@ -1193,20 +1198,20 @@ the three different cases. */ else G(x,BITTWO) z y #define SETCASTPTR(x,y) \ - if (test_mode == PCRE8_MODE) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ G(x,BITONE) = (G(G(uint,BITONE),_t) *)y; \ else \ G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)y #define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \ - G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p)) : \ - G(strlen(BITTWO)((G(PCRE2_SPTR,BITTWO))p))) + G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \ + G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p)) #define SUB1(a,b) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ - G(a,BITONE))(G(b,BITONE)); \ + G(a,BITONE)(G(b,BITONE)); \ else \ - G(a,BITTWO))(G(b,BITTWO)) + G(a,BITTWO)(G(b,BITTWO)) #define SUB2(a,b,c) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ @@ -2031,6 +2036,7 @@ return yield; +#ifdef SUPPORT_PCRE8 /************************************************* * Convert character value to UTF-8 * *************************************************/ @@ -2062,6 +2068,7 @@ for (j = i; j > 0; j--) *utf8bytes = utf8_table2[i] | cvalue; return i + 1; } +#endif /* SUPPORT_PCRE8 */ @@ -2414,7 +2421,6 @@ Arguments: CTX_DEFPAT => default pattern context CTX_DAT => data context CTX_DEFDAT => default data context - CTX_DEFANY => any default context (depends on the modifier) pctl point to pattern control block dctl point to data control block c a single character or 0 @@ -2445,7 +2451,7 @@ switch (m->which) { case MOD_CTB: /* Compile or match context modifier */ case MOD_CTC: /* Compile context modifier */ - if (ctx == CTX_DEFPAT || ctx == CTX_DEFANY) field = PTR(default_pat_context); + if (ctx == CTX_DEFPAT) field = PTR(default_pat_context); else if (ctx == CTX_PAT) field = PTR(pat_context); if (field != NULL || m->which == MOD_CTC) break; @@ -2455,7 +2461,7 @@ switch (m->which) offset = (PCRE2_OFFSET)(m->value); case MOD_CTM: /* Match context modifier */ - if (ctx == CTX_DEFDAT || ctx == CTX_DEFANY) field = PTR(default_dat_context); + if (ctx == CTX_DEFDAT) field = PTR(default_dat_context); else if (ctx == CTX_DAT) field = PTR(dat_context); break; @@ -2507,7 +2513,6 @@ Arguments: CTX_DEFPAT => default pattern context CTX_DAT => data context CTX_DEFDAT => default data context - CTX_DEFANY => any default context (depends on the modifier) pctl point to pattern control block dctl point to data control block @@ -2656,11 +2661,26 @@ for (;;) break; case MOD_BSR: - if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0) + if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0) + { +#ifdef BSR_ANYCRLF *((uint16_t *)field) = PCRE2_BSR_ANYCRLF; - else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0) +#else *((uint16_t *)field) = PCRE2_BSR_UNICODE; - else goto INVALID_VALUE; +#endif + if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control &= ~CTL_BSR_SET; + else dctl->control &= ~CTL_BSR_SET; + } + else + { + if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0) + *((uint16_t *)field) = PCRE2_BSR_ANYCRLF; + else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0) + *((uint16_t *)field) = PCRE2_BSR_UNICODE; + else goto INVALID_VALUE; + if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control |= CTL_BSR_SET; + else dctl->control |= CTL_BSR_SET; + } pp = ep; break; @@ -2698,7 +2718,18 @@ for (;;) if (len == strlen(newlines[i]) && strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break; if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE; - *((uint16_t *)field) = i; + if (i == 0) + { + *((uint16_t *)field) = NEWLINE_DEFAULT; + if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control &= ~CTL_NL_SET; + else dctl->control &= ~CTL_NL_SET; + } + else + { + *((uint16_t *)field) = i; + if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control |= CTL_NL_SET; + else dctl->control |= CTL_NL_SET; + } pp = ep; break; @@ -2799,13 +2830,15 @@ return rc; +#ifdef SUPPORT_PCRE8 /************************************************* * Show something in a list * *************************************************/ /* This function just helps to keep the code that uses it tidier. It's used for various lists of things where there needs to be introductory text before the -first item. */ +first item. As these calls are all in the POSIX-support code, they happen only +when 8-bit mode is supported. */ static void prmsg(const char **msg, const char *s) @@ -2813,14 +2846,17 @@ prmsg(const char **msg, const char *s) fprintf(outfile, "%s %s", *msg, s); *msg = ""; } +#endif /* SUPPORT_PCRE8 */ +#ifdef SUPPORT_PCRE8 /************************************************* * Show compile controls * *************************************************/ -/* Called for unsupported POSIX modifiers. +/* Called for unsupported POSIX modifiers, and therefore needed only when the +8-bit library is supported. Arguments: controls control bits @@ -2851,6 +2887,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", ((controls & CTL_POSIX) != 0)? " posix" : "", after); } +#endif /* SUPPORT_PCRE8 */ @@ -2900,6 +2937,7 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", +#ifdef SUPPORT_PCRE8 /************************************************* * Show match controls * *************************************************/ @@ -2924,9 +2962,10 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s", ((controls & CTL_MARK) != 0)? " mark" : "", ((controls & CTL_MEMORY) != 0)? " memory" : ""); } +#endif /* SUPPORT_PCRE8 */ - +#ifdef SUPPORT_PCRE8 /************************************************* * Show match options * *************************************************/ @@ -2949,6 +2988,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s", ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "", ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : ""); } +#endif /* SUPPORT_PCRE8 */ @@ -3086,35 +3126,40 @@ if ((pat_patctl.control & CTL_INFO) != 0) if (jchanged) fprintf(outfile, "Duplicate name status changes\n"); - if (bsr_convention != BSR_DEFAULT) + if ((pat_patctl.control & CTL_BSR_SET) != 0 || + (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0) fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)? "any Unicode newline" : "CR, LF, or CRLF"); - if (newline_convention != NEWLINE_DEFAULT) switch (newline_convention) - { - case PCRE2_NEWLINE_CR: - fprintf(outfile, "Forced newline is CR\n"); - break; - - case PCRE2_NEWLINE_LF: - fprintf(outfile, "Forced newline is LF\n"); - break; - - case PCRE2_NEWLINE_CRLF: - fprintf(outfile, "Forced newline is CRLF\n"); - break; - - case PCRE2_NEWLINE_ANYCRLF: - fprintf(outfile, "Forced newline is CR, LF, or CRLF\n"); - break; - - case PCRE2_NEWLINE_ANY: - fprintf(outfile, "Forced newline is any Unicode newline\n"); - break; - - default: - break; - } + if ((pat_patctl.control & CTL_NL_SET) != 0 || + (FLD(compiled_code, flags) & PCRE2_NL_SET) != 0) + { + switch (newline_convention) + { + case PCRE2_NEWLINE_CR: + fprintf(outfile, "Forced newline is CR\n"); + break; + + case PCRE2_NEWLINE_LF: + fprintf(outfile, "Forced newline is LF\n"); + break; + + case PCRE2_NEWLINE_CRLF: + fprintf(outfile, "Forced newline is CRLF\n"); + break; + + case PCRE2_NEWLINE_ANYCRLF: + fprintf(outfile, "Forced newline is CR, LF, or CRLF\n"); + break; + + case PCRE2_NEWLINE_ANY: + fprintf(outfile, "Forced newline is any Unicode newline\n"); + break; + + default: + break; + } + } if (first_ctype == 2) { @@ -3560,9 +3605,11 @@ local character tables. Neither does it have 16-bit or 32-bit support. */ if ((pat_patctl.control & CTL_POSIX) != 0) { +#ifdef SUPPORT_PCRE8 int rc; int cflags = 0; const char *msg = "** Ignored with POSIX interface:"; +#endif if (test_mode != 8) { @@ -4032,9 +4079,13 @@ dat_datctl.control |= (pat_patctl.control & CTL_ALLPD); /* Initialize for scanning the data line. */ +#ifdef SUPPORT_PCRE8 utf = ((((pat_patctl.control & CTL_POSIX) != 0)? ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options : FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0; +#else +utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0; +#endif start_rep = NULL; len = strlen((const char *)buffer); @@ -4359,6 +4410,7 @@ possible in 8-bit mode, and it does not support timing or other fancy features. Some were checked at compile time, but we need to check the match-time settings here. */ +#ifdef SUPPORT_PCRE8 if ((pat_patctl.control & CTL_POSIX) != 0) { int rc; @@ -4431,6 +4483,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0) free(pmatch); return PR_OK; } +#endif /* SUPPORT_PCRE8 */ /* Handle matching via the native interface. Check for consistency of modifiers. */ @@ -4876,11 +4929,15 @@ for (gmatched = 0;; gmatched++) else if (utf && test_mode != PCRE32_MODE) { if (test_mode == PCRE8_MODE) + { for (; end_offset < ulen; end_offset++) if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break; - else + } + else /* 16-bit mode */ + { for (; end_offset < ulen; end_offset++) if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break; + } } SETFLDVEC(match_data, ovector, 0, start_offset); @@ -5534,14 +5591,20 @@ if (argc > 2) if (!quiet) print_version(outfile); SET(compiled_code, NULL); + +#ifdef SUPPORT_PCRE8 preg.re_pcre2_code = NULL; preg.re_match_data = NULL; +#endif while (notdone) { uint8_t *p; int rc = PR_OK; - BOOL expectdata = TEST(compiled_code, !=, NULL) || preg.re_pcre2_code != NULL; + BOOL expectdata = TEST(compiled_code, !=, NULL); +#ifdef SUPPORT_PCRE8 + expectdata |= preg.re_pcre2_code != NULL; +#endif if (extend_inputline(infile, buffer, expectdata? "data> " : " re> ") == NULL) break; @@ -5558,12 +5621,14 @@ while (notdone) while (isspace(*p)) p++; if (*p == 0) { +#ifdef SUPPORT_PCRE8 if (preg.re_pcre2_code != NULL) { regfree(&preg); preg.re_pcre2_code = NULL; preg.re_match_data = NULL; } +#endif /* SUPPORT_PCRE8 */ if (TEST(compiled_code, !=, NULL)) { SUB1(pcre2_code_free, compiled_code); @@ -5641,11 +5706,11 @@ free(dbuffer); free(pbuffer8); free(dfa_workspace); free((void *)locale_tables); -regfree(&preg); PCRE2_MATCH_DATA_FREE(match_data); SUB1(pcre2_code_free, compiled_code); #ifdef SUPPORT_PCRE8 +regfree(&preg); pcre2_general_context_free_8(general_context8); pcre2_compile_context_free_8(pat_context8); pcre2_compile_context_free_8(default_pat_context8); diff --git a/testdata/testoutput10 b/testdata/testoutput10 index 213247b..ba2a4fe 100644 --- a/testdata/testoutput10 +++ b/testdata/testoutput10 @@ -793,6 +793,7 @@ No match Capturing subpattern count = 0 Compile options: Overall options: utf +\R matches any Unicode newline Forced newline is CRLF First code unit = 'a' Last code unit = 'b' diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16 index 1f6387b..041999a 100644 --- a/testdata/testoutput11-16 +++ b/testdata/testoutput11-16 @@ -371,6 +371,7 @@ Subject length lower bound = 1 /\R+/I,bsr=unicode Capturing subpattern count = 0 No options +\R matches any Unicode newline Starting code units: \x0a \x0b \x0c \x0d \x85 \xff No last code unit Subject length lower bound = 1 diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32 index 255ab4d..2ded03c 100644 --- a/testdata/testoutput11-32 +++ b/testdata/testoutput11-32 @@ -371,6 +371,7 @@ Subject length lower bound = 1 /\R+/I,bsr=unicode Capturing subpattern count = 0 No options +\R matches any Unicode newline Starting code units: \x0a \x0b \x0c \x0d \x85 \xff No last code unit Subject length lower bound = 1 diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 index 7eeb728..8a02ac6 100644 --- a/testdata/testoutput12-16 +++ b/testdata/testoutput12-16 @@ -679,6 +679,7 @@ Failed: error 160 at offset 5: (*VERB) not recognized or malformed Capturing subpattern count = 0 Compile options: Overall options: utf +\R matches any Unicode newline Forced newline is CRLF First code unit = 'a' Last code unit = 'b' @@ -745,6 +746,7 @@ Subject length lower bound = 1 /\R*A/I,bsr=unicode,utf Capturing subpattern count = 0 Options: utf +\R matches any Unicode newline Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff Last code unit = 'A' Subject length lower bound = 1 diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 index 70642c8..4530a0b 100644 --- a/testdata/testoutput12-32 +++ b/testdata/testoutput12-32 @@ -680,6 +680,7 @@ Failed: error 160 at offset 12: (*VERB) not recognized or malformed Capturing subpattern count = 0 Compile options: Overall options: utf +\R matches any Unicode newline Forced newline is CRLF First code unit = 'a' Last code unit = 'b' @@ -743,6 +744,7 @@ Subject length lower bound = 1 /\R*A/I,bsr=unicode,utf Capturing subpattern count = 0 Options: utf +\R matches any Unicode newline Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff Last code unit = 'A' Subject length lower bound = 1 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index e649e11..e3fd89f 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -6625,6 +6625,7 @@ No match /abc$/Im,newline=lf Capturing subpattern count = 0 Options: multiline +Forced newline is LF First code unit = 'a' Last code unit = 'c' Subject length lower bound = 3 @@ -6672,6 +6673,7 @@ No match /^abc/Im,newline=lf Capturing subpattern count = 0 Options: multiline +Forced newline is LF First code unit at start or follows newline Last code unit = 'c' Subject length lower bound = 3 @@ -6718,6 +6720,7 @@ Subject length lower bound = 3 Capturing subpattern count = 0 May match empty string No options +Forced newline is LF First code unit at start or follows newline No last code unit Subject length lower bound = 0 @@ -9094,6 +9097,7 @@ No match /a\Rb/I,bsr=unicode Capturing subpattern count = 0 No options +\R matches any Unicode newline First code unit = 'a' Last code unit = 'b' Subject length lower bound = 3 @@ -9137,6 +9141,7 @@ No match /a\R?b/I,bsr=unicode Capturing subpattern count = 0 No options +\R matches any Unicode newline First code unit = 'a' Last code unit = 'b' Subject length lower bound = 2 @@ -9180,6 +9185,7 @@ No match /a\R{2,4}b/I,bsr=unicode Capturing subpattern count = 0 No options +\R matches any Unicode newline First code unit = 'a' Last code unit = 'b' Subject length lower bound = 4 @@ -9217,6 +9223,7 @@ Subject length lower bound = 3 /(*BSR_UNICODE)a\Rb/I Capturing subpattern count = 0 No options +\R matches any Unicode newline First code unit = 'a' Last code unit = 'b' Subject length lower bound = 3 @@ -9239,6 +9246,7 @@ Subject length lower bound = 3 /(*CRLF)(*BSR_UNICODE)a\Rb/I Capturing subpattern count = 0 No options +\R matches any Unicode newline Forced newline is CRLF First code unit = 'a' Last code unit = 'b' diff --git a/testdata/testoutput5 b/testdata/testoutput5 index 2ddd11f..a269b44 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -791,6 +791,7 @@ No match /a\Rb/I,bsr=unicode,utf Capturing subpattern count = 0 Options: utf +\R matches any Unicode newline First code unit = 'a' Last code unit = 'b' Subject length lower bound = 3 @@ -834,6 +835,7 @@ No match /a\R?b/I,bsr=unicode,utf Capturing subpattern count = 0 Options: utf +\R matches any Unicode newline First code unit = 'a' Last code unit = 'b' Subject length lower bound = 2 diff --git a/testdata/testoutput6 b/testdata/testoutput6 index 7400d94..28e5e1e 100644 --- a/testdata/testoutput6 +++ b/testdata/testoutput6 @@ -6706,6 +6706,7 @@ No match /a\Rb/I,bsr=unicode Capturing subpattern count = 0 No options +\R matches any Unicode newline First code unit = 'a' Last code unit = 'b' Subject length lower bound = 3 @@ -6749,6 +6750,7 @@ No match /a\R?b/I,bsr=unicode Capturing subpattern count = 0 No options +\R matches any Unicode newline First code unit = 'a' Last code unit = 'b' Subject length lower bound = 2 @@ -6792,6 +6794,7 @@ No match /a\R{2,4}b/I,bsr=unicode Capturing subpattern count = 0 No options +\R matches any Unicode newline First code unit = 'a' Last code unit = 'b' Subject length lower bound = 4 diff --git a/testdata/testoutput7 b/testdata/testoutput7 index a9511bc..a380569 100644 --- a/testdata/testoutput7 +++ b/testdata/testoutput7 @@ -1125,6 +1125,7 @@ No match /a\Rb/I,bsr=unicode,utf Capturing subpattern count = 0 Options: utf +\R matches any Unicode newline First code unit = 'a' Last code unit = 'b' Subject length lower bound = 3 @@ -1168,6 +1169,7 @@ No match /a\R?b/I,bsr=unicode,utf Capturing subpattern count = 0 Options: utf +\R matches any Unicode newline First code unit = 'a' Last code unit = 'b' Subject length lower bound = 2