Basic pcre2_compile() is working (no study, no auto-possess yet).
This commit is contained in:
parent
2801d5d132
commit
1abd5a7f8d
|
@ -275,19 +275,19 @@ COMMON_SOURCES = \
|
|||
src/pcre2_jit_misc.c \
|
||||
src/pcre2_maketables.c \
|
||||
src/pcre2_match_data.c \
|
||||
src/pcre2_newline.c \
|
||||
src/pcre2_ord2utf.c \
|
||||
src/pcre2_pattern_info.c \
|
||||
src/pcre2_string_utils.c \
|
||||
src/pcre2_substring.c \
|
||||
src/pcre2_tables.c \
|
||||
src/pcre2_ucd.c \
|
||||
src/pcre2_ucp.h \
|
||||
src/pcre2_valid_utf.c \
|
||||
src/pcre2_version.c
|
||||
|
||||
# src/pcre2_newline.c \
|
||||
# src/pcre2_ord2utf8.c \
|
||||
# src/pcre2_refcount.c \
|
||||
# src/pcre2_study.c \
|
||||
# src/pcre2_valid_utf8.c \
|
||||
# src/pcre2_xclass.c
|
||||
|
||||
|
||||
|
|
228
configure.ac
228
configure.ac
|
@ -2,13 +2,13 @@ dnl Process this file with autoconf to produce a configure script.
|
|||
|
||||
dnl NOTE FOR MAINTAINERS: Do not use minor version numbers 08 or 09 because
|
||||
dnl the leading zeros may cause them to be treated as invalid octal constants
|
||||
dnl if a PCRE user writes code that uses PCRE_MINOR as a number. There is now
|
||||
dnl if a PCRE2 user writes code that uses PCRE2_MINOR as a number. There is now
|
||||
dnl a check further down that throws an error if 08 or 09 are used.
|
||||
|
||||
dnl The PCRE_PRERELEASE feature is for identifying release candidates. It might
|
||||
dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
|
||||
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||
|
||||
m4_define(pcre2_major, [9])
|
||||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [00])
|
||||
m4_define(pcre2_prerelease, [-DEV])
|
||||
m4_define(pcre2_date, [2014-99-99])
|
||||
|
@ -125,11 +125,11 @@ AC_ARG_ENABLE(jit,
|
|||
[enable Just-In-Time compiling support]),
|
||||
, enable_jit=no)
|
||||
|
||||
# Handle --disable-pcregrep-jit (enabled by default)
|
||||
AC_ARG_ENABLE(pcregrep-jit,
|
||||
AS_HELP_STRING([--disable-pcregrep-jit],
|
||||
[disable JIT support in pcregrep]),
|
||||
, enable_pcregrep_jit=yes)
|
||||
# Handle --disable-pcre2grep-jit (enabled by default)
|
||||
AC_ARG_ENABLE(pcre2grep-jit,
|
||||
AS_HELP_STRING([--disable-pcre2grep-jit],
|
||||
[disable JIT support in pcre2grep]),
|
||||
, enable_pcre2grep_jit=yes)
|
||||
|
||||
# Handle --enable-rebuild-chartables
|
||||
AC_ARG_ENABLE(rebuild-chartables,
|
||||
|
@ -144,28 +144,28 @@ AC_ARG_ENABLE(utf,
|
|||
, enable_utf=unset)
|
||||
|
||||
# Handle newline options
|
||||
ac_pcre_newline=lf
|
||||
ac_pcre2_newline=lf
|
||||
AC_ARG_ENABLE(newline-is-cr,
|
||||
AS_HELP_STRING([--enable-newline-is-cr],
|
||||
[use CR as newline character]),
|
||||
ac_pcre_newline=cr)
|
||||
ac_pcre2_newline=cr)
|
||||
AC_ARG_ENABLE(newline-is-lf,
|
||||
AS_HELP_STRING([--enable-newline-is-lf],
|
||||
[use LF as newline character (default)]),
|
||||
ac_pcre_newline=lf)
|
||||
ac_pcre2_newline=lf)
|
||||
AC_ARG_ENABLE(newline-is-crlf,
|
||||
AS_HELP_STRING([--enable-newline-is-crlf],
|
||||
[use CRLF as newline sequence]),
|
||||
ac_pcre_newline=crlf)
|
||||
ac_pcre2_newline=crlf)
|
||||
AC_ARG_ENABLE(newline-is-anycrlf,
|
||||
AS_HELP_STRING([--enable-newline-is-anycrlf],
|
||||
[use CR, LF, or CRLF as newline sequence]),
|
||||
ac_pcre_newline=anycrlf)
|
||||
ac_pcre2_newline=anycrlf)
|
||||
AC_ARG_ENABLE(newline-is-any,
|
||||
AS_HELP_STRING([--enable-newline-is-any],
|
||||
[use any valid Unicode newline sequence]),
|
||||
ac_pcre_newline=any)
|
||||
enable_newline="$ac_pcre_newline"
|
||||
ac_pcre2_newline=any)
|
||||
enable_newline="$ac_pcre2_newline"
|
||||
|
||||
# Handle --enable-bsr-anycrlf
|
||||
AC_ARG_ENABLE(bsr-anycrlf,
|
||||
|
@ -191,35 +191,35 @@ AC_ARG_ENABLE(stack-for-recursion,
|
|||
[don't use stack recursion when matching]),
|
||||
, enable_stack_for_recursion=yes)
|
||||
|
||||
# Handle --enable-pcregrep-libz
|
||||
AC_ARG_ENABLE(pcregrep-libz,
|
||||
AS_HELP_STRING([--enable-pcregrep-libz],
|
||||
[link pcregrep with libz to handle .gz files]),
|
||||
, enable_pcregrep_libz=no)
|
||||
# Handle --enable-pcre2grep-libz
|
||||
AC_ARG_ENABLE(pcre2grep-libz,
|
||||
AS_HELP_STRING([--enable-pcre2grep-libz],
|
||||
[link pcre2grep with libz to handle .gz files]),
|
||||
, enable_pcre2grep_libz=no)
|
||||
|
||||
# Handle --enable-pcregrep-libbz2
|
||||
AC_ARG_ENABLE(pcregrep-libbz2,
|
||||
AS_HELP_STRING([--enable-pcregrep-libbz2],
|
||||
[link pcregrep with libbz2 to handle .bz2 files]),
|
||||
, enable_pcregrep_libbz2=no)
|
||||
# Handle --enable-pcre2grep-libbz2
|
||||
AC_ARG_ENABLE(pcre2grep-libbz2,
|
||||
AS_HELP_STRING([--enable-pcre2grep-libbz2],
|
||||
[link pcre2grep with libbz2 to handle .bz2 files]),
|
||||
, enable_pcre2grep_libbz2=no)
|
||||
|
||||
# Handle --with-pcregrep-bufsize=N
|
||||
AC_ARG_WITH(pcregrep-bufsize,
|
||||
AS_HELP_STRING([--with-pcregrep-bufsize=N],
|
||||
[pcregrep buffer size (default=20480, minimum=8192)]),
|
||||
, with_pcregrep_bufsize=20480)
|
||||
# Handle --with-pcre2grep-bufsize=N
|
||||
AC_ARG_WITH(pcre2grep-bufsize,
|
||||
AS_HELP_STRING([--with-pcre2grep-bufsize=N],
|
||||
[pcre2grep buffer size (default=20480, minimum=8192)]),
|
||||
, with_pcre2grep_bufsize=20480)
|
||||
|
||||
# Handle --enable-pcretest-libedit
|
||||
AC_ARG_ENABLE(pcretest-libedit,
|
||||
AS_HELP_STRING([--enable-pcretest-libedit],
|
||||
[link pcretest with libedit]),
|
||||
, enable_pcretest_libedit=no)
|
||||
# Handle --enable-pcre2test-libedit
|
||||
AC_ARG_ENABLE(pcre2test-libedit,
|
||||
AS_HELP_STRING([--enable-pcre2test-libedit],
|
||||
[link pcre2test with libedit]),
|
||||
, enable_pcre2test_libedit=no)
|
||||
|
||||
# Handle --enable-pcretest-libreadline
|
||||
AC_ARG_ENABLE(pcretest-libreadline,
|
||||
AS_HELP_STRING([--enable-pcretest-libreadline],
|
||||
[link pcretest with libreadline]),
|
||||
, enable_pcretest_libreadline=no)
|
||||
# Handle --enable-pcre2test-libreadline
|
||||
AC_ARG_ENABLE(pcre2test-libreadline,
|
||||
AS_HELP_STRING([--enable-pcre2test-libreadline],
|
||||
[link pcre2test with libreadline]),
|
||||
, enable_pcre2test_libreadline=no)
|
||||
|
||||
# Handle --with-link-size=N
|
||||
AC_ARG_WITH(link-size,
|
||||
|
@ -298,11 +298,11 @@ fi
|
|||
# agree with the PCRE2_NEWLINE_xxx values in pcre2.h.
|
||||
|
||||
case "$enable_newline" in
|
||||
cr) ac_pcre_newline_value=0 ;;
|
||||
lf) ac_pcre_newline_value=1 ;;
|
||||
crlf) ac_pcre_newline_value=2 ;;
|
||||
any) ac_pcre_newline_value=3 ;;
|
||||
anycrlf) ac_pcre_newline_value=4 ;;
|
||||
cr) ac_pcre2_newline_value=1 ;;
|
||||
lf) ac_pcre2_newline_value=2 ;;
|
||||
crlf) ac_pcre2_newline_value=3 ;;
|
||||
any) ac_pcre2_newline_value=4 ;;
|
||||
anycrlf) ac_pcre2_newline_value=5 ;;
|
||||
*)
|
||||
AC_MSG_ERROR([invalid argument \"$enable_newline\" to --enable-newline option])
|
||||
;;
|
||||
|
@ -314,7 +314,7 @@ if test "x$enable_ebcdic_nl25" = "xyes"; then
|
|||
fi
|
||||
|
||||
# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled.
|
||||
# Also check that UTF support is not requested, because PCRE cannot handle
|
||||
# Also check that UTF support is not requested, because PCRE2 cannot handle
|
||||
# EBCDIC and UTF in the same build. To do so it would need to use different
|
||||
# character constants depending on the mode.
|
||||
#
|
||||
|
@ -334,13 +334,13 @@ case "$with_link_size" in
|
|||
esac
|
||||
|
||||
AH_TOP([
|
||||
/* PCRE is written in Standard C, but there are a few non-standard things it
|
||||
/* PCRE2 is written in Standard C, but there are a few non-standard things it
|
||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the GNU autotools, config.h.in is converted into
|
||||
config.h by the "configure" script. In environments that use CMake,
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE "by
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
|
||||
hand" without using "configure" or CMake, you should copy the distributed
|
||||
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||
|
@ -357,7 +357,7 @@ macros are listed as a commented #undef in config.h.generic. Macros such as
|
|||
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */])
|
||||
|
||||
|
@ -370,7 +370,7 @@ AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1])
|
|||
AM_CONDITIONAL(WITH_PCRE8, test "x$enable_pcre8" = "xyes")
|
||||
AM_CONDITIONAL(WITH_PCRE16, test "x$enable_pcre16" = "xyes")
|
||||
AM_CONDITIONAL(WITH_PCRE32, test "x$enable_pcre32" = "xyes")
|
||||
AM_CONDITIONAL(WITH_PCRE_CPP, test "x$enable_cpp" = "xyes")
|
||||
#AM_CONDITIONAL(WITH_PCRE2_CPP, test "x$enable_cpp" = "xyes")
|
||||
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
|
||||
AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
|
||||
AM_CONDITIONAL(WITH_UTF, test "x$enable_utf" = "xyes")
|
||||
|
@ -400,7 +400,7 @@ AC_CHECK_LIB([z], [gzopen], [HAVE_LIBZ=1])
|
|||
# therefore missing the function definition.
|
||||
# - The compiler thus generates a "C" signature for the test function.
|
||||
# - The linker fails to find the "C" function.
|
||||
# - PCRE fails to configure if asked to do so against libbz2.
|
||||
# - PCRE2 fails to configure if asked to do so against libbz2.
|
||||
#
|
||||
# Solution:
|
||||
#
|
||||
|
@ -426,7 +426,7 @@ LIBS="$OLD_LIBS"
|
|||
|
||||
# Check for the availabiity of libreadline
|
||||
|
||||
if test "$enable_pcretest_libreadline" = "yes"; then
|
||||
if test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_H=1])
|
||||
AC_CHECK_HEADERS([readline/history.h], [HAVE_HISTORY_H=1])
|
||||
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lreadline"],
|
||||
|
@ -459,7 +459,7 @@ fi
|
|||
# Check for the availability of libedit. Different distributions put its
|
||||
# headers in different places. Try to cover the most common ones.
|
||||
|
||||
if test "$enable_pcretest_libedit" = "yes"; then
|
||||
if test "$enable_pcre2test_libedit" = "yes"; then
|
||||
AC_CHECK_HEADERS([editline/readline.h], [HAVE_EDITLINE_READLINE_H=1],
|
||||
[AC_CHECK_HEADERS([edit/readline/readline.h], [HAVE_READLINE_READLINE_H=1],
|
||||
[AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_READLINE_H=1])])])
|
||||
|
@ -477,21 +477,21 @@ if test "x$enable_shared" = "xno" ; then
|
|||
fi
|
||||
AC_SUBST(PCRE2_STATIC_CFLAG)
|
||||
|
||||
# Here is where pcre specific defines are handled
|
||||
# Here is where PCRE2-specific defines are handled
|
||||
|
||||
if test "$enable_pcre8" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE8], [], [
|
||||
Define to any value to enable the 8 bit PCRE library.])
|
||||
Define to any value to enable the 8 bit PCRE2 library.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre16" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE16], [], [
|
||||
Define to any value to enable the 16 bit PCRE library.])
|
||||
Define to any value to enable the 16 bit PCRE2 library.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcre32" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE32], [], [
|
||||
Define to any value to enable the 32 bit PCRE library.])
|
||||
Define to any value to enable the 32 bit PCRE2 library.])
|
||||
fi
|
||||
|
||||
# Unless running under Windows, JIT support requires pthreads.
|
||||
|
@ -506,87 +506,87 @@ if test "$enable_jit" = "yes"; then
|
|||
AC_DEFINE([SUPPORT_JIT], [], [
|
||||
Define to any value to enable support for Just-In-Time compiling.])
|
||||
else
|
||||
enable_pcregrep_jit="no"
|
||||
enable_pcre2grep_jit="no"
|
||||
fi
|
||||
|
||||
if test "$enable_pcregrep_jit" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCREGREP_JIT], [], [
|
||||
Define to any value to enable JIT support in pcregrep.])
|
||||
if test "$enable_pcre2grep_jit" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_PCRE2GREP_JIT], [], [
|
||||
Define to any value to enable JIT support in pcre2grep.])
|
||||
fi
|
||||
|
||||
if test "$enable_utf" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_UTF], [], [
|
||||
Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
|
||||
This will work even in an EBCDIC environment, but it is incompatible
|
||||
with the EBCDIC macro. That is, PCRE can support *either* EBCDIC
|
||||
with the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC
|
||||
code *or* ASCII/UTF-8/16/32, but not both at once.])
|
||||
fi
|
||||
|
||||
if test "$enable_stack_for_recursion" = "no"; then
|
||||
AC_DEFINE([NO_RECURSE], [], [
|
||||
PCRE uses recursive function calls to handle backtracking while
|
||||
PCRE2 uses recursive function calls to handle backtracking while
|
||||
matching. This can sometimes be a problem on systems that have
|
||||
stacks of limited size. Define NO_RECURSE to any value to get a
|
||||
version that doesn't use recursion in the match() function; instead
|
||||
it creates its own stack by steam using pcre_recurse_malloc() to obtain
|
||||
memory from the heap. For more detail, see the comments and other stuff
|
||||
just above the match() function.])
|
||||
it creates its own stack by steam using memory from the heap. For more
|
||||
detail, see the comments and other stuff just above the match() function.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcregrep_libz" = "yes"; then
|
||||
if test "$enable_pcre2grep_libz" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBZ], [], [
|
||||
Define to any value to allow pcregrep to be linked with libz, so that it is
|
||||
Define to any value to allow pcre2grep to be linked with libz, so that it is
|
||||
able to handle .gz files.])
|
||||
fi
|
||||
|
||||
if test "$enable_pcregrep_libbz2" = "yes"; then
|
||||
if test "$enable_pcre2grep_libbz2" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBBZ2], [], [
|
||||
Define to any value to allow pcregrep to be linked with libbz2, so that it
|
||||
Define to any value to allow pcre2grep to be linked with libbz2, so that it
|
||||
is able to handle .bz2 files.])
|
||||
fi
|
||||
|
||||
if test $with_pcregrep_bufsize -lt 8192 ; then
|
||||
AC_MSG_WARN([$with_pcregrep_bufsize is too small for --with-pcregrep-bufsize; using 8192])
|
||||
with_pcregrep_bufsize="8192"
|
||||
if test $with_pcre2grep_bufsize -lt 8192 ; then
|
||||
AC_MSG_WARN([$with_pcre2grep_bufsize is too small for --with-pcre2grep-bufsize; using 8192])
|
||||
with_pcre2grep_bufsize="8192"
|
||||
else
|
||||
if test $? -gt 1 ; then
|
||||
AC_MSG_ERROR([Bad value for --with-pcregrep-bufsize])
|
||||
AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize])
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([PCREGREP_BUFSIZE], [$with_pcregrep_bufsize], [
|
||||
The value of PCREGREP_BUFSIZE determines the size of buffer used by pcregrep
|
||||
AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [
|
||||
The value of PCRE2GREP_BUFSIZE determines the size of buffer used by pcre2grep
|
||||
to hold parts of the file it is searching. This is also the minimum value.
|
||||
The actual amount of memory used by pcregrep is three times this number,
|
||||
The actual amount of memory used by pcre2grep is three times this number,
|
||||
because it allows for the buffering of "before" and "after" lines.])
|
||||
|
||||
if test "$enable_pcretest_libedit" = "yes"; then
|
||||
if test "$enable_pcre2test_libedit" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBEDIT], [], [
|
||||
Define to any value to allow pcretest to be linked with libedit.])
|
||||
Define to any value to allow pcre2test to be linked with libedit.])
|
||||
LIBREADLINE="$LIBEDIT"
|
||||
elif test "$enable_pcretest_libreadline" = "yes"; then
|
||||
elif test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_LIBREADLINE], [], [
|
||||
Define to any value to allow pcretest to be linked with libreadline.])
|
||||
Define to any value to allow pcre2test to be linked with libreadline.])
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([NEWLINE], [$ac_pcre_newline_value], [
|
||||
The value of NEWLINE determines the default newline character sequence. PCRE
|
||||
client programs can override this by selecting other values at run time. The
|
||||
valid values are 0 (CR), 1 (LF), 2 (CRLF), 3 (ANY), and 4 (ANYCRLF).])
|
||||
AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [
|
||||
The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY),
|
||||
and 5 (ANYCRLF).])
|
||||
|
||||
if test "$enable_bsr_anycrlf" = "yes"; then
|
||||
AC_DEFINE([BSR_ANYCRLF], [], [
|
||||
By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||
The build-time default can be overridden by the user of PCRE at runtime.])
|
||||
The build-time default can be overridden by the user of PCRE2 at runtime.])
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
|
||||
The value of LINK_SIZE determines the number of bytes used to store
|
||||
links as offsets within the compiled regex. The default is 2, which
|
||||
allows for compiled patterns up to 64K long. This covers the vast
|
||||
majority of cases. However, PCRE can also be compiled to use 3 or 4
|
||||
majority of cases. However, PCRE2 can also be compiled to use 3 or 4
|
||||
bytes instead. This allows for longer patterns in extreme cases.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
|
||||
|
@ -597,7 +597,7 @@ AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
|
|||
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
||||
The value of MATCH_LIMIT determines the default number of times the
|
||||
internal match() function can be called during a single execution of
|
||||
pcre_exec(). There is a runtime interface for setting a different
|
||||
pcre2_match(). There is a runtime interface for setting a different
|
||||
limit. The limit exists in order to catch runaway regular
|
||||
expressions that take for ever to determine that they do not match.
|
||||
The default is set very large so that it does not accidentally catch
|
||||
|
@ -639,10 +639,10 @@ AH_VERBATIM([PCRE2_EXP_DEFN], [
|
|||
if test "$enable_ebcdic" = "yes"; then
|
||||
AC_DEFINE_UNQUOTED([EBCDIC], [], [
|
||||
If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro to any value. When EBCDIC is set, PCRE
|
||||
character codes, define this macro to any value. When EBCDIC is set, PCRE2
|
||||
assumes that all input strings are in EBCDIC. If you do not define this
|
||||
macro, PCRE will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||
is not possible to build a version of PCRE that supports both EBCDIC and
|
||||
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||
is not possible to build a version of PCRE2 that supports both EBCDIC and
|
||||
UTF-8/16/32.])
|
||||
fi
|
||||
|
||||
|
@ -695,65 +695,65 @@ AC_SUBST(EXTRA_LIBPCRE2_POSIX_LDFLAGS)
|
|||
DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre16 --enable-pcre32 --enable-jit --enable-utf"
|
||||
AC_SUBST(DISTCHECK_CONFIGURE_FLAGS)
|
||||
|
||||
# Check that, if --enable-pcregrep-libz or --enable-pcregrep-libbz2 is
|
||||
# Check that, if --enable-pcre2grep-libz or --enable-pcre2grep-libbz2 is
|
||||
# specified, the relevant library is available.
|
||||
|
||||
if test "$enable_pcregrep_libz" = "yes"; then
|
||||
if test "$enable_pcre2grep_libz" = "yes"; then
|
||||
if test "$HAVE_ZLIB_H" != "1"; then
|
||||
echo "** Cannot --enable-pcregrep-libz because zlib.h was not found"
|
||||
echo "** Cannot --enable-pcre2grep-libz because zlib.h was not found"
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_LIBZ" != "1"; then
|
||||
echo "** Cannot --enable-pcregrep-libz because libz was not found"
|
||||
echo "** Cannot --enable-pcre2grep-libz because libz was not found"
|
||||
exit 1
|
||||
fi
|
||||
LIBZ="-lz"
|
||||
fi
|
||||
AC_SUBST(LIBZ)
|
||||
|
||||
if test "$enable_pcregrep_libbz2" = "yes"; then
|
||||
if test "$enable_pcre2grep_libbz2" = "yes"; then
|
||||
if test "$HAVE_BZLIB_H" != "1"; then
|
||||
echo "** Cannot --enable-pcregrep-libbz2 because bzlib.h was not found"
|
||||
echo "** Cannot --enable-pcre2grep-libbz2 because bzlib.h was not found"
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_LIBBZ2" != "1"; then
|
||||
echo "** Cannot --enable-pcregrep-libbz2 because libbz2 was not found"
|
||||
echo "** Cannot --enable-pcre2grep-libbz2 because libbz2 was not found"
|
||||
exit 1
|
||||
fi
|
||||
LIBBZ2="-lbz2"
|
||||
fi
|
||||
AC_SUBST(LIBBZ2)
|
||||
|
||||
# Similarly for --enable-pcretest-readline
|
||||
# Similarly for --enable-pcre2test-readline
|
||||
|
||||
if test "$enable_pcretest_libedit" = "yes"; then
|
||||
if test "$enable_pcretest_libreadline" = "yes"; then
|
||||
echo "** Cannot use both --enable-pcretest-libedit and --enable-pcretest-readline"
|
||||
if test "$enable_pcre2test_libedit" = "yes"; then
|
||||
if test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
echo "** Cannot use both --enable-pcre2test-libedit and --enable-pcre2test-readline"
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_EDITLINE_READLINE_H" != "1" -a \
|
||||
"$HAVE_READLINE_READLINE_H" != "1"; then
|
||||
echo "** Cannot --enable-pcretest-libedit because neither editline/readline.h"
|
||||
echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h"
|
||||
echo "** nor readline/readline.h was found."
|
||||
exit 1
|
||||
fi
|
||||
if test -z "$LIBEDIT"; then
|
||||
echo "** Cannot --enable-pcretest-libedit because libedit library was not found."
|
||||
echo "** Cannot --enable-pcre2test-libedit because libedit library was not found."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "$enable_pcretest_libreadline" = "yes"; then
|
||||
if test "$enable_pcre2test_libreadline" = "yes"; then
|
||||
if test "$HAVE_READLINE_H" != "1"; then
|
||||
echo "** Cannot --enable-pcretest-readline because readline/readline.h was not found."
|
||||
echo "** Cannot --enable-pcre2test-readline because readline/readline.h was not found."
|
||||
exit 1
|
||||
fi
|
||||
if test "$HAVE_HISTORY_H" != "1"; then
|
||||
echo "** Cannot --enable-pcretest-readline because readline/history.h was not found."
|
||||
echo "** Cannot --enable-pcre2test-readline because readline/history.h was not found."
|
||||
exit 1
|
||||
fi
|
||||
if test -z "$LIBREADLINE"; then
|
||||
echo "** Cannot --enable-pcretest-readline because readline library was not found."
|
||||
echo "** Cannot --enable-pcre2test-readline because readline library was not found."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
@ -868,12 +868,12 @@ $PACKAGE-$VERSION configuration summary:
|
|||
Match limit recursion ........... : ${with_match_limit_recursion}
|
||||
Build shared libs ............... : ${enable_shared}
|
||||
Build static libs ............... : ${enable_static}
|
||||
Use JIT in pcregrep ............. : ${enable_pcregrep_jit}
|
||||
Buffer size for pcregrep ........ : ${with_pcregrep_bufsize}
|
||||
Link pcregrep with libz ......... : ${enable_pcregrep_libz}
|
||||
Link pcregrep with libbz2 ....... : ${enable_pcregrep_libbz2}
|
||||
Link pcretest with libedit ...... : ${enable_pcretest_libedit}
|
||||
Link pcretest with libreadline .. : ${enable_pcretest_libreadline}
|
||||
Use JIT in pcre2grep ............ : ${enable_pcre2grep_jit}
|
||||
Buffer size for pcre2grep ....... : ${with_pcre2grep_bufsize}
|
||||
Link pcre2grep with libz ........ : ${enable_pcre2grep_libz}
|
||||
Link pcre2grep with libbz2 ...... : ${enable_pcre2grep_libbz2}
|
||||
Link pcre2test with libedit ..... : ${enable_pcre2test_libedit}
|
||||
Link pcre2test with libreadline . : ${enable_pcre2test_libreadline}
|
||||
Valgrind support ................ : ${enable_valgrind}
|
||||
Code coverage ................... : ${enable_coverage}
|
||||
|
||||
|
|
|
@ -420,6 +420,7 @@ about the pattern:
|
|||
flipbytes flip endianness
|
||||
/BB fullbincode show binary code with lengths
|
||||
/I info show info about compiled pattern
|
||||
hex pattern is coded in hexadecimal
|
||||
jit[=<number>] use JIT
|
||||
locale=<name> use this locale
|
||||
memory show memory used
|
||||
|
@ -430,6 +431,7 @@ about the pattern:
|
|||
save=<file name> save compiled pattern
|
||||
stackguard=<number> test the stackguard feature
|
||||
tables=[0|1|2] select internal tables
|
||||
use_length use the pattern's length
|
||||
.sp
|
||||
The effects of these modifiers are described in the following sections.
|
||||
FIXME: Give more examples.
|
||||
|
@ -481,6 +483,27 @@ specified. See also the section about saving and reloading compiled patterns
|
|||
below.
|
||||
.
|
||||
.
|
||||
.SS "Specifying a pattern in hex"
|
||||
.rs
|
||||
.sp
|
||||
The \fBhex\fP modifier specifies that the characters of the pattern are to be
|
||||
interpreted as pairs of hexadecimal digits. White space is permitted between
|
||||
pairs. For example:
|
||||
.sp
|
||||
/ab 32 59/hex
|
||||
.sp
|
||||
This feature is provided as a way of creating patterns that contain binary zero
|
||||
characters. When \fBhex\fP is set, it implies \fBuse_length\fP.
|
||||
.
|
||||
.
|
||||
.SS "Using the pattern's length"
|
||||
.rs
|
||||
.sp
|
||||
By default, \fBpcre2test\fP passes patterns as zero-terminated strings to
|
||||
\fBpcre2_compile()\fP, giving the length as -1. If \fBuse_length\fP is set, the
|
||||
length of the pattern is passed. This is implied if \fBhex\fP is set.
|
||||
.
|
||||
.
|
||||
.SS "JIT compilation"
|
||||
.rs
|
||||
.sp
|
||||
|
@ -595,38 +618,6 @@ letters, digits, spaces, etc. Setting alternate character tables and a locale
|
|||
are mutually exclusive.
|
||||
.
|
||||
.
|
||||
.SS "Locking out certain modifiers"
|
||||
.rs
|
||||
.sp
|
||||
FIXME FIXME
|
||||
PCRE can be compiled with or without support for certain features such as
|
||||
UTF-8/16/32 or Unicode properties. Accordingly, the standard tests are split up
|
||||
into a number of different files that are selected for running depending on
|
||||
which features are available. When updating the tests, it is all too easy to
|
||||
put a new test into the wrong file by mistake; for example, to put a test that
|
||||
requires UTF support into a file that is used when it is not available. To help
|
||||
detect such mistakes as early as possible, there is a facility for locking out
|
||||
specific modifiers. If an input line for \fBpcre2test\fP starts with the string
|
||||
"< forbid " the following sequence of characters is taken as a list of
|
||||
forbidden modifiers. For example, in the test files that must not use UTF or
|
||||
Unicode property support, this line appears:
|
||||
.sp
|
||||
< forbid 8W
|
||||
.sp
|
||||
This locks out the /8 and /W modifiers. An immediate error is given if they are
|
||||
subsequently encountered. If the character string contains < but not >, all the
|
||||
multi-character modifiers that begin with < are locked out. Otherwise, such
|
||||
modifiers must be explicitly listed, for example:
|
||||
.sp
|
||||
< forbid <JS><cr>
|
||||
.sp
|
||||
There must be a single space between < and "forbid" for this feature to be
|
||||
recognised. If there is not, the line is interpreted either as a request to
|
||||
re-load a pre-compiled pattern (see "SAVING AND RELOADING COMPILED PATTERNS"
|
||||
below) or, if there is a another < character, as a pattern that uses < as its
|
||||
delimiter.
|
||||
.
|
||||
.
|
||||
.SS "Setting certain match controls"
|
||||
.rs
|
||||
.sp
|
||||
|
@ -653,6 +644,7 @@ defaults, set them in a \fB#subject\fP command.
|
|||
The modifiers that can appear in subject lines and the \fB#subject\fP
|
||||
command are of two types.
|
||||
.
|
||||
.
|
||||
.SS "Setting match options"
|
||||
.rs
|
||||
.sp
|
||||
|
@ -1199,6 +1191,6 @@ Cambridge CB2 3QH, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 13 May 2014
|
||||
Last updated: 08 June 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -7,7 +7,7 @@ includedir=${prefix}/include
|
|||
|
||||
Name: libpcre2-posix
|
||||
Description: Posix compatible interface to libpcre2-8
|
||||
Version: 9.00-DEV
|
||||
Version: 10.00-DEV
|
||||
Libs: -L${libdir} -lpcre2-posix
|
||||
Cflags: -I${includedir} @PCRE_STATIC_CFLAG@
|
||||
Requires.private: libpcre2-8
|
||||
|
|
80
src/config.h
80
src/config.h
|
@ -2,13 +2,13 @@
|
|||
/* src/config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
|
||||
/* PCRE is written in Standard C, but there are a few non-standard things it
|
||||
/* PCRE2 is written in Standard C, but there are a few non-standard things it
|
||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the GNU autotools, config.h.in is converted into
|
||||
config.h by the "configure" script. In environments that use CMake,
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE "by
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
|
||||
hand" without using "configure" or CMake, you should copy the distributed
|
||||
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||
|
@ -25,21 +25,22 @@ macros are listed as a commented #undef in config.h.generic. Macros such as
|
|||
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */
|
||||
|
||||
/* By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||
The build-time default can be overridden by the user of PCRE at runtime. */
|
||||
The build-time default can be overridden by the user of PCRE2 at runtime.
|
||||
*/
|
||||
/* #undef BSR_ANYCRLF */
|
||||
|
||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro to any value. When EBCDIC is set, PCRE
|
||||
character codes, define this macro to any value. When EBCDIC is set, PCRE2
|
||||
assumes that all input strings are in EBCDIC. If you do not define this
|
||||
macro, PCRE will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||
is not possible to build a version of PCRE that supports both EBCDIC and
|
||||
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||
is not possible to build a version of PCRE2 that supports both EBCDIC and
|
||||
UTF-8/16/32. */
|
||||
/* #undef EBCDIC */
|
||||
|
||||
|
@ -126,8 +127,8 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
|
||||
for longer patterns in extreme cases. */
|
||||
However, PCRE2 can also be compiled to use 3 or 4 bytes instead. This
|
||||
allows for longer patterns in extreme cases. */
|
||||
#define LINK_SIZE 2
|
||||
|
||||
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
||||
|
@ -136,7 +137,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
internal match() function can be called during a single execution of
|
||||
pcre_exec(). There is a runtime interface for setting a different limit.
|
||||
pcre2_match(). There is a runtime interface for setting a different limit.
|
||||
The limit exists in order to catch runaway regular expressions that take
|
||||
for ever to determine that they do not match. The default is set very large
|
||||
so that it does not accidentally catch legitimate cases. */
|
||||
|
@ -162,19 +163,18 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
overflow caused by enormously large patterns. */
|
||||
#define MAX_NAME_SIZE 32
|
||||
|
||||
/* The value of NEWLINE determines the default newline character sequence.
|
||||
PCRE client programs can override this by selecting other values at run
|
||||
time. The valid values are 0 (CR), 1 (LF), 2 (CRLF), 3 (ANY), and 4
|
||||
/* The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5
|
||||
(ANYCRLF). */
|
||||
#define NEWLINE 1
|
||||
#define NEWLINE_DEFAULT 2
|
||||
|
||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
||||
/* PCRE2 uses recursive function calls to handle backtracking while matching.
|
||||
This can sometimes be a problem on systems that have stacks of limited
|
||||
size. Define NO_RECURSE to any value to get a version that doesn't use
|
||||
recursion in the match() function; instead it creates its own stack by
|
||||
steam using pcre_recurse_malloc() to obtain memory from the heap. For more
|
||||
detail, see the comments and other stuff just above the match() function.
|
||||
*/
|
||||
steam using memory from the heap. For more detail, see the comments and
|
||||
other stuff just above the match() function. */
|
||||
/* #undef NO_RECURSE */
|
||||
|
||||
/* Name of package */
|
||||
|
@ -187,7 +187,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_NAME "PCRE2"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE2 9.00-DEV"
|
||||
#define PACKAGE_STRING "PCRE2 10.00-DEV"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre2"
|
||||
|
@ -196,13 +196,20 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "9.00-DEV"
|
||||
#define PACKAGE_VERSION "10.00-DEV"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
stack that is used while compiling a pattern. */
|
||||
#define PARENS_NEST_LIMIT 250
|
||||
|
||||
/* The value of PCRE2GREP_BUFSIZE determines the size of buffer used by
|
||||
pcre2grep to hold parts of the file it is searching. This is also the
|
||||
minimum value. The actual amount of memory used by pcre2grep is three times
|
||||
this number, because it allows for the buffering of "before" and "after"
|
||||
lines. */
|
||||
#define PCRE2GREP_BUFSIZE 20480
|
||||
|
||||
/* to make a symbol visible */
|
||||
#define PCRE2POSIX_EXP_DECL extern __attribute__ ((visibility ("default")))
|
||||
|
||||
|
@ -227,13 +234,6 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* Define to any value if linking statically (TODO: make nice with Libtool) */
|
||||
#define PCRE2_STATIC 1
|
||||
|
||||
/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
|
||||
pcregrep to hold parts of the file it is searching. This is also the
|
||||
minimum value. The actual amount of memory used by pcregrep is three times
|
||||
this number, because it allows for the buffering of "before" and "after"
|
||||
lines. */
|
||||
#define PCREGREP_BUFSIZE 20480
|
||||
|
||||
/* Define to necessary symbol if this constant uses a non-standard name on
|
||||
your system. */
|
||||
/* #undef PTHREAD_CREATE_JOINABLE */
|
||||
|
@ -244,35 +244,35 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||
/* #undef SUPPORT_JIT */
|
||||
|
||||
/* Define to any value to allow pcregrep to be linked with libbz2, so that it
|
||||
/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
|
||||
is able to handle .bz2 files. */
|
||||
#define SUPPORT_LIBBZ2 /**/
|
||||
|
||||
/* Define to any value to allow pcretest to be linked with libedit. */
|
||||
/* Define to any value to allow pcre2test to be linked with libedit. */
|
||||
/* #undef SUPPORT_LIBEDIT */
|
||||
|
||||
/* Define to any value to allow pcretest to be linked with libreadline. */
|
||||
/* Define to any value to allow pcre2test to be linked with libreadline. */
|
||||
#define SUPPORT_LIBREADLINE /**/
|
||||
|
||||
/* Define to any value to allow pcregrep to be linked with libz, so that it is
|
||||
able to handle .gz files. */
|
||||
/* Define to any value to allow pcre2grep to be linked with libz, so that it
|
||||
is able to handle .gz files. */
|
||||
#define SUPPORT_LIBZ /**/
|
||||
|
||||
/* Define to any value to enable the 16 bit PCRE library. */
|
||||
/* Define to any value to enable the 16 bit PCRE2 library. */
|
||||
#define SUPPORT_PCRE16 /**/
|
||||
|
||||
/* Define to any value to enable the 32 bit PCRE library. */
|
||||
/* Define to any value to enable JIT support in pcre2grep. */
|
||||
/* #undef SUPPORT_PCRE2GREP_JIT */
|
||||
|
||||
/* Define to any value to enable the 32 bit PCRE2 library. */
|
||||
#define SUPPORT_PCRE32 /**/
|
||||
|
||||
/* Define to any value to enable the 8 bit PCRE library. */
|
||||
/* Define to any value to enable the 8 bit PCRE2 library. */
|
||||
#define SUPPORT_PCRE8 /**/
|
||||
|
||||
/* Define to any value to enable JIT support in pcregrep. */
|
||||
/* #undef SUPPORT_PCREGREP_JIT */
|
||||
|
||||
/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
|
||||
This will work even in an EBCDIC environment, but it is incompatible with
|
||||
the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or*
|
||||
the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
|
||||
ASCII/UTF-8/16/32, but not both at once. */
|
||||
#define SUPPORT_UTF /**/
|
||||
|
||||
|
@ -280,7 +280,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define SUPPORT_VALGRIND /**/
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "9.00-DEV"
|
||||
#define VERSION "10.00-DEV"
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
/* src/config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
|
||||
/* PCRE is written in Standard C, but there are a few non-standard things it
|
||||
/* PCRE2 is written in Standard C, but there are a few non-standard things it
|
||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the GNU autotools, config.h.in is converted into
|
||||
config.h by the "configure" script. In environments that use CMake,
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE "by
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
|
||||
hand" without using "configure" or CMake, you should copy the distributed
|
||||
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||
|
@ -24,21 +24,22 @@ macros are listed as a commented #undef in config.h.generic. Macros such as
|
|||
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */
|
||||
|
||||
/* By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||
The build-time default can be overridden by the user of PCRE at runtime. */
|
||||
The build-time default can be overridden by the user of PCRE2 at runtime.
|
||||
*/
|
||||
#undef BSR_ANYCRLF
|
||||
|
||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro to any value. When EBCDIC is set, PCRE
|
||||
character codes, define this macro to any value. When EBCDIC is set, PCRE2
|
||||
assumes that all input strings are in EBCDIC. If you do not define this
|
||||
macro, PCRE will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||
is not possible to build a version of PCRE that supports both EBCDIC and
|
||||
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||
is not possible to build a version of PCRE2 that supports both EBCDIC and
|
||||
UTF-8/16/32. */
|
||||
#undef EBCDIC
|
||||
|
||||
|
@ -125,8 +126,8 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
|
||||
for longer patterns in extreme cases. */
|
||||
However, PCRE2 can also be compiled to use 3 or 4 bytes instead. This
|
||||
allows for longer patterns in extreme cases. */
|
||||
#undef LINK_SIZE
|
||||
|
||||
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
||||
|
@ -135,7 +136,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
internal match() function can be called during a single execution of
|
||||
pcre_exec(). There is a runtime interface for setting a different limit.
|
||||
pcre2_match(). There is a runtime interface for setting a different limit.
|
||||
The limit exists in order to catch runaway regular expressions that take
|
||||
for ever to determine that they do not match. The default is set very large
|
||||
so that it does not accidentally catch legitimate cases. */
|
||||
|
@ -161,19 +162,18 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
overflow caused by enormously large patterns. */
|
||||
#undef MAX_NAME_SIZE
|
||||
|
||||
/* The value of NEWLINE determines the default newline character sequence.
|
||||
PCRE client programs can override this by selecting other values at run
|
||||
time. The valid values are 0 (CR), 1 (LF), 2 (CRLF), 3 (ANY), and 4
|
||||
/* The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5
|
||||
(ANYCRLF). */
|
||||
#undef NEWLINE
|
||||
#undef NEWLINE_DEFAULT
|
||||
|
||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
||||
/* PCRE2 uses recursive function calls to handle backtracking while matching.
|
||||
This can sometimes be a problem on systems that have stacks of limited
|
||||
size. Define NO_RECURSE to any value to get a version that doesn't use
|
||||
recursion in the match() function; instead it creates its own stack by
|
||||
steam using pcre_recurse_malloc() to obtain memory from the heap. For more
|
||||
detail, see the comments and other stuff just above the match() function.
|
||||
*/
|
||||
steam using memory from the heap. For more detail, see the comments and
|
||||
other stuff just above the match() function. */
|
||||
#undef NO_RECURSE
|
||||
|
||||
/* Name of package */
|
||||
|
@ -202,6 +202,13 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
stack that is used while compiling a pattern. */
|
||||
#undef PARENS_NEST_LIMIT
|
||||
|
||||
/* The value of PCRE2GREP_BUFSIZE determines the size of buffer used by
|
||||
pcre2grep to hold parts of the file it is searching. This is also the
|
||||
minimum value. The actual amount of memory used by pcre2grep is three times
|
||||
this number, because it allows for the buffering of "before" and "after"
|
||||
lines. */
|
||||
#undef PCRE2GREP_BUFSIZE
|
||||
|
||||
/* to make a symbol visible */
|
||||
#undef PCRE2POSIX_EXP_DECL
|
||||
|
||||
|
@ -226,13 +233,6 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* Define to any value if linking statically (TODO: make nice with Libtool) */
|
||||
#undef PCRE2_STATIC
|
||||
|
||||
/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
|
||||
pcregrep to hold parts of the file it is searching. This is also the
|
||||
minimum value. The actual amount of memory used by pcregrep is three times
|
||||
this number, because it allows for the buffering of "before" and "after"
|
||||
lines. */
|
||||
#undef PCREGREP_BUFSIZE
|
||||
|
||||
/* Define to necessary symbol if this constant uses a non-standard name on
|
||||
your system. */
|
||||
#undef PTHREAD_CREATE_JOINABLE
|
||||
|
@ -243,35 +243,35 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||
#undef SUPPORT_JIT
|
||||
|
||||
/* Define to any value to allow pcregrep to be linked with libbz2, so that it
|
||||
/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
|
||||
is able to handle .bz2 files. */
|
||||
#undef SUPPORT_LIBBZ2
|
||||
|
||||
/* Define to any value to allow pcretest to be linked with libedit. */
|
||||
/* Define to any value to allow pcre2test to be linked with libedit. */
|
||||
#undef SUPPORT_LIBEDIT
|
||||
|
||||
/* Define to any value to allow pcretest to be linked with libreadline. */
|
||||
/* Define to any value to allow pcre2test to be linked with libreadline. */
|
||||
#undef SUPPORT_LIBREADLINE
|
||||
|
||||
/* Define to any value to allow pcregrep to be linked with libz, so that it is
|
||||
able to handle .gz files. */
|
||||
/* Define to any value to allow pcre2grep to be linked with libz, so that it
|
||||
is able to handle .gz files. */
|
||||
#undef SUPPORT_LIBZ
|
||||
|
||||
/* Define to any value to enable the 16 bit PCRE library. */
|
||||
/* Define to any value to enable the 16 bit PCRE2 library. */
|
||||
#undef SUPPORT_PCRE16
|
||||
|
||||
/* Define to any value to enable the 32 bit PCRE library. */
|
||||
/* Define to any value to enable JIT support in pcre2grep. */
|
||||
#undef SUPPORT_PCRE2GREP_JIT
|
||||
|
||||
/* Define to any value to enable the 32 bit PCRE2 library. */
|
||||
#undef SUPPORT_PCRE32
|
||||
|
||||
/* Define to any value to enable the 8 bit PCRE library. */
|
||||
/* Define to any value to enable the 8 bit PCRE2 library. */
|
||||
#undef SUPPORT_PCRE8
|
||||
|
||||
/* Define to any value to enable JIT support in pcregrep. */
|
||||
#undef SUPPORT_PCREGREP_JIT
|
||||
|
||||
/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
|
||||
This will work even in an EBCDIC environment, but it is incompatible with
|
||||
the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or*
|
||||
the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
|
||||
ASCII/UTF-8/16/32, but not both at once. */
|
||||
#undef SUPPORT_UTF
|
||||
|
||||
|
|
121
src/pcre2.h
121
src/pcre2.h
|
@ -41,7 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE2_MAJOR 9
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 00
|
||||
#define PCRE2_PRERELEASE -DEV
|
||||
#define PCRE2_DATE 2014-99-99
|
||||
|
@ -138,86 +138,83 @@ D is inspected during pcre2_dfa_exec() execution
|
|||
|
||||
/* Newline and \R settings, for use in the compile context. */
|
||||
|
||||
#define PCRE2_NEWLINE_DEFAULT 0
|
||||
#define PCRE2_NEWLINE_CR 1
|
||||
#define PCRE2_NEWLINE_LF 2
|
||||
#define PCRE2_NEWLINE_CRLF 3
|
||||
#define PCRE2_NEWLINE_ANY 4
|
||||
#define PCRE2_NEWLINE_ANYCRLF 5
|
||||
|
||||
#define PCRE2_BSR_DEFAULT 0
|
||||
#define PCRE2_BSR_UNICODE 1
|
||||
#define PCRE2_BSR_ANYCRLF 2
|
||||
|
||||
/* Match-time and get/set-time error codes */
|
||||
/* Error codes: no match and partial match are "expected" errors. */
|
||||
|
||||
#define PCRE2_ERROR_NOMATCH (-1)
|
||||
#define PCRE2_ERROR_PARTIAL (-2)
|
||||
|
||||
#define PCRE2_ERROR_BADCOUNT (-2)
|
||||
#define PCRE2_ERROR_BADENDIANNESS (-3)
|
||||
#define PCRE2_ERROR_BADLENGTH (-4)
|
||||
#define PCRE2_ERROR_BADMAGIC (-5)
|
||||
#define PCRE2_ERROR_BADMODE (-6)
|
||||
#define PCRE2_ERROR_BADOFFSET (-7)
|
||||
#define PCRE2_ERROR_BADOPTION (-8)
|
||||
#define PCRE2_ERROR_BADUTF (-9)
|
||||
#define PCRE2_ERROR_BADUTF_OFFSET (-10)
|
||||
#define PCRE2_ERROR_CALLOUT (-11) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_INTERNAL (-12)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-13)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-14)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-15)
|
||||
#define PCRE2_ERROR_NOMEMORY (-16)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-17)
|
||||
#define PCRE2_ERROR_NULL (-18)
|
||||
#define PCRE2_ERROR_PARTIAL (-19)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-20)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-21)
|
||||
#define PCRE2_ERROR_UNKNOWN_OPCODE (-22)
|
||||
#define PCRE2_ERROR_UNSET (-23)
|
||||
/* Error codes for UTF-8 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-30)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-31)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-32)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-33)
|
||||
#define PCRE2_ERROR_DFA_UMLIMIT (-34)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-35)
|
||||
#define PCRE2_ERROR_UTF8_ERR1 (-3)
|
||||
#define PCRE2_ERROR_UTF8_ERR2 (-4)
|
||||
#define PCRE2_ERROR_UTF8_ERR3 (-5)
|
||||
#define PCRE2_ERROR_UTF8_ERR4 (-6)
|
||||
#define PCRE2_ERROR_UTF8_ERR5 (-7)
|
||||
#define PCRE2_ERROR_UTF8_ERR6 (-8)
|
||||
#define PCRE2_ERROR_UTF8_ERR7 (-9)
|
||||
#define PCRE2_ERROR_UTF8_ERR8 (-10)
|
||||
#define PCRE2_ERROR_UTF8_ERR9 (-11)
|
||||
#define PCRE2_ERROR_UTF8_ERR10 (-12)
|
||||
#define PCRE2_ERROR_UTF8_ERR11 (-13)
|
||||
#define PCRE2_ERROR_UTF8_ERR12 (-14)
|
||||
#define PCRE2_ERROR_UTF8_ERR13 (-15)
|
||||
#define PCRE2_ERROR_UTF8_ERR14 (-16)
|
||||
#define PCRE2_ERROR_UTF8_ERR15 (-17)
|
||||
#define PCRE2_ERROR_UTF8_ERR16 (-18)
|
||||
#define PCRE2_ERROR_UTF8_ERR17 (-19)
|
||||
#define PCRE2_ERROR_UTF8_ERR18 (-20)
|
||||
#define PCRE2_ERROR_UTF8_ERR19 (-21)
|
||||
#define PCRE2_ERROR_UTF8_ERR20 (-22)
|
||||
#define PCRE2_ERROR_UTF8_ERR21 (-23)
|
||||
|
||||
/* Error codes for UTF-16 validity checks */
|
||||
|
||||
/* Specific error codes for UTF-8 validity checks */
|
||||
#define PCRE2_ERROR_UTF16_ERR1 (-24)
|
||||
#define PCRE2_ERROR_UTF16_ERR2 (-25)
|
||||
#define PCRE2_ERROR_UTF16_ERR3 (-26)
|
||||
|
||||
#define PCRE2_ERROR_UTF8_ERR1 (-41)
|
||||
#define PCRE2_ERROR_UTF8_ERR2 (-42)
|
||||
#define PCRE2_ERROR_UTF8_ERR3 (-43)
|
||||
#define PCRE2_ERROR_UTF8_ERR4 (-44)
|
||||
#define PCRE2_ERROR_UTF8_ERR5 (-45)
|
||||
#define PCRE2_ERROR_UTF8_ERR6 (-46)
|
||||
#define PCRE2_ERROR_UTF8_ERR7 (-47)
|
||||
#define PCRE2_ERROR_UTF8_ERR8 (-48)
|
||||
#define PCRE2_ERROR_UTF8_ERR9 (-49)
|
||||
#define PCRE2_ERROR_UTF8_ERR10 (-50)
|
||||
#define PCRE2_ERROR_UTF8_ERR11 (-51)
|
||||
#define PCRE2_ERROR_UTF8_ERR12 (-52)
|
||||
#define PCRE2_ERROR_UTF8_ERR13 (-53)
|
||||
#define PCRE2_ERROR_UTF8_ERR14 (-54)
|
||||
#define PCRE2_ERROR_UTF8_ERR15 (-55)
|
||||
#define PCRE2_ERROR_UTF8_ERR16 (-56)
|
||||
#define PCRE2_ERROR_UTF8_ERR17 (-57)
|
||||
#define PCRE2_ERROR_UTF8_ERR18 (-58)
|
||||
#define PCRE2_ERROR_UTF8_ERR19 (-59)
|
||||
#define PCRE2_ERROR_UTF8_ERR20 (-60)
|
||||
#define PCRE2_ERROR_UTF8_ERR21 (-61)
|
||||
/* Error codes for UTF-32 validity checks */
|
||||
|
||||
/* Specific error codes for UTF-16 validity checks */
|
||||
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
||||
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
||||
|
||||
#define PCRE2_ERROR_UTF16_ERR1 (-62)
|
||||
#define PCRE2_ERROR_UTF16_ERR2 (-63)
|
||||
#define PCRE2_ERROR_UTF16_ERR3 (-64)
|
||||
/* Error codes for pcre2[_dfa]_match() */
|
||||
|
||||
/* Specific error codes for UTF-32 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF32_ERR1 (-65)
|
||||
#define PCRE2_ERROR_UTF32_ERR3 (-66)
|
||||
#define PCRE2_ERROR_BADCOUNT (-29)
|
||||
#define PCRE2_ERROR_BADENDIANNESS (-30)
|
||||
#define PCRE2_ERROR_BADLENGTH (-31)
|
||||
#define PCRE2_ERROR_BADMAGIC (-32)
|
||||
#define PCRE2_ERROR_BADMODE (-33)
|
||||
#define PCRE2_ERROR_BADOFFSET (-34)
|
||||
#define PCRE2_ERROR_BADOPTION (-35)
|
||||
#define PCRE2_ERROR_BADUTF_OFFSET (-36)
|
||||
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-40)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-41)
|
||||
#define PCRE2_ERROR_DFA_UMLIMIT (-42)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-43)
|
||||
#define PCRE2_ERROR_INTERNAL (-44)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-45)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-47)
|
||||
#define PCRE2_ERROR_NOMEMORY (-48)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-49)
|
||||
#define PCRE2_ERROR_NULL (-50)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-51)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-52)
|
||||
#define PCRE2_ERROR_UNKNOWN_OPCODE (-53)
|
||||
#define PCRE2_ERROR_UNSET (-54)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
|
|
119
src/pcre2.h.in
119
src/pcre2.h.in
|
@ -138,86 +138,83 @@ D is inspected during pcre2_dfa_exec() execution
|
|||
|
||||
/* Newline and \R settings, for use in the compile context. */
|
||||
|
||||
#define PCRE2_NEWLINE_DEFAULT 0
|
||||
#define PCRE2_NEWLINE_CR 1
|
||||
#define PCRE2_NEWLINE_LF 2
|
||||
#define PCRE2_NEWLINE_CRLF 3
|
||||
#define PCRE2_NEWLINE_ANY 4
|
||||
#define PCRE2_NEWLINE_ANYCRLF 5
|
||||
|
||||
#define PCRE2_BSR_DEFAULT 0
|
||||
#define PCRE2_BSR_UNICODE 1
|
||||
#define PCRE2_BSR_ANYCRLF 2
|
||||
|
||||
/* Match-time and get/set-time error codes */
|
||||
/* Error codes: no match and partial match are "expected" errors. */
|
||||
|
||||
#define PCRE2_ERROR_NOMATCH (-1)
|
||||
#define PCRE2_ERROR_PARTIAL (-2)
|
||||
|
||||
#define PCRE2_ERROR_BADCOUNT (-2)
|
||||
#define PCRE2_ERROR_BADENDIANNESS (-3)
|
||||
#define PCRE2_ERROR_BADLENGTH (-4)
|
||||
#define PCRE2_ERROR_BADMAGIC (-5)
|
||||
#define PCRE2_ERROR_BADMODE (-6)
|
||||
#define PCRE2_ERROR_BADOFFSET (-7)
|
||||
#define PCRE2_ERROR_BADOPTION (-8)
|
||||
#define PCRE2_ERROR_BADUTF (-9)
|
||||
#define PCRE2_ERROR_BADUTF_OFFSET (-10)
|
||||
#define PCRE2_ERROR_CALLOUT (-11) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_INTERNAL (-12)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-13)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-14)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-15)
|
||||
#define PCRE2_ERROR_NOMEMORY (-16)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-17)
|
||||
#define PCRE2_ERROR_NULL (-18)
|
||||
#define PCRE2_ERROR_PARTIAL (-19)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-20)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-21)
|
||||
#define PCRE2_ERROR_UNKNOWN_OPCODE (-22)
|
||||
#define PCRE2_ERROR_UNSET (-23)
|
||||
/* Error codes for UTF-8 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-30)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-31)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-32)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-33)
|
||||
#define PCRE2_ERROR_DFA_UMLIMIT (-34)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-35)
|
||||
#define PCRE2_ERROR_UTF8_ERR1 (-3)
|
||||
#define PCRE2_ERROR_UTF8_ERR2 (-4)
|
||||
#define PCRE2_ERROR_UTF8_ERR3 (-5)
|
||||
#define PCRE2_ERROR_UTF8_ERR4 (-6)
|
||||
#define PCRE2_ERROR_UTF8_ERR5 (-7)
|
||||
#define PCRE2_ERROR_UTF8_ERR6 (-8)
|
||||
#define PCRE2_ERROR_UTF8_ERR7 (-9)
|
||||
#define PCRE2_ERROR_UTF8_ERR8 (-10)
|
||||
#define PCRE2_ERROR_UTF8_ERR9 (-11)
|
||||
#define PCRE2_ERROR_UTF8_ERR10 (-12)
|
||||
#define PCRE2_ERROR_UTF8_ERR11 (-13)
|
||||
#define PCRE2_ERROR_UTF8_ERR12 (-14)
|
||||
#define PCRE2_ERROR_UTF8_ERR13 (-15)
|
||||
#define PCRE2_ERROR_UTF8_ERR14 (-16)
|
||||
#define PCRE2_ERROR_UTF8_ERR15 (-17)
|
||||
#define PCRE2_ERROR_UTF8_ERR16 (-18)
|
||||
#define PCRE2_ERROR_UTF8_ERR17 (-19)
|
||||
#define PCRE2_ERROR_UTF8_ERR18 (-20)
|
||||
#define PCRE2_ERROR_UTF8_ERR19 (-21)
|
||||
#define PCRE2_ERROR_UTF8_ERR20 (-22)
|
||||
#define PCRE2_ERROR_UTF8_ERR21 (-23)
|
||||
|
||||
/* Error codes for UTF-16 validity checks */
|
||||
|
||||
/* Specific error codes for UTF-8 validity checks */
|
||||
#define PCRE2_ERROR_UTF16_ERR1 (-24)
|
||||
#define PCRE2_ERROR_UTF16_ERR2 (-25)
|
||||
#define PCRE2_ERROR_UTF16_ERR3 (-26)
|
||||
|
||||
#define PCRE2_ERROR_UTF8_ERR1 (-41)
|
||||
#define PCRE2_ERROR_UTF8_ERR2 (-42)
|
||||
#define PCRE2_ERROR_UTF8_ERR3 (-43)
|
||||
#define PCRE2_ERROR_UTF8_ERR4 (-44)
|
||||
#define PCRE2_ERROR_UTF8_ERR5 (-45)
|
||||
#define PCRE2_ERROR_UTF8_ERR6 (-46)
|
||||
#define PCRE2_ERROR_UTF8_ERR7 (-47)
|
||||
#define PCRE2_ERROR_UTF8_ERR8 (-48)
|
||||
#define PCRE2_ERROR_UTF8_ERR9 (-49)
|
||||
#define PCRE2_ERROR_UTF8_ERR10 (-50)
|
||||
#define PCRE2_ERROR_UTF8_ERR11 (-51)
|
||||
#define PCRE2_ERROR_UTF8_ERR12 (-52)
|
||||
#define PCRE2_ERROR_UTF8_ERR13 (-53)
|
||||
#define PCRE2_ERROR_UTF8_ERR14 (-54)
|
||||
#define PCRE2_ERROR_UTF8_ERR15 (-55)
|
||||
#define PCRE2_ERROR_UTF8_ERR16 (-56)
|
||||
#define PCRE2_ERROR_UTF8_ERR17 (-57)
|
||||
#define PCRE2_ERROR_UTF8_ERR18 (-58)
|
||||
#define PCRE2_ERROR_UTF8_ERR19 (-59)
|
||||
#define PCRE2_ERROR_UTF8_ERR20 (-60)
|
||||
#define PCRE2_ERROR_UTF8_ERR21 (-61)
|
||||
/* Error codes for UTF-32 validity checks */
|
||||
|
||||
/* Specific error codes for UTF-16 validity checks */
|
||||
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
||||
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
||||
|
||||
#define PCRE2_ERROR_UTF16_ERR1 (-62)
|
||||
#define PCRE2_ERROR_UTF16_ERR2 (-63)
|
||||
#define PCRE2_ERROR_UTF16_ERR3 (-64)
|
||||
/* Error codes for pcre2[_dfa]_match() */
|
||||
|
||||
/* Specific error codes for UTF-32 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF32_ERR1 (-65)
|
||||
#define PCRE2_ERROR_UTF32_ERR3 (-66)
|
||||
#define PCRE2_ERROR_BADCOUNT (-29)
|
||||
#define PCRE2_ERROR_BADENDIANNESS (-30)
|
||||
#define PCRE2_ERROR_BADLENGTH (-31)
|
||||
#define PCRE2_ERROR_BADMAGIC (-32)
|
||||
#define PCRE2_ERROR_BADMODE (-33)
|
||||
#define PCRE2_ERROR_BADOFFSET (-34)
|
||||
#define PCRE2_ERROR_BADOPTION (-35)
|
||||
#define PCRE2_ERROR_BADUTF_OFFSET (-36)
|
||||
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-40)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-41)
|
||||
#define PCRE2_ERROR_DFA_UMLIMIT (-42)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-43)
|
||||
#define PCRE2_ERROR_INTERNAL (-44)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-45)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-47)
|
||||
#define PCRE2_ERROR_NOMEMORY (-48)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-49)
|
||||
#define PCRE2_ERROR_NULL (-50)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-51)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-52)
|
||||
#define PCRE2_ERROR_UNKNOWN_OPCODE (-53)
|
||||
#define PCRE2_ERROR_UNSET (-54)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
|
|
7889
src/pcre2_compile.c
7889
src/pcre2_compile.c
File diff suppressed because it is too large
Load Diff
|
@ -102,7 +102,7 @@ switch (what)
|
|||
break;
|
||||
|
||||
case PCRE2_CONFIG_NEWLINE:
|
||||
*((int *)where) = NEWLINE;
|
||||
*((int *)where) = NEWLINE_DEFAULT;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_PARENS_LIMIT:
|
||||
|
|
|
@ -139,9 +139,9 @@ if (defmemctl)
|
|||
}
|
||||
ccontext->stack_guard = NULL;
|
||||
ccontext->tables = PRIV(default_tables);
|
||||
ccontext->bsr_convention = PCRE2_BSR_DEFAULT;
|
||||
ccontext->newline_convention = PCRE2_NEWLINE_DEFAULT;
|
||||
ccontext->parens_nest_limit = PARENS_NEST_LIMIT;
|
||||
ccontext->newline_convention = NEWLINE_DEFAULT;
|
||||
ccontext->bsr_convention = BSR_DEFAULT;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -51,7 +51,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
/* The texts of compile-time error messages. Do not ever re-use any error
|
||||
/* The texts of compile-time error messages. Compile-time error numbers start
|
||||
at COMPILE_ERROR_BASE (100).
|
||||
|
||||
Do not ever re-use any error
|
||||
number, because they are documented. Always add a new error instead. Messages
|
||||
marked DEAD below are no longer used. This used to be a table of strings, but
|
||||
in order to reduce the number of relocations needed when a shared library is
|
||||
|
@ -85,7 +88,7 @@ static const char compile_error_texts[] =
|
|||
"missing )\0"
|
||||
/* 15 */
|
||||
"reference to non-existent subpattern\0"
|
||||
"erroffset passed as NULL\0"
|
||||
"pattern or erroffset passed as NULL\0"
|
||||
"unknown option bit(s) set\0"
|
||||
"missing ) after comment\0"
|
||||
"parentheses nested too deeply\0" /** DEAD **/
|
||||
|
@ -104,7 +107,7 @@ static const char compile_error_texts[] =
|
|||
/* 30 */
|
||||
"unknown POSIX class name\0"
|
||||
"POSIX collating elements are not supported\0"
|
||||
"this version of PCRE is compiled without UTF support\0"
|
||||
"this version of PCRE does not have UTF or Unicode property support\0"
|
||||
"spare error\0" /** DEAD **/
|
||||
"character value in \\x{} or \\o{} is too large\0"
|
||||
/* 35 */
|
||||
|
@ -133,7 +136,7 @@ static const char compile_error_texts[] =
|
|||
"DEFINE group contains more than one branch\0"
|
||||
/* 55 */
|
||||
"repeating a DEFINE group is not allowed\0" /** DEAD **/
|
||||
"inconsistent NEWLINE options\0"
|
||||
"internal error: unknown newline setting\0"
|
||||
"\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
|
||||
"a numbered reference must not be zero\0"
|
||||
"an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
|
||||
|
@ -171,58 +174,74 @@ static const char compile_error_texts[] =
|
|||
"parentheses are too deeply nested (stack check)\0"
|
||||
;
|
||||
|
||||
/* Match-time error texts are in the same format. */
|
||||
/* Match-time and UTF error texts are in the same format. */
|
||||
|
||||
static const char match_error_texts[] =
|
||||
"no error\0"
|
||||
"no match\0"
|
||||
"partial match\0"
|
||||
"UTF-8 error: 1 byte missing at end\0"
|
||||
"UTF-8 error: 2 bytes missing at end\0"
|
||||
/* 5 */
|
||||
"UTF-8 error: 3 bytes missing at end\0"
|
||||
"UTF-8 error: 4 bytes missing at end\0"
|
||||
"UTF-8 error: 5 bytes missing at end\0"
|
||||
"UTF-8 error: byte 2 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 3 top bits not 0x80\0"
|
||||
/* 10 */
|
||||
"UTF-8 error: byte 4 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 5 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 6 top bits not 0x80\0"
|
||||
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
|
||||
"UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
|
||||
/* 15 */
|
||||
"UTF-8 error: code point > 0x10ffff is not defined\0"
|
||||
"UTF-8 error: code points 0xd000-0xdfff are not defined\0"
|
||||
"UTF-8 error: overlong 2-byte sequence\0"
|
||||
"UTF-8 error: overlong 3-byte sequence\0"
|
||||
"UTF-8 error: overlong 4-byte sequence\0"
|
||||
/* 20 */
|
||||
"UTF-8 error: overlong 5-byte sequence\0"
|
||||
"UTF-8 error: overlong 6-byte sequence\0"
|
||||
"UTF-8 error: isolated 0x80 byte\0"
|
||||
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
|
||||
"UTF-16 error: missing low surrogate at end\0"
|
||||
/* 25 */
|
||||
"UTF-16 error: invalid low surrogate\0"
|
||||
"UTF-16 error: isolated low surrogate\0"
|
||||
"UTF-32 error: surrogate character not allowed\0"
|
||||
"UTF-32 error: code point > 0x10ffff is not defined\0"
|
||||
"bad count value\0"
|
||||
/* 30 */
|
||||
"pattern compiled with other endianness\0"
|
||||
"bad length\0"
|
||||
/* -5 */
|
||||
"magic number missing\0"
|
||||
"pattern compiled in wrong mode: 8/16/32-bit error\0"
|
||||
"bad offset value\0"
|
||||
/* 35 */
|
||||
"bad option value\0"
|
||||
"bad UTF string\0"
|
||||
/* -10 */
|
||||
"bad offset into UTF string\0"
|
||||
"callout error code\0" /* Never returned by PCRE2 itself */
|
||||
"invalid data in workspace for DFA restart\0"
|
||||
"too much recursion for DFA matching\0"
|
||||
/* 40 */
|
||||
"backreference condition or recursion test not supported for DFA matching\0"
|
||||
"item unsupported for DFA matching\0"
|
||||
"match limit not supported for DFA matching\0"
|
||||
"workspace size exceeded in DFA matching\0"
|
||||
"internal error - pattern overwritten?\0"
|
||||
/* 45 */
|
||||
"bad JIT option\0"
|
||||
"JIT stack limit reached\0"
|
||||
/* -15 */
|
||||
"match limit exceeded\0"
|
||||
"no more memory\0"
|
||||
"unknown substring\0"
|
||||
/* 50 */
|
||||
"NULL argument passed\0"
|
||||
"partial match\0"
|
||||
/* -20 */
|
||||
"nested recursion at the same subject position\0"
|
||||
"recursion limit exceeded\0"
|
||||
"unknown opcode - pattern overwritten?\0"
|
||||
"value unset\0" /* Used by pcre2_pattern_info() */
|
||||
"spare -24\0"
|
||||
/* -25 */
|
||||
"spare -25\0"
|
||||
"spare -26\0"
|
||||
"spare -27\0"
|
||||
"spare -28\0"
|
||||
"spare -29\0"
|
||||
/* -30 */
|
||||
"invalid data in workspace for DFA restart\0"
|
||||
"too much recursion for DFA matching\0"
|
||||
"backreference condition or recursion test not supported for DFA matching\0"
|
||||
"item unsupported for DFA matching\0"
|
||||
"match limit not supported for DFA matching\0"
|
||||
/* -35 */
|
||||
"workspace size exceeded in DFA matching\0"
|
||||
"spare -36\0"
|
||||
"spare -37\0"
|
||||
"spare -38\0"
|
||||
"spare -39\0"
|
||||
/* -40 */
|
||||
"spare -39\0"
|
||||
;
|
||||
|
||||
|
||||
|
@ -232,7 +251,8 @@ static const char match_error_texts[] =
|
|||
|
||||
/* This function copies an error message into a buffer whose units are of an
|
||||
appropriate width. Error numbers are positive for compile-time errors, and
|
||||
negative for exec-time errors.
|
||||
negative for match-time errors (except for UTF errors), but the numbers are all
|
||||
distinct.
|
||||
|
||||
Arguments:
|
||||
enumber error number
|
||||
|
@ -253,13 +273,12 @@ uint32_t n;
|
|||
|
||||
if (size == 0) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
if (enumber > 0) /* Compile-time error */
|
||||
if (enumber > COMPILE_ERROR_BASE) /* Compile error */
|
||||
{
|
||||
message = compile_error_texts;
|
||||
n = enumber;
|
||||
n = enumber - COMPILE_ERROR_BASE;
|
||||
}
|
||||
|
||||
else /* Match-time error */
|
||||
else /* Match or UTF error */
|
||||
{
|
||||
message = match_error_texts;
|
||||
n = -enumber;
|
||||
|
|
|
@ -40,78 +40,235 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
/* FIXME: this file is incomplete, being gradually built. */
|
||||
|
||||
/* We do not support both EBCDIC and UTF at the same time. The "configure"
|
||||
script prevents both being selected, but not everybody uses "configure". */
|
||||
|
||||
#if defined EBCDIC && defined SUPPORT_UTF
|
||||
#error The use of both EBCDIC and SUPPORT_UTF is not supported.
|
||||
#endif
|
||||
|
||||
/* Standard C headers */
|
||||
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "pcre2.h"
|
||||
#include "pcre2_ucp.h"
|
||||
|
||||
#define PUBL(name) pcre2_##name
|
||||
|
||||
#ifndef PRIV
|
||||
#define PRIV(name) _pcre2_##name
|
||||
#endif
|
||||
|
||||
#define PCRE2_CALL_CONVENTION
|
||||
|
||||
extern const uint8_t PRIV(default_tables)[];
|
||||
|
||||
|
||||
|
||||
/* Macros to make boolean values more obvious. The #ifndef is to pacify
|
||||
compiler warnings in environments where these macros are defined elsewhere.
|
||||
Unfortunately, there is no way to do the same for the typedef. */
|
||||
|
||||
typedef int BOOL;
|
||||
|
||||
#ifndef FALSE
|
||||
#define FALSE 0
|
||||
#define TRUE 1
|
||||
#endif
|
||||
|
||||
|
||||
/* Valgrind (memcheck) support */
|
||||
|
||||
#ifdef SUPPORT_VALGRIND
|
||||
#include <valgrind/memcheck.h>
|
||||
#endif
|
||||
|
||||
/* This is an unsigned int value that no character can ever have, as
|
||||
/* When compiling a DLL for Windows, the exported symbols have to be declared
|
||||
using some MS magic. I found some useful information on this web page:
|
||||
http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
|
||||
information there, using __declspec(dllexport) without "extern" we have a
|
||||
definition; with "extern" we have a declaration. The settings here override the
|
||||
setting in pcre.h (which is included below); it defines only PCRE2_EXP_DECL,
|
||||
which is all that is needed for applications (they just import the symbols). We
|
||||
use:
|
||||
|
||||
PCRE2_EXP_DECL for declarations
|
||||
PCRE2_EXP_DEFN for definitions of exported functions
|
||||
PCRE2_EXP_DATA_DEFN for definitions of exported variables
|
||||
|
||||
The reason for the two DEFN macros is that in non-Windows environments, one
|
||||
does not want to have "extern" before variable definitions because it leads to
|
||||
compiler warnings. So we distinguish between functions and variables. In
|
||||
Windows, the two should always be the same.
|
||||
|
||||
The reason for wrapping this in #ifndef PCRE2_EXP_DECL is so that pcretest,
|
||||
which is an application, but needs to import this file in order to "peek" at
|
||||
internals, can #include pcre.h first to get an application's-eye view.
|
||||
|
||||
In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon,
|
||||
special-purpose environments) might want to stick other stuff in front of
|
||||
exported symbols. That's why, in the non-Windows case, we set PCRE2_EXP_DEFN and
|
||||
PCRE2_EXP_DATA_DEFN only if they are not already set. */
|
||||
|
||||
#ifndef PCRE2_EXP_DECL
|
||||
# ifdef _WIN32
|
||||
# ifndef PCRE2_STATIC
|
||||
# define PCRE2_EXP_DECL extern __declspec(dllexport)
|
||||
# define PCRE2_EXP_DEFN __declspec(dllexport)
|
||||
# define PCRE2_EXP_DATA_DEFN __declspec(dllexport)
|
||||
# else
|
||||
# define PCRE2_EXP_DECL extern
|
||||
# define PCRE2_EXP_DEFN
|
||||
# define PCRE2_EXP_DATA_DEFN
|
||||
# endif
|
||||
# else
|
||||
# ifdef __cplusplus
|
||||
# define PCRE2_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE2_EXP_DECL extern
|
||||
# endif
|
||||
# ifndef PCRE2_EXP_DEFN
|
||||
# define PCRE2_EXP_DEFN PCRE2_EXP_DECL
|
||||
# endif
|
||||
# ifndef PCRE2_EXP_DATA_DEFN
|
||||
# define PCRE2_EXP_DATA_DEFN
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Include the public PCRE2 header and the definitions of UCP character
|
||||
property values. This must follow the setting of PCRE2_EXP_DECL above. */
|
||||
|
||||
#include "pcre2.h"
|
||||
#include "pcre2_ucp.h"
|
||||
|
||||
/* When PCRE is compiled as a C++ library, the subject pointer can be replaced
|
||||
with a custom type. This makes it possible, for example, to allow pcre2_match()
|
||||
to process subject strings that are discontinuous by using a smart pointer
|
||||
class. It must always be possible to inspect all of the subject string in
|
||||
pcre2_match() because of the way it backtracks. */
|
||||
|
||||
/* WARNING: This is as yet untested for PCRE2. */
|
||||
|
||||
#ifdef CUSTOM_SUBJECT_PTR
|
||||
#undef PCRE2_SPTR
|
||||
#define PCRE2_SPTR CUSTOM_SUBJECT_PTR
|
||||
#endif
|
||||
|
||||
/* When compiling with the MSVC compiler, it is sometimes necessary to include
|
||||
a "calling convention" before exported function names. (This is secondhand
|
||||
information; I know nothing about MSVC myself). For example, something like
|
||||
|
||||
void __cdecl function(....)
|
||||
|
||||
might be needed. In order so make this easy, all the exported functions have
|
||||
PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
|
||||
set, we ensure here that it has no effect. */
|
||||
|
||||
#ifndef PCRE2_CALL_CONVENTION
|
||||
#define PCRE2_CALL_CONVENTION
|
||||
#endif
|
||||
|
||||
/* When checking for integer overflow in pcre2_compile(), we need to handle
|
||||
large integers. If a 64-bit integer type is available, we can use that.
|
||||
Otherwise we have to cast to double, which of course requires floating point
|
||||
arithmetic. Handle this by defining a macro for the appropriate type. If
|
||||
stdint.h is available, include it; it may define INT64_MAX. Systems that do not
|
||||
have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
|
||||
by "configure". */
|
||||
|
||||
#if defined HAVE_STDINT_H
|
||||
#include <stdint.h>
|
||||
#elif defined HAVE_INTTYPES_H
|
||||
#include <inttypes.h>
|
||||
#endif
|
||||
|
||||
#if defined INT64_MAX || defined int64_t
|
||||
#define INT64_OR_DOUBLE int64_t
|
||||
#else
|
||||
#define INT64_OR_DOUBLE double
|
||||
#endif
|
||||
|
||||
/* When compiling for use with the Virtual Pascal compiler, these functions
|
||||
need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
|
||||
option on the command line. */
|
||||
|
||||
#ifdef VPCOMPAT
|
||||
#define strlen(s) _strlen(s)
|
||||
#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
|
||||
#define memcmp(s,c,n) _memcmp(s,c,n)
|
||||
#define memcpy(d,s,n) _memcpy(d,s,n)
|
||||
#define memmove(d,s,n) _memmove(d,s,n)
|
||||
#define memset(s,c,n) _memset(s,c,n)
|
||||
#else /* VPCOMPAT */
|
||||
|
||||
/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
|
||||
define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY
|
||||
is set. Otherwise, include an emulating function for those systems that have
|
||||
neither (there some non-Unix environments where this is the case). */
|
||||
|
||||
#ifndef HAVE_MEMMOVE
|
||||
#undef memmove /* some systems may have a macro */
|
||||
#ifdef HAVE_BCOPY
|
||||
#define memmove(a, b, c) bcopy(b, a, c)
|
||||
#else /* HAVE_BCOPY */
|
||||
static void *
|
||||
pcre_memmove(void *d, const void *s, size_t n)
|
||||
{
|
||||
size_t i;
|
||||
unsigned char *dest = (unsigned char *)d;
|
||||
const unsigned char *src = (const unsigned char *)s;
|
||||
if (dest > src)
|
||||
{
|
||||
dest += n;
|
||||
src += n;
|
||||
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
||||
return (void *)dest;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < n; ++i) *dest++ = *src++;
|
||||
return (void *)(dest - n);
|
||||
}
|
||||
}
|
||||
#define memmove(a, b, c) pcre_memmove(a, b, c)
|
||||
#endif /* not HAVE_BCOPY */
|
||||
#endif /* not HAVE_MEMMOVE */
|
||||
#endif /* not VPCOMPAT */
|
||||
|
||||
/* External (in the C sense) functions and macros that are private to the
|
||||
libraries are always referenced using the PRIV macro. This makes it possible
|
||||
for pcre2test.c to include some of the source files from the libraries using a
|
||||
different PRIV definition to avoid name clashes. */
|
||||
|
||||
#ifndef PRIV
|
||||
#define PRIV(name) _pcre2_##name
|
||||
#endif
|
||||
|
||||
/* This is an unsigned int value that no UTF character can ever have, as
|
||||
Unicode doesn't go beyond 0x0010ffff. */
|
||||
|
||||
#define NOTACHAR 0xffffffff
|
||||
|
||||
/* When UTF encoding is being used, a character is no longer just a single
|
||||
byte in 8-bit mode or a single short in 16-bit mode. The macros for character
|
||||
handling generate simple sequences when used in the basic mode, and more
|
||||
complicated ones for UTF characters. GETCHARLENTEST and other macros are not
|
||||
used when UTF is not supported. To make sure they can never even appear when
|
||||
UTF support is omitted, we don't even define them. */
|
||||
/* Compile-time errors are added to this value. As they are documented, it
|
||||
should probably never be changed. */
|
||||
|
||||
#ifndef SUPPORT_UTF
|
||||
#define COMPILE_ERROR_BASE 100
|
||||
|
||||
/* #define MAX_VALUE_FOR_SINGLE_CHAR */
|
||||
/* #define HAS_EXTRALEN(c) */
|
||||
/* #define GET_EXTRALEN(c) */
|
||||
/* #define NOT_FIRSTCHAR(c) */
|
||||
#define GETCHAR(c, eptr) c = *eptr;
|
||||
#define GETCHARTEST(c, eptr) c = *eptr;
|
||||
#define GETCHARINC(c, eptr) c = *eptr++;
|
||||
#define GETCHARINCTEST(c, eptr) c = *eptr++;
|
||||
#define GETCHARLEN(c, eptr, len) c = *eptr;
|
||||
/* #define GETCHARLENTEST(c, eptr, len) */
|
||||
/* #define BACKCHAR(eptr) */
|
||||
/* #define FORWARDCHAR(eptr) */
|
||||
/* #define ACROSSCHAR(condition, eptr, action) */
|
||||
/* Define the default BSR convention. */
|
||||
|
||||
#else /* SUPPORT_UTF */
|
||||
#ifdef BSR_ANYCRLF
|
||||
#define BSR_DEFAULT PCRE2_BSR_ANYCRLF
|
||||
#else
|
||||
#define BSR_DEFAULT PCRE2_BSR_UNICODE
|
||||
#endif
|
||||
|
||||
|
||||
/* ---------------- Basic UTF-8 macros ---------------- */
|
||||
|
||||
/* These UTF-8 macros are always defined because they are used in pcre2test for
|
||||
handling wide characters in 16-bit and 32-bit modes, even if an 8-bit library
|
||||
is not supported. */
|
||||
|
||||
/* Tests whether a UTF-8 code point needs extra bytes to decode. */
|
||||
|
||||
#define HASUTF8EXTRALEN(c) ((c) >= 0xc0)
|
||||
|
||||
/* The following macros were originally written in the form of loops that used
|
||||
data from the tables whose names start with PRIV(utf8_table). They were
|
||||
rewritten by a user so as not to use loops, because in some environments this
|
||||
gives a significant performance advantage, and it seems never to do any harm.
|
||||
*/
|
||||
|
||||
/* Base macro to pick up the remaining bytes of a UTF-8 character, not
|
||||
advancing the pointer. */
|
||||
|
||||
|
@ -168,8 +325,44 @@ the pointer. */
|
|||
} \
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UTF */
|
||||
/* Base macro to pick up the remaining bytes of a UTF-8 character, not
|
||||
advancing the pointer, incrementing the length. */
|
||||
|
||||
#define GETUTF8LEN(c, eptr, len) \
|
||||
{ \
|
||||
if ((c & 0x20) == 0) \
|
||||
{ \
|
||||
c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \
|
||||
len++; \
|
||||
} \
|
||||
else if ((c & 0x10) == 0) \
|
||||
{ \
|
||||
c = ((c & 0x0f) << 12) | ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \
|
||||
len += 2; \
|
||||
} \
|
||||
else if ((c & 0x08) == 0) \
|
||||
{\
|
||||
c = ((c & 0x07) << 18) | ((eptr[1] & 0x3f) << 12) | \
|
||||
((eptr[2] & 0x3f) << 6) | (eptr[3] & 0x3f); \
|
||||
len += 3; \
|
||||
} \
|
||||
else if ((c & 0x04) == 0) \
|
||||
{ \
|
||||
c = ((c & 0x03) << 24) | ((eptr[1] & 0x3f) << 18) | \
|
||||
((eptr[2] & 0x3f) << 12) | ((eptr[3] & 0x3f) << 6) | \
|
||||
(eptr[4] & 0x3f); \
|
||||
len += 4; \
|
||||
} \
|
||||
else \
|
||||
{\
|
||||
c = ((c & 0x01) << 30) | ((eptr[1] & 0x3f) << 24) | \
|
||||
((eptr[2] & 0x3f) << 18) | ((eptr[3] & 0x3f) << 12) | \
|
||||
((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \
|
||||
len += 5; \
|
||||
} \
|
||||
}
|
||||
|
||||
/* --------------- Whitespace macros ---------------- */
|
||||
|
||||
/* Tests for Unicode horizontal and vertical whitespace characters must check a
|
||||
number of different values. Using a switch statement for this generates the
|
||||
|
@ -187,7 +380,7 @@ NOTACHAR (which is 0xffffffff).
|
|||
Any changes should ensure that the various macros are kept in step with each
|
||||
other. NOTE: The values also appear in pcre2_jit_compile.c. */
|
||||
|
||||
/* ------ ASCII/Unicode environments ------ */
|
||||
/* -------------- ASCII/Unicode environments -------------- */
|
||||
|
||||
#ifndef EBCDIC
|
||||
|
||||
|
@ -242,7 +435,7 @@ other. NOTE: The values also appear in pcre2_jit_compile.c. */
|
|||
VSPACE_BYTE_CASES: \
|
||||
VSPACE_MULTIBYTE_CASES
|
||||
|
||||
/* ------ EBCDIC environments ------ */
|
||||
/* -------------- EBCDIC environments -------------- */
|
||||
|
||||
#else
|
||||
#define HSPACE_LIST CHAR_HT, CHAR_SPACE
|
||||
|
@ -271,9 +464,47 @@ other. NOTE: The values also appear in pcre2_jit_compile.c. */
|
|||
#define VSPACE_CASES VSPACE_BYTE_CASES
|
||||
#endif /* EBCDIC */
|
||||
|
||||
/* ------ End of whitespace macros ------ */
|
||||
/* -------------- End of whitespace macros -------------- */
|
||||
|
||||
|
||||
/* PCRE2 is able to support several different kinds of newline (CR, LF, CRLF,
|
||||
"any" and "anycrlf" at present). The following macros are used to package up
|
||||
testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various
|
||||
modules to indicate in which datablock the parameters exist, and what the
|
||||
start/end of string field names are. */
|
||||
|
||||
#define NLTYPE_FIXED 0 /* Newline is a fixed length string */
|
||||
#define NLTYPE_ANY 1 /* Newline is any Unicode line ending */
|
||||
#define NLTYPE_ANYCRLF 2 /* Newline is CR, LF, or CRLF */
|
||||
|
||||
/* This macro checks for a newline at the given position */
|
||||
|
||||
#define IS_NEWLINE(p) \
|
||||
((NLBLOCK->nltype != NLTYPE_FIXED)? \
|
||||
((p) < NLBLOCK->PSEND && \
|
||||
PRIV(is_newline)((p), NLBLOCK->nltype, NLBLOCK->PSEND, \
|
||||
&(NLBLOCK->nllen), utf)) \
|
||||
: \
|
||||
((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
|
||||
UCHAR21TEST(p) == NLBLOCK->nl[0] && \
|
||||
(NLBLOCK->nllen == 1 || UCHAR21TEST(p+1) == NLBLOCK->nl[1]) \
|
||||
) \
|
||||
)
|
||||
|
||||
/* This macro checks for a newline immediately preceding the given position */
|
||||
|
||||
#define WAS_NEWLINE(p) \
|
||||
((NLBLOCK->nltype != NLTYPE_FIXED)? \
|
||||
((p) > NLBLOCK->PSSTART && \
|
||||
PRIV(was_newline)((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \
|
||||
&(NLBLOCK->nllen), utf)) \
|
||||
: \
|
||||
((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
|
||||
UCHAR21TEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] && \
|
||||
(NLBLOCK->nllen == 1 || UCHAR21TEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
|
||||
) \
|
||||
)
|
||||
|
||||
/* Private flags containing information about the compiled pattern. The first
|
||||
three must not be changed, because whichever is set is actually the number of
|
||||
bytes in a code unit in that mode. */
|
||||
|
@ -296,16 +527,55 @@ bytes in a code unit in that mode. */
|
|||
|
||||
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
|
||||
|
||||
|
||||
/* Magic number to provide a small check against being handed junk. */
|
||||
|
||||
#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */
|
||||
|
||||
/* This variable is used to detect a loaded regular expression
|
||||
in different endianness. */
|
||||
/* This value is used to detect a loaded regular expression in different
|
||||
endianness. */
|
||||
|
||||
#define REVERSED_MAGIC_NUMBER 0x45524350UL /* 'ERCP' */
|
||||
|
||||
/* The maximum remaining length of subject we are prepared to search for a
|
||||
req_unit match. */
|
||||
|
||||
#define REQ_UNIT_MAX 1000
|
||||
|
||||
/* Bit definitions for entries in the pcre_ctypes table. */
|
||||
|
||||
#define ctype_space 0x01
|
||||
#define ctype_letter 0x02
|
||||
#define ctype_digit 0x04
|
||||
#define ctype_xdigit 0x08
|
||||
#define ctype_word 0x10 /* alphanumeric or '_' */
|
||||
#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */
|
||||
|
||||
/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
|
||||
of bits for a class map. Some classes are built by combining these tables. */
|
||||
|
||||
#define cbit_space 0 /* [:space:] or \s */
|
||||
#define cbit_xdigit 32 /* [:xdigit:] */
|
||||
#define cbit_digit 64 /* [:digit:] or \d */
|
||||
#define cbit_upper 96 /* [:upper:] */
|
||||
#define cbit_lower 128 /* [:lower:] */
|
||||
#define cbit_word 160 /* [:word:] or \w */
|
||||
#define cbit_graph 192 /* [:graph:] */
|
||||
#define cbit_print 224 /* [:print:] */
|
||||
#define cbit_punct 256 /* [:punct:] */
|
||||
#define cbit_cntrl 288 /* [:cntrl:] */
|
||||
#define cbit_length 320 /* Length of the cbits table */
|
||||
|
||||
/* Offsets of the various tables from the base tables pointer, and
|
||||
total length. */
|
||||
|
||||
#define lcc_offset 0
|
||||
#define fcc_offset 256
|
||||
#define cbits_offset 512
|
||||
#define ctypes_offset (cbits_offset + cbit_length)
|
||||
#define tables_length (ctypes_offset + 256)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* -------------------- Character and string names ------------------------ */
|
||||
|
@ -1432,6 +1702,17 @@ typedef struct pcre2_memctl {
|
|||
void *memory_data;
|
||||
} pcre2_memctl;
|
||||
|
||||
/* Structure for building a chain of open capturing subpatterns during
|
||||
compiling, so that instructions to close them can be compiled when (*ACCEPT) is
|
||||
encountered. This is also used to identify subpatterns that contain recursive
|
||||
back references to themselves, so that they can be made atomic. */
|
||||
|
||||
typedef struct open_capitem {
|
||||
struct open_capitem *next; /* Chain link */
|
||||
uint16_t number; /* Capture number */
|
||||
uint16_t flag; /* Set TRUE if recursive back ref */
|
||||
} open_capitem;
|
||||
|
||||
/* Layout of the UCP type table that translates property names into types and
|
||||
codes. Each entry used to point directly to a name, but to reduce the number of
|
||||
relocations in shared libraries, it now has an offset into a single string
|
||||
|
@ -1481,13 +1762,52 @@ extern const int PRIV(ucp_typerange)[];
|
|||
|
||||
/* ----------------- Items that need PCRE2_CODE_UNIT_WIDTH ----------------- */
|
||||
|
||||
/* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is not
|
||||
defined, so the following items are omitted. */
|
||||
|
||||
#ifdef PCRE2_CODE_UNIT_WIDTH
|
||||
|
||||
/* Mode-dependent macros and private structures are defined in a separate file.
|
||||
When compiling the library, PCRE2_CODE_UNIT_WIDTH will be defined, and we
|
||||
include them at the appropriate width. When compiling pcre2test, however, that
|
||||
macro is not set at this point because pcre2test needs to include them at all
|
||||
supported widths. */
|
||||
/* This is the largest non-UTF code point. */
|
||||
|
||||
#define MAX_NON_UTF_CHAR (0xffffffffU >> (32 - PCRE2_CODE_UNIT_WIDTH))
|
||||
|
||||
|
||||
/* Internal shared data tables. These are tables that are used by more than one
|
||||
of the exported public functions. They have to be "external" in the C sense,
|
||||
but are not part of the PCRE2 public API. The data for these tables is in the
|
||||
pcre2_tables.c module. Even though some of them are identical in each library,
|
||||
they must have different names so that more than one library can be linked with
|
||||
an application. UTF-8 tables are needed only when compiling the 8-bit library.
|
||||
*/
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
extern const int PRIV(utf8_table1)[];
|
||||
extern const int PRIV(utf8_table1_size);
|
||||
extern const int PRIV(utf8_table2)[];
|
||||
extern const int PRIV(utf8_table3)[];
|
||||
extern const uint8_t PRIV(utf8_table4)[];
|
||||
#endif
|
||||
|
||||
extern const uint8_t PRIV(default_tables)[];
|
||||
extern const uint8_t PRIV(OP_lengths)[];
|
||||
|
||||
extern const uint32_t PRIV(hspace_list)[];
|
||||
extern const uint32_t PRIV(vspace_list)[];
|
||||
|
||||
extern const ucp_type_table PRIV(utt)[];
|
||||
extern const char PRIV(utt_names)[];
|
||||
extern const size_t PRIV(utt_size);
|
||||
|
||||
|
||||
/* Mode-dependent macros and hidden and private structures are defined in a
|
||||
separate file so that pcre2test can include them at all supported widths. When
|
||||
compiling the library, PCRE2_CODE_UNIT_WIDTH will be defined, and we can
|
||||
include them at the appropriate width, after setting up suffix macros for the
|
||||
private structures. */
|
||||
|
||||
#define compile_data PCRE2_SUFFIX(compile_data_)
|
||||
#define branch_chain PCRE2_SUFFIX(branch_chain_)
|
||||
#define named_group PCRE2_SUFFIX(named_group_)
|
||||
|
||||
#include "pcre2_intmodedep.h"
|
||||
|
||||
|
@ -1498,14 +1818,32 @@ from pcre2test, and must not be defined when no code unit width is available.
|
|||
*/
|
||||
|
||||
#define _pcre2_compile_context_init PCRE2_SUFFIX(_pcre2_compile_context_init_)
|
||||
#define _pcre2_find_bracket PCRE2_SUFFIX(_pcre2_find_bracket_)
|
||||
#define _pcre2_is_newline PCRE2_SUFFIX(_pcre2_is_newline_)
|
||||
#define _pcre2_match_context_init PCRE2_SUFFIX(_pcre2_match_context_init_)
|
||||
#define _pcre2_memctl_malloc PCRE2_SUFFIX(_pcre2_memctl_malloc_)
|
||||
#define _pcre2_ord2utf PCRE2_SUFFIX(_pcre2_ord2utf_)
|
||||
#define _pcre2_strcmp PCRE2_SUFFIX(_pcre_strcmp_)
|
||||
#define _pcre2_strcmp_c8 PCRE2_SUFFIX(_pcre_strcmp_c8_)
|
||||
#define _pcre2_strlen PCRE2_SUFFIX(_pcre_strlen_)
|
||||
#define _pcre2_strncmp PCRE2_SUFFIX(_pcre_strncmp_)
|
||||
#define _pcre2_strncmp_c8 PCRE2_SUFFIX(_pcre_strncmp_c8_)
|
||||
#define _pcre2_valid_utf PCRE2_SUFFIX(_pcre_valid_utf_)
|
||||
#define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_)
|
||||
|
||||
extern void _pcre2_compile_context_init(pcre2_compile_context *, BOOL);
|
||||
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
||||
extern BOOL _pcre2_is_newline(PCRE2_SPTR, int, PCRE2_SPTR, int *, BOOL);
|
||||
extern void _pcre2_match_context_init(pcre2_match_context *, BOOL);
|
||||
extern void *_pcre2_memctl_malloc(size_t, size_t, pcre2_memctl *);
|
||||
extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *);
|
||||
extern int _pcre2_strcmp(PCRE2_SPTR, PCRE2_SPTR);
|
||||
#endif
|
||||
extern int _pcre2_strcmp_c8(PCRE2_SPTR, const char *);
|
||||
extern int _pcre2_strlen(PCRE2_SPTR);
|
||||
extern int _pcre2_strncmp(PCRE2_SPTR, PCRE2_SPTR, size_t);
|
||||
extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t);
|
||||
extern int _pcre2_valid_utf(PCRE2_SPTR, int, size_t *);
|
||||
extern BOOL _pcre2_was_newline(PCRE2_SPTR, int, PCRE2_SPTR, int *, BOOL);
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH */
|
||||
|
||||
/* End of pcre2_internal.h */
|
||||
|
|
|
@ -42,21 +42,44 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
/* This module contains mode-dependent macro and structure definitions. The
|
||||
file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined.
|
||||
These mode-dependent items are kept in a separate file so that they can also be
|
||||
#included multiple times for different code unit widths by pcre2test. Start by
|
||||
undefining all the new macros defined herein so that they can be redefined for
|
||||
multiple inclusions. */
|
||||
#included multiple times for different code unit widths by pcre2test in order
|
||||
to have access to the hidden structures at all supported widths.
|
||||
|
||||
Some of the mode-dependent macros are required at different widths for
|
||||
different parts of the pcre2test code (in particular, the included
|
||||
pcre_printint.c file). We undefine them here so that they can be re-defined for
|
||||
multiple inclusions. Not all of these are used in pcretest, but it's easier
|
||||
just to undefine them all. */
|
||||
|
||||
#undef ACROSSCHAR
|
||||
#undef BACKCHAR
|
||||
#undef CU2BYTES
|
||||
#undef FORWARDCHAR
|
||||
#undef GET
|
||||
#undef GET2
|
||||
#undef GETCHAR
|
||||
#undef GETCHARINC
|
||||
#undef GETCHARINCTEST
|
||||
#undef GETCHARLEN
|
||||
#undef GETCHARLENTEST
|
||||
#undef GETCHARTEST
|
||||
#undef GET_EXTRALEN
|
||||
#undef HAS_EXTRALEN
|
||||
#undef IMM2_SIZE
|
||||
#undef MAX_255
|
||||
#undef MAX_MARK
|
||||
#undef MAX_PATTERN_SIZE
|
||||
#undef MAX_UTF_SINGLE_CU
|
||||
#undef NOT_FIRSTCHAR
|
||||
#undef PUT
|
||||
#undef PUT2
|
||||
#undef PUT2INC
|
||||
#undef PUTCHAR
|
||||
#undef PUTINC
|
||||
|
||||
|
||||
/* ---------------------------MACROS ----------------------------- */
|
||||
|
||||
/* -------------------------- MACROS ----------------------------- */
|
||||
|
||||
/* PCRE keeps offsets in its compiled code as at least 16-bit quantities
|
||||
(always stored in big-endian order in 8-bit mode) by default. These are used,
|
||||
|
@ -70,7 +93,6 @@ unit string is now handled by the macros that are defined here.
|
|||
The macros are controlled by the value of LINK_SIZE. This defaults to 2, but
|
||||
values of 2 or 4 are also supported. */
|
||||
|
||||
|
||||
/* ------------------- 8-bit support ------------------ */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
|
@ -150,8 +172,8 @@ values of 2 or 4 are also supported. */
|
|||
#error Unsupported compiling mode
|
||||
#endif
|
||||
|
||||
/* -------------------------------------------------------*/
|
||||
|
||||
/* --------------- Other mode-specific macros ----------------- */
|
||||
|
||||
/* PCRE uses some other (at least) 16-bit quantities that do not change when
|
||||
the size of offsets changes. There are used for repeat counts and for other
|
||||
|
@ -166,7 +188,7 @@ arithmetic results in a signed value. Hence the cast. */
|
|||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define IMM2_SIZE 2
|
||||
#define GET2(a,n) (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
|
||||
#define PUT2(a,n,d) { a[n] = (d) >> 8; a[(n)+1] = (d) & 255; }
|
||||
#define PUT2(a,n,d) a[n] = (d) >> 8, a[(n)+1] = (d) & 255
|
||||
|
||||
#else /* Code units are 16 or 32 bits */
|
||||
#define IMM2_SIZE 1
|
||||
|
@ -174,14 +196,338 @@ arithmetic results in a signed value. Hence the cast. */
|
|||
#define PUT2(a,n,d) a[n] = d
|
||||
#endif
|
||||
|
||||
/* Other macros that are different for 8-bit mode. The maximum length of a MARK
|
||||
name must fit in one code unit; currently it is set to 255 or 65535. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define MAX_255(c) TRUE
|
||||
#define MAX_MARK ((1u << 8) - 1)
|
||||
#ifdef SUPPORT_UTF
|
||||
#define SUPPORT_WIDE_CHARS
|
||||
#endif /* SUPPORT_UTF */
|
||||
|
||||
#else /* Code units are 16 or 32 bits */
|
||||
#define MAX_255(c) ((c) <= 255u)
|
||||
#define MAX_MARK ((1u << 16) - 1)
|
||||
#define SUPPORT_WIDE_CHARS
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* ----------------- Character-handling macros ----------------- */
|
||||
|
||||
/* There is a proposed future special "UTF-21" mode, in which only the lowest
|
||||
21 bits of a 32-bit character are interpreted as UTF, with the remaining 11
|
||||
high-order bits available to the application for other uses. In preparation for
|
||||
the future implementation of this mode, there are macros that load a data item
|
||||
and, if in this special mode, mask it to 21 bits. These macros all have names
|
||||
starting with UCHAR21. In all other modes, including the normal 32-bit
|
||||
library, the macros all have the same simple definitions. When the new mode is
|
||||
implemented, it is expected that these definitions will be varied appropriately
|
||||
using #ifdef when compiling the library that supports the special mode. */
|
||||
|
||||
#define UCHAR21(eptr) (*(eptr))
|
||||
#define UCHAR21TEST(eptr) (*(eptr))
|
||||
#define UCHAR21INC(eptr) (*(eptr)++)
|
||||
#define UCHAR21INCTEST(eptr) (*(eptr)++)
|
||||
|
||||
/* When UTF encoding is being used, a character is no longer just a single
|
||||
byte in 8-bit mode or a single short in 16-bit mode. The macros for character
|
||||
handling generate simple sequences when used in the basic mode, and more
|
||||
complicated ones for UTF characters. GETCHARLENTEST and other macros are not
|
||||
used when UTF is not supported. To make sure they can never even appear when
|
||||
UTF support is omitted, we don't even define them. */
|
||||
|
||||
#ifndef SUPPORT_UTF
|
||||
|
||||
/* #define MAX_UTF_SINGLE_CU */
|
||||
/* #define HAS_EXTRALEN(c) */
|
||||
/* #define GET_EXTRALEN(c) */
|
||||
/* #define NOT_FIRSTCHAR(c) */
|
||||
#define GETCHAR(c, eptr) c = *eptr;
|
||||
#define GETCHARTEST(c, eptr) c = *eptr;
|
||||
#define GETCHARINC(c, eptr) c = *eptr++;
|
||||
#define GETCHARINCTEST(c, eptr) c = *eptr++;
|
||||
#define GETCHARLEN(c, eptr, len) c = *eptr;
|
||||
#define PUTCHAR(c, p) (*p = c, 1)
|
||||
/* #define GETCHARLENTEST(c, eptr, len) */
|
||||
/* #define BACKCHAR(eptr) */
|
||||
/* #define FORWARDCHAR(eptr) */
|
||||
/* #define ACROSSCHAR(condition, eptr, action) */
|
||||
|
||||
#else /* SUPPORT_UTF */
|
||||
|
||||
/* ------------------- 8-bit support ------------------ */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
|
||||
/* The largest UTF code point that can be encoded as a single code unit. */
|
||||
|
||||
#define MAX_UTF_SINGLE_CU 127
|
||||
|
||||
/* Tests whether the code point needs extra characters to decode. */
|
||||
|
||||
#define HAS_EXTRALEN(c) HASUTF8EXTRALEN(c)
|
||||
|
||||
/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
|
||||
Otherwise it has an undefined behaviour. */
|
||||
|
||||
#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])
|
||||
|
||||
/* Returns TRUE, if the given character is not the first character
|
||||
of a UTF sequence. */
|
||||
|
||||
#define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)
|
||||
|
||||
/* Get the next UTF-8 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHAR(c, eptr) \
|
||||
c = *eptr; \
|
||||
if (c >= 0xc0) GETUTF8(c, eptr);
|
||||
|
||||
/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
|
||||
pointer. */
|
||||
|
||||
#define GETCHARTEST(c, eptr) \
|
||||
c = *eptr; \
|
||||
if (utf && c >= 0xc0) GETUTF8(c, eptr);
|
||||
|
||||
/* Get the next UTF-8 character, advancing the pointer. This is called when we
|
||||
know we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARINC(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if (c >= 0xc0) GETUTF8INC(c, eptr);
|
||||
|
||||
/* Get the next character, testing for UTF-8 mode, and advancing the pointer.
|
||||
This is called when we don't know if we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARINCTEST(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if (utf && c >= 0xc0) GETUTF8INC(c, eptr);
|
||||
|
||||
/* Get the next UTF-8 character, not advancing the pointer, incrementing length
|
||||
if there are extra bytes. This is called when we know we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARLEN(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if (c >= 0xc0) GETUTF8LEN(c, eptr, len);
|
||||
|
||||
/* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
|
||||
pointer, incrementing length if there are extra bytes. This is called when we
|
||||
do not know if we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARLENTEST(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);
|
||||
|
||||
/* If the pointer is not at the start of a character, move it back until
|
||||
it is. This is called only in UTF-8 mode - we don't put a test within the macro
|
||||
because almost all calls are already within a block of UTF-8 only code. */
|
||||
|
||||
#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--
|
||||
|
||||
/* Same as above, just in the other direction. */
|
||||
#define FORWARDCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr++
|
||||
|
||||
/* Same as above, but it allows a fully customizable form. */
|
||||
#define ACROSSCHAR(condition, eptr, action) \
|
||||
while((condition) && ((eptr) & 0xc0) == 0x80) action
|
||||
|
||||
/* Deposit a character into memory, returning the number of code units. */
|
||||
|
||||
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
|
||||
PRIV(ord2utf)(c,p) : (*p = c, 1))
|
||||
|
||||
|
||||
/* ------------------- 16-bit support ------------------ */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
|
||||
/* The largest UTF code point that can be encoded as a single code unit. */
|
||||
|
||||
#define MAX_UTF_SINGLE_CU 65535
|
||||
|
||||
/* Tests whether the code point needs extra characters to decode. */
|
||||
|
||||
#define HAS_EXTRALEN(c) (((c) & 0xfc00) == 0xd800)
|
||||
|
||||
/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
|
||||
Otherwise it has an undefined behaviour. */
|
||||
|
||||
#define GET_EXTRALEN(c) 1
|
||||
|
||||
/* Returns TRUE, if the given character is not the first character
|
||||
of a UTF sequence. */
|
||||
|
||||
#define NOT_FIRSTCHAR(c) (((c) & 0xfc00) == 0xdc00)
|
||||
|
||||
/* Base macro to pick up the low surrogate of a UTF-16 character, not
|
||||
advancing the pointer. */
|
||||
|
||||
#define GETUTF16(c, eptr) \
|
||||
{ c = (((c & 0x3ff) << 10) | (eptr[1] & 0x3ff)) + 0x10000; }
|
||||
|
||||
/* Get the next UTF-16 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHAR(c, eptr) \
|
||||
c = *eptr; \
|
||||
if ((c & 0xfc00) == 0xd800) GETUTF16(c, eptr);
|
||||
|
||||
/* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the
|
||||
pointer. */
|
||||
|
||||
#define GETCHARTEST(c, eptr) \
|
||||
c = *eptr; \
|
||||
if (utf && (c & 0xfc00) == 0xd800) GETUTF16(c, eptr);
|
||||
|
||||
/* Base macro to pick up the low surrogate of a UTF-16 character, advancing
|
||||
the pointer. */
|
||||
|
||||
#define GETUTF16INC(c, eptr) \
|
||||
{ c = (((c & 0x3ff) << 10) | (*eptr++ & 0x3ff)) + 0x10000; }
|
||||
|
||||
/* Get the next UTF-16 character, advancing the pointer. This is called when we
|
||||
know we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHARINC(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, eptr);
|
||||
|
||||
/* Get the next character, testing for UTF-16 mode, and advancing the pointer.
|
||||
This is called when we don't know if we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHARINCTEST(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if (utf && (c & 0xfc00) == 0xd800) GETUTF16INC(c, eptr);
|
||||
|
||||
/* Base macro to pick up the low surrogate of a UTF-16 character, not
|
||||
advancing the pointer, incrementing the length. */
|
||||
|
||||
#define GETUTF16LEN(c, eptr, len) \
|
||||
{ c = (((c & 0x3ff) << 10) | (eptr[1] & 0x3ff)) + 0x10000; len++; }
|
||||
|
||||
/* Get the next UTF-16 character, not advancing the pointer, incrementing
|
||||
length if there is a low surrogate. This is called when we know we are in
|
||||
UTF-16 mode. */
|
||||
|
||||
#define GETCHARLEN(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if ((c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);
|
||||
|
||||
/* Get the next UTF-816character, testing for UTF-16 mode, not advancing the
|
||||
pointer, incrementing length if there is a low surrogate. This is called when
|
||||
we do not know if we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHARLENTEST(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);
|
||||
|
||||
/* If the pointer is not at the start of a character, move it back until
|
||||
it is. This is called only in UTF-16 mode - we don't put a test within the
|
||||
macro because almost all calls are already within a block of UTF-16 only
|
||||
code. */
|
||||
|
||||
#define BACKCHAR(eptr) if ((*eptr & 0xfc00) == 0xdc00) eptr--
|
||||
|
||||
/* Same as above, just in the other direction. */
|
||||
#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00) == 0xdc00) eptr++
|
||||
|
||||
/* Same as above, but it allows a fully customizable form. */
|
||||
#define ACROSSCHAR(condition, eptr, action) \
|
||||
if ((condition) && ((eptr) & 0xfc00) == 0xdc00) action
|
||||
|
||||
/* Deposit a character into memory, returning the number of code units. */
|
||||
|
||||
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
|
||||
PRIV(ord2utf)(c,p) : (*p = c, 1))
|
||||
|
||||
|
||||
/* ------------------- 32-bit support ------------------ */
|
||||
|
||||
#else
|
||||
|
||||
/* These are trivial for the 32-bit library, since all UTF-32 characters fit
|
||||
into one PCRE_UCHAR unit. */
|
||||
|
||||
#define MAX_UTF_SINGLE_CU (0x10ffffu)
|
||||
#define HAS_EXTRALEN(c) (0)
|
||||
#define GET_EXTRALEN(c) (0)
|
||||
#define NOT_FIRSTCHAR(c) (0)
|
||||
|
||||
/* Get the next UTF-32 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHAR(c, eptr) \
|
||||
c = *(eptr);
|
||||
|
||||
/* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
|
||||
pointer. */
|
||||
|
||||
#define GETCHARTEST(c, eptr) \
|
||||
c = *(eptr);
|
||||
|
||||
/* Get the next UTF-32 character, advancing the pointer. This is called when we
|
||||
know we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHARINC(c, eptr) \
|
||||
c = *((eptr)++);
|
||||
|
||||
/* Get the next character, testing for UTF-32 mode, and advancing the pointer.
|
||||
This is called when we don't know if we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHARINCTEST(c, eptr) \
|
||||
c = *((eptr)++);
|
||||
|
||||
/* Get the next UTF-32 character, not advancing the pointer, not incrementing
|
||||
length (since all UTF-32 is of length 1). This is called when we know we are in
|
||||
UTF-32 mode. */
|
||||
|
||||
#define GETCHARLEN(c, eptr, len) \
|
||||
GETCHAR(c, eptr)
|
||||
|
||||
/* Get the next UTF-32character, testing for UTF-32 mode, not advancing the
|
||||
pointer, not incrementing the length (since all UTF-32 is of length 1).
|
||||
This is called when we do not know if we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHARLENTEST(c, eptr, len) \
|
||||
GETCHARTEST(c, eptr)
|
||||
|
||||
/* If the pointer is not at the start of a character, move it back until
|
||||
it is. This is called only in UTF-32 mode - we don't put a test within the
|
||||
macro because almost all calls are already within a block of UTF-32 only
|
||||
code.
|
||||
|
||||
These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
|
||||
|
||||
#define BACKCHAR(eptr) do { } while (0)
|
||||
|
||||
/* Same as above, just in the other direction. */
|
||||
|
||||
#define FORWARDCHAR(eptr) do { } while (0)
|
||||
|
||||
/* Same as above, but it allows a fully customizable form. */
|
||||
|
||||
#define ACROSSCHAR(condition, eptr, action) do { } while (0)
|
||||
|
||||
/* Deposit a character into memory, returning the number of code units. */
|
||||
|
||||
#define PUTCHAR(c, p) (*p = c, 1)
|
||||
|
||||
#endif /* UTF-32 character handling */
|
||||
#endif /* SUPPORT_UTF */
|
||||
|
||||
|
||||
/* Mode-dependent macros that have the same definition in all modes. */
|
||||
|
||||
#define CU2BYTES(x) (x)*((PCRE2_CODE_UNIT_WIDTH/8))
|
||||
#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE
|
||||
#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE
|
||||
|
||||
|
||||
/* --------------------------- STRUCTURES ----------------------------- */
|
||||
/* ----------------------- HIDDEN STRUCTURES ----------------------------- */
|
||||
|
||||
/* The real general context structure. At present it hold only data for custom
|
||||
memory control. */
|
||||
|
@ -195,7 +541,7 @@ typedef struct pcre2_real_general_context {
|
|||
typedef struct pcre2_real_compile_context {
|
||||
pcre2_memctl memctl;
|
||||
int (*stack_guard)(uint32_t);
|
||||
const unsigned char *tables;
|
||||
const uint8_t *tables;
|
||||
uint16_t bsr_convention;
|
||||
uint16_t newline_convention;
|
||||
uint32_t parens_nest_limit;
|
||||
|
@ -217,11 +563,12 @@ typedef struct pcre2_real_match_context {
|
|||
/* The real compiled code structure */
|
||||
|
||||
typedef struct pcre2_real_code {
|
||||
pcre2_memctl memctl;
|
||||
pcre2_memctl memctl; /* Memory control fields */
|
||||
const uint8_t *tables; /* The character tables */
|
||||
void *executable_jit; /* Pointer to JIT code */
|
||||
uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */
|
||||
size_t blocksize; /* Total (bytes) that was malloc-ed */
|
||||
uint32_t magic_number; /* Paranoid and endianness check */
|
||||
uint32_t size; /* Total (bytes) that was malloc-ed */
|
||||
uint32_t compile_options; /* Options passed to pcre2_compile() */
|
||||
uint32_t pattern_options; /* Options taken from the pattern */
|
||||
uint32_t flags; /* Various state flags */
|
||||
|
@ -239,14 +586,13 @@ typedef struct pcre2_real_code {
|
|||
uint16_t name_count; /* Number of name entries in the table */
|
||||
} pcre2_real_code;
|
||||
|
||||
/* The reat match data structure. */
|
||||
/* The real match data structure. */
|
||||
|
||||
typedef struct pcre2_real_match_data {
|
||||
pcre2_memctl memctl;
|
||||
const pcre2_real_code *code; /* The pattern used for the match */
|
||||
PCRE2_SPTR subject; /* The subject that was matched */
|
||||
int rc; /* The return code from the match */
|
||||
int utf_reason; /* Reason code for bad UTF */
|
||||
size_t leftchar; /* Offset to leftmost code unit */
|
||||
size_t rightchar; /* Offset to rightmost code unit */
|
||||
size_t startchar; /* Offset to starting code unit */
|
||||
|
@ -255,4 +601,71 @@ typedef struct pcre2_real_match_data {
|
|||
size_t ovector[1]; /* The first field */
|
||||
} pcre2_real_match_data;
|
||||
|
||||
|
||||
/* ----------------------- PRIVATE STRUCTURES ----------------------------- */
|
||||
|
||||
/* These structures are not needed for pcre2test. */
|
||||
|
||||
#ifndef PCRE2_PCRE2TEST
|
||||
|
||||
/* Structure for maintaining a chain of pointers to the currently incomplete
|
||||
branches, for testing for left recursion while compiling. */
|
||||
|
||||
typedef struct branch_chain {
|
||||
struct branch_chain *outer;
|
||||
PCRE2_UCHAR *current_branch;
|
||||
} branch_chain;
|
||||
|
||||
/* Structure for building a list of named groups during the first pass of
|
||||
compiling. */
|
||||
|
||||
typedef struct named_group {
|
||||
PCRE2_SPTR name; /* Points to the name in the pattern */
|
||||
int length; /* Length of the name */
|
||||
uint32_t number; /* Group number */
|
||||
} named_group;
|
||||
|
||||
/* Structure for passing "static" information around between the functions
|
||||
doing the compiling, so that they are thread-safe. */
|
||||
|
||||
typedef struct compile_data {
|
||||
pcre2_real_compile_context *cx; /* Points to the compile context */
|
||||
const uint8_t *lcc; /* Points to lower casing table */
|
||||
const uint8_t *fcc; /* Points to case-flipping table */
|
||||
const uint8_t *cbits; /* Points to character type table */
|
||||
const uint8_t *ctypes; /* Points to table of type maps */
|
||||
PCRE2_SPTR start_workspace; /* The start of working space */
|
||||
PCRE2_SPTR start_code; /* The start of the compiled code */
|
||||
PCRE2_SPTR start_pattern; /* The start of the pattern */
|
||||
PCRE2_SPTR end_pattern; /* The end of the pattern */
|
||||
PCRE2_UCHAR *hwm; /* High watermark of workspace */
|
||||
open_capitem *open_caps; /* Chain of open capture items */
|
||||
named_group *named_groups; /* Points to vector in pre-compile */
|
||||
PCRE2_UCHAR *name_table; /* The name/number table */
|
||||
int names_found; /* Number of entries so far */
|
||||
int name_entry_size; /* Size of each entry */
|
||||
int named_group_list_size; /* Number of entries in the list */
|
||||
int workspace_size; /* Size of workspace */
|
||||
unsigned int bracount; /* Count of capturing parens as we compile */
|
||||
int final_bracount; /* Saved value after first pass */
|
||||
int max_lookbehind; /* Maximum lookbehind (characters) */
|
||||
int top_backref; /* Maximum back reference */
|
||||
unsigned int backref_map; /* Bitmap of low back refs */
|
||||
unsigned int namedrefcount; /* Number of backreferences by name */
|
||||
int parens_depth; /* Depth of nested parentheses */
|
||||
int assert_depth; /* Depth of nested assertions */
|
||||
uint32_t external_options; /* External (initial) options */
|
||||
uint32_t external_flags; /* External flag bits to be set */
|
||||
int req_varyopt; /* "After variable item" flag for reqbyte */
|
||||
BOOL had_accept; /* (*ACCEPT) encountered */
|
||||
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
|
||||
BOOL check_lookbehind; /* Lookbehinds need later checking */
|
||||
BOOL dupnames; /* Duplicate names exist */
|
||||
int nltype; /* Newline type */
|
||||
int nllen; /* Newline string length */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
|
||||
} compile_data;
|
||||
|
||||
#endif /* PCRE2_PCRE2TEST */
|
||||
|
||||
/* End of pcre2_intmodedep.h */
|
||||
|
|
|
@ -0,0 +1,213 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains internal functions for testing newlines when more than
|
||||
one kind of newline is to be recognized. When a newline is found, its length is
|
||||
returned. In principle, we could implement several newline "types", each
|
||||
referring to a different set of newline characters. At present, PCRE2 supports
|
||||
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
|
||||
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
|
||||
http://unicode.org/unicode/reports/tr18/. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at given position *
|
||||
*************************************************/
|
||||
|
||||
/* It is guaranteed that the initial value of ptr is less than the end of the
|
||||
string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
endptr pointer to the end of the string
|
||||
lenptr where to return the length
|
||||
utf TRUE if in utf mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(is_newline)(PCRE2_SPTR ptr, int type, PCRE2_SPTR endptr, int *lenptr,
|
||||
BOOL utf)
|
||||
{
|
||||
uint32_t c;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
if (utf) { GETCHAR(c, ptr); } else
|
||||
#else
|
||||
(void)utf;
|
||||
#endif /* SUPPORT_UTF */
|
||||
|
||||
c = *ptr;
|
||||
|
||||
/* Note that this function is called only for ANY or ANYCRLF. */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case CHAR_LF: *lenptr = 1; return TRUE;
|
||||
case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||
return TRUE;
|
||||
default: return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
#ifdef EBCDIC
|
||||
case CHAR_NEL:
|
||||
#endif
|
||||
case CHAR_LF:
|
||||
case CHAR_VT:
|
||||
case CHAR_FF: *lenptr = 1; return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
#ifndef EBCDIC
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||
#else /* 16-bit or 32-bit code units */
|
||||
case CHAR_NEL:
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 1; return TRUE; /* PS */
|
||||
#endif
|
||||
#endif /* Not EBCDIC */
|
||||
|
||||
default: return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at previous position *
|
||||
*************************************************/
|
||||
|
||||
/* It is guaranteed that the initial value of ptr is greater than the start of
|
||||
the string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
startptr pointer to the start of the string
|
||||
lenptr where to return the length
|
||||
utf TRUE if in utf mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(was_newline)(PCRE2_SPTR ptr, int type, PCRE2_SPTR startptr, int *lenptr,
|
||||
BOOL utf)
|
||||
{
|
||||
uint32_t c;
|
||||
ptr--;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(ptr);
|
||||
GETCHAR(c, ptr);
|
||||
}
|
||||
else
|
||||
#else
|
||||
(void)utf;
|
||||
#endif /* SUPPORT_UTF */
|
||||
|
||||
c = *ptr;
|
||||
|
||||
/* Note that this function is called only for ANY or ANYCRLF. */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case CHAR_LF:
|
||||
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR: *lenptr = 1; return TRUE;
|
||||
default: return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
case CHAR_LF:
|
||||
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
#ifdef EBCDIC
|
||||
case CHAR_NEL:
|
||||
#endif
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
case CHAR_CR: *lenptr = 1; return TRUE;
|
||||
|
||||
#ifndef EBCDIC
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||
#else /* 16-bit or 32-bit code units */
|
||||
case CHAR_NEL:
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 1; return TRUE; /* PS */
|
||||
#endif
|
||||
#endif /* Not EBCDIC */
|
||||
|
||||
default: return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_newline.c */
|
|
@ -0,0 +1,119 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This file contains a function that converts a Unicode character code point
|
||||
into a UTF string. The behaviour is different for each code unit width. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/* If SUPPORT_UTF is not defined, this function will never be called. Supply a
|
||||
dummy function because some compilers do not like empty source modules. */
|
||||
|
||||
#ifndef SUPPORT_UTF
|
||||
unsigned int
|
||||
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
||||
{
|
||||
(void)(cvalue);
|
||||
(void)(buffer);
|
||||
return 0;
|
||||
}
|
||||
#else /* SUPPORT_UTF */
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert code point to UTF *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
cvalue the character value
|
||||
buffer pointer to buffer for result
|
||||
|
||||
Returns: number of code units placed in the buffer
|
||||
*/
|
||||
|
||||
unsigned int
|
||||
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
||||
{
|
||||
/* Convert to UTF-8 */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
register int i, j;
|
||||
for (i = 0; i < PRIV(utf8_table1_size); i++)
|
||||
if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
|
||||
buffer += i;
|
||||
for (j = i; j > 0; j--)
|
||||
{
|
||||
*buffer-- = 0x80 | (cvalue & 0x3f);
|
||||
cvalue >>= 6;
|
||||
}
|
||||
*buffer = PRIV(utf8_table2)[i] | cvalue;
|
||||
return i + 1;
|
||||
|
||||
/* Convert to UTF-16 */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if (cvalue <= 0xffff)
|
||||
{
|
||||
*buffer = (PCRE2_UCHAR)cvalue;
|
||||
return 1;
|
||||
}
|
||||
cvalue -= 0x10000;
|
||||
*buffer++ = 0xd800 | (cvalue >> 10);
|
||||
*buffer = 0xdc00 | (cvalue & 0x3ff);
|
||||
return 2;
|
||||
|
||||
/* Convert to UTF-32 */
|
||||
|
||||
#else
|
||||
*buffer = (PCRE2_UCHAR)cvalue;
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
#endif /* SUPPORT_UTF */
|
||||
|
||||
/* End of pcre_ord2utf.c */
|
|
@ -184,7 +184,7 @@ switch(what)
|
|||
break;
|
||||
|
||||
case PCRE2_INFO_SIZE:
|
||||
*((size_t *)where) = re->size;
|
||||
*((size_t *)where) = re->blocksize;
|
||||
break;
|
||||
|
||||
default: return PCRE2_ERROR_BADOPTION;
|
||||
|
|
|
@ -53,7 +53,7 @@ functions work only on 8-bit data. */
|
|||
|
||||
|
||||
/*************************************************
|
||||
* Compare two strings *
|
||||
* Compare two zero-terminated PCRE2 strings *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
|
@ -77,4 +77,105 @@ while (*str1 != '\0' || *str2 != '\0')
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare zero-terminated PCRE2 & 8-bit strings *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strcmp_c8)(PCRE2_SPTR str1, const char *str2)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
while (*str1 != '\0' || *str2 != '\0')
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare two PCRE2 strings, given a length *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
len the length
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
while (len-- > 0)
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare PCRE2 string to 8-bit string by length *
|
||||
*************************************************/
|
||||
|
||||
/* As the 8-bit string is almost always a literal, its type is specified as
|
||||
'const char *'.
|
||||
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
len the length
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
while (len-- > 0)
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find the length of a string *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Argument: the string
|
||||
Returns: the length
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strlen)(PCRE2_SPTR str)
|
||||
{
|
||||
int c = 0;
|
||||
while (*str++ != 0) c++;
|
||||
return c;
|
||||
}
|
||||
|
||||
/* End of pcre2_string_utils.c */
|
||||
|
|
|
@ -41,22 +41,22 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
/* This module contains some fixed tables that are used by more than one of the
|
||||
PCRE code modules. The tables are also #included by the pcre2test program,
|
||||
which uses macros to change their names from _pcre2_xxx to xxxx, thereby
|
||||
avoiding name clashes with the library. In this case, PCRE2_INCLUDED is
|
||||
avoiding name clashes with the library. In this case, PCRE2_PCRE2TEST is
|
||||
defined. */
|
||||
|
||||
#ifndef PCRE2_INCLUDED /* We're compiling the library */
|
||||
#ifndef PCRE2_PCRE2TEST /* We're compiling the library */
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
#include "pcre2_internal.h"
|
||||
#endif /* PCRE2_INCLUDED */
|
||||
#endif /* PCRE2_PCRE2TEST */
|
||||
|
||||
|
||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||
the definition is next to the definition of the opcodes in pcre2_internal.h.
|
||||
This is mode-dependent, so is skipped when this file is included by pcre2test. */
|
||||
|
||||
#ifndef PCRE2_INCLUDED
|
||||
#ifndef PCRE2_PCRE2TEST
|
||||
const uint8_t PRIV(OP_lengths)[] = { OP_LENGTHS };
|
||||
#endif
|
||||
|
||||
|
@ -71,14 +71,18 @@ const uint32_t PRIV(vspace_list)[] = { VSPACE_LIST };
|
|||
* Tables for UTF-8 support *
|
||||
*************************************************/
|
||||
|
||||
/* These tables are required by pcre2test in 16- or 32-bit mode, as well
|
||||
as for the library in 8-bit mode, because pcre2test uses UTF-8 internally for
|
||||
handling wide characters. */
|
||||
|
||||
#if defined PCRE2_PCRE2TEST || \
|
||||
(defined SUPPORT_UTF && \
|
||||
defined PCRE2_CODE_UNIT_WIDTH && \
|
||||
PCRE2_CODE_UNIT_WIDTH == 8)
|
||||
|
||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||
character. */
|
||||
|
||||
#if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \
|
||||
|| (defined PCRE2_INCLUDED && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32))
|
||||
|
||||
/* These tables are also required by pcretest in 16- or 32-bit mode. */
|
||||
|
||||
const int PRIV(utf8_table1)[] =
|
||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||
|
||||
|
@ -99,7 +103,7 @@ const uint8_t PRIV(utf8_table4)[] = {
|
|||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||
|
||||
#endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE2_INCLUDED && SUPPORT_PCRE[16|32])*/
|
||||
#endif /* UTF-8 support needed */
|
||||
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
|
@ -653,7 +657,7 @@ const ucp_type_table PRIV(utt)[] = {
|
|||
{ 1042, PT_PC, ucp_Zs }
|
||||
};
|
||||
|
||||
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||
|
||||
#endif /* SUPPORT_UTF */
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ table names from _pcre2_xxx to xxxx, thereby avoiding name clashes
|
|||
with the library. At present, just one of these tables is actually
|
||||
needed. */
|
||||
|
||||
#ifndef PCRE2_INCLUDED
|
||||
#ifndef PCRE2_PCRE2TEST
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
|
@ -16,7 +16,7 @@ needed. */
|
|||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#endif /* PCRE2_INCLUDED */
|
||||
#endif /* PCRE2_PCRE2TEST */
|
||||
|
||||
/* Unicode character database. */
|
||||
/* This file was autogenerated by the MultiStage2.py script. */
|
||||
|
@ -78,7 +78,7 @@ const uint32_t PRIV(ucd_caseless_sets)[] = {
|
|||
|
||||
/* When #included in pcre2test, we don't need this large table. */
|
||||
|
||||
#ifndef PCRE2_INCLUDED
|
||||
#ifndef PCRE2_PCRE2TEST
|
||||
|
||||
const ucd_record PRIV(ucd_records)[] = { /* 5016 bytes, record size 8 */
|
||||
{ 9, 0, 2, 0, 0, }, /* 0 */
|
||||
|
@ -3295,4 +3295,4 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
|
|||
#endif
|
||||
#endif /* SUPPORT_UTF */
|
||||
|
||||
#endif /* PCRE2_INCLUDED */
|
||||
#endif /* PCRE2_PCRE2TEST */
|
||||
|
|
|
@ -0,0 +1,399 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function for validating UTF character
|
||||
strings. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
#ifndef SUPPORT_UTF
|
||||
/*************************************************
|
||||
* Dummy function when UTF not supported *
|
||||
*************************************************/
|
||||
|
||||
/* This function should never be called when UTF is not supported. */
|
||||
|
||||
int
|
||||
PRIV(valid_utf)(PCRE2_SPTR string, int length, size_t *erroroffset)
|
||||
{
|
||||
(void)string;
|
||||
(void)length;
|
||||
(void)erroroffset);
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Validate a UTF string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called (optionally) at the start of compile or match, to
|
||||
check that a supposed UTF string is actually valid. The early check means
|
||||
that subsequent code can assume it is dealing with a valid string. The check
|
||||
can be turned off for maximum performance, but the consequences of supplying an
|
||||
invalid string are then undefined.
|
||||
|
||||
Arguments:
|
||||
string points to the string
|
||||
length length of string, or -1 if the string is zero-terminated
|
||||
errp pointer to an error position offset variable
|
||||
|
||||
Returns: == 0 if the string is a valid UTF string
|
||||
!= 0 otherwise, setting the offset of the bad character
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(valid_utf)(PCRE2_SPTR string, int length, size_t *erroroffset)
|
||||
{
|
||||
register PCRE2_SPTR p;
|
||||
register uint32_t c;
|
||||
|
||||
if (length < 0)
|
||||
{
|
||||
for (p = string; *p != 0; p++);
|
||||
length = (int)(p - string);
|
||||
}
|
||||
|
||||
/* ----------------- Check a UTF-8 string ----------------- */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
|
||||
/* Originally, this function checked according to RFC 2279, allowing for values
|
||||
in the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were
|
||||
in the canonical format. Once somebody had pointed out RFC 3629 to me (it
|
||||
obsoletes 2279), additional restrictions were applied. The values are now
|
||||
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
|
||||
subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte
|
||||
characters is still checked. Error returns are as follows:
|
||||
|
||||
PCRE2_ERROR_UTF8_ERR1 Missing 1 byte at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR2 Missing 2 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR3 Missing 3 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR4 Missing 4 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR5 Missing 5 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR6 2nd-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR7 3rd-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR8 4th-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR9 5th-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR10 6th-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR11 5-byte character is not permitted by RFC 3629
|
||||
PCRE2_ERROR_UTF8_ERR12 6-byte character is not permitted by RFC 3629
|
||||
PCRE2_ERROR_UTF8_ERR13 4-byte character with value > 0x10ffff is not permitted
|
||||
PCRE2_ERROR_UTF8_ERR14 3-byte character with value 0xd000-0xdfff is not permitted
|
||||
PCRE2_ERROR_UTF8_ERR15 Overlong 2-byte sequence
|
||||
PCRE2_ERROR_UTF8_ERR16 Overlong 3-byte sequence
|
||||
PCRE2_ERROR_UTF8_ERR17 Overlong 4-byte sequence
|
||||
PCRE2_ERROR_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur)
|
||||
PCRE2_ERROR_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
|
||||
PCRE2_ERROR_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
|
||||
PCRE2_ERROR_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
|
||||
*/
|
||||
|
||||
for (p = string; length-- > 0; p++)
|
||||
{
|
||||
register uint32_t ab, d;
|
||||
|
||||
c = *p;
|
||||
if (c < 128) continue; /* ASCII character */
|
||||
|
||||
if (c < 0xc0) /* Isolated 10xx xxxx byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string);
|
||||
return PCRE2_ERROR_UTF8_ERR20;
|
||||
}
|
||||
|
||||
if (c >= 0xfe) /* Invalid 0xfe or 0xff bytes */
|
||||
{
|
||||
*erroroffset = (int)(p - string);
|
||||
return PCRE2_ERROR_UTF8_ERR21;
|
||||
}
|
||||
|
||||
ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes (1-5) */
|
||||
if (length < (int)ab) /* Missing bytes */
|
||||
{
|
||||
*erroroffset = (int)(p - string);
|
||||
switch(ab - length)
|
||||
{
|
||||
case 1: return PCRE2_ERROR_UTF8_ERR1;
|
||||
case 2: return PCRE2_ERROR_UTF8_ERR2;
|
||||
case 3: return PCRE2_ERROR_UTF8_ERR3;
|
||||
case 4: return PCRE2_ERROR_UTF8_ERR4;
|
||||
case 5: return PCRE2_ERROR_UTF8_ERR5;
|
||||
}
|
||||
}
|
||||
length -= ab; /* Length remaining */
|
||||
|
||||
/* Check top bits in the second byte */
|
||||
|
||||
if (((d = *(++p)) & 0xc0) != 0x80)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 1;
|
||||
return PCRE2_ERROR_UTF8_ERR6;
|
||||
}
|
||||
|
||||
/* For each length, check that the remaining bytes start with the 0x80 bit
|
||||
set and not the 0x40 bit. Then check for an overlong sequence, and for the
|
||||
excluded range 0xd800 to 0xdfff. */
|
||||
|
||||
switch (ab)
|
||||
{
|
||||
/* 2-byte character. No further bytes to check for 0x80. Check first byte
|
||||
for for xx00 000x (overlong sequence). */
|
||||
|
||||
case 1: if ((c & 0x3e) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 1;
|
||||
return PCRE2_ERROR_UTF8_ERR15;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 3-byte character. Check third byte for 0x80. Then check first 2 bytes
|
||||
for 1110 0000, xx0x xxxx (overlong sequence) or
|
||||
1110 1101, 1010 xxxx (0xd800 - 0xdfff) */
|
||||
|
||||
case 2:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if (c == 0xe0 && (d & 0x20) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR16;
|
||||
}
|
||||
if (c == 0xed && d >= 0xa0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR14;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
|
||||
bytes for for 1111 0000, xx00 xxxx (overlong sequence), then check for a
|
||||
character greater than 0x0010ffff (f4 8f bf bf) */
|
||||
|
||||
case 3:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR8;
|
||||
}
|
||||
if (c == 0xf0 && (d & 0x30) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR17;
|
||||
}
|
||||
if (c > 0xf4 || (c == 0xf4 && d > 0x8f))
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR13;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
|
||||
rejected by the length test below. However, we do the appropriate tests
|
||||
here so that overlong sequences get diagnosed, and also in case there is
|
||||
ever an option for handling these larger code points. */
|
||||
|
||||
/* 5-byte character. Check 3rd, 4th, and 5th bytes for 0x80. Then check for
|
||||
1111 1000, xx00 0xxx */
|
||||
|
||||
case 4:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR8;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 4;
|
||||
return PCRE2_ERROR_UTF8_ERR9;
|
||||
}
|
||||
if (c == 0xf8 && (d & 0x38) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 4;
|
||||
return PCRE2_ERROR_UTF8_ERR18;
|
||||
}
|
||||
break;
|
||||
|
||||
/* 6-byte character. Check 3rd-6th bytes for 0x80. Then check for
|
||||
1111 1100, xx00 00xx. */
|
||||
|
||||
case 5:
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE2_ERROR_UTF8_ERR8;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 4;
|
||||
return PCRE2_ERROR_UTF8_ERR9;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Sixth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 5;
|
||||
return PCRE2_ERROR_UTF8_ERR10;
|
||||
}
|
||||
if (c == 0xfc && (d & 0x3c) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 5;
|
||||
return PCRE2_ERROR_UTF8_ERR19;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Character is valid under RFC 2279, but 4-byte and 5-byte characters are
|
||||
excluded by RFC 3629. The pointer p is currently at the last byte of the
|
||||
character. */
|
||||
|
||||
if (ab > 3)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - ab;
|
||||
return (ab == 4)? PCRE2_ERROR_UTF8_ERR11 : PCRE2_ERROR_UTF8_ERR12;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
|
||||
/* ----------------- Check a UTF-16 string ----------------- */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
|
||||
/* There's not so much work, nor so many errors, for UTF-16.
|
||||
PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at the end of the string
|
||||
PCRE2_ERROR_UTF16_ERR2 Invalid low surrogate
|
||||
PCRE2_ERROR_UTF16_ERR3 Isolated low surrogate
|
||||
*/
|
||||
|
||||
for (p = string; length-- > 0; p++)
|
||||
{
|
||||
c = *p;
|
||||
|
||||
if ((c & 0xf800) != 0xd800)
|
||||
{
|
||||
/* Normal UTF-16 code point. Neither high nor low surrogate. */
|
||||
}
|
||||
else if ((c & 0x0400) == 0)
|
||||
{
|
||||
/* High surrogate. Must be a followed by a low surrogate. */
|
||||
if (length == 0)
|
||||
{
|
||||
*erroroffset = p - string;
|
||||
return PCRE2_ERROR_UTF16_ERR1;
|
||||
}
|
||||
p++;
|
||||
length--;
|
||||
if ((*p & 0xfc00) != 0xdc00)
|
||||
{
|
||||
*erroroffset = p - string;
|
||||
return PCRE2_ERROR_UTF16_ERR2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Isolated low surrogate. Always an error. */
|
||||
*erroroffset = p - string;
|
||||
return PCRE2_ERROR_UTF16_ERR3;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
|
||||
|
||||
/* ----------------- Check a UTF-32 string ----------------- */
|
||||
|
||||
#else
|
||||
|
||||
/* There is very little to do for a UTF-32 string.
|
||||
PCRE2_ERROR_UTF32_ERR1 Surrogate character
|
||||
PCRE2_ERROR_UTF32_ERR2 Character > 0x10ffff
|
||||
*/
|
||||
|
||||
for (p = string; length-- > 0; p++)
|
||||
{
|
||||
c = *p;
|
||||
if ((c & 0xfffff800u) != 0xd800u)
|
||||
{
|
||||
/* Normal UTF-32 code point. Neither high nor low surrogate. */
|
||||
if (c > 0x10ffffu)
|
||||
{
|
||||
*erroroffset = p - string;
|
||||
return PCRE2_ERROR_UTF32_ERR2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* A surrogate */
|
||||
*erroroffset = p - string;
|
||||
return PCRE2_ERROR_UTF32_ERR1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
#endif /* SUPPORT_UTF */
|
||||
}
|
||||
|
||||
/* End of pcre2_valid_utf.c */
|
|
@ -108,7 +108,7 @@ static const int eint2[] = {
|
|||
30, REG_ECTYPE, /* unknown POSIX class name */
|
||||
32, REG_INVARG, /* this version of PCRE2 is not compiled with PCRE2_UTF8 support */
|
||||
37, REG_EESCAPE, /* PCRE2 does not support \L, \l, \N, \U, or \u */
|
||||
56, REG_INVARG, /* inconsistent NEWLINE options */
|
||||
56, REG_INVARG, /* internal error: unknown newline setting */
|
||||
67, REG_INVARG, /* this version of PCRE2 is not compiled with PCRE2_UCP support */
|
||||
};
|
||||
|
||||
|
@ -148,6 +148,8 @@ regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
|
|||
const char *message, *addmessage;
|
||||
size_t length, addlength;
|
||||
|
||||
errcode -= COMPILE_ERROR_BASE;
|
||||
|
||||
message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
|
||||
"unknown error code" : pstring[errcode];
|
||||
length = strlen(message) + 1;
|
||||
|
@ -224,6 +226,8 @@ preg->re_erroffset = erroffset;
|
|||
if (preg->re_pcre2_code == NULL)
|
||||
{
|
||||
unsigned int i;
|
||||
if (errorcode < 0) return REG_BADPAT; /* UTF error */
|
||||
errorcode -= COMPILE_ERROR_BASE;
|
||||
if (errorcode < (int)(sizeof(eint1)/sizeof(const int)))
|
||||
return eint1[errorcode];
|
||||
for (i = 0; i < sizeof(eint2)/(2*sizeof(const int)); i += 2)
|
||||
|
@ -307,13 +311,15 @@ if (rc >= 0)
|
|||
|
||||
/* Unsuccessful match */
|
||||
|
||||
if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21)
|
||||
return REG_INVARG;
|
||||
|
||||
switch(rc)
|
||||
{
|
||||
default: return REG_ASSERT;
|
||||
case PCRE2_ERROR_BADMODE: return REG_INVARG;
|
||||
case PCRE2_ERROR_BADMAGIC: return REG_INVARG;
|
||||
case PCRE2_ERROR_BADOPTION: return REG_INVARG;
|
||||
case PCRE2_ERROR_BADUTF: return REG_INVARG;
|
||||
case PCRE2_ERROR_BADUTF_OFFSET: return REG_INVARG;
|
||||
case PCRE2_ERROR_MATCHLIMIT: return REG_ESPACE;
|
||||
case PCRE2_ERROR_NOMATCH: return REG_NOMATCH;
|
||||
|
|
182
src/pcre2test.c
182
src/pcre2test.c
|
@ -211,10 +211,10 @@ for building the library. */
|
|||
#include "pcre2_internal.h"
|
||||
|
||||
/* We need access to some of the data tables that PCRE uses. Defining
|
||||
PCRE2_INCLUDED makes some minor changes in the files. The previous definition
|
||||
PCRE2_PCRETEST makes some minor changes in the files. The previous definition
|
||||
of PRIV avoids name clashes. */
|
||||
|
||||
#define PCRE2_INCLUDED
|
||||
#define PCRE2_PCRE2TEST
|
||||
#include "pcre2_tables.c"
|
||||
#include "pcre2_ucd.c"
|
||||
|
||||
|
@ -340,12 +340,14 @@ either on a pattern or a data line, so they must all be distinct. */
|
|||
#define CTL_FULLBINCODE 0x00000200
|
||||
#define CTL_GETALL 0x00000400
|
||||
#define CTL_GLOBAL 0x00000800
|
||||
#define CTL_INFO 0x00001000
|
||||
#define CTL_JITVERIFY 0x00002000
|
||||
#define CTL_LIMITS 0x00004000
|
||||
#define CTL_MARK 0x00008000
|
||||
#define CTL_MEMORY 0x00010000
|
||||
#define CTL_POSIX 0x00020000
|
||||
#define CTL_HEXPAT 0x00001000
|
||||
#define CTL_INFO 0x00002000
|
||||
#define CTL_JITVERIFY 0x00004000
|
||||
#define CTL_LIMITS 0x00008000
|
||||
#define CTL_MARK 0x00010000
|
||||
#define CTL_MEMORY 0x00020000
|
||||
#define CTL_PATLEN 0x00040000
|
||||
#define CTL_POSIX 0x00080000
|
||||
|
||||
#define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */
|
||||
#define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE) /* For testing */
|
||||
|
@ -441,6 +443,7 @@ static modstruct modlist[] = {
|
|||
{ "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
|
||||
{ "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
|
||||
{ "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
|
||||
{ "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
|
||||
{ "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
|
||||
{ "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
|
||||
{ "jitstack", MOD_DAT, MOD_INT, 0, DO(jitstack) },
|
||||
|
@ -475,6 +478,7 @@ static modstruct modlist[] = {
|
|||
{ "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
|
||||
{ "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
|
||||
{ "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
|
||||
{ "use_length", MOD_PAT, MOD_CTL, CTL_PATLEN, PO(control) },
|
||||
{ "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) }
|
||||
};
|
||||
|
||||
|
@ -626,13 +630,8 @@ Pattern lines are always copied to pbuffer8 for use in callouts, even if they
|
|||
are actually compiled from pbuffer16 or pbuffer32. */
|
||||
|
||||
static int pbuffer8_size = 50000; /* Initial size, bytes */
|
||||
static int pbuffer16_size = 0; /* Only set once needed */
|
||||
static int pbuffer32_size = 0; /* Only set once needed */
|
||||
|
||||
static uint8_t *buffer = NULL;
|
||||
static uint8_t *pbuffer8 = NULL;
|
||||
static uint16_t *pbuffer16 = NULL;
|
||||
static uint32_t *pbuffer32 = NULL;
|
||||
static uint8_t *buffer = NULL;
|
||||
|
||||
/* The dbuffer is where all processed data lines are put. In non-8-bit modes it
|
||||
is cast as needed. For long data lines it grows as necessary. */
|
||||
|
@ -655,6 +654,8 @@ pcre2_code_16 *compiled_code16;
|
|||
pcre2_compile_context_16 *pat_context16, *default_pat_context16;
|
||||
pcre2_match_context_16 *dat_context16, *default_dat_context16;
|
||||
pcre2_match_data_16 *match_data16;
|
||||
static int pbuffer16_size = 0; /* Only set once needed */
|
||||
static uint16_t *pbuffer16 = NULL;
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_PCRE32
|
||||
|
@ -662,6 +663,8 @@ pcre2_code_32 *compiled_code32;
|
|||
pcre2_compile_context_32 *pat_context32, *default_pat_context32;
|
||||
pcre2_match_context_32 *dat_context32, *default_dat_context32;
|
||||
pcre2_match_data_32 *match_data32;
|
||||
static int pbuffer32_size = 0; /* Only set once needed */
|
||||
static uint32_t *pbuffer32 = NULL;
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -997,10 +1000,10 @@ the three different cases. */
|
|||
|
||||
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
|
||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||
a = G(pcre2_dfa-match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
|
||||
a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
|
||||
G(g,BITONE),G(h,BITONE),i,j); \
|
||||
else \
|
||||
a = G(pcre2_dfa-match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
|
||||
a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
|
||||
G(g,BITTWO),G(h,BITTWO),i,j)
|
||||
|
||||
#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
|
||||
|
@ -1178,20 +1181,20 @@ the three different cases. */
|
|||
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
||||
G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,G(g,8))
|
||||
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
|
||||
G(a,8) = pcre2_dfa-match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8),i,j)
|
||||
a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8),i,j)
|
||||
#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
|
||||
r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size))
|
||||
#define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_8(G(a,8),b)
|
||||
#define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
|
||||
G(a,8) = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8))
|
||||
a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8))
|
||||
#define PCRE2_MAKETABLES(a) a = pcre2_maketables_8(NULL)
|
||||
#define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c)
|
||||
#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(a)
|
||||
#define PCRE2_PATTERN_INFO(a,b,c,d) G(a,8) = pcre2_pattern_info_8(G(b,8),c,d)
|
||||
#define PCRE2_PRINTINT(a,b) pcre2_printint_8(compiled_code8,outfile,a)
|
||||
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b) \
|
||||
#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
|
||||
#define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
|
||||
#define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
|
||||
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
|
||||
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
|
||||
a = pcre2_substring_copy_bynumber_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
|
||||
a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
|
||||
#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
|
||||
a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e)
|
||||
#define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a)
|
||||
|
@ -1207,12 +1210,12 @@ the three different cases. */
|
|||
#define SETFLD(x,y,z) G(x,8)->y = z
|
||||
#define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z
|
||||
#define SETOP(x,y,z) G(x,8) z y
|
||||
#define SETCASTPTR(x,y) G(x,8) = (uint8_t) *)y
|
||||
#define STRLEN(p) (int)strlen(p)
|
||||
#define SETCASTPTR(x,y) G(x,8) = (uint8_t *)y
|
||||
#define STRLEN(p) (int)strlen((char *)p)
|
||||
#define SUB1(a,b) G(a,8)(G(b,8))
|
||||
#define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
|
||||
#define TEST(x,r,y) (G(a,8) r (y))
|
||||
#define TESTFLD(x,f,r,y) (G(a,8)->f r (y))
|
||||
#define TEST(x,r,y) (G(x,8) r (y))
|
||||
#define TESTFLD(x,f,r,y) (G(x,8)->f r (y))
|
||||
|
||||
|
||||
/* ----- Only 16-bit mode is supported ----- */
|
||||
|
@ -1231,20 +1234,20 @@ the three different cases. */
|
|||
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
||||
G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,G(g,16))
|
||||
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
|
||||
G(a,16) = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16),i,j)
|
||||
a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16),i,j)
|
||||
#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
|
||||
r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size))
|
||||
#define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_16(G(a,16),b)
|
||||
#define PCRE2_MAKETABLES(a) a = pcre2_maketables_16(NULL)
|
||||
#define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
|
||||
G(a,16) = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16))
|
||||
a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16))
|
||||
#define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c)
|
||||
#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(a)
|
||||
#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
|
||||
#define PCRE2_PATTERN_INFO(a,b,c,d) G(a,16) = pcre2_pattern_info_16(G(b,16),c,d)
|
||||
#define PCRE2_PRINTINT(a,b) pcre2_printint_16(compiled_code16,outfile,a)
|
||||
#define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
|
||||
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
|
||||
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
|
||||
a = pcre2_substring_copy_bynumber_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e);
|
||||
a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e);
|
||||
#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
|
||||
a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e);
|
||||
#define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
|
||||
|
@ -1260,12 +1263,12 @@ the three different cases. */
|
|||
#define SETFLD(x,y,z) G(x,16)->y = z
|
||||
#define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
|
||||
#define SETOP(x,y,z) G(x,16) z y
|
||||
#define SETCASTPTR(x,y) G(x,16) = (uint16_t) *)y
|
||||
#define SETCASTPTR(x,y) G(x,16) = (uint16_t *)y
|
||||
#define STRLEN(p) (int)strlen16(p)
|
||||
#define SUB1(a,b) G(a,16)(G(b,16))
|
||||
#define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
|
||||
#define TEST(x,r,y) (G(a,16) r (y))
|
||||
#define TESTFLD(x,f,r,y) (G(a,16)->f r (y))
|
||||
#define TEST(x,r,y) (G(x,16) r (y))
|
||||
#define TESTFLD(x,f,r,y) (G(x,16)->f r (y))
|
||||
|
||||
|
||||
/* ----- Only 32-bit mode is supported ----- */
|
||||
|
@ -1284,20 +1287,20 @@ the three different cases. */
|
|||
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
||||
G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,G(g,32))
|
||||
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
|
||||
G(a,32) = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32),i,j)
|
||||
a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32),i,j)
|
||||
#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
|
||||
r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size))
|
||||
#define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_32(G(a,32),b)
|
||||
#define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
|
||||
G(a,32) = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),g(h,32))
|
||||
a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),g(h,32))
|
||||
#define PCRE2_MAKETABLES(a) a = pcre2_maketables_32(NULL)
|
||||
#define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c)
|
||||
#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(a)
|
||||
#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
|
||||
#define PCRE2_PATTERN_INFO(a,b,c,d) G(a,32) = pcre2_pattern_info_32(G(b,32),c,d)
|
||||
#define PCRE2_PRINTINT(a,b) pcre2_printint_32(compiled_code32,outfile,a)
|
||||
#define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
|
||||
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
|
||||
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
|
||||
a = pcre2_substring_copy_bynumber_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e);
|
||||
a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e);
|
||||
#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
|
||||
a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
|
||||
#define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
|
||||
|
@ -1313,12 +1316,12 @@ define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d) \
|
|||
#define SETFLD(x,y,z) G(x,32)->y = z
|
||||
#define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
|
||||
#define SETOP(x,y,z) G(x,32) z y
|
||||
#define SETCASTPTR(x,y) G(x,32) = (uint32_t) *)y
|
||||
#define SETCASTPTR(x,y) G(x,32) = (uint32_t *)y
|
||||
#define STRLEN(p) (int)strle32(p)
|
||||
#define SUB1(a,b) G(a,32)(G(b,32))
|
||||
#define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
|
||||
#define TEST(x,r,y) (G(a,32) r (y))
|
||||
#define TESTFLD(x,f,r,y) (G(a,32)->f r (y))
|
||||
#define TEST(x,r,y) (G(x,32) r (y))
|
||||
#define TESTFLD(x,f,r,y) (G(x,32)->f r (y))
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -2669,7 +2672,7 @@ Returns: nothing
|
|||
static void
|
||||
show_compile_controls(uint32_t controls, const char *before, const char *after)
|
||||
{
|
||||
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
before,
|
||||
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
|
||||
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
|
||||
|
@ -2679,9 +2682,11 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
|||
((controls & CTL_FLIPBYTES) != 0)? " flipbytes" : "",
|
||||
((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
|
||||
((controls & CTL_GLOBAL) != 0)? " global" : "",
|
||||
((controls & CTL_HEXPAT) != 0)? " hex" : "",
|
||||
((controls & CTL_INFO) != 0)? " info" : "",
|
||||
((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
|
||||
((controls & CTL_MARK) != 0)? " mark" : "",
|
||||
((controls & CTL_PATLEN) != 0)? " use_length" : "",
|
||||
((controls & CTL_POSIX) != 0)? " posix" : "",
|
||||
after);
|
||||
}
|
||||
|
@ -2705,7 +2710,8 @@ Returns: nothing
|
|||
static void
|
||||
show_compile_options(uint32_t options, const char *before, const char *after)
|
||||
{
|
||||
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
|
||||
else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
before,
|
||||
((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
|
||||
((options & PCRE2_CASELESS) != 0)? " caseless" : "",
|
||||
|
@ -2896,15 +2902,13 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
fprintf(outfile, "No options\n");
|
||||
else
|
||||
{
|
||||
if (compile_options != 0)
|
||||
show_compile_options(compile_options, "Compile options:", "\n");
|
||||
if (pattern_options != 0)
|
||||
show_compile_options(pattern_options, "Pattern options:", "\n");
|
||||
}
|
||||
|
||||
if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
|
||||
|
||||
if (bsr_convention != PCRE2_BSR_DEFAULT)
|
||||
if (bsr_convention != BSR_DEFAULT)
|
||||
fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
|
||||
"any Unicode newline" : "CR, LF, or CRLF");
|
||||
|
||||
|
@ -3272,25 +3276,63 @@ for(;;)
|
|||
if (infile != stdin) fprintf(outfile, "%s", (char *)p);
|
||||
}
|
||||
|
||||
/* If the first character after the delimiter is backslash, make
|
||||
the pattern end with backslash. This is purely to provide a way
|
||||
of testing for the error message when a pattern ends with backslash. */
|
||||
/* If the first character after the delimiter is backslash, make the pattern
|
||||
end with backslash. This is purely to provide a way of testing for the error
|
||||
message when a pattern ends with backslash. */
|
||||
|
||||
if (p[1] == '\\') *p++ = '\\';
|
||||
|
||||
/* Terminate the pattern at the delimiter, and save a copy of the pattern
|
||||
for callouts. */
|
||||
/* Terminate the pattern at the delimiter, and compute the length. */
|
||||
|
||||
*p++ = 0;
|
||||
patlen = p - buffer - 1;
|
||||
strncpy((char *)pbuffer8, (char *)(buffer+1), patlen);
|
||||
patlen = p - buffer - 2;
|
||||
|
||||
/* Look for modifiers and options after the final delimiter. If successful,
|
||||
compile the pattern. */
|
||||
/* Look for modifiers and options after the final delimiter. */
|
||||
|
||||
if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
|
||||
utf = (pat_patctl.options & PCRE2_UTF) != 0;
|
||||
|
||||
/* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
|
||||
in callouts. Convert to binary if required. */
|
||||
|
||||
if ((pat_patctl.control & CTL_HEXPAT) != 0)
|
||||
{
|
||||
uint8_t *pp, *pt;
|
||||
uint32_t c, d;
|
||||
|
||||
if ((pat_patctl.control & CTL_POSIX) != 0)
|
||||
{
|
||||
fprintf(outfile, "** Hex patterns are not supported for the POSIX API\n");
|
||||
return PR_SKIP;
|
||||
}
|
||||
|
||||
pt = pbuffer8;
|
||||
for (pp = buffer + 1; *pp != 0; pp++)
|
||||
{
|
||||
if (isspace(*pp)) continue;
|
||||
c = toupper(*pp++);
|
||||
if (*pp == 0)
|
||||
{
|
||||
fprintf(outfile, "** Odd number of digits in hex pattern.\n");
|
||||
return PR_SKIP;
|
||||
}
|
||||
d = toupper(*pp);
|
||||
if (!isxdigit(c) || !isxdigit(d))
|
||||
{
|
||||
fprintf(outfile, "** Non-hex-digit in hex pattern.\n");
|
||||
return PR_SKIP;
|
||||
}
|
||||
*pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) +
|
||||
(isdigit(d)? (d - '0') : (d - 'A' + 10));
|
||||
}
|
||||
*pt = 0;
|
||||
patlen = pt - pbuffer8;
|
||||
}
|
||||
else
|
||||
{
|
||||
strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
|
||||
}
|
||||
|
||||
/* Sort out character tables */
|
||||
|
||||
if (pat_patctl.locale[0] != 0)
|
||||
|
@ -3394,12 +3436,12 @@ modes. */
|
|||
|
||||
#ifdef SUPPORT_PCRE16
|
||||
if (test_mode == PCRE16_MODE)
|
||||
patlen = to16(pbuffer8, utf, (int)strlen((char *)pbuffer8));
|
||||
patlen = to16(pbuffer8, utf, patlen);
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_PCRE32
|
||||
if (test_mode == PCRE32_MODE)
|
||||
patlen = to32(pbuffer8, utf, (int)strlen((char *)pbuffer8));
|
||||
patlen = to32(pbuffer8, utf, patlen);
|
||||
#endif
|
||||
|
||||
switch(patlen)
|
||||
|
@ -3423,8 +3465,13 @@ switch(patlen)
|
|||
break;
|
||||
}
|
||||
|
||||
/* The pattern in now in pbuffer[8|16|32], with the length in patlen. Compile
|
||||
many times when timing. */
|
||||
/* The pattern in now in pbuffer[8|16|32], with the length in patlen. By
|
||||
default, however, we pass a zero-terminated pattern. The length is passed only
|
||||
if we had a hex pattern or if use_length was set. */
|
||||
|
||||
if ((pat_patctl.control & (CTL_PATLEN|CTL_HEXPAT)) == 0) patlen = -1;
|
||||
|
||||
/* Compile many times when timing. */
|
||||
|
||||
if (timeit > 0)
|
||||
{
|
||||
|
@ -3960,9 +4007,9 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
|
|||
|
||||
if (dat_datctl.cfail[0] != 0 || dat_datctl.cfail[1] != 0)
|
||||
prmsg(&msg, "callout_fail");
|
||||
if (dat_datctl.copy_numbers[0] != 0 || dat_datctl.copy_names[0] != 0)
|
||||
if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
|
||||
prmsg(&msg, "copy");
|
||||
if (dat_datctl.get_numbers[0] != 0 || dat_datctl.get_names[0] != 0)
|
||||
if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
|
||||
prmsg(&msg, "get");
|
||||
if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
|
||||
|
||||
|
@ -4059,6 +4106,9 @@ for (gmatched = 0;; gmatched++)
|
|||
|
||||
#ifdef FIXME
|
||||
jit_was_used = FALSE;
|
||||
|
||||
Need to set newline and bsr in match context and allow them to be
|
||||
set in the datctl block.
|
||||
#endif
|
||||
|
||||
/* Adjust match_data according to size of offsets required. */
|
||||
|
@ -4502,12 +4552,6 @@ if ((dat_datctl.control & CTL_DFA) != 0)
|
|||
}
|
||||
break;
|
||||
|
||||
case PCRE2_ERROR_BADUTF:
|
||||
fprintf(outfile, "Error %d (bad UTF-%d string) offset=%d reason=%d\n",
|
||||
capcount, test_mode, CASTFLD(int, match_data, startchar),
|
||||
CASTFLD(int, match_data, utf_reason));
|
||||
break;
|
||||
|
||||
case PCRE2_ERROR_BADUTF_OFFSET:
|
||||
fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode);
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue