Basic pcre2_compile() is working (no study, no auto-possess yet).

2014-06-14 18:29:51 +00:00 · 2014-06-14 18:29:51 +00:00 · 1abd5a7f8d
parent 2801d5d132
commit 1abd5a7f8d
23 changed files with 10009 additions and 624 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -275,19 +275,19 @@ COMMON_SOURCES = \
  src/pcre2_jit_misc.c \
  src/pcre2_maketables.c \
  src/pcre2_match_data.c \
+  src/pcre2_newline.c \
+  src/pcre2_ord2utf.c \
  src/pcre2_pattern_info.c \
  src/pcre2_string_utils.c \
  src/pcre2_substring.c \
  src/pcre2_tables.c \
  src/pcre2_ucd.c \
  src/pcre2_ucp.h \
+  src/pcre2_valid_utf.c \
  src/pcre2_version.c

-#  src/pcre2_newline.c \
-#  src/pcre2_ord2utf8.c \
 #  src/pcre2_refcount.c \
 #  src/pcre2_study.c \
-#  src/pcre2_valid_utf8.c \
 #  src/pcre2_xclass.c


--- a/configure.ac
+++ b/configure.ac
@ -2,13 +2,13 @@ dnl Process this file with autoconf to produce a configure script.

 dnl NOTE FOR MAINTAINERS: Do not use minor version numbers 08 or 09 because
 dnl the leading zeros may cause them to be treated as invalid octal constants
-dnl if a PCRE user writes code that uses PCRE_MINOR as a number. There is now
+dnl if a PCRE2 user writes code that uses PCRE2_MINOR as a number. There is now
 dnl a check further down that throws an error if 08 or 09 are used.

-dnl The PCRE_PRERELEASE feature is for identifying release candidates. It might
+dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
 dnl be defined as -RC2, for example. For real releases, it should be empty.

-m4_define(pcre2_major, [9])
+m4_define(pcre2_major, [10])
 m4_define(pcre2_minor, [00])
 m4_define(pcre2_prerelease, [-DEV])
 m4_define(pcre2_date, [2014-99-99])
@ -125,11 +125,11 @@ AC_ARG_ENABLE(jit,
                             [enable Just-In-Time compiling support]),
              , enable_jit=no)

-# Handle --disable-pcregrep-jit (enabled by default)
-AC_ARG_ENABLE(pcregrep-jit,
-              AS_HELP_STRING([--disable-pcregrep-jit],
-                             [disable JIT support in pcregrep]),
-              , enable_pcregrep_jit=yes)
+# Handle --disable-pcre2grep-jit (enabled by default)
+AC_ARG_ENABLE(pcre2grep-jit,
+              AS_HELP_STRING([--disable-pcre2grep-jit],
+                             [disable JIT support in pcre2grep]),
+              , enable_pcre2grep_jit=yes)

 # Handle --enable-rebuild-chartables
 AC_ARG_ENABLE(rebuild-chartables,
@ -144,28 +144,28 @@ AC_ARG_ENABLE(utf,
              , enable_utf=unset)

 # Handle newline options
-ac_pcre_newline=lf
+ac_pcre2_newline=lf
 AC_ARG_ENABLE(newline-is-cr,
              AS_HELP_STRING([--enable-newline-is-cr],
                             [use CR as newline character]),
-              ac_pcre_newline=cr)
+              ac_pcre2_newline=cr)
 AC_ARG_ENABLE(newline-is-lf,
              AS_HELP_STRING([--enable-newline-is-lf],
                             [use LF as newline character (default)]),
-              ac_pcre_newline=lf)
+              ac_pcre2_newline=lf)
 AC_ARG_ENABLE(newline-is-crlf,
              AS_HELP_STRING([--enable-newline-is-crlf],
                             [use CRLF as newline sequence]),
-              ac_pcre_newline=crlf)
+              ac_pcre2_newline=crlf)
 AC_ARG_ENABLE(newline-is-anycrlf,
              AS_HELP_STRING([--enable-newline-is-anycrlf],
                             [use CR, LF, or CRLF as newline sequence]),
-              ac_pcre_newline=anycrlf)
+              ac_pcre2_newline=anycrlf)
 AC_ARG_ENABLE(newline-is-any,
              AS_HELP_STRING([--enable-newline-is-any],
                             [use any valid Unicode newline sequence]),
-              ac_pcre_newline=any)
-enable_newline="$ac_pcre_newline"
+              ac_pcre2_newline=any)
+enable_newline="$ac_pcre2_newline"

 # Handle --enable-bsr-anycrlf
 AC_ARG_ENABLE(bsr-anycrlf,
@ -191,35 +191,35 @@ AC_ARG_ENABLE(stack-for-recursion,
                             [don't use stack recursion when matching]),
              , enable_stack_for_recursion=yes)

-# Handle --enable-pcregrep-libz
-AC_ARG_ENABLE(pcregrep-libz,
-              AS_HELP_STRING([--enable-pcregrep-libz],
-                             [link pcregrep with libz to handle .gz files]),
-              , enable_pcregrep_libz=no)
+# Handle --enable-pcre2grep-libz
+AC_ARG_ENABLE(pcre2grep-libz,
+              AS_HELP_STRING([--enable-pcre2grep-libz],
+                             [link pcre2grep with libz to handle .gz files]),
+              , enable_pcre2grep_libz=no)

-# Handle --enable-pcregrep-libbz2
-AC_ARG_ENABLE(pcregrep-libbz2,
-              AS_HELP_STRING([--enable-pcregrep-libbz2],
-                             [link pcregrep with libbz2 to handle .bz2 files]),
-              , enable_pcregrep_libbz2=no)
+# Handle --enable-pcre2grep-libbz2
+AC_ARG_ENABLE(pcre2grep-libbz2,
+              AS_HELP_STRING([--enable-pcre2grep-libbz2],
+                             [link pcre2grep with libbz2 to handle .bz2 files]),
+              , enable_pcre2grep_libbz2=no)

-# Handle --with-pcregrep-bufsize=N
-AC_ARG_WITH(pcregrep-bufsize,
-              AS_HELP_STRING([--with-pcregrep-bufsize=N],
-                             [pcregrep buffer size (default=20480, minimum=8192)]),
-              , with_pcregrep_bufsize=20480)
+# Handle --with-pcre2grep-bufsize=N
+AC_ARG_WITH(pcre2grep-bufsize,
+              AS_HELP_STRING([--with-pcre2grep-bufsize=N],
+                             [pcre2grep buffer size (default=20480, minimum=8192)]),
+              , with_pcre2grep_bufsize=20480)

-# Handle --enable-pcretest-libedit
-AC_ARG_ENABLE(pcretest-libedit,
-              AS_HELP_STRING([--enable-pcretest-libedit],
-                             [link pcretest with libedit]),
-              , enable_pcretest_libedit=no)
+# Handle --enable-pcre2test-libedit
+AC_ARG_ENABLE(pcre2test-libedit,
+              AS_HELP_STRING([--enable-pcre2test-libedit],
+                             [link pcre2test with libedit]),
+              , enable_pcre2test_libedit=no)

-# Handle --enable-pcretest-libreadline
-AC_ARG_ENABLE(pcretest-libreadline,
-              AS_HELP_STRING([--enable-pcretest-libreadline],
-                             [link pcretest with libreadline]),
-              , enable_pcretest_libreadline=no)
+# Handle --enable-pcre2test-libreadline
+AC_ARG_ENABLE(pcre2test-libreadline,
+              AS_HELP_STRING([--enable-pcre2test-libreadline],
+                             [link pcre2test with libreadline]),
+              , enable_pcre2test_libreadline=no)

 # Handle --with-link-size=N
 AC_ARG_WITH(link-size,
@ -298,11 +298,11 @@ fi
 # agree with the PCRE2_NEWLINE_xxx values in pcre2.h.

 case "$enable_newline" in
-  cr)      ac_pcre_newline_value=0 ;;
-  lf)      ac_pcre_newline_value=1 ;;
-  crlf)    ac_pcre_newline_value=2 ;;
-  any)     ac_pcre_newline_value=3 ;;
-  anycrlf) ac_pcre_newline_value=4 ;;
+  cr)      ac_pcre2_newline_value=1 ;;
+  lf)      ac_pcre2_newline_value=2 ;;
+  crlf)    ac_pcre2_newline_value=3 ;;
+  any)     ac_pcre2_newline_value=4 ;;
+  anycrlf) ac_pcre2_newline_value=5 ;;
  *)
  AC_MSG_ERROR([invalid argument \"$enable_newline\" to --enable-newline option])
  ;;
@ -314,7 +314,7 @@ if test "x$enable_ebcdic_nl25" = "xyes"; then
 fi

 # Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled.
-# Also check that UTF support is not requested, because PCRE cannot handle
+# Also check that UTF support is not requested, because PCRE2 cannot handle
 # EBCDIC and UTF in the same build. To do so it would need to use different
 # character constants depending on the mode.
 #
@ -334,13 +334,13 @@ case "$with_link_size" in
 esac

 AH_TOP([
-/* PCRE is written in Standard C, but there are a few non-standard things it
+/* PCRE2 is written in Standard C, but there are a few non-standard things it
 can cope with, allowing it to run on SunOS4 and other "close to standard"
 systems.

 In environments that support the GNU autotools, config.h.in is converted into
 config.h by the "configure" script. In environments that use CMake,
-config-cmake.in is converted into config.h. If you are going to build PCRE "by
+config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
 hand" without using "configure" or CMake, you should copy the distributed
 config.h.generic to config.h, and edit the macro definitions to be the way you
 need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
@ -357,7 +357,7 @@ macros are listed as a commented #undef in config.h.generic. Macros such as
 MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
 surrounded by #ifndef/#endif lines so that the value can be overridden by -D.

-PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
+PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
 HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
 sure both macros are undefined; an emulation function will then be used. */])

@ -370,7 +370,7 @@ AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1])
 AM_CONDITIONAL(WITH_PCRE8, test "x$enable_pcre8" = "xyes")
 AM_CONDITIONAL(WITH_PCRE16, test "x$enable_pcre16" = "xyes")
 AM_CONDITIONAL(WITH_PCRE32, test "x$enable_pcre32" = "xyes")
-AM_CONDITIONAL(WITH_PCRE_CPP, test "x$enable_cpp" = "xyes")
+#AM_CONDITIONAL(WITH_PCRE2_CPP, test "x$enable_cpp" = "xyes")
 AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
 AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
 AM_CONDITIONAL(WITH_UTF, test "x$enable_utf" = "xyes")
@ -400,7 +400,7 @@ AC_CHECK_LIB([z], [gzopen], [HAVE_LIBZ=1])
 #     therefore missing the function definition.
 #   - The compiler thus generates a "C" signature for the test function.
 #   - The linker fails to find the "C" function.
-#   - PCRE fails to configure if asked to do so against libbz2.
+#   - PCRE2 fails to configure if asked to do so against libbz2.
 #
 # Solution:
 #
@ -426,7 +426,7 @@ LIBS="$OLD_LIBS"

 # Check for the availabiity of libreadline

-if test "$enable_pcretest_libreadline" = "yes"; then
+if test "$enable_pcre2test_libreadline" = "yes"; then
 AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_H=1])
 AC_CHECK_HEADERS([readline/history.h], [HAVE_HISTORY_H=1])
 AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lreadline"],
@ -459,7 +459,7 @@ fi
 # Check for the availability of libedit. Different distributions put its
 # headers in different places. Try to cover the most common ones.

-if test "$enable_pcretest_libedit" = "yes"; then
+if test "$enable_pcre2test_libedit" = "yes"; then
  AC_CHECK_HEADERS([editline/readline.h], [HAVE_EDITLINE_READLINE_H=1],
    [AC_CHECK_HEADERS([edit/readline/readline.h], [HAVE_READLINE_READLINE_H=1],
      [AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_READLINE_H=1])])])
@ -477,21 +477,21 @@ if test "x$enable_shared" = "xno" ; then
 fi
 AC_SUBST(PCRE2_STATIC_CFLAG)

-# Here is where pcre specific defines are handled
+# Here is where PCRE2-specific defines are handled

 if test "$enable_pcre8" = "yes"; then
  AC_DEFINE([SUPPORT_PCRE8], [], [
-    Define to any value to enable the 8 bit PCRE library.])
+    Define to any value to enable the 8 bit PCRE2 library.])
 fi

 if test "$enable_pcre16" = "yes"; then
  AC_DEFINE([SUPPORT_PCRE16], [], [
-    Define to any value to enable the 16 bit PCRE library.])
+    Define to any value to enable the 16 bit PCRE2 library.])
 fi

 if test "$enable_pcre32" = "yes"; then
  AC_DEFINE([SUPPORT_PCRE32], [], [
-    Define to any value to enable the 32 bit PCRE library.])
+    Define to any value to enable the 32 bit PCRE2 library.])
 fi

 # Unless running under Windows, JIT support requires pthreads.
@ -506,87 +506,87 @@ if test "$enable_jit" = "yes"; then
  AC_DEFINE([SUPPORT_JIT], [], [
    Define to any value to enable support for Just-In-Time compiling.])
 else
-  enable_pcregrep_jit="no"
+  enable_pcre2grep_jit="no"
 fi

-if test "$enable_pcregrep_jit" = "yes"; then
-  AC_DEFINE([SUPPORT_PCREGREP_JIT], [], [
-    Define to any value to enable JIT support in pcregrep.])
+if test "$enable_pcre2grep_jit" = "yes"; then
+  AC_DEFINE([SUPPORT_PCRE2GREP_JIT], [], [
+    Define to any value to enable JIT support in pcre2grep.])
 fi

 if test "$enable_utf" = "yes"; then
  AC_DEFINE([SUPPORT_UTF], [], [
    Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
    This will work even in an EBCDIC environment, but it is incompatible
-    with the EBCDIC macro. That is, PCRE can support *either* EBCDIC
+    with the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC
    code *or* ASCII/UTF-8/16/32, but not both at once.])
 fi

 if test "$enable_stack_for_recursion" = "no"; then
  AC_DEFINE([NO_RECURSE], [], [
-    PCRE uses recursive function calls to handle backtracking while
+    PCRE2 uses recursive function calls to handle backtracking while
    matching. This can sometimes be a problem on systems that have
    stacks of limited size. Define NO_RECURSE to any value to get a
    version that doesn't use recursion in the match() function; instead
-    it creates its own stack by steam using pcre_recurse_malloc() to obtain
-    memory from the heap. For more detail, see the comments and other stuff
-    just above the match() function.])
+    it creates its own stack by steam using memory from the heap. For more 
+    detail, see the comments and other stuff just above the match() function.])
 fi

-if test "$enable_pcregrep_libz" = "yes"; then
+if test "$enable_pcre2grep_libz" = "yes"; then
  AC_DEFINE([SUPPORT_LIBZ], [], [
-    Define to any value to allow pcregrep to be linked with libz, so that it is
+    Define to any value to allow pcre2grep to be linked with libz, so that it is
    able to handle .gz files.])
 fi

-if test "$enable_pcregrep_libbz2" = "yes"; then
+if test "$enable_pcre2grep_libbz2" = "yes"; then
  AC_DEFINE([SUPPORT_LIBBZ2], [], [
-    Define to any value to allow pcregrep to be linked with libbz2, so that it
+    Define to any value to allow pcre2grep to be linked with libbz2, so that it
    is able to handle .bz2 files.])
 fi

-if test $with_pcregrep_bufsize -lt 8192 ; then
-  AC_MSG_WARN([$with_pcregrep_bufsize is too small for --with-pcregrep-bufsize; using 8192])
-  with_pcregrep_bufsize="8192"
+if test $with_pcre2grep_bufsize -lt 8192 ; then
+  AC_MSG_WARN([$with_pcre2grep_bufsize is too small for --with-pcre2grep-bufsize; using 8192])
+  with_pcre2grep_bufsize="8192"
 else
  if test $? -gt 1 ; then
-  AC_MSG_ERROR([Bad value for  --with-pcregrep-bufsize]) 
+  AC_MSG_ERROR([Bad value for  --with-pcre2grep-bufsize]) 
  fi    
 fi

-AC_DEFINE_UNQUOTED([PCREGREP_BUFSIZE], [$with_pcregrep_bufsize], [
-  The value of PCREGREP_BUFSIZE determines the size of buffer used by pcregrep
+AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [
+  The value of PCRE2GREP_BUFSIZE determines the size of buffer used by pcre2grep
  to hold parts of the file it is searching. This is also the minimum value.
-  The actual amount of memory used by pcregrep is three times this number,
+  The actual amount of memory used by pcre2grep is three times this number,
  because it allows for the buffering of "before" and "after" lines.])

-if test "$enable_pcretest_libedit" = "yes"; then
+if test "$enable_pcre2test_libedit" = "yes"; then
  AC_DEFINE([SUPPORT_LIBEDIT], [], [
-    Define to any value to allow pcretest to be linked with libedit.])
+    Define to any value to allow pcre2test to be linked with libedit.])
  LIBREADLINE="$LIBEDIT"
-elif test "$enable_pcretest_libreadline" = "yes"; then
+elif test "$enable_pcre2test_libreadline" = "yes"; then
  AC_DEFINE([SUPPORT_LIBREADLINE], [], [
-    Define to any value to allow pcretest to be linked with libreadline.])
+    Define to any value to allow pcre2test to be linked with libreadline.])
 fi

-AC_DEFINE_UNQUOTED([NEWLINE], [$ac_pcre_newline_value], [
-  The value of NEWLINE determines the default newline character sequence. PCRE
-  client programs can override this by selecting other values at run time. The
-  valid values are 0 (CR), 1 (LF), 2 (CRLF), 3 (ANY), and 4 (ANYCRLF).])
+AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [
+  The value of NEWLINE_DEFAULT determines the default newline character 
+  sequence. PCRE2 client programs can override this by selecting other values 
+  at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 
+  and 5 (ANYCRLF).])

 if test "$enable_bsr_anycrlf" = "yes"; then
  AC_DEFINE([BSR_ANYCRLF], [], [
    By default, the \R escape sequence matches any Unicode line ending
    character or sequence of characters. If BSR_ANYCRLF is defined (to any
    value), this is changed so that backslash-R matches only CR, LF, or CRLF.
-    The build-time default can be overridden by the user of PCRE at runtime.])
+    The build-time default can be overridden by the user of PCRE2 at runtime.])
 fi

 AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
  The value of LINK_SIZE determines the number of bytes used to store
  links as offsets within the compiled regex. The default is 2, which
  allows for compiled patterns up to 64K long. This covers the vast
-  majority of cases. However, PCRE can also be compiled to use 3 or 4
+  majority of cases. However, PCRE2 can also be compiled to use 3 or 4
  bytes instead. This allows for longer patterns in extreme cases.])

 AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
@ -597,7 +597,7 @@ AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
 AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
  The value of MATCH_LIMIT determines the default number of times the
  internal match() function can be called during a single execution of
-  pcre_exec(). There is a runtime interface for setting a different
+  pcre2_match(). There is a runtime interface for setting a different
  limit. The limit exists in order to catch runaway regular
  expressions that take for ever to determine that they do not match.
  The default is set very large so that it does not accidentally catch
@ -639,10 +639,10 @@ AH_VERBATIM([PCRE2_EXP_DEFN], [
 if test "$enable_ebcdic" = "yes"; then
  AC_DEFINE_UNQUOTED([EBCDIC], [], [
    If you are compiling for a system that uses EBCDIC instead of ASCII
-    character codes, define this macro to any value. When EBCDIC is set, PCRE
+    character codes, define this macro to any value. When EBCDIC is set, PCRE2
    assumes that all input strings are in EBCDIC. If you do not define this
-    macro, PCRE will assume input strings are ASCII or UTF-8/16/32 Unicode. It
-    is not possible to build a version of PCRE that supports both EBCDIC and
+    macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
+    is not possible to build a version of PCRE2 that supports both EBCDIC and
    UTF-8/16/32.])
 fi

@ -695,65 +695,65 @@ AC_SUBST(EXTRA_LIBPCRE2_POSIX_LDFLAGS)
 DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre16 --enable-pcre32 --enable-jit --enable-utf"
 AC_SUBST(DISTCHECK_CONFIGURE_FLAGS)

-# Check that, if --enable-pcregrep-libz or --enable-pcregrep-libbz2 is
+# Check that, if --enable-pcre2grep-libz or --enable-pcre2grep-libbz2 is
 # specified, the relevant library is available.

-if test "$enable_pcregrep_libz" = "yes"; then
+if test "$enable_pcre2grep_libz" = "yes"; then
  if test "$HAVE_ZLIB_H" != "1"; then
-    echo "** Cannot --enable-pcregrep-libz because zlib.h was not found"
+    echo "** Cannot --enable-pcre2grep-libz because zlib.h was not found"
    exit 1
  fi
  if test "$HAVE_LIBZ" != "1"; then
-    echo "** Cannot --enable-pcregrep-libz because libz was not found"
+    echo "** Cannot --enable-pcre2grep-libz because libz was not found"
    exit 1
  fi
  LIBZ="-lz"
 fi
 AC_SUBST(LIBZ)

-if test "$enable_pcregrep_libbz2" = "yes"; then
+if test "$enable_pcre2grep_libbz2" = "yes"; then
  if test "$HAVE_BZLIB_H" != "1"; then
-    echo "** Cannot --enable-pcregrep-libbz2 because bzlib.h was not found"
+    echo "** Cannot --enable-pcre2grep-libbz2 because bzlib.h was not found"
    exit 1
  fi
  if test "$HAVE_LIBBZ2" != "1"; then
-    echo "** Cannot --enable-pcregrep-libbz2 because libbz2 was not found"
+    echo "** Cannot --enable-pcre2grep-libbz2 because libbz2 was not found"
    exit 1
  fi
  LIBBZ2="-lbz2"
 fi
 AC_SUBST(LIBBZ2)

-# Similarly for --enable-pcretest-readline
+# Similarly for --enable-pcre2test-readline

-if test "$enable_pcretest_libedit" = "yes"; then
-  if test "$enable_pcretest_libreadline" = "yes"; then
-    echo "** Cannot use both --enable-pcretest-libedit and --enable-pcretest-readline"
+if test "$enable_pcre2test_libedit" = "yes"; then
+  if test "$enable_pcre2test_libreadline" = "yes"; then
+    echo "** Cannot use both --enable-pcre2test-libedit and --enable-pcre2test-readline"
    exit 1
  fi
  if test "$HAVE_EDITLINE_READLINE_H" != "1" -a \
          "$HAVE_READLINE_READLINE_H" != "1"; then
-    echo "** Cannot --enable-pcretest-libedit because neither editline/readline.h"
+    echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h"
    echo "** nor readline/readline.h was found."
    exit 1
  fi
  if test -z "$LIBEDIT"; then
-    echo "** Cannot --enable-pcretest-libedit because libedit library was not found."
+    echo "** Cannot --enable-pcre2test-libedit because libedit library was not found."
    exit 1
  fi
 fi

-if test "$enable_pcretest_libreadline" = "yes"; then
+if test "$enable_pcre2test_libreadline" = "yes"; then
  if test "$HAVE_READLINE_H" != "1"; then
-    echo "** Cannot --enable-pcretest-readline because readline/readline.h was not found."
+    echo "** Cannot --enable-pcre2test-readline because readline/readline.h was not found."
    exit 1
  fi
  if test "$HAVE_HISTORY_H" != "1"; then
-    echo "** Cannot --enable-pcretest-readline because readline/history.h was not found."
+    echo "** Cannot --enable-pcre2test-readline because readline/history.h was not found."
    exit 1
  fi
  if test -z "$LIBREADLINE"; then
-    echo "** Cannot --enable-pcretest-readline because readline library was not found."
+    echo "** Cannot --enable-pcre2test-readline because readline library was not found."
    exit 1
  fi
 fi
@ -868,12 +868,12 @@ $PACKAGE-$VERSION configuration summary:
    Match limit recursion ........... : ${with_match_limit_recursion}
    Build shared libs ............... : ${enable_shared}
    Build static libs ............... : ${enable_static}
-    Use JIT in pcregrep ............. : ${enable_pcregrep_jit}
-    Buffer size for pcregrep ........ : ${with_pcregrep_bufsize}
-    Link pcregrep with libz ......... : ${enable_pcregrep_libz}
-    Link pcregrep with libbz2 ....... : ${enable_pcregrep_libbz2}
-    Link pcretest with libedit ...... : ${enable_pcretest_libedit}
-    Link pcretest with libreadline .. : ${enable_pcretest_libreadline}
+    Use JIT in pcre2grep ............ : ${enable_pcre2grep_jit}
+    Buffer size for pcre2grep ....... : ${with_pcre2grep_bufsize}
+    Link pcre2grep with libz ........ : ${enable_pcre2grep_libz}
+    Link pcre2grep with libbz2 ...... : ${enable_pcre2grep_libbz2}
+    Link pcre2test with libedit ..... : ${enable_pcre2test_libedit}
+    Link pcre2test with libreadline . : ${enable_pcre2test_libreadline}
    Valgrind support ................ : ${enable_valgrind}
    Code coverage ................... : ${enable_coverage}

--- a/doc/pcre2test.1
+++ b/doc/pcre2test.1
@ -420,6 +420,7 @@ about the pattern:
      flipbytes                 flip endianness
  /BB fullbincode               show binary code with lengths
  /I  info                      show info about compiled pattern
+      hex                       pattern is coded in hexadecimal 
      jit[=<number>]            use JIT
      locale=<name>             use this locale
      memory                    show memory used 
@ -430,6 +431,7 @@ about the pattern:
      save=<file name>          save compiled pattern
      stackguard=<number>       test the stackguard feature
      tables=[0|1|2]            select internal tables
+      use_length                use the pattern's length 
 .sp
 The effects of these modifiers are described in the following sections.
 FIXME: Give more examples.
@ -481,6 +483,27 @@ specified. See also the section about saving and reloading compiled patterns
 below.
 .
 .
+.SS "Specifying a pattern in hex"
+.rs
+.sp
+The \fBhex\fP modifier specifies that the characters of the pattern are to be 
+interpreted as pairs of hexadecimal digits. White space is permitted between
+pairs. For example:
+.sp
+  /ab 32 59/hex
+.sp
+This feature is provided as a way of creating patterns that contain binary zero 
+characters. When \fBhex\fP is set, it implies \fBuse_length\fP.
+.
+.
+.SS "Using the pattern's length"
+.rs
+.sp
+By default, \fBpcre2test\fP passes patterns as zero-terminated strings to 
+\fBpcre2_compile()\fP, giving the length as -1. If \fBuse_length\fP is set, the 
+length of the pattern is passed. This is implied if \fBhex\fP is set.
+.
+.
 .SS "JIT compilation"
 .rs
 .sp
@ -595,38 +618,6 @@ letters, digits, spaces, etc. Setting alternate character tables and a locale
 are mutually exclusive.
 .
 .
-.SS "Locking out certain modifiers"
-.rs
-.sp
-FIXME FIXME
-PCRE can be compiled with or without support for certain features such as
-UTF-8/16/32 or Unicode properties. Accordingly, the standard tests are split up
-into a number of different files that are selected for running depending on
-which features are available. When updating the tests, it is all too easy to
-put a new test into the wrong file by mistake; for example, to put a test that
-requires UTF support into a file that is used when it is not available. To help
-detect such mistakes as early as possible, there is a facility for locking out
-specific modifiers. If an input line for \fBpcre2test\fP starts with the string
-"< forbid " the following sequence of characters is taken as a list of
-forbidden modifiers. For example, in the test files that must not use UTF or
-Unicode property support, this line appears:
-.sp
-  < forbid 8W
-.sp
-This locks out the /8 and /W modifiers. An immediate error is given if they are
-subsequently encountered. If the character string contains < but not >, all the
-multi-character modifiers that begin with < are locked out. Otherwise, such
-modifiers must be explicitly listed, for example:
-.sp
-  < forbid <JS><cr>
-.sp
-There must be a single space between < and "forbid" for this feature to be
-recognised. If there is not, the line is interpreted either as a request to
-re-load a pre-compiled pattern (see "SAVING AND RELOADING COMPILED PATTERNS"
-below) or, if there is a another < character, as a pattern that uses < as its
-delimiter.
-.
-.
 .SS "Setting certain match controls"
 .rs
 .sp
@ -653,6 +644,7 @@ defaults, set them in a \fB#subject\fP command.
 The modifiers that can appear in subject lines and the \fB#subject\fP 
 command are of two types.
 .
+.
 .SS "Setting match options"
 .rs
 .sp
@ -1199,6 +1191,6 @@ Cambridge CB2 3QH, England.
 .rs
 .sp
 .nf
-Last updated: 13 May 2014
+Last updated: 08 June 2014
 Copyright (c) 1997-2014 University of Cambridge.
 .fi
--- a/libpcre2-posix.pc
+++ b/libpcre2-posix.pc
@ -7,7 +7,7 @@ includedir=${prefix}/include

 Name: libpcre2-posix
 Description: Posix compatible interface to libpcre2-8
-Version: 9.00-DEV
+Version: 10.00-DEV
 Libs: -L${libdir} -lpcre2-posix
 Cflags: -I${includedir} @PCRE_STATIC_CFLAG@
 Requires.private: libpcre2-8
--- a/src/config.h
+++ b/src/config.h
@ -2,13 +2,13 @@
 /* src/config.h.in.  Generated from configure.ac by autoheader.  */


-/* PCRE is written in Standard C, but there are a few non-standard things it
+/* PCRE2 is written in Standard C, but there are a few non-standard things it
 can cope with, allowing it to run on SunOS4 and other "close to standard"
 systems.

 In environments that support the GNU autotools, config.h.in is converted into
 config.h by the "configure" script. In environments that use CMake,
-config-cmake.in is converted into config.h. If you are going to build PCRE "by
+config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
 hand" without using "configure" or CMake, you should copy the distributed
 config.h.generic to config.h, and edit the macro definitions to be the way you
 need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
@ -25,21 +25,22 @@ macros are listed as a commented #undef in config.h.generic. Macros such as
 MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
 surrounded by #ifndef/#endif lines so that the value can be overridden by -D.

-PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
+PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
 HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
 sure both macros are undefined; an emulation function will then be used. */

 /* By default, the \R escape sequence matches any Unicode line ending
   character or sequence of characters. If BSR_ANYCRLF is defined (to any
   value), this is changed so that backslash-R matches only CR, LF, or CRLF.
-   The build-time default can be overridden by the user of PCRE at runtime. */
+   The build-time default can be overridden by the user of PCRE2 at runtime.
+   */
 /* #undef BSR_ANYCRLF */

 /* If you are compiling for a system that uses EBCDIC instead of ASCII
-   character codes, define this macro to any value. When EBCDIC is set, PCRE
+   character codes, define this macro to any value. When EBCDIC is set, PCRE2
   assumes that all input strings are in EBCDIC. If you do not define this
-   macro, PCRE will assume input strings are ASCII or UTF-8/16/32 Unicode. It
-   is not possible to build a version of PCRE that supports both EBCDIC and
+   macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
+   is not possible to build a version of PCRE2 that supports both EBCDIC and
   UTF-8/16/32. */
 /* #undef EBCDIC */

@ -126,8 +127,8 @@ sure both macros are undefined; an emulation function will then be used. */
 /* The value of LINK_SIZE determines the number of bytes used to store links
   as offsets within the compiled regex. The default is 2, which allows for
   compiled patterns up to 64K long. This covers the vast majority of cases.
-   However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
-   for longer patterns in extreme cases. */
+   However, PCRE2 can also be compiled to use 3 or 4 bytes instead. This
+   allows for longer patterns in extreme cases. */
 #define LINK_SIZE 2

 /* Define to the sub-directory in which libtool stores uninstalled libraries.
@ -136,7 +137,7 @@ sure both macros are undefined; an emulation function will then be used. */

 /* The value of MATCH_LIMIT determines the default number of times the
   internal match() function can be called during a single execution of
-   pcre_exec(). There is a runtime interface for setting a different limit.
+   pcre2_match(). There is a runtime interface for setting a different limit.
   The limit exists in order to catch runaway regular expressions that take
   for ever to determine that they do not match. The default is set very large
   so that it does not accidentally catch legitimate cases. */
@ -162,19 +163,18 @@ sure both macros are undefined; an emulation function will then be used. */
   overflow caused by enormously large patterns. */
 #define MAX_NAME_SIZE 32

-/* The value of NEWLINE determines the default newline character sequence.
-   PCRE client programs can override this by selecting other values at run
-   time. The valid values are 0 (CR), 1 (LF), 2 (CRLF), 3 (ANY), and 4
+/* The value of NEWLINE_DEFAULT determines the default newline character
+   sequence. PCRE2 client programs can override this by selecting other values
+   at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5
   (ANYCRLF). */
-#define NEWLINE 1
+#define NEWLINE_DEFAULT 2

-/* PCRE uses recursive function calls to handle backtracking while matching.
+/* PCRE2 uses recursive function calls to handle backtracking while matching.
   This can sometimes be a problem on systems that have stacks of limited
   size. Define NO_RECURSE to any value to get a version that doesn't use
   recursion in the match() function; instead it creates its own stack by
-   steam using pcre_recurse_malloc() to obtain memory from the heap. For more
-   detail, see the comments and other stuff just above the match() function.
-   */
+   steam using memory from the heap. For more detail, see the comments and
+   other stuff just above the match() function. */
 /* #undef NO_RECURSE */

 /* Name of package */
@ -187,7 +187,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_NAME "PCRE2"

 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PCRE2 9.00-DEV"
+#define PACKAGE_STRING "PCRE2 10.00-DEV"

 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "pcre2"
@ -196,13 +196,20 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_URL ""

 /* Define to the version of this package. */
-#define PACKAGE_VERSION "9.00-DEV"
+#define PACKAGE_VERSION "10.00-DEV"

 /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
   parentheses (of any kind) in a pattern. This limits the amount of system
   stack that is used while compiling a pattern. */
 #define PARENS_NEST_LIMIT 250

+/* The value of PCRE2GREP_BUFSIZE determines the size of buffer used by
+   pcre2grep to hold parts of the file it is searching. This is also the
+   minimum value. The actual amount of memory used by pcre2grep is three times
+   this number, because it allows for the buffering of "before" and "after"
+   lines. */
+#define PCRE2GREP_BUFSIZE 20480
+
 /* to make a symbol visible */
 #define PCRE2POSIX_EXP_DECL extern __attribute__ ((visibility ("default")))

@ -227,13 +234,6 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to any value if linking statically (TODO: make nice with Libtool) */
 #define PCRE2_STATIC 1

-/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
-   pcregrep to hold parts of the file it is searching. This is also the
-   minimum value. The actual amount of memory used by pcregrep is three times
-   this number, because it allows for the buffering of "before" and "after"
-   lines. */
-#define PCREGREP_BUFSIZE 20480
-
 /* Define to necessary symbol if this constant uses a non-standard name on
   your system. */
 /* #undef PTHREAD_CREATE_JOINABLE */
@ -244,35 +244,35 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to any value to enable support for Just-In-Time compiling. */
 /* #undef SUPPORT_JIT */

-/* Define to any value to allow pcregrep to be linked with libbz2, so that it
+/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
   is able to handle .bz2 files. */
 #define SUPPORT_LIBBZ2 /**/

-/* Define to any value to allow pcretest to be linked with libedit. */
+/* Define to any value to allow pcre2test to be linked with libedit. */
 /* #undef SUPPORT_LIBEDIT */

-/* Define to any value to allow pcretest to be linked with libreadline. */
+/* Define to any value to allow pcre2test to be linked with libreadline. */
 #define SUPPORT_LIBREADLINE /**/

-/* Define to any value to allow pcregrep to be linked with libz, so that it is
-   able to handle .gz files. */
+/* Define to any value to allow pcre2grep to be linked with libz, so that it
+   is able to handle .gz files. */
 #define SUPPORT_LIBZ /**/

-/* Define to any value to enable the 16 bit PCRE library. */
+/* Define to any value to enable the 16 bit PCRE2 library. */
 #define SUPPORT_PCRE16 /**/

-/* Define to any value to enable the 32 bit PCRE library. */
+/* Define to any value to enable JIT support in pcre2grep. */
+/* #undef SUPPORT_PCRE2GREP_JIT */
+
+/* Define to any value to enable the 32 bit PCRE2 library. */
 #define SUPPORT_PCRE32 /**/

-/* Define to any value to enable the 8 bit PCRE library. */
+/* Define to any value to enable the 8 bit PCRE2 library. */
 #define SUPPORT_PCRE8 /**/

-/* Define to any value to enable JIT support in pcregrep. */
-/* #undef SUPPORT_PCREGREP_JIT */
-
 /* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
   This will work even in an EBCDIC environment, but it is incompatible with
-   the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or*
+   the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
   ASCII/UTF-8/16/32, but not both at once. */
 #define SUPPORT_UTF /**/

@ -280,7 +280,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define SUPPORT_VALGRIND /**/

 /* Version number of package */
-#define VERSION "9.00-DEV"
+#define VERSION "10.00-DEV"

 /* Define to empty if `const' does not conform to ANSI C. */
 /* #undef const */
--- a/src/config.h.in
+++ b/src/config.h.in
@ -1,13 +1,13 @@
 /* src/config.h.in.  Generated from configure.ac by autoheader.  */


-/* PCRE is written in Standard C, but there are a few non-standard things it
+/* PCRE2 is written in Standard C, but there are a few non-standard things it
 can cope with, allowing it to run on SunOS4 and other "close to standard"
 systems.

 In environments that support the GNU autotools, config.h.in is converted into
 config.h by the "configure" script. In environments that use CMake,
-config-cmake.in is converted into config.h. If you are going to build PCRE "by
+config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
 hand" without using "configure" or CMake, you should copy the distributed
 config.h.generic to config.h, and edit the macro definitions to be the way you
 need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
@ -24,21 +24,22 @@ macros are listed as a commented #undef in config.h.generic. Macros such as
 MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
 surrounded by #ifndef/#endif lines so that the value can be overridden by -D.

-PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
+PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
 HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
 sure both macros are undefined; an emulation function will then be used. */

 /* By default, the \R escape sequence matches any Unicode line ending
   character or sequence of characters. If BSR_ANYCRLF is defined (to any
   value), this is changed so that backslash-R matches only CR, LF, or CRLF.
-   The build-time default can be overridden by the user of PCRE at runtime. */
+   The build-time default can be overridden by the user of PCRE2 at runtime.
+   */
 #undef BSR_ANYCRLF

 /* If you are compiling for a system that uses EBCDIC instead of ASCII
-   character codes, define this macro to any value. When EBCDIC is set, PCRE
+   character codes, define this macro to any value. When EBCDIC is set, PCRE2
   assumes that all input strings are in EBCDIC. If you do not define this
-   macro, PCRE will assume input strings are ASCII or UTF-8/16/32 Unicode. It
-   is not possible to build a version of PCRE that supports both EBCDIC and
+   macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
+   is not possible to build a version of PCRE2 that supports both EBCDIC and
   UTF-8/16/32. */
 #undef EBCDIC

@ -125,8 +126,8 @@ sure both macros are undefined; an emulation function will then be used. */
 /* The value of LINK_SIZE determines the number of bytes used to store links
   as offsets within the compiled regex. The default is 2, which allows for
   compiled patterns up to 64K long. This covers the vast majority of cases.
-   However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
-   for longer patterns in extreme cases. */
+   However, PCRE2 can also be compiled to use 3 or 4 bytes instead. This
+   allows for longer patterns in extreme cases. */
 #undef LINK_SIZE

 /* Define to the sub-directory in which libtool stores uninstalled libraries.
@ -135,7 +136,7 @@ sure both macros are undefined; an emulation function will then be used. */

 /* The value of MATCH_LIMIT determines the default number of times the
   internal match() function can be called during a single execution of
-   pcre_exec(). There is a runtime interface for setting a different limit.
+   pcre2_match(). There is a runtime interface for setting a different limit.
   The limit exists in order to catch runaway regular expressions that take
   for ever to determine that they do not match. The default is set very large
   so that it does not accidentally catch legitimate cases. */
@ -161,19 +162,18 @@ sure both macros are undefined; an emulation function will then be used. */
   overflow caused by enormously large patterns. */
 #undef MAX_NAME_SIZE

-/* The value of NEWLINE determines the default newline character sequence.
-   PCRE client programs can override this by selecting other values at run
-   time. The valid values are 0 (CR), 1 (LF), 2 (CRLF), 3 (ANY), and 4
+/* The value of NEWLINE_DEFAULT determines the default newline character
+   sequence. PCRE2 client programs can override this by selecting other values
+   at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5
   (ANYCRLF). */
-#undef NEWLINE
+#undef NEWLINE_DEFAULT

-/* PCRE uses recursive function calls to handle backtracking while matching.
+/* PCRE2 uses recursive function calls to handle backtracking while matching.
   This can sometimes be a problem on systems that have stacks of limited
   size. Define NO_RECURSE to any value to get a version that doesn't use
   recursion in the match() function; instead it creates its own stack by
-   steam using pcre_recurse_malloc() to obtain memory from the heap. For more
-   detail, see the comments and other stuff just above the match() function.
-   */
+   steam using memory from the heap. For more detail, see the comments and
+   other stuff just above the match() function. */
 #undef NO_RECURSE

 /* Name of package */
@ -202,6 +202,13 @@ sure both macros are undefined; an emulation function will then be used. */
   stack that is used while compiling a pattern. */
 #undef PARENS_NEST_LIMIT

+/* The value of PCRE2GREP_BUFSIZE determines the size of buffer used by
+   pcre2grep to hold parts of the file it is searching. This is also the
+   minimum value. The actual amount of memory used by pcre2grep is three times
+   this number, because it allows for the buffering of "before" and "after"
+   lines. */
+#undef PCRE2GREP_BUFSIZE
+
 /* to make a symbol visible */
 #undef PCRE2POSIX_EXP_DECL

@ -226,13 +233,6 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to any value if linking statically (TODO: make nice with Libtool) */
 #undef PCRE2_STATIC

-/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
-   pcregrep to hold parts of the file it is searching. This is also the
-   minimum value. The actual amount of memory used by pcregrep is three times
-   this number, because it allows for the buffering of "before" and "after"
-   lines. */
-#undef PCREGREP_BUFSIZE
-
 /* Define to necessary symbol if this constant uses a non-standard name on
   your system. */
 #undef PTHREAD_CREATE_JOINABLE
@ -243,35 +243,35 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to any value to enable support for Just-In-Time compiling. */
 #undef SUPPORT_JIT

-/* Define to any value to allow pcregrep to be linked with libbz2, so that it
+/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
   is able to handle .bz2 files. */
 #undef SUPPORT_LIBBZ2

-/* Define to any value to allow pcretest to be linked with libedit. */
+/* Define to any value to allow pcre2test to be linked with libedit. */
 #undef SUPPORT_LIBEDIT

-/* Define to any value to allow pcretest to be linked with libreadline. */
+/* Define to any value to allow pcre2test to be linked with libreadline. */
 #undef SUPPORT_LIBREADLINE

-/* Define to any value to allow pcregrep to be linked with libz, so that it is
-   able to handle .gz files. */
+/* Define to any value to allow pcre2grep to be linked with libz, so that it
+   is able to handle .gz files. */
 #undef SUPPORT_LIBZ

-/* Define to any value to enable the 16 bit PCRE library. */
+/* Define to any value to enable the 16 bit PCRE2 library. */
 #undef SUPPORT_PCRE16

-/* Define to any value to enable the 32 bit PCRE library. */
+/* Define to any value to enable JIT support in pcre2grep. */
+#undef SUPPORT_PCRE2GREP_JIT
+
+/* Define to any value to enable the 32 bit PCRE2 library. */
 #undef SUPPORT_PCRE32

-/* Define to any value to enable the 8 bit PCRE library. */
+/* Define to any value to enable the 8 bit PCRE2 library. */
 #undef SUPPORT_PCRE8

-/* Define to any value to enable JIT support in pcregrep. */
-#undef SUPPORT_PCREGREP_JIT
-
 /* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
   This will work even in an EBCDIC environment, but it is incompatible with
-   the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or*
+   the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
   ASCII/UTF-8/16/32, but not both at once. */
 #undef SUPPORT_UTF

--- a/src/pcre2.h
+++ b/src/pcre2.h
@ -41,7 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.

 /* The current PCRE version information. */

-#define PCRE2_MAJOR          9
+#define PCRE2_MAJOR          10
 #define PCRE2_MINOR          00
 #define PCRE2_PRERELEASE     -DEV
 #define PCRE2_DATE           2014-99-99
@ -138,86 +138,83 @@ D   is inspected during pcre2_dfa_exec() execution

 /* Newline and \R settings, for use in the compile context. */

-#define PCRE2_NEWLINE_DEFAULT     0
 #define PCRE2_NEWLINE_CR          1
 #define PCRE2_NEWLINE_LF          2
 #define PCRE2_NEWLINE_CRLF        3
 #define PCRE2_NEWLINE_ANY         4
 #define PCRE2_NEWLINE_ANYCRLF     5

-#define PCRE2_BSR_DEFAULT         0
 #define PCRE2_BSR_UNICODE         1
 #define PCRE2_BSR_ANYCRLF         2

-/* Match-time and get/set-time error codes */
+/* Error codes: no match and partial match are "expected" errors. */

 #define PCRE2_ERROR_NOMATCH          (-1)
+#define PCRE2_ERROR_PARTIAL          (-2)

-#define PCRE2_ERROR_BADCOUNT         (-2)
-#define PCRE2_ERROR_BADENDIANNESS    (-3)
-#define PCRE2_ERROR_BADLENGTH        (-4)
-#define PCRE2_ERROR_BADMAGIC         (-5)
-#define PCRE2_ERROR_BADMODE          (-6)
-#define PCRE2_ERROR_BADOFFSET        (-7)
-#define PCRE2_ERROR_BADOPTION        (-8)
-#define PCRE2_ERROR_BADUTF           (-9)
-#define PCRE2_ERROR_BADUTF_OFFSET   (-10)
-#define PCRE2_ERROR_CALLOUT         (-11)  /* Never used by PCRE2 itself */
-#define PCRE2_ERROR_INTERNAL        (-12)
-#define PCRE2_ERROR_JIT_BADOPTION   (-13)
-#define PCRE2_ERROR_JIT_STACKLIMIT  (-14)
-#define PCRE2_ERROR_MATCHLIMIT      (-15)
-#define PCRE2_ERROR_NOMEMORY        (-16)
-#define PCRE2_ERROR_NOSUBSTRING     (-17)
-#define PCRE2_ERROR_NULL            (-18)
-#define PCRE2_ERROR_PARTIAL         (-19)
-#define PCRE2_ERROR_RECURSELOOP     (-20)
-#define PCRE2_ERROR_RECURSIONLIMIT  (-21)
-#define PCRE2_ERROR_UNKNOWN_OPCODE  (-22)
-#define PCRE2_ERROR_UNSET           (-23)
+/* Error codes for UTF-8 validity checks */

-#define PCRE2_ERROR_DFA_BADRESTART  (-30)
-#define PCRE2_ERROR_DFA_RECURSE     (-31)
-#define PCRE2_ERROR_DFA_UCOND       (-32)
-#define PCRE2_ERROR_DFA_UITEM       (-33)
-#define PCRE2_ERROR_DFA_UMLIMIT     (-34)
-#define PCRE2_ERROR_DFA_WSSIZE      (-35)
+#define PCRE2_ERROR_UTF8_ERR1        (-3)
+#define PCRE2_ERROR_UTF8_ERR2        (-4)
+#define PCRE2_ERROR_UTF8_ERR3        (-5)
+#define PCRE2_ERROR_UTF8_ERR4        (-6)
+#define PCRE2_ERROR_UTF8_ERR5        (-7)
+#define PCRE2_ERROR_UTF8_ERR6        (-8)
+#define PCRE2_ERROR_UTF8_ERR7        (-9)
+#define PCRE2_ERROR_UTF8_ERR8       (-10)
+#define PCRE2_ERROR_UTF8_ERR9       (-11)
+#define PCRE2_ERROR_UTF8_ERR10      (-12)
+#define PCRE2_ERROR_UTF8_ERR11      (-13)
+#define PCRE2_ERROR_UTF8_ERR12      (-14)
+#define PCRE2_ERROR_UTF8_ERR13      (-15)
+#define PCRE2_ERROR_UTF8_ERR14      (-16)
+#define PCRE2_ERROR_UTF8_ERR15      (-17)
+#define PCRE2_ERROR_UTF8_ERR16      (-18)
+#define PCRE2_ERROR_UTF8_ERR17      (-19)
+#define PCRE2_ERROR_UTF8_ERR18      (-20)
+#define PCRE2_ERROR_UTF8_ERR19      (-21)
+#define PCRE2_ERROR_UTF8_ERR20      (-22)
+#define PCRE2_ERROR_UTF8_ERR21      (-23)

+/* Error codes for UTF-16 validity checks */

-/* Specific error codes for UTF-8 validity checks */
+#define PCRE2_ERROR_UTF16_ERR1      (-24)
+#define PCRE2_ERROR_UTF16_ERR2      (-25)
+#define PCRE2_ERROR_UTF16_ERR3      (-26)

-#define PCRE2_ERROR_UTF8_ERR1       (-41)
-#define PCRE2_ERROR_UTF8_ERR2       (-42)
-#define PCRE2_ERROR_UTF8_ERR3       (-43)
-#define PCRE2_ERROR_UTF8_ERR4       (-44)
-#define PCRE2_ERROR_UTF8_ERR5       (-45)
-#define PCRE2_ERROR_UTF8_ERR6       (-46)
-#define PCRE2_ERROR_UTF8_ERR7       (-47)
-#define PCRE2_ERROR_UTF8_ERR8       (-48)
-#define PCRE2_ERROR_UTF8_ERR9       (-49)
-#define PCRE2_ERROR_UTF8_ERR10      (-50)
-#define PCRE2_ERROR_UTF8_ERR11      (-51)
-#define PCRE2_ERROR_UTF8_ERR12      (-52)
-#define PCRE2_ERROR_UTF8_ERR13      (-53)
-#define PCRE2_ERROR_UTF8_ERR14      (-54)
-#define PCRE2_ERROR_UTF8_ERR15      (-55)
-#define PCRE2_ERROR_UTF8_ERR16      (-56)
-#define PCRE2_ERROR_UTF8_ERR17      (-57)
-#define PCRE2_ERROR_UTF8_ERR18      (-58)
-#define PCRE2_ERROR_UTF8_ERR19      (-59)
-#define PCRE2_ERROR_UTF8_ERR20      (-60)
-#define PCRE2_ERROR_UTF8_ERR21      (-61)
+/* Error codes for UTF-32 validity checks */

-/* Specific error codes for UTF-16 validity checks */
+#define PCRE2_ERROR_UTF32_ERR1      (-27)
+#define PCRE2_ERROR_UTF32_ERR2      (-28)

-#define PCRE2_ERROR_UTF16_ERR1      (-62)
-#define PCRE2_ERROR_UTF16_ERR2      (-63)
-#define PCRE2_ERROR_UTF16_ERR3      (-64)
+/* Error codes for pcre2[_dfa]_match() */

-/* Specific error codes for UTF-32 validity checks */
-
-#define PCRE2_ERROR_UTF32_ERR1      (-65)
-#define PCRE2_ERROR_UTF32_ERR3      (-66)
+#define PCRE2_ERROR_BADCOUNT        (-29)
+#define PCRE2_ERROR_BADENDIANNESS   (-30)
+#define PCRE2_ERROR_BADLENGTH       (-31)
+#define PCRE2_ERROR_BADMAGIC        (-32)
+#define PCRE2_ERROR_BADMODE         (-33)
+#define PCRE2_ERROR_BADOFFSET       (-34)
+#define PCRE2_ERROR_BADOPTION       (-35)
+#define PCRE2_ERROR_BADUTF_OFFSET   (-36)
+#define PCRE2_ERROR_CALLOUT         (-37)  /* Never used by PCRE2 itself */
+#define PCRE2_ERROR_DFA_BADRESTART  (-38)
+#define PCRE2_ERROR_DFA_RECURSE     (-39)
+#define PCRE2_ERROR_DFA_UCOND       (-40)
+#define PCRE2_ERROR_DFA_UITEM       (-41)
+#define PCRE2_ERROR_DFA_UMLIMIT     (-42)
+#define PCRE2_ERROR_DFA_WSSIZE      (-43)
+#define PCRE2_ERROR_INTERNAL        (-44)
+#define PCRE2_ERROR_JIT_BADOPTION   (-45)
+#define PCRE2_ERROR_JIT_STACKLIMIT  (-46)
+#define PCRE2_ERROR_MATCHLIMIT      (-47)
+#define PCRE2_ERROR_NOMEMORY        (-48)
+#define PCRE2_ERROR_NOSUBSTRING     (-49)
+#define PCRE2_ERROR_NULL            (-50)
+#define PCRE2_ERROR_RECURSELOOP     (-51)
+#define PCRE2_ERROR_RECURSIONLIMIT  (-52)
+#define PCRE2_ERROR_UNKNOWN_OPCODE  (-53)
+#define PCRE2_ERROR_UNSET           (-54)

 /* Request types for pcre2_pattern_info() */

--- a/src/pcre2.h.in
+++ b/src/pcre2.h.in
@ -138,86 +138,83 @@ D   is inspected during pcre2_dfa_exec() execution

 /* Newline and \R settings, for use in the compile context. */

-#define PCRE2_NEWLINE_DEFAULT     0
 #define PCRE2_NEWLINE_CR          1
 #define PCRE2_NEWLINE_LF          2
 #define PCRE2_NEWLINE_CRLF        3
 #define PCRE2_NEWLINE_ANY         4
 #define PCRE2_NEWLINE_ANYCRLF     5

-#define PCRE2_BSR_DEFAULT         0
 #define PCRE2_BSR_UNICODE         1
 #define PCRE2_BSR_ANYCRLF         2

-/* Match-time and get/set-time error codes */
+/* Error codes: no match and partial match are "expected" errors. */

 #define PCRE2_ERROR_NOMATCH          (-1)
+#define PCRE2_ERROR_PARTIAL          (-2)

-#define PCRE2_ERROR_BADCOUNT         (-2)
-#define PCRE2_ERROR_BADENDIANNESS    (-3)
-#define PCRE2_ERROR_BADLENGTH        (-4)
-#define PCRE2_ERROR_BADMAGIC         (-5)
-#define PCRE2_ERROR_BADMODE          (-6)
-#define PCRE2_ERROR_BADOFFSET        (-7)
-#define PCRE2_ERROR_BADOPTION        (-8)
-#define PCRE2_ERROR_BADUTF           (-9)
-#define PCRE2_ERROR_BADUTF_OFFSET   (-10)
-#define PCRE2_ERROR_CALLOUT         (-11)  /* Never used by PCRE2 itself */
-#define PCRE2_ERROR_INTERNAL        (-12)
-#define PCRE2_ERROR_JIT_BADOPTION   (-13)
-#define PCRE2_ERROR_JIT_STACKLIMIT  (-14)
-#define PCRE2_ERROR_MATCHLIMIT      (-15)
-#define PCRE2_ERROR_NOMEMORY        (-16)
-#define PCRE2_ERROR_NOSUBSTRING     (-17)
-#define PCRE2_ERROR_NULL            (-18)
-#define PCRE2_ERROR_PARTIAL         (-19)
-#define PCRE2_ERROR_RECURSELOOP     (-20)
-#define PCRE2_ERROR_RECURSIONLIMIT  (-21)
-#define PCRE2_ERROR_UNKNOWN_OPCODE  (-22)
-#define PCRE2_ERROR_UNSET           (-23)
+/* Error codes for UTF-8 validity checks */

-#define PCRE2_ERROR_DFA_BADRESTART  (-30)
-#define PCRE2_ERROR_DFA_RECURSE     (-31)
-#define PCRE2_ERROR_DFA_UCOND       (-32)
-#define PCRE2_ERROR_DFA_UITEM       (-33)
-#define PCRE2_ERROR_DFA_UMLIMIT     (-34)
-#define PCRE2_ERROR_DFA_WSSIZE      (-35)
+#define PCRE2_ERROR_UTF8_ERR1        (-3)
+#define PCRE2_ERROR_UTF8_ERR2        (-4)
+#define PCRE2_ERROR_UTF8_ERR3        (-5)
+#define PCRE2_ERROR_UTF8_ERR4        (-6)
+#define PCRE2_ERROR_UTF8_ERR5        (-7)
+#define PCRE2_ERROR_UTF8_ERR6        (-8)
+#define PCRE2_ERROR_UTF8_ERR7        (-9)
+#define PCRE2_ERROR_UTF8_ERR8       (-10)
+#define PCRE2_ERROR_UTF8_ERR9       (-11)
+#define PCRE2_ERROR_UTF8_ERR10      (-12)
+#define PCRE2_ERROR_UTF8_ERR11      (-13)
+#define PCRE2_ERROR_UTF8_ERR12      (-14)
+#define PCRE2_ERROR_UTF8_ERR13      (-15)
+#define PCRE2_ERROR_UTF8_ERR14      (-16)
+#define PCRE2_ERROR_UTF8_ERR15      (-17)
+#define PCRE2_ERROR_UTF8_ERR16      (-18)
+#define PCRE2_ERROR_UTF8_ERR17      (-19)
+#define PCRE2_ERROR_UTF8_ERR18      (-20)
+#define PCRE2_ERROR_UTF8_ERR19      (-21)
+#define PCRE2_ERROR_UTF8_ERR20      (-22)
+#define PCRE2_ERROR_UTF8_ERR21      (-23)

+/* Error codes for UTF-16 validity checks */

-/* Specific error codes for UTF-8 validity checks */
+#define PCRE2_ERROR_UTF16_ERR1      (-24)
+#define PCRE2_ERROR_UTF16_ERR2      (-25)
+#define PCRE2_ERROR_UTF16_ERR3      (-26)

-#define PCRE2_ERROR_UTF8_ERR1       (-41)
-#define PCRE2_ERROR_UTF8_ERR2       (-42)
-#define PCRE2_ERROR_UTF8_ERR3       (-43)
-#define PCRE2_ERROR_UTF8_ERR4       (-44)
-#define PCRE2_ERROR_UTF8_ERR5       (-45)
-#define PCRE2_ERROR_UTF8_ERR6       (-46)
-#define PCRE2_ERROR_UTF8_ERR7       (-47)
-#define PCRE2_ERROR_UTF8_ERR8       (-48)
-#define PCRE2_ERROR_UTF8_ERR9       (-49)
-#define PCRE2_ERROR_UTF8_ERR10      (-50)
-#define PCRE2_ERROR_UTF8_ERR11      (-51)
-#define PCRE2_ERROR_UTF8_ERR12      (-52)
-#define PCRE2_ERROR_UTF8_ERR13      (-53)
-#define PCRE2_ERROR_UTF8_ERR14      (-54)
-#define PCRE2_ERROR_UTF8_ERR15      (-55)
-#define PCRE2_ERROR_UTF8_ERR16      (-56)
-#define PCRE2_ERROR_UTF8_ERR17      (-57)
-#define PCRE2_ERROR_UTF8_ERR18      (-58)
-#define PCRE2_ERROR_UTF8_ERR19      (-59)
-#define PCRE2_ERROR_UTF8_ERR20      (-60)
-#define PCRE2_ERROR_UTF8_ERR21      (-61)
+/* Error codes for UTF-32 validity checks */

-/* Specific error codes for UTF-16 validity checks */
+#define PCRE2_ERROR_UTF32_ERR1      (-27)
+#define PCRE2_ERROR_UTF32_ERR2      (-28)

-#define PCRE2_ERROR_UTF16_ERR1      (-62)
-#define PCRE2_ERROR_UTF16_ERR2      (-63)
-#define PCRE2_ERROR_UTF16_ERR3      (-64)
+/* Error codes for pcre2[_dfa]_match() */

-/* Specific error codes for UTF-32 validity checks */
-
-#define PCRE2_ERROR_UTF32_ERR1      (-65)
-#define PCRE2_ERROR_UTF32_ERR3      (-66)
+#define PCRE2_ERROR_BADCOUNT        (-29)
+#define PCRE2_ERROR_BADENDIANNESS   (-30)
+#define PCRE2_ERROR_BADLENGTH       (-31)
+#define PCRE2_ERROR_BADMAGIC        (-32)
+#define PCRE2_ERROR_BADMODE         (-33)
+#define PCRE2_ERROR_BADOFFSET       (-34)
+#define PCRE2_ERROR_BADOPTION       (-35)
+#define PCRE2_ERROR_BADUTF_OFFSET   (-36)
+#define PCRE2_ERROR_CALLOUT         (-37)  /* Never used by PCRE2 itself */
+#define PCRE2_ERROR_DFA_BADRESTART  (-38)
+#define PCRE2_ERROR_DFA_RECURSE     (-39)
+#define PCRE2_ERROR_DFA_UCOND       (-40)
+#define PCRE2_ERROR_DFA_UITEM       (-41)
+#define PCRE2_ERROR_DFA_UMLIMIT     (-42)
+#define PCRE2_ERROR_DFA_WSSIZE      (-43)
+#define PCRE2_ERROR_INTERNAL        (-44)
+#define PCRE2_ERROR_JIT_BADOPTION   (-45)
+#define PCRE2_ERROR_JIT_STACKLIMIT  (-46)
+#define PCRE2_ERROR_MATCHLIMIT      (-47)
+#define PCRE2_ERROR_NOMEMORY        (-48)
+#define PCRE2_ERROR_NOSUBSTRING     (-49)
+#define PCRE2_ERROR_NULL            (-50)
+#define PCRE2_ERROR_RECURSELOOP     (-51)
+#define PCRE2_ERROR_RECURSIONLIMIT  (-52)
+#define PCRE2_ERROR_UNKNOWN_OPCODE  (-53)
+#define PCRE2_ERROR_UNSET           (-54)

 /* Request types for pcre2_pattern_info() */

--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
--- a/src/pcre2_config.c
+++ b/src/pcre2_config.c
@ -102,7 +102,7 @@ switch (what)
  break;

  case PCRE2_CONFIG_NEWLINE:
-  *((int *)where) = NEWLINE;
+  *((int *)where) = NEWLINE_DEFAULT;
  break;

  case PCRE2_CONFIG_PARENS_LIMIT:
--- a/src/pcre2_context.c
+++ b/src/pcre2_context.c
@ -139,9 +139,9 @@ if (defmemctl)
  } 
 ccontext->stack_guard = NULL;
 ccontext->tables = PRIV(default_tables);
-ccontext->bsr_convention = PCRE2_BSR_DEFAULT;
-ccontext->newline_convention = PCRE2_NEWLINE_DEFAULT;
 ccontext->parens_nest_limit = PARENS_NEST_LIMIT;
+ccontext->newline_convention = NEWLINE_DEFAULT;
+ccontext->bsr_convention = BSR_DEFAULT;
 }


--- a/src/pcre2_error.c
+++ b/src/pcre2_error.c
@ -51,7 +51,10 @@ POSSIBILITY OF SUCH DAMAGE.
 #define STRING(a)  # a
 #define XSTRING(s) STRING(s)

-/* The texts of compile-time error messages. Do not ever re-use any error
+/* The texts of compile-time error messages. Compile-time error numbers start 
+at COMPILE_ERROR_BASE (100).
+
+Do not ever re-use any error
 number, because they are documented. Always add a new error instead. Messages
 marked DEAD below are no longer used. This used to be a table of strings, but
 in order to reduce the number of relocations needed when a shared library is
@ -85,7 +88,7 @@ static const char compile_error_texts[] =
  "missing )\0"
  /* 15 */
  "reference to non-existent subpattern\0"
-  "erroffset passed as NULL\0"
+  "pattern or erroffset passed as NULL\0"
  "unknown option bit(s) set\0"
  "missing ) after comment\0"
  "parentheses nested too deeply\0"  /** DEAD **/
@ -104,7 +107,7 @@ static const char compile_error_texts[] =
  /* 30 */
  "unknown POSIX class name\0"
  "POSIX collating elements are not supported\0"
-  "this version of PCRE is compiled without UTF support\0"
+  "this version of PCRE does not have UTF or Unicode property support\0"
  "spare error\0"  /** DEAD **/
  "character value in \\x{} or \\o{} is too large\0"
  /* 35 */
@ -133,7 +136,7 @@ static const char compile_error_texts[] =
  "DEFINE group contains more than one branch\0"
  /* 55 */
  "repeating a DEFINE group is not allowed\0"  /** DEAD **/
-  "inconsistent NEWLINE options\0"
+  "internal error: unknown newline setting\0"
  "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
  "a numbered reference must not be zero\0"
  "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
@ -171,58 +174,74 @@ static const char compile_error_texts[] =
  "parentheses are too deeply nested (stack check)\0"
  ;

-/* Match-time error texts are in the same format. */
+/* Match-time and UTF error texts are in the same format. */

 static const char match_error_texts[] =
  "no error\0"
  "no match\0" 
+  "partial match\0"
+  "UTF-8 error: 1 byte missing at end\0"
+  "UTF-8 error: 2 bytes missing at end\0"
+  /* 5 */ 
+  "UTF-8 error: 3 bytes missing at end\0"
+  "UTF-8 error: 4 bytes missing at end\0"
+  "UTF-8 error: 5 bytes missing at end\0"
+  "UTF-8 error: byte 2 top bits not 0x80\0" 
+  "UTF-8 error: byte 3 top bits not 0x80\0" 
+  /* 10 */ 
+  "UTF-8 error: byte 4 top bits not 0x80\0" 
+  "UTF-8 error: byte 5 top bits not 0x80\0" 
+  "UTF-8 error: byte 6 top bits not 0x80\0"
+  "UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"  
+  "UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
+  /* 15 */ 
+  "UTF-8 error: code point > 0x10ffff is not defined\0"
+  "UTF-8 error: code points 0xd000-0xdfff are not defined\0"    
+  "UTF-8 error: overlong 2-byte sequence\0" 
+  "UTF-8 error: overlong 3-byte sequence\0" 
+  "UTF-8 error: overlong 4-byte sequence\0"
+  /* 20 */ 
+  "UTF-8 error: overlong 5-byte sequence\0"
+  "UTF-8 error: overlong 6-byte sequence\0"
+  "UTF-8 error: isolated 0x80 byte\0"
+  "UTF-8 error: illegal byte (0xfe or 0xff)\0" 
+  "UTF-16 error: missing low surrogate at end\0" 
+  /* 25 */ 
+  "UTF-16 error: invalid low surrogate\0" 
+  "UTF-16 error: isolated low surrogate\0" 
+  "UTF-32 error: surrogate character not allowed\0"
+  "UTF-32 error: code point > 0x10ffff is not defined\0" 
  "bad count value\0"
+  /* 30 */ 
  "pattern compiled with other endianness\0"
  "bad length\0"
-  /* -5 */ 
  "magic number missing\0"
  "pattern compiled in wrong mode: 8/16/32-bit error\0"
  "bad offset value\0"
+  /* 35 */ 
  "bad option value\0"
-  "bad UTF string\0"
-  /* -10 */
  "bad offset into UTF string\0"
  "callout error code\0"              /* Never returned by PCRE2 itself */   
+  "invalid data in workspace for DFA restart\0"
+  "too much recursion for DFA matching\0"
+  /* 40 */ 
+  "backreference condition or recursion test not supported for DFA matching\0"
+  "item unsupported for DFA matching\0"
+  "match limit not supported for DFA matching\0"
+  "workspace size exceeded in DFA matching\0"
  "internal error - pattern overwritten?\0"
+  /* 45 */ 
  "bad JIT option\0"
  "JIT stack limit reached\0"
-  /* -15 */
  "match limit exceeded\0"
  "no more memory\0"
  "unknown substring\0" 
+  /* 50 */ 
  "NULL argument passed\0"
-  "partial match\0"
-  /* -20 */ 
  "nested recursion at the same subject position\0"
  "recursion limit exceeded\0"
  "unknown opcode - pattern overwritten?\0"
  "value unset\0"                     /* Used by pcre2_pattern_info() */
-  "spare -24\0"
-  /* -25 */  
-  "spare -25\0" 
-  "spare -26\0" 
-  "spare -27\0" 
-  "spare -28\0" 
-  "spare -29\0" 
-  /* -30 */ 
-  "invalid data in workspace for DFA restart\0"
-  "too much recursion for DFA matching\0"
-  "backreference condition or recursion test not supported for DFA matching\0"
-  "item unsupported for DFA matching\0"
-  "match limit not supported for DFA matching\0"
-  /* -35 */ 
-  "workspace size exceeded in DFA matching\0"
-  "spare -36\0"
-  "spare -37\0"
-  "spare -38\0"
-  "spare -39\0"
-  /* -40 */
-  "spare -39\0"
  ; 


@ -232,7 +251,8 @@ static const char match_error_texts[] =

 /* This function copies an error message into a buffer whose units are of an 
 appropriate width. Error numbers are positive for compile-time errors, and 
-negative for exec-time errors.
+negative for match-time errors (except for UTF errors), but the numbers are all 
+distinct.

 Arguments:
  enumber       error number
@ -253,13 +273,12 @@ uint32_t n;

 if (size == 0) return PCRE2_ERROR_NOMEMORY;

-if (enumber > 0)    /* Compile-time error */
+if (enumber > COMPILE_ERROR_BASE)  /* Compile error */
  {
  message = compile_error_texts;
-  n = enumber;
+  n = enumber - COMPILE_ERROR_BASE; 
  }  
-  
-else                /* Match-time error */
+else                               /* Match or UTF error */
  {
  message = match_error_texts;
  n = -enumber; 
--- a/src/pcre2_internal.h
+++ b/src/pcre2_internal.h
@ -40,78 +40,235 @@ POSSIBILITY OF SUCH DAMAGE.

 /* FIXME: this file is incomplete, being gradually built. */

+/* We do not support both EBCDIC and UTF at the same time. The "configure"
+script prevents both being selected, but not everybody uses "configure". */
+
+#if defined EBCDIC && defined SUPPORT_UTF
+#error The use of both EBCDIC and SUPPORT_UTF is not supported.
+#endif
+
+/* Standard C headers */
+
 #include <ctype.h>
 #include <limits.h>
 #include <stddef.h>
-#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>

-#include "pcre2.h"
-#include "pcre2_ucp.h"
-
-#define PUBL(name) pcre2_##name
-
-#ifndef PRIV
-#define PRIV(name) _pcre2_##name
-#endif
-
-#define PCRE2_CALL_CONVENTION
-
-extern const uint8_t PRIV(default_tables)[];
-
-
-
+/* Macros to make boolean values more obvious. The #ifndef is to pacify
+compiler warnings in environments where these macros are defined elsewhere.
+Unfortunately, there is no way to do the same for the typedef. */

 typedef int BOOL;
-
 #ifndef FALSE
 #define FALSE   0
 #define TRUE    1
 #endif

-
 /* Valgrind (memcheck) support */

 #ifdef SUPPORT_VALGRIND
 #include <valgrind/memcheck.h>
 #endif

-/* This is an unsigned int value that no character can ever have, as
+/* When compiling a DLL for Windows, the exported symbols have to be declared
+using some MS magic. I found some useful information on this web page:
+http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
+information there, using __declspec(dllexport) without "extern" we have a
+definition; with "extern" we have a declaration. The settings here override the
+setting in pcre.h (which is included below); it defines only PCRE2_EXP_DECL,
+which is all that is needed for applications (they just import the symbols). We
+use:
+
+  PCRE2_EXP_DECL       for declarations
+  PCRE2_EXP_DEFN       for definitions of exported functions
+  PCRE2_EXP_DATA_DEFN  for definitions of exported variables
+
+The reason for the two DEFN macros is that in non-Windows environments, one
+does not want to have "extern" before variable definitions because it leads to
+compiler warnings. So we distinguish between functions and variables. In
+Windows, the two should always be the same.
+
+The reason for wrapping this in #ifndef PCRE2_EXP_DECL is so that pcretest,
+which is an application, but needs to import this file in order to "peek" at
+internals, can #include pcre.h first to get an application's-eye view.
+
+In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon,
+special-purpose environments) might want to stick other stuff in front of
+exported symbols. That's why, in the non-Windows case, we set PCRE2_EXP_DEFN and
+PCRE2_EXP_DATA_DEFN only if they are not already set. */
+
+#ifndef PCRE2_EXP_DECL
+#  ifdef _WIN32
+#    ifndef PCRE2_STATIC
+#      define PCRE2_EXP_DECL       extern __declspec(dllexport)
+#      define PCRE2_EXP_DEFN       __declspec(dllexport)
+#      define PCRE2_EXP_DATA_DEFN  __declspec(dllexport)
+#    else
+#      define PCRE2_EXP_DECL       extern
+#      define PCRE2_EXP_DEFN
+#      define PCRE2_EXP_DATA_DEFN
+#    endif
+#  else
+#    ifdef __cplusplus
+#      define PCRE2_EXP_DECL       extern "C"
+#    else
+#      define PCRE2_EXP_DECL       extern
+#    endif
+#    ifndef PCRE2_EXP_DEFN
+#      define PCRE2_EXP_DEFN       PCRE2_EXP_DECL
+#    endif
+#    ifndef PCRE2_EXP_DATA_DEFN
+#      define PCRE2_EXP_DATA_DEFN
+#    endif
+#  endif
+#endif
+
+/* Include the public PCRE2 header and the definitions of UCP character
+property values. This must follow the setting of PCRE2_EXP_DECL above. */
+
+#include "pcre2.h"
+#include "pcre2_ucp.h"
+
+/* When PCRE is compiled as a C++ library, the subject pointer can be replaced
+with a custom type. This makes it possible, for example, to allow pcre2_match()
+to process subject strings that are discontinuous by using a smart pointer
+class. It must always be possible to inspect all of the subject string in
+pcre2_match() because of the way it backtracks. */
+
+/* WARNING: This is as yet untested for PCRE2. */
+
+#ifdef CUSTOM_SUBJECT_PTR
+#undef PCRE2_SPTR
+#define PCRE2_SPTR CUSTOM_SUBJECT_PTR
+#endif
+
+/* When compiling with the MSVC compiler, it is sometimes necessary to include
+a "calling convention" before exported function names. (This is secondhand
+information; I know nothing about MSVC myself). For example, something like
+
+  void __cdecl function(....)
+
+might be needed. In order so make this easy, all the exported functions have
+PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
+set, we ensure here that it has no effect. */
+
+#ifndef PCRE2_CALL_CONVENTION
+#define PCRE2_CALL_CONVENTION
+#endif
+
+/* When checking for integer overflow in pcre2_compile(), we need to handle
+large integers. If a 64-bit integer type is available, we can use that.
+Otherwise we have to cast to double, which of course requires floating point
+arithmetic. Handle this by defining a macro for the appropriate type. If
+stdint.h is available, include it; it may define INT64_MAX. Systems that do not
+have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
+by "configure". */
+
+#if defined HAVE_STDINT_H
+#include <stdint.h>
+#elif defined HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+
+#if defined INT64_MAX || defined int64_t
+#define INT64_OR_DOUBLE int64_t
+#else
+#define INT64_OR_DOUBLE double
+#endif
+
+/* When compiling for use with the Virtual Pascal compiler, these functions
+need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
+option on the command line. */
+
+#ifdef VPCOMPAT
+#define strlen(s)        _strlen(s)
+#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
+#define memcmp(s,c,n)    _memcmp(s,c,n)
+#define memcpy(d,s,n)    _memcpy(d,s,n)
+#define memmove(d,s,n)   _memmove(d,s,n)
+#define memset(s,c,n)    _memset(s,c,n)
+#else  /* VPCOMPAT */
+
+/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
+define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY
+is set. Otherwise, include an emulating function for those systems that have
+neither (there some non-Unix environments where this is the case). */
+
+#ifndef HAVE_MEMMOVE
+#undef  memmove        /* some systems may have a macro */
+#ifdef HAVE_BCOPY
+#define memmove(a, b, c) bcopy(b, a, c)
+#else  /* HAVE_BCOPY */
+static void *
+pcre_memmove(void *d, const void *s, size_t n)
+{
+size_t i;
+unsigned char *dest = (unsigned char *)d;
+const unsigned char *src = (const unsigned char *)s;
+if (dest > src)
+  {
+  dest += n;
+  src += n;
+  for (i = 0; i < n; ++i) *(--dest) = *(--src);
+  return (void *)dest;
+  }
+else
+  {
+  for (i = 0; i < n; ++i) *dest++ = *src++;
+  return (void *)(dest - n);
+  }
+}
+#define memmove(a, b, c) pcre_memmove(a, b, c)
+#endif   /* not HAVE_BCOPY */
+#endif   /* not HAVE_MEMMOVE */
+#endif   /* not VPCOMPAT */
+
+/* External (in the C sense) functions and macros that are private to the 
+libraries are always referenced using the PRIV macro. This makes it possible
+for pcre2test.c to include some of the source files from the libraries using a
+different PRIV definition to avoid name clashes. */
+
+#ifndef PRIV
+#define PRIV(name) _pcre2_##name
+#endif
+
+/* This is an unsigned int value that no UTF character can ever have, as
 Unicode doesn't go beyond 0x0010ffff. */

 #define NOTACHAR 0xffffffff

-/* When UTF encoding is being used, a character is no longer just a single
-byte in 8-bit mode or a single short in 16-bit mode. The macros for character
-handling generate simple sequences when used in the basic mode, and more
-complicated ones for UTF characters. GETCHARLENTEST and other macros are not
-used when UTF is not supported. To make sure they can never even appear when
-UTF support is omitted, we don't even define them. */
+/* Compile-time errors are added to this value. As they are documented, it
+should probably never be changed. */

-#ifndef SUPPORT_UTF
+#define COMPILE_ERROR_BASE 100

-/* #define MAX_VALUE_FOR_SINGLE_CHAR */
-/* #define HAS_EXTRALEN(c) */
-/* #define GET_EXTRALEN(c) */
-/* #define NOT_FIRSTCHAR(c) */
-#define GETCHAR(c, eptr) c = *eptr;
-#define GETCHARTEST(c, eptr) c = *eptr;
-#define GETCHARINC(c, eptr) c = *eptr++;
-#define GETCHARINCTEST(c, eptr) c = *eptr++;
-#define GETCHARLEN(c, eptr, len) c = *eptr;
-/* #define GETCHARLENTEST(c, eptr, len) */
-/* #define BACKCHAR(eptr) */
-/* #define FORWARDCHAR(eptr) */
-/* #define ACROSSCHAR(condition, eptr, action) */
+/* Define the default BSR convention. */

-#else   /* SUPPORT_UTF */
+#ifdef BSR_ANYCRLF
+#define BSR_DEFAULT PCRE2_BSR_ANYCRLF
+#else
+#define BSR_DEFAULT PCRE2_BSR_UNICODE
+#endif
+
+
+/* ---------------- Basic UTF-8 macros ---------------- */
+
+/* These UTF-8 macros are always defined because they are used in pcre2test for
+handling wide characters in 16-bit and 32-bit modes, even if an 8-bit library
+is not supported. */

 /* Tests whether a UTF-8 code point needs extra bytes to decode. */

 #define HASUTF8EXTRALEN(c) ((c) >= 0xc0)

+/* The following macros were originally written in the form of loops that used
+data from the tables whose names start with PRIV(utf8_table). They were
+rewritten by a user so as not to use loops, because in some environments this
+gives a significant performance advantage, and it seems never to do any harm.
+*/
+
 /* Base macro to pick up the remaining bytes of a UTF-8 character, not
 advancing the pointer. */

@ -168,8 +325,44 @@ the pointer. */
      } \
    }

-#endif  /* SUPPORT_UTF */
+/* Base macro to pick up the remaining bytes of a UTF-8 character, not
+advancing the pointer, incrementing the length. */

+#define GETUTF8LEN(c, eptr, len) \
+    { \
+    if ((c & 0x20) == 0) \
+      { \
+      c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \
+      len++; \
+      } \
+    else if ((c & 0x10)  == 0) \
+      { \
+      c = ((c & 0x0f) << 12) | ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \
+      len += 2; \
+      } \
+    else if ((c & 0x08)  == 0) \
+      {\
+      c = ((c & 0x07) << 18) | ((eptr[1] & 0x3f) << 12) | \
+          ((eptr[2] & 0x3f) << 6) | (eptr[3] & 0x3f); \
+      len += 3; \
+      } \
+    else if ((c & 0x04)  == 0) \
+      { \
+      c = ((c & 0x03) << 24) | ((eptr[1] & 0x3f) << 18) | \
+          ((eptr[2] & 0x3f) << 12) | ((eptr[3] & 0x3f) << 6) | \
+          (eptr[4] & 0x3f); \
+      len += 4; \
+      } \
+    else \
+      {\
+      c = ((c & 0x01) << 30) | ((eptr[1] & 0x3f) << 24) | \
+          ((eptr[2] & 0x3f) << 18) | ((eptr[3] & 0x3f) << 12) | \
+          ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \
+      len += 5; \
+      } \
+    }
+
+/* --------------- Whitespace macros ---------------- */

 /* Tests for Unicode horizontal and vertical whitespace characters must check a
 number of different values. Using a switch statement for this generates the
@ -187,7 +380,7 @@ NOTACHAR (which is 0xffffffff).
 Any changes should ensure that the various macros are kept in step with each
 other. NOTE: The values also appear in pcre2_jit_compile.c. */

-/* ------ ASCII/Unicode environments ------ */
+/* -------------- ASCII/Unicode environments -------------- */

 #ifndef EBCDIC

@ -242,7 +435,7 @@ other. NOTE: The values also appear in pcre2_jit_compile.c. */
  VSPACE_BYTE_CASES: \
  VSPACE_MULTIBYTE_CASES

-/* ------ EBCDIC environments ------ */
+/* -------------- EBCDIC environments -------------- */

 #else
 #define HSPACE_LIST CHAR_HT, CHAR_SPACE
@ -271,9 +464,47 @@ other. NOTE: The values also appear in pcre2_jit_compile.c. */
 #define VSPACE_CASES VSPACE_BYTE_CASES
 #endif  /* EBCDIC */

-/* ------ End of whitespace macros ------ */
+/* -------------- End of whitespace macros -------------- */


+/* PCRE2 is able to support several different kinds of newline (CR, LF, CRLF,
+"any" and "anycrlf" at present). The following macros are used to package up
+testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various
+modules to indicate in which datablock the parameters exist, and what the
+start/end of string field names are. */
+
+#define NLTYPE_FIXED    0     /* Newline is a fixed length string */
+#define NLTYPE_ANY      1     /* Newline is any Unicode line ending */
+#define NLTYPE_ANYCRLF  2     /* Newline is CR, LF, or CRLF */
+
+/* This macro checks for a newline at the given position */
+
+#define IS_NEWLINE(p) \
+  ((NLBLOCK->nltype != NLTYPE_FIXED)? \
+    ((p) < NLBLOCK->PSEND && \
+     PRIV(is_newline)((p), NLBLOCK->nltype, NLBLOCK->PSEND, \
+       &(NLBLOCK->nllen), utf)) \
+    : \
+    ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
+     UCHAR21TEST(p) == NLBLOCK->nl[0] && \
+     (NLBLOCK->nllen == 1 || UCHAR21TEST(p+1) == NLBLOCK->nl[1])       \
+    ) \
+  )
+
+/* This macro checks for a newline immediately preceding the given position */
+
+#define WAS_NEWLINE(p) \
+  ((NLBLOCK->nltype != NLTYPE_FIXED)? \
+    ((p) > NLBLOCK->PSSTART && \
+     PRIV(was_newline)((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \
+       &(NLBLOCK->nllen), utf)) \
+    : \
+    ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
+     UCHAR21TEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] &&              \
+     (NLBLOCK->nllen == 1 || UCHAR21TEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
+    ) \
+  )
+
 /* Private flags containing information about the compiled pattern. The first
 three must not be changed, because whichever is set is actually the number of
 bytes in a code unit in that mode. */
@ -296,16 +527,55 @@ bytes in a code unit in that mode. */

 #define PCRE2_MODE_MASK     (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)

-
 /* Magic number to provide a small check against being handed junk. */

 #define MAGIC_NUMBER  0x50435245UL   /* 'PCRE' */

-/* This variable is used to detect a loaded regular expression
-in different endianness. */
+/* This value is used to detect a loaded regular expression in different
+endianness. */

 #define REVERSED_MAGIC_NUMBER  0x45524350UL   /* 'ERCP' */

+/* The maximum remaining length of subject we are prepared to search for a
+req_unit match. */
+
+#define REQ_UNIT_MAX 1000
+
+/* Bit definitions for entries in the pcre_ctypes table. */
+
+#define ctype_space   0x01
+#define ctype_letter  0x02
+#define ctype_digit   0x04
+#define ctype_xdigit  0x08
+#define ctype_word    0x10    /* alphanumeric or '_' */
+#define ctype_meta    0x80    /* regexp meta char or zero (end pattern) */
+
+/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
+of bits for a class map. Some classes are built by combining these tables. */
+
+#define cbit_space     0      /* [:space:] or \s */
+#define cbit_xdigit   32      /* [:xdigit:] */
+#define cbit_digit    64      /* [:digit:] or \d */
+#define cbit_upper    96      /* [:upper:] */
+#define cbit_lower   128      /* [:lower:] */
+#define cbit_word    160      /* [:word:] or \w */
+#define cbit_graph   192      /* [:graph:] */
+#define cbit_print   224      /* [:print:] */
+#define cbit_punct   256      /* [:punct:] */
+#define cbit_cntrl   288      /* [:cntrl:] */
+#define cbit_length  320      /* Length of the cbits table */
+
+/* Offsets of the various tables from the base tables pointer, and
+total length. */
+
+#define lcc_offset      0
+#define fcc_offset    256
+#define cbits_offset  512
+#define ctypes_offset (cbits_offset + cbit_length)
+#define tables_length (ctypes_offset + 256)
+
+
+


 /* -------------------- Character and string names ------------------------ */
@ -1432,6 +1702,17 @@ typedef struct pcre2_memctl {
  void      *memory_data;
 } pcre2_memctl;

+/* Structure for building a chain of open capturing subpatterns during
+compiling, so that instructions to close them can be compiled when (*ACCEPT) is
+encountered. This is also used to identify subpatterns that contain recursive
+back references to themselves, so that they can be made atomic. */
+
+typedef struct open_capitem {
+  struct open_capitem *next;    /* Chain link */
+  uint16_t number;              /* Capture number */
+  uint16_t flag;                /* Set TRUE if recursive back ref */
+} open_capitem;
+
 /* Layout of the UCP type table that translates property names into types and
 codes. Each entry used to point directly to a name, but to reduce the number of
 relocations in shared libraries, it now has an offset into a single string
@ -1481,13 +1762,52 @@ extern const int         PRIV(ucp_typerange)[];

 /* ----------------- Items that need PCRE2_CODE_UNIT_WIDTH ----------------- */

+/* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is not 
+defined, so the following items are omitted. */
+
 #ifdef PCRE2_CODE_UNIT_WIDTH

-/* Mode-dependent macros and private structures are defined in a separate file.
-When compiling the library, PCRE2_CODE_UNIT_WIDTH will be defined, and we
-include them at the appropriate width. When compiling pcre2test, however, that
-macro is not set at this point because pcre2test needs to include them at all
-supported widths. */
+/* This is the largest non-UTF code point. */
+
+#define MAX_NON_UTF_CHAR (0xffffffffU >> (32 - PCRE2_CODE_UNIT_WIDTH))
+
+
+/* Internal shared data tables. These are tables that are used by more than one
+of the exported public functions. They have to be "external" in the C sense,
+but are not part of the PCRE2 public API. The data for these tables is in the
+pcre2_tables.c module. Even though some of them are identical in each library, 
+they must have different names so that more than one library can be linked with 
+an application. UTF-8 tables are needed only when compiling the 8-bit library.
+*/
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+extern const int              PRIV(utf8_table1)[];
+extern const int              PRIV(utf8_table1_size);                            
+extern const int              PRIV(utf8_table2)[];
+extern const int              PRIV(utf8_table3)[];
+extern const uint8_t          PRIV(utf8_table4)[];       
+#endif                        
+                              
+extern const uint8_t          PRIV(default_tables)[];
+extern const uint8_t          PRIV(OP_lengths)[];
+
+extern const uint32_t         PRIV(hspace_list)[];
+extern const uint32_t         PRIV(vspace_list)[];
+                              
+extern const ucp_type_table   PRIV(utt)[];
+extern const char             PRIV(utt_names)[];
+extern const size_t           PRIV(utt_size);
+
+
+/* Mode-dependent macros and hidden and private structures are defined in a
+separate file so that pcre2test can include them at all supported widths. When
+compiling the library, PCRE2_CODE_UNIT_WIDTH will be defined, and we can
+include them at the appropriate width, after setting up suffix macros for the
+private structures. */
+
+#define compile_data                 PCRE2_SUFFIX(compile_data_)
+#define branch_chain                 PCRE2_SUFFIX(branch_chain_)
+#define named_group                  PCRE2_SUFFIX(named_group_)

 #include "pcre2_intmodedep.h"

@ -1498,14 +1818,32 @@ from pcre2test, and must not be defined when no code unit width is available.
 */

 #define _pcre2_compile_context_init  PCRE2_SUFFIX(_pcre2_compile_context_init_)
+#define _pcre2_find_bracket          PCRE2_SUFFIX(_pcre2_find_bracket_)
+#define _pcre2_is_newline            PCRE2_SUFFIX(_pcre2_is_newline_)
 #define _pcre2_match_context_init    PCRE2_SUFFIX(_pcre2_match_context_init_)
 #define _pcre2_memctl_malloc         PCRE2_SUFFIX(_pcre2_memctl_malloc_)
+#define _pcre2_ord2utf               PCRE2_SUFFIX(_pcre2_ord2utf_)
 #define _pcre2_strcmp                PCRE2_SUFFIX(_pcre_strcmp_)
+#define _pcre2_strcmp_c8             PCRE2_SUFFIX(_pcre_strcmp_c8_)
+#define _pcre2_strlen                PCRE2_SUFFIX(_pcre_strlen_)
+#define _pcre2_strncmp               PCRE2_SUFFIX(_pcre_strncmp_)
+#define _pcre2_strncmp_c8            PCRE2_SUFFIX(_pcre_strncmp_c8_)
+#define _pcre2_valid_utf             PCRE2_SUFFIX(_pcre_valid_utf_)
+#define _pcre2_was_newline           PCRE2_SUFFIX(_pcre2_was_newline_)

 extern void  _pcre2_compile_context_init(pcre2_compile_context *, BOOL);
+extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
+extern BOOL  _pcre2_is_newline(PCRE2_SPTR, int, PCRE2_SPTR, int *, BOOL);
 extern void  _pcre2_match_context_init(pcre2_match_context *, BOOL);
 extern void  *_pcre2_memctl_malloc(size_t, size_t, pcre2_memctl *);
+extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *);
 extern int   _pcre2_strcmp(PCRE2_SPTR, PCRE2_SPTR);
-#endif
+extern int   _pcre2_strcmp_c8(PCRE2_SPTR, const char *);
+extern int   _pcre2_strlen(PCRE2_SPTR);
+extern int   _pcre2_strncmp(PCRE2_SPTR, PCRE2_SPTR, size_t);
+extern int   _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t);
+extern int   _pcre2_valid_utf(PCRE2_SPTR, int, size_t *);
+extern BOOL  _pcre2_was_newline(PCRE2_SPTR, int, PCRE2_SPTR, int *, BOOL);
+#endif  /* PCRE2_CODE_UNIT_WIDTH */

 /* End of pcre2_internal.h */
--- a/src/pcre2_intmodedep.h
+++ b/src/pcre2_intmodedep.h
@ -42,21 +42,44 @@ POSSIBILITY OF SUCH DAMAGE.
 /* This module contains mode-dependent macro and structure definitions. The 
 file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined.
 These mode-dependent items are kept in a separate file so that they can also be
-#included multiple times for different code unit widths by pcre2test. Start by 
-undefining all the new macros defined herein so that they can be redefined for 
-multiple inclusions. */
+#included multiple times for different code unit widths by pcre2test in order 
+to have access to the hidden structures at all supported widths. 

+Some of the mode-dependent macros are required at different widths for
+different parts of the pcre2test code (in particular, the included
+pcre_printint.c file). We undefine them here so that they can be re-defined for
+multiple inclusions. Not all of these are used in pcretest, but it's easier 
+just to undefine them all. */
+
+#undef ACROSSCHAR
+#undef BACKCHAR
 #undef CU2BYTES
+#undef FORWARDCHAR
 #undef GET
 #undef GET2
+#undef GETCHAR
+#undef GETCHARINC
+#undef GETCHARINCTEST
+#undef GETCHARLEN
+#undef GETCHARLENTEST
+#undef GETCHARTEST
+#undef GET_EXTRALEN
+#undef HAS_EXTRALEN
 #undef IMM2_SIZE
+#undef MAX_255
+#undef MAX_MARK
 #undef MAX_PATTERN_SIZE
+#undef MAX_UTF_SINGLE_CU
+#undef NOT_FIRSTCHAR
 #undef PUT
 #undef PUT2
+#undef PUT2INC
+#undef PUTCHAR
 #undef PUTINC


-/* ---------------------------MACROS ----------------------------- */
+
+/* -------------------------- MACROS ----------------------------- */

 /* PCRE keeps offsets in its compiled code as at least 16-bit quantities
 (always stored in big-endian order in 8-bit mode) by default. These are used,
@ -70,7 +93,6 @@ unit string is now handled by the macros that are defined here.
 The macros are controlled by the value of LINK_SIZE. This defaults to 2, but 
 values of 2 or 4 are also supported. */

-
 /* ------------------- 8-bit support  ------------------ */

 #if PCRE2_CODE_UNIT_WIDTH == 8
@ -150,8 +172,8 @@ values of 2 or 4 are also supported. */
 #error Unsupported compiling mode
 #endif 

-/* -------------------------------------------------------*/

+/* --------------- Other mode-specific macros ----------------- */

 /* PCRE uses some other (at least) 16-bit quantities that do not change when
 the size of offsets changes. There are used for repeat counts and for other
@ -166,7 +188,7 @@ arithmetic results in a signed value. Hence the cast. */
 #if PCRE2_CODE_UNIT_WIDTH == 8
 #define IMM2_SIZE 2
 #define GET2(a,n) (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
-#define PUT2(a,n,d) { a[n] = (d) >> 8; a[(n)+1] = (d) & 255; }
+#define PUT2(a,n,d) a[n] = (d) >> 8, a[(n)+1] = (d) & 255

 #else  /* Code units are 16 or 32 bits */
 #define IMM2_SIZE 1
@ -174,14 +196,338 @@ arithmetic results in a signed value. Hence the cast. */
 #define PUT2(a,n,d) a[n] = d  
 #endif

+/* Other macros that are different for 8-bit mode. The maximum length of a MARK
+name must fit in one code unit; currently it is set to 255 or 65535. */
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+#define MAX_255(c) TRUE
+#define MAX_MARK ((1u << 8) - 1)
+#ifdef SUPPORT_UTF
+#define SUPPORT_WIDE_CHARS
+#endif  /* SUPPORT_UTF */
+
+#else  /* Code units are 16 or 32 bits */
+#define MAX_255(c) ((c) <= 255u)
+#define MAX_MARK ((1u << 16) - 1)
+#define SUPPORT_WIDE_CHARS
+#endif
+
+
+
+/* ----------------- Character-handling macros ----------------- */
+
+/* There is a proposed future special "UTF-21" mode, in which only the lowest
+21 bits of a 32-bit character are interpreted as UTF, with the remaining 11
+high-order bits available to the application for other uses. In preparation for
+the future implementation of this mode, there are macros that load a data item
+and, if in this special mode, mask it to 21 bits. These macros all have names
+starting with UCHAR21. In all other modes, including the normal 32-bit
+library, the macros all have the same simple definitions. When the new mode is
+implemented, it is expected that these definitions will be varied appropriately
+using #ifdef when compiling the library that supports the special mode. */
+
+#define UCHAR21(eptr)        (*(eptr))
+#define UCHAR21TEST(eptr)    (*(eptr))
+#define UCHAR21INC(eptr)     (*(eptr)++)
+#define UCHAR21INCTEST(eptr) (*(eptr)++)
+
+/* When UTF encoding is being used, a character is no longer just a single
+byte in 8-bit mode or a single short in 16-bit mode. The macros for character
+handling generate simple sequences when used in the basic mode, and more
+complicated ones for UTF characters. GETCHARLENTEST and other macros are not
+used when UTF is not supported. To make sure they can never even appear when
+UTF support is omitted, we don't even define them. */
+
+#ifndef SUPPORT_UTF
+
+/* #define MAX_UTF_SINGLE_CU */
+/* #define HAS_EXTRALEN(c) */
+/* #define GET_EXTRALEN(c) */
+/* #define NOT_FIRSTCHAR(c) */
+#define GETCHAR(c, eptr) c = *eptr;
+#define GETCHARTEST(c, eptr) c = *eptr;
+#define GETCHARINC(c, eptr) c = *eptr++;
+#define GETCHARINCTEST(c, eptr) c = *eptr++;
+#define GETCHARLEN(c, eptr, len) c = *eptr;
+#define PUTCHAR(c, p) (*p = c, 1)
+/* #define GETCHARLENTEST(c, eptr, len) */
+/* #define BACKCHAR(eptr) */
+/* #define FORWARDCHAR(eptr) */
+/* #define ACROSSCHAR(condition, eptr, action) */
+
+#else   /* SUPPORT_UTF */
+
+/* ------------------- 8-bit support  ------------------ */
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+
+/* The largest UTF code point that can be encoded as a single code unit. */
+
+#define MAX_UTF_SINGLE_CU 127
+
+/* Tests whether the code point needs extra characters to decode. */
+
+#define HAS_EXTRALEN(c) HASUTF8EXTRALEN(c)
+
+/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
+Otherwise it has an undefined behaviour. */
+
+#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])
+
+/* Returns TRUE, if the given character is not the first character
+of a UTF sequence. */
+
+#define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)
+
+/* Get the next UTF-8 character, not advancing the pointer. This is called when
+we know we are in UTF-8 mode. */
+
+#define GETCHAR(c, eptr) \
+  c = *eptr; \
+  if (c >= 0xc0) GETUTF8(c, eptr);
+
+/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
+pointer. */
+
+#define GETCHARTEST(c, eptr) \
+  c = *eptr; \
+  if (utf && c >= 0xc0) GETUTF8(c, eptr);
+
+/* Get the next UTF-8 character, advancing the pointer. This is called when we
+know we are in UTF-8 mode. */
+
+#define GETCHARINC(c, eptr) \
+  c = *eptr++; \
+  if (c >= 0xc0) GETUTF8INC(c, eptr);
+
+/* Get the next character, testing for UTF-8 mode, and advancing the pointer.
+This is called when we don't know if we are in UTF-8 mode. */
+
+#define GETCHARINCTEST(c, eptr) \
+  c = *eptr++; \
+  if (utf && c >= 0xc0) GETUTF8INC(c, eptr);
+
+/* Get the next UTF-8 character, not advancing the pointer, incrementing length
+if there are extra bytes. This is called when we know we are in UTF-8 mode. */
+
+#define GETCHARLEN(c, eptr, len) \
+  c = *eptr; \
+  if (c >= 0xc0) GETUTF8LEN(c, eptr, len);
+
+/* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
+pointer, incrementing length if there are extra bytes. This is called when we
+do not know if we are in UTF-8 mode. */
+
+#define GETCHARLENTEST(c, eptr, len) \
+  c = *eptr; \
+  if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);
+
+/* If the pointer is not at the start of a character, move it back until
+it is. This is called only in UTF-8 mode - we don't put a test within the macro
+because almost all calls are already within a block of UTF-8 only code. */
+
+#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--
+
+/* Same as above, just in the other direction. */
+#define FORWARDCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr++
+
+/* Same as above, but it allows a fully customizable form. */
+#define ACROSSCHAR(condition, eptr, action) \
+  while((condition) && ((eptr) & 0xc0) == 0x80) action
+  
+/* Deposit a character into memory, returning the number of code units. */
+
+#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
+  PRIV(ord2utf)(c,p) : (*p = c, 1))
+
+
+/* ------------------- 16-bit support  ------------------ */
+
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+
+/* The largest UTF code point that can be encoded as a single code unit. */
+
+#define MAX_UTF_SINGLE_CU 65535
+
+/* Tests whether the code point needs extra characters to decode. */
+
+#define HAS_EXTRALEN(c) (((c) & 0xfc00) == 0xd800)
+
+/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
+Otherwise it has an undefined behaviour. */
+
+#define GET_EXTRALEN(c) 1
+
+/* Returns TRUE, if the given character is not the first character
+of a UTF sequence. */
+
+#define NOT_FIRSTCHAR(c) (((c) & 0xfc00) == 0xdc00)
+
+/* Base macro to pick up the low surrogate of a UTF-16 character, not
+advancing the pointer. */
+
+#define GETUTF16(c, eptr) \
+   { c = (((c & 0x3ff) << 10) | (eptr[1] & 0x3ff)) + 0x10000; }
+
+/* Get the next UTF-16 character, not advancing the pointer. This is called when
+we know we are in UTF-16 mode. */
+
+#define GETCHAR(c, eptr) \
+  c = *eptr; \
+  if ((c & 0xfc00) == 0xd800) GETUTF16(c, eptr);
+
+/* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the
+pointer. */
+
+#define GETCHARTEST(c, eptr) \
+  c = *eptr; \
+  if (utf && (c & 0xfc00) == 0xd800) GETUTF16(c, eptr);
+
+/* Base macro to pick up the low surrogate of a UTF-16 character, advancing
+the pointer. */
+
+#define GETUTF16INC(c, eptr) \
+   { c = (((c & 0x3ff) << 10) | (*eptr++ & 0x3ff)) + 0x10000; }
+
+/* Get the next UTF-16 character, advancing the pointer. This is called when we
+know we are in UTF-16 mode. */
+
+#define GETCHARINC(c, eptr) \
+  c = *eptr++; \
+  if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, eptr);
+
+/* Get the next character, testing for UTF-16 mode, and advancing the pointer.
+This is called when we don't know if we are in UTF-16 mode. */
+
+#define GETCHARINCTEST(c, eptr) \
+  c = *eptr++; \
+  if (utf && (c & 0xfc00) == 0xd800) GETUTF16INC(c, eptr);
+
+/* Base macro to pick up the low surrogate of a UTF-16 character, not
+advancing the pointer, incrementing the length. */
+
+#define GETUTF16LEN(c, eptr, len) \
+   { c = (((c & 0x3ff) << 10) | (eptr[1] & 0x3ff)) + 0x10000; len++; }
+
+/* Get the next UTF-16 character, not advancing the pointer, incrementing
+length if there is a low surrogate. This is called when we know we are in
+UTF-16 mode. */
+
+#define GETCHARLEN(c, eptr, len) \
+  c = *eptr; \
+  if ((c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);
+
+/* Get the next UTF-816character, testing for UTF-16 mode, not advancing the
+pointer, incrementing length if there is a low surrogate. This is called when
+we do not know if we are in UTF-16 mode. */
+
+#define GETCHARLENTEST(c, eptr, len) \
+  c = *eptr; \
+  if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);
+
+/* If the pointer is not at the start of a character, move it back until
+it is. This is called only in UTF-16 mode - we don't put a test within the
+macro because almost all calls are already within a block of UTF-16 only
+code. */
+
+#define BACKCHAR(eptr) if ((*eptr & 0xfc00) == 0xdc00) eptr--
+
+/* Same as above, just in the other direction. */
+#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00) == 0xdc00) eptr++
+
+/* Same as above, but it allows a fully customizable form. */
+#define ACROSSCHAR(condition, eptr, action) \
+  if ((condition) && ((eptr) & 0xfc00) == 0xdc00) action
+
+/* Deposit a character into memory, returning the number of code units. */
+
+#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
+  PRIV(ord2utf)(c,p) : (*p = c, 1))
+
+
+/* ------------------- 32-bit support  ------------------ */
+
+#else
+
+/* These are trivial for the 32-bit library, since all UTF-32 characters fit
+into one PCRE_UCHAR unit. */
+
+#define MAX_UTF_SINGLE_CU (0x10ffffu)
+#define HAS_EXTRALEN(c) (0)
+#define GET_EXTRALEN(c) (0)
+#define NOT_FIRSTCHAR(c) (0)
+
+/* Get the next UTF-32 character, not advancing the pointer. This is called when
+we know we are in UTF-32 mode. */
+
+#define GETCHAR(c, eptr) \
+  c = *(eptr);
+
+/* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
+pointer. */
+
+#define GETCHARTEST(c, eptr) \
+  c = *(eptr);
+
+/* Get the next UTF-32 character, advancing the pointer. This is called when we
+know we are in UTF-32 mode. */
+
+#define GETCHARINC(c, eptr) \
+  c = *((eptr)++);
+
+/* Get the next character, testing for UTF-32 mode, and advancing the pointer.
+This is called when we don't know if we are in UTF-32 mode. */
+
+#define GETCHARINCTEST(c, eptr) \
+  c = *((eptr)++);
+
+/* Get the next UTF-32 character, not advancing the pointer, not incrementing
+length (since all UTF-32 is of length 1). This is called when we know we are in
+UTF-32 mode. */
+
+#define GETCHARLEN(c, eptr, len) \
+  GETCHAR(c, eptr)
+
+/* Get the next UTF-32character, testing for UTF-32 mode, not advancing the
+pointer, not incrementing the length (since all UTF-32 is of length 1).
+This is called when we do not know if we are in UTF-32 mode. */
+
+#define GETCHARLENTEST(c, eptr, len) \
+  GETCHARTEST(c, eptr)
+
+/* If the pointer is not at the start of a character, move it back until
+it is. This is called only in UTF-32 mode - we don't put a test within the
+macro because almost all calls are already within a block of UTF-32 only
+code.
+
+These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
+
+#define BACKCHAR(eptr) do { } while (0)
+
+/* Same as above, just in the other direction. */
+
+#define FORWARDCHAR(eptr) do { } while (0)
+
+/* Same as above, but it allows a fully customizable form. */
+
+#define ACROSSCHAR(condition, eptr, action) do { } while (0)
+
+/* Deposit a character into memory, returning the number of code units. */
+
+#define PUTCHAR(c, p) (*p = c, 1)
+
+#endif  /* UTF-32 character handling */
+#endif  /* SUPPORT_UTF */
+

 /* Mode-dependent macros that have the same definition in all modes. */

 #define CU2BYTES(x)     (x)*((PCRE2_CODE_UNIT_WIDTH/8))
 #define PUTINC(a,n,d)   PUT(a,n,d), a += LINK_SIZE
+#define PUT2INC(a,n,d)  PUT2(a,n,d), a += IMM2_SIZE


-/* --------------------------- STRUCTURES ----------------------------- */
+/* ----------------------- HIDDEN STRUCTURES ----------------------------- */

 /* The real general context structure. At present it hold only data for custom 
 memory control. */
@ -195,7 +541,7 @@ typedef struct pcre2_real_general_context {
 typedef struct pcre2_real_compile_context {
  pcre2_memctl    memctl;
  int       (*stack_guard)(uint32_t);
-  const unsigned char *tables;
+  const uint8_t *tables;
  uint16_t  bsr_convention;
  uint16_t  newline_convention;
  uint32_t  parens_nest_limit;
@ -217,11 +563,12 @@ typedef struct pcre2_real_match_context {
 /* The real compiled code structure */

 typedef struct pcre2_real_code {
-  pcre2_memctl   memctl;
+  pcre2_memctl   memctl;          /* Memory control fields */
+  const uint8_t *tables;          /* The character tables */
  void    *executable_jit;        /* Pointer to JIT code */  
  uint8_t  start_bitmap[32];      /* Bitmap for starting code unit < 256 */
+  size_t   blocksize;             /* Total (bytes) that was malloc-ed */ 
  uint32_t magic_number;          /* Paranoid and endianness check */
-  uint32_t size;                  /* Total (bytes) that was malloc-ed */ 
  uint32_t compile_options;       /* Options passed to pcre2_compile() */
  uint32_t pattern_options;       /* Options taken from the pattern */
  uint32_t flags;                 /* Various state flags */
@ -239,14 +586,13 @@ typedef struct pcre2_real_code {
  uint16_t name_count;            /* Number of name entries in the table */
 } pcre2_real_code;

-/* The reat match data structure. */
+/* The real match data structure. */

 typedef struct pcre2_real_match_data {
  pcre2_memctl     memctl;
  const pcre2_real_code *code;    /* The pattern used for the match */
  PCRE2_SPTR       subject;       /* The subject that was matched */
  int              rc;            /* The return code from the match */
-  int              utf_reason;    /* Reason code for bad UTF */  
  size_t           leftchar;      /* Offset to leftmost code unit */
  size_t           rightchar;     /* Offset to rightmost code unit */
  size_t           startchar;     /* Offset to starting code unit */  
@ -255,4 +601,71 @@ typedef struct pcre2_real_match_data {
  size_t           ovector[1];    /* The first field */ 
 } pcre2_real_match_data;

+
+/* ----------------------- PRIVATE STRUCTURES ----------------------------- */
+
+/* These structures are not needed for pcre2test. */
+
+#ifndef PCRE2_PCRE2TEST
+
+/* Structure for maintaining a chain of pointers to the currently incomplete
+branches, for testing for left recursion while compiling. */
+
+typedef struct branch_chain {
+  struct branch_chain *outer;
+  PCRE2_UCHAR *current_branch;
+} branch_chain;
+
+/* Structure for building a list of named groups during the first pass of
+compiling. */
+
+typedef struct named_group {
+  PCRE2_SPTR   name;          /* Points to the name in the pattern */
+  int          length;        /* Length of the name */
+  uint32_t     number;        /* Group number */
+} named_group;
+
+/* Structure for passing "static" information around between the functions
+doing the compiling, so that they are thread-safe. */
+
+typedef struct compile_data {
+  pcre2_real_compile_context *cx;  /* Points to the compile context */
+  const uint8_t *lcc;              /* Points to lower casing table */
+  const uint8_t *fcc;              /* Points to case-flipping table */
+  const uint8_t *cbits;            /* Points to character type table */
+  const uint8_t *ctypes;           /* Points to table of type maps */
+  PCRE2_SPTR start_workspace;      /* The start of working space */
+  PCRE2_SPTR start_code;           /* The start of the compiled code */
+  PCRE2_SPTR start_pattern;        /* The start of the pattern */
+  PCRE2_SPTR end_pattern;          /* The end of the pattern */
+  PCRE2_UCHAR *hwm;                /* High watermark of workspace */
+  open_capitem *open_caps;         /* Chain of open capture items */
+  named_group *named_groups;       /* Points to vector in pre-compile */
+  PCRE2_UCHAR *name_table;         /* The name/number table */
+  int  names_found;                /* Number of entries so far */
+  int  name_entry_size;            /* Size of each entry */
+  int  named_group_list_size;      /* Number of entries in the list */
+  int  workspace_size;             /* Size of workspace */
+  unsigned int bracount;           /* Count of capturing parens as we compile */
+  int  final_bracount;             /* Saved value after first pass */
+  int  max_lookbehind;             /* Maximum lookbehind (characters) */
+  int  top_backref;                /* Maximum back reference */
+  unsigned int backref_map;        /* Bitmap of low back refs */
+  unsigned int namedrefcount;      /* Number of backreferences by name */
+  int  parens_depth;               /* Depth of nested parentheses */
+  int  assert_depth;               /* Depth of nested assertions */
+  uint32_t external_options;       /* External (initial) options */
+  uint32_t external_flags;         /* External flag bits to be set */
+  int  req_varyopt;                /* "After variable item" flag for reqbyte */
+  BOOL had_accept;                 /* (*ACCEPT) encountered */
+  BOOL had_pruneorskip;            /* (*PRUNE) or (*SKIP) encountered */
+  BOOL check_lookbehind;           /* Lookbehinds need later checking */
+  BOOL dupnames;                   /* Duplicate names exist */
+  int  nltype;                     /* Newline type */
+  int  nllen;                      /* Newline string length */
+  PCRE2_UCHAR nl[4];               /* Newline string when fixed length */
+} compile_data;
+
+#endif  /* PCRE2_PCRE2TEST */
+
 /* End of pcre2_intmodedep.h */
--- a/src/pcre2_newline.c
+++ b/src/pcre2_newline.c
@ -0,0 +1,213 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+         New API code Copyright (c) 2014 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains internal functions for testing newlines when more than
+one kind of newline is to be recognized. When a newline is found, its length is
+returned. In principle, we could implement several newline "types", each
+referring to a different set of newline characters. At present, PCRE2 supports
+only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
+and NLTYPE_ANY. The full list of Unicode newline characters is taken from
+http://unicode.org/unicode/reports/tr18/. */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_internal.h"
+
+
+
+/*************************************************
+*      Check for newline at given position       *
+*************************************************/
+
+/* It is guaranteed that the initial value of ptr is less than the end of the
+string that is being processed.
+
+Arguments:
+  ptr          pointer to possible newline
+  type         the newline type
+  endptr       pointer to the end of the string
+  lenptr       where to return the length
+  utf          TRUE if in utf mode
+
+Returns:       TRUE or FALSE
+*/
+
+BOOL
+PRIV(is_newline)(PCRE2_SPTR ptr, int type, PCRE2_SPTR endptr, int *lenptr,
+  BOOL utf)
+{
+uint32_t c;
+
+#ifdef SUPPORT_UTF
+if (utf) { GETCHAR(c, ptr); } else
+#else
+(void)utf;
+#endif  /* SUPPORT_UTF */
+
+  c = *ptr;
+
+/* Note that this function is called only for ANY or ANYCRLF. */
+
+if (type == NLTYPE_ANYCRLF) switch(c)
+  {
+  case CHAR_LF: *lenptr = 1; return TRUE;
+  case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
+               return TRUE;
+  default: return FALSE;
+  }
+
+/* NLTYPE_ANY */
+
+else switch(c)
+  {
+#ifdef EBCDIC
+  case CHAR_NEL:
+#endif
+  case CHAR_LF:
+  case CHAR_VT:
+  case CHAR_FF: *lenptr = 1; return TRUE;
+
+  case CHAR_CR:
+  *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
+  return TRUE;
+
+#ifndef EBCDIC
+#if PCRE2_CODE_UNIT_WIDTH == 8
+  case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
+  case 0x2028:                                       /* LS */
+  case 0x2029: *lenptr = 3; return TRUE;             /* PS */
+#else  /* 16-bit or 32-bit code units */
+  case CHAR_NEL:
+  case 0x2028:                                       /* LS */
+  case 0x2029: *lenptr = 1; return TRUE;             /* PS */
+#endif
+#endif /* Not EBCDIC */
+
+  default: return FALSE;
+  }
+}
+
+
+
+/*************************************************
+*     Check for newline at previous position     *
+*************************************************/
+
+/* It is guaranteed that the initial value of ptr is greater than the start of
+the string that is being processed.
+
+Arguments:
+  ptr          pointer to possible newline
+  type         the newline type
+  startptr     pointer to the start of the string
+  lenptr       where to return the length
+  utf          TRUE if in utf mode
+
+Returns:       TRUE or FALSE
+*/
+
+BOOL
+PRIV(was_newline)(PCRE2_SPTR ptr, int type, PCRE2_SPTR startptr, int *lenptr,
+  BOOL utf)
+{
+uint32_t c;
+ptr--;
+
+#ifdef SUPPORT_UTF
+if (utf)
+  {
+  BACKCHAR(ptr);
+  GETCHAR(c, ptr);
+  }
+else
+#else
+(void)utf;
+#endif  /* SUPPORT_UTF */
+
+  c = *ptr;
+
+/* Note that this function is called only for ANY or ANYCRLF. */
+
+if (type == NLTYPE_ANYCRLF) switch(c)
+  {
+  case CHAR_LF:
+  *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
+  return TRUE;
+
+  case CHAR_CR: *lenptr = 1; return TRUE;
+  default: return FALSE;
+  }
+
+/* NLTYPE_ANY */
+
+else switch(c)
+  {
+  case CHAR_LF:
+  *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
+  return TRUE;
+
+#ifdef EBCDIC
+  case CHAR_NEL:
+#endif
+  case CHAR_VT:
+  case CHAR_FF:
+  case CHAR_CR: *lenptr = 1; return TRUE;
+
+#ifndef EBCDIC
+#if PCRE2_CODE_UNIT_WIDTH == 8
+  case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
+  case 0x2028:                                       /* LS */
+  case 0x2029: *lenptr = 3; return TRUE;             /* PS */
+#else /* 16-bit or 32-bit code units */
+  case CHAR_NEL:
+  case 0x2028:                                       /* LS */
+  case 0x2029: *lenptr = 1; return TRUE;             /* PS */
+#endif
+#endif /* Not EBCDIC */
+
+  default: return FALSE;
+  }
+}
+
+/* End of pcre2_newline.c */
--- a/src/pcre2_ord2utf.c
+++ b/src/pcre2_ord2utf.c
@ -0,0 +1,119 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+         New API code Copyright (c) 2014 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This file contains a function that converts a Unicode character code point
+into a UTF string. The behaviour is different for each code unit width. */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_internal.h"
+
+
+/* If SUPPORT_UTF is not defined, this function will never be called. Supply a 
+dummy function because some compilers do not like empty source modules. */
+
+#ifndef SUPPORT_UTF
+unsigned int
+PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
+{
+(void)(cvalue);
+(void)(buffer);
+return 0;
+}
+#else  /* SUPPORT_UTF */
+
+
+/*************************************************
+*          Convert code point to UTF             *
+*************************************************/
+
+/*
+Arguments:
+  cvalue     the character value
+  buffer     pointer to buffer for result
+
+Returns:     number of code units placed in the buffer
+*/
+
+unsigned int
+PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
+{
+/* Convert to UTF-8 */
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+register int i, j;
+for (i = 0; i < PRIV(utf8_table1_size); i++)
+  if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
+buffer += i;
+for (j = i; j > 0; j--)
+ {
+ *buffer-- = 0x80 | (cvalue & 0x3f);
+ cvalue >>= 6;
+ }
+*buffer = PRIV(utf8_table2)[i] | cvalue;
+return i + 1;
+
+/* Convert to UTF-16 */
+
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+if (cvalue <= 0xffff)
+  {
+  *buffer = (PCRE2_UCHAR)cvalue;
+  return 1;
+  }
+cvalue -= 0x10000;
+*buffer++ = 0xd800 | (cvalue >> 10);
+*buffer = 0xdc00 | (cvalue & 0x3ff);
+return 2;
+
+/* Convert to UTF-32 */
+
+#else
+*buffer = (PCRE2_UCHAR)cvalue;
+return 1;
+#endif
+}
+#endif  /* SUPPORT_UTF */
+
+/* End of pcre_ord2utf.c */
--- a/src/pcre2_pattern_info.c
+++ b/src/pcre2_pattern_info.c
@ -184,7 +184,7 @@ switch(what)
  break;

  case PCRE2_INFO_SIZE:
-  *((size_t *)where) = re->size;
+  *((size_t *)where) = re->blocksize;
  break;

  default: return PCRE2_ERROR_BADOPTION;
--- a/src/pcre2_string_utils.c
+++ b/src/pcre2_string_utils.c
@ -53,7 +53,7 @@ functions work only on 8-bit data. */


 /*************************************************
-*             Compare two strings                *
+*    Compare two zero-terminated PCRE2 strings   *
 *************************************************/

 /* 
@ -77,4 +77,105 @@ while (*str1 != '\0' || *str2 != '\0')
 return 0;
 }

+
+/*************************************************
+*  Compare zero-terminated PCRE2 & 8-bit strings *
+*************************************************/
+
+/* 
+Arguments:
+  str1        first string
+  str2        second string
+
+Returns:      0, 1, or -1
+*/
+
+int
+PRIV(strcmp_c8)(PCRE2_SPTR str1, const char *str2)
+{
+PCRE2_UCHAR c1, c2;
+while (*str1 != '\0' || *str2 != '\0')
+  {
+  c1 = *str1++;
+  c2 = *str2++;
+  if (c1 != c2) return ((c1 > c2) << 1) - 1;
+  }
+return 0;
+}
+
+
+/*************************************************
+*    Compare two PCRE2 strings, given a length   *
+*************************************************/
+
+/* 
+Arguments:
+  str1        first string
+  str2        second string
+  len         the length
+
+Returns:      0, 1, or -1
+*/
+
+int
+PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len)
+{
+PCRE2_UCHAR c1, c2;
+while (len-- > 0)
+  {
+  c1 = *str1++;
+  c2 = *str2++;
+  if (c1 != c2) return ((c1 > c2) << 1) - 1;
+  }
+return 0;
+}
+
+
+/*************************************************
+* Compare PCRE2 string to 8-bit string by length *
+*************************************************/
+
+/* As the 8-bit string is almost always a literal, its type is specified as 
+'const char *'.
+
+Arguments:
+  str1        first string
+  str2        second string
+  len         the length
+
+Returns:      0, 1, or -1
+*/
+
+int
+PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len)
+{
+PCRE2_UCHAR c1, c2;
+while (len-- > 0)
+  {
+  c1 = *str1++;
+  c2 = *str2++;
+  if (c1 != c2) return ((c1 > c2) << 1) - 1;
+  }
+return 0;
+}
+
+
+
+/*************************************************
+*          Find the length of a string           *
+*************************************************/
+
+/* 
+Argument:    the string
+Returns:     the length
+*/
+
+int
+PRIV(strlen)(PCRE2_SPTR str)
+{
+int c = 0;
+while (*str++ != 0) c++;
+return c;
+}
+
 /* End of pcre2_string_utils.c */
--- a/src/pcre2_tables.c
+++ b/src/pcre2_tables.c
@ -41,22 +41,22 @@ POSSIBILITY OF SUCH DAMAGE.
 /* This module contains some fixed tables that are used by more than one of the
 PCRE code modules. The tables are also #included by the pcre2test program,
 which uses macros to change their names from _pcre2_xxx to xxxx, thereby
-avoiding name clashes with the library. In this case, PCRE2_INCLUDED is 
+avoiding name clashes with the library. In this case, PCRE2_PCRE2TEST is
 defined. */

-#ifndef PCRE2_INCLUDED           /* We're compiling the library */
+#ifndef PCRE2_PCRE2TEST           /* We're compiling the library */
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 #include "pcre2_internal.h"
-#endif /* PCRE2_INCLUDED */
+#endif /* PCRE2_PCRE2TEST */


 /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
 the definition is next to the definition of the opcodes in pcre2_internal.h.
 This is mode-dependent, so is skipped when this file is included by pcre2test. */

-#ifndef PCRE2_INCLUDED
+#ifndef PCRE2_PCRE2TEST
 const uint8_t PRIV(OP_lengths)[] = { OP_LENGTHS };
 #endif

@ -71,14 +71,18 @@ const uint32_t PRIV(vspace_list)[] = { VSPACE_LIST };
 *           Tables for UTF-8 support             *
 *************************************************/

+/* These tables are required by pcre2test in 16- or 32-bit mode, as well
+as for the library in 8-bit mode, because pcre2test uses UTF-8 internally for
+handling wide characters. */
+
+#if defined PCRE2_PCRE2TEST || \
+   (defined SUPPORT_UTF && \
+    defined PCRE2_CODE_UNIT_WIDTH && \
+    PCRE2_CODE_UNIT_WIDTH == 8)
+
 /* These are the breakpoints for different numbers of bytes in a UTF-8
 character. */

-#if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \
-  || (defined PCRE2_INCLUDED && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32))
-
-/* These tables are also required by pcretest in 16- or 32-bit mode. */
-
 const int PRIV(utf8_table1)[] =
  { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};

@ -99,7 +103,7 @@ const uint8_t PRIV(utf8_table4)[] = {
  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };

-#endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE2_INCLUDED && SUPPORT_PCRE[16|32])*/
+#endif /* UTF-8 support needed */


 #ifdef SUPPORT_UTF
@ -653,7 +657,7 @@ const ucp_type_table PRIV(utt)[] = {
  { 1042, PT_PC, ucp_Zs }
 };

-const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
+const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);

 #endif /* SUPPORT_UTF */

--- a/src/pcre2_ucd.c
+++ b/src/pcre2_ucd.c
@ -8,7 +8,7 @@ table names from _pcre2_xxx to xxxx, thereby avoiding name clashes
 with the library. At present, just one of these tables is actually
 needed. */

-#ifndef PCRE2_INCLUDED
+#ifndef PCRE2_PCRE2TEST

 #ifdef HAVE_CONFIG_H
 #include "config.h"
@ -16,7 +16,7 @@ needed. */

 #include "pcre2_internal.h"

-#endif /* PCRE2_INCLUDED */
+#endif /* PCRE2_PCRE2TEST */

 /* Unicode character database. */
 /* This file was autogenerated by the MultiStage2.py script. */
@ -78,7 +78,7 @@ const uint32_t PRIV(ucd_caseless_sets)[] = {

 /* When #included in pcre2test, we don't need this large table. */

-#ifndef PCRE2_INCLUDED
+#ifndef PCRE2_PCRE2TEST

 const ucd_record PRIV(ucd_records)[] = { /* 5016 bytes, record size 8 */
  {     9,      0,      2,      0,      0, }, /*   0 */
@ -3295,4 +3295,4 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 51968 bytes, block = 128 */
 #endif
 #endif  /* SUPPORT_UTF */

-#endif  /* PCRE2_INCLUDED */
+#endif  /* PCRE2_PCRE2TEST */
--- a/src/pcre2_valid_utf.c
+++ b/src/pcre2_valid_utf.c
@ -0,0 +1,399 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+         New API code Copyright (c) 2014 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains an internal function for validating UTF character
+strings. */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_internal.h"
+
+
+#ifndef SUPPORT_UTF
+/*************************************************
+*        Dummy function when UTF not supported   *
+*************************************************/
+
+/* This function should never be called when UTF is not supported. */
+
+int
+PRIV(valid_utf)(PCRE2_SPTR string, int length, size_t *erroroffset)
+{
+(void)string;
+(void)length;
+(void)erroroffset);
+return 0;
+}
+#else
+
+
+
+/*************************************************
+*           Validate a UTF string                *
+*************************************************/
+
+/* This function is called (optionally) at the start of compile or match, to
+check that a supposed UTF string is actually valid. The early check means
+that subsequent code can assume it is dealing with a valid string. The check
+can be turned off for maximum performance, but the consequences of supplying an
+invalid string are then undefined.
+
+Arguments:
+  string       points to the string
+  length       length of string, or -1 if the string is zero-terminated
+  errp         pointer to an error position offset variable
+
+Returns:       == 0    if the string is a valid UTF string
+               != 0    otherwise, setting the offset of the bad character
+*/
+
+int
+PRIV(valid_utf)(PCRE2_SPTR string, int length, size_t *erroroffset)
+{
+register PCRE2_SPTR p;
+register uint32_t c;
+
+if (length < 0)
+  {
+  for (p = string; *p != 0; p++);
+  length = (int)(p - string);
+  }
+
+/* ----------------- Check a UTF-8 string ----------------- */
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+
+/* Originally, this function checked according to RFC 2279, allowing for values
+in the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were
+in the canonical format. Once somebody had pointed out RFC 3629 to me (it
+obsoletes 2279), additional restrictions were applied. The values are now
+limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
+subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte
+characters is still checked. Error returns are as follows:
+
+PCRE2_ERROR_UTF8_ERR1   Missing 1 byte at the end of the string
+PCRE2_ERROR_UTF8_ERR2   Missing 2 bytes at the end of the string
+PCRE2_ERROR_UTF8_ERR3   Missing 3 bytes at the end of the string
+PCRE2_ERROR_UTF8_ERR4   Missing 4 bytes at the end of the string
+PCRE2_ERROR_UTF8_ERR5   Missing 5 bytes at the end of the string
+PCRE2_ERROR_UTF8_ERR6   2nd-byte's two top bits are not 0x80
+PCRE2_ERROR_UTF8_ERR7   3rd-byte's two top bits are not 0x80
+PCRE2_ERROR_UTF8_ERR8   4th-byte's two top bits are not 0x80
+PCRE2_ERROR_UTF8_ERR9   5th-byte's two top bits are not 0x80
+PCRE2_ERROR_UTF8_ERR10  6th-byte's two top bits are not 0x80
+PCRE2_ERROR_UTF8_ERR11  5-byte character is not permitted by RFC 3629
+PCRE2_ERROR_UTF8_ERR12  6-byte character is not permitted by RFC 3629
+PCRE2_ERROR_UTF8_ERR13  4-byte character with value > 0x10ffff is not permitted
+PCRE2_ERROR_UTF8_ERR14  3-byte character with value 0xd000-0xdfff is not permitted
+PCRE2_ERROR_UTF8_ERR15  Overlong 2-byte sequence
+PCRE2_ERROR_UTF8_ERR16  Overlong 3-byte sequence
+PCRE2_ERROR_UTF8_ERR17  Overlong 4-byte sequence
+PCRE2_ERROR_UTF8_ERR18  Overlong 5-byte sequence (won't ever occur)
+PCRE2_ERROR_UTF8_ERR19  Overlong 6-byte sequence (won't ever occur)
+PCRE2_ERROR_UTF8_ERR20  Isolated 0x80 byte (not within UTF-8 character)
+PCRE2_ERROR_UTF8_ERR21  Byte with the illegal value 0xfe or 0xff
+*/
+
+for (p = string; length-- > 0; p++)
+  {
+  register uint32_t ab, d;
+
+  c = *p;
+  if (c < 128) continue;                /* ASCII character */
+
+  if (c < 0xc0)                         /* Isolated 10xx xxxx byte */
+    {
+    *erroroffset = (int)(p - string);
+    return PCRE2_ERROR_UTF8_ERR20;
+    }
+
+  if (c >= 0xfe)                        /* Invalid 0xfe or 0xff bytes */
+    {
+    *erroroffset = (int)(p - string);
+    return PCRE2_ERROR_UTF8_ERR21;
+    }
+
+  ab = PRIV(utf8_table4)[c & 0x3f];     /* Number of additional bytes (1-5) */
+  if (length < (int)ab)                 /* Missing bytes */
+    {
+    *erroroffset = (int)(p - string);
+    switch(ab - length)
+      {
+      case 1: return PCRE2_ERROR_UTF8_ERR1;  
+      case 2: return PCRE2_ERROR_UTF8_ERR2;  
+      case 3: return PCRE2_ERROR_UTF8_ERR3;  
+      case 4: return PCRE2_ERROR_UTF8_ERR4;  
+      case 5: return PCRE2_ERROR_UTF8_ERR5;  
+      }
+    }
+  length -= ab;                         /* Length remaining */
+
+  /* Check top bits in the second byte */
+
+  if (((d = *(++p)) & 0xc0) != 0x80)
+    {
+    *erroroffset = (int)(p - string) - 1;
+    return PCRE2_ERROR_UTF8_ERR6;
+    }
+
+  /* For each length, check that the remaining bytes start with the 0x80 bit
+  set and not the 0x40 bit. Then check for an overlong sequence, and for the
+  excluded range 0xd800 to 0xdfff. */
+
+  switch (ab)
+    {
+    /* 2-byte character. No further bytes to check for 0x80. Check first byte
+    for for xx00 000x (overlong sequence). */
+
+    case 1: if ((c & 0x3e) == 0)
+      {
+      *erroroffset = (int)(p - string) - 1;
+      return PCRE2_ERROR_UTF8_ERR15;
+      }
+    break;
+
+    /* 3-byte character. Check third byte for 0x80. Then check first 2 bytes
+      for 1110 0000, xx0x xxxx (overlong sequence) or
+          1110 1101, 1010 xxxx (0xd800 - 0xdfff) */
+
+    case 2:
+    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE2_ERROR_UTF8_ERR7;
+      }
+    if (c == 0xe0 && (d & 0x20) == 0)
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE2_ERROR_UTF8_ERR16;
+      }
+    if (c == 0xed && d >= 0xa0)
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE2_ERROR_UTF8_ERR14;
+      }
+    break;
+
+    /* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
+       bytes for for 1111 0000, xx00 xxxx (overlong sequence), then check for a
+       character greater than 0x0010ffff (f4 8f bf bf) */
+
+    case 3:
+    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE2_ERROR_UTF8_ERR7;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE2_ERROR_UTF8_ERR8;
+      }
+    if (c == 0xf0 && (d & 0x30) == 0)
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE2_ERROR_UTF8_ERR17;
+      }
+    if (c > 0xf4 || (c == 0xf4 && d > 0x8f))
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE2_ERROR_UTF8_ERR13;
+      }
+    break;
+
+    /* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
+    rejected by the length test below. However, we do the appropriate tests
+    here so that overlong sequences get diagnosed, and also in case there is
+    ever an option for handling these larger code points. */
+
+    /* 5-byte character. Check 3rd, 4th, and 5th bytes for 0x80. Then check for
+    1111 1000, xx00 0xxx */
+
+    case 4:
+    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE2_ERROR_UTF8_ERR7;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE2_ERROR_UTF8_ERR8;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fifth byte */
+      {
+      *erroroffset = (int)(p - string) - 4;
+      return PCRE2_ERROR_UTF8_ERR9;
+      }
+    if (c == 0xf8 && (d & 0x38) == 0)
+      {
+      *erroroffset = (int)(p - string) - 4;
+      return PCRE2_ERROR_UTF8_ERR18;
+      }
+    break;
+
+    /* 6-byte character. Check 3rd-6th bytes for 0x80. Then check for
+    1111 1100, xx00 00xx. */
+
+    case 5:
+    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE2_ERROR_UTF8_ERR7;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE2_ERROR_UTF8_ERR8;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fifth byte */
+      {
+      *erroroffset = (int)(p - string) - 4;
+      return PCRE2_ERROR_UTF8_ERR9;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Sixth byte */
+      {
+      *erroroffset = (int)(p - string) - 5;
+      return PCRE2_ERROR_UTF8_ERR10;
+      }
+    if (c == 0xfc && (d & 0x3c) == 0)
+      {
+      *erroroffset = (int)(p - string) - 5;
+      return PCRE2_ERROR_UTF8_ERR19;
+      }
+    break;
+    }
+
+  /* Character is valid under RFC 2279, but 4-byte and 5-byte characters are
+  excluded by RFC 3629. The pointer p is currently at the last byte of the
+  character. */
+
+  if (ab > 3)
+    {
+    *erroroffset = (int)(p - string) - ab;
+    return (ab == 4)? PCRE2_ERROR_UTF8_ERR11 : PCRE2_ERROR_UTF8_ERR12;
+    }
+  }
+return 0;
+
+
+/* ----------------- Check a UTF-16 string ----------------- */
+
+#elif PCRE2_CODE_UNIT_WIDTH == 16 
+
+/* There's not so much work, nor so many errors, for UTF-16.
+PCRE2_ERROR_UTF16_ERR1  Missing low surrogate at the end of the string
+PCRE2_ERROR_UTF16_ERR2  Invalid low surrogate
+PCRE2_ERROR_UTF16_ERR3  Isolated low surrogate
+*/
+
+for (p = string; length-- > 0; p++)
+  {
+  c = *p;
+
+  if ((c & 0xf800) != 0xd800)
+    {
+    /* Normal UTF-16 code point. Neither high nor low surrogate. */
+    }
+  else if ((c & 0x0400) == 0)
+    {
+    /* High surrogate. Must be a followed by a low surrogate. */
+    if (length == 0)
+      {
+      *erroroffset = p - string;
+      return PCRE2_ERROR_UTF16_ERR1;
+      }
+    p++;
+    length--;
+    if ((*p & 0xfc00) != 0xdc00)
+      {
+      *erroroffset = p - string;
+      return PCRE2_ERROR_UTF16_ERR2;
+      }
+    }
+  else
+    {
+    /* Isolated low surrogate. Always an error. */
+    *erroroffset = p - string;
+    return PCRE2_ERROR_UTF16_ERR3;
+    }
+  }
+return 0;
+
+
+
+/* ----------------- Check a UTF-32 string ----------------- */
+
+#else
+
+/* There is very little to do for a UTF-32 string.
+PCRE2_ERROR_UTF32_ERR1  Surrogate character
+PCRE2_ERROR_UTF32_ERR2  Character > 0x10ffff
+*/
+
+for (p = string; length-- > 0; p++)
+  {
+  c = *p;
+  if ((c & 0xfffff800u) != 0xd800u)
+    {
+    /* Normal UTF-32 code point. Neither high nor low surrogate. */
+    if (c > 0x10ffffu)
+      {
+      *erroroffset = p - string;
+      return PCRE2_ERROR_UTF32_ERR2;
+      }
+    }
+  else
+    {
+    /* A surrogate */
+    *erroroffset = p - string;
+    return PCRE2_ERROR_UTF32_ERR1;
+    }
+  }
+return 0;
+#endif      /* CODE_UNIT_WIDTH */
+#endif      /* SUPPORT_UTF */
+}
+
+/* End of pcre2_valid_utf.c */
--- a/src/pcre2posix.c
+++ b/src/pcre2posix.c
@ -108,7 +108,7 @@ static const int eint2[] = {
  30, REG_ECTYPE,  /* unknown POSIX class name */
  32, REG_INVARG,  /* this version of PCRE2 is not compiled with PCRE2_UTF8 support */
  37, REG_EESCAPE, /* PCRE2 does not support \L, \l, \N, \U, or \u */
-  56, REG_INVARG,  /* inconsistent NEWLINE options */
+  56, REG_INVARG,  /* internal error: unknown newline setting */
  67, REG_INVARG,  /* this version of PCRE2 is not compiled with PCRE2_UCP support */
 };

@ -148,6 +148,8 @@ regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
 const char *message, *addmessage;
 size_t length, addlength;

+errcode -= COMPILE_ERROR_BASE;
+
 message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
  "unknown error code" : pstring[errcode];
 length = strlen(message) + 1;
@ -224,6 +226,8 @@ preg->re_erroffset = erroffset;
 if (preg->re_pcre2_code == NULL)
  {
  unsigned int i; 
+  if (errorcode < 0) return REG_BADPAT;   /* UTF error */ 
+  errorcode -= COMPILE_ERROR_BASE;
  if (errorcode < (int)(sizeof(eint1)/sizeof(const int)))
    return eint1[errorcode];
  for (i = 0; i < sizeof(eint2)/(2*sizeof(const int)); i += 2)
@ -307,13 +311,15 @@ if (rc >= 0)

 /* Unsuccessful match */

+if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21)
+  return REG_INVARG;
+
 switch(rc)
  {
  default: return REG_ASSERT;
  case PCRE2_ERROR_BADMODE: return REG_INVARG;
  case PCRE2_ERROR_BADMAGIC: return REG_INVARG;
  case PCRE2_ERROR_BADOPTION: return REG_INVARG;
-  case PCRE2_ERROR_BADUTF: return REG_INVARG;
  case PCRE2_ERROR_BADUTF_OFFSET: return REG_INVARG;
  case PCRE2_ERROR_MATCHLIMIT: return REG_ESPACE;
  case PCRE2_ERROR_NOMATCH: return REG_NOMATCH;
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@ -211,10 +211,10 @@ for building the library. */
 #include "pcre2_internal.h"

 /* We need access to some of the data tables that PCRE uses. Defining
-PCRE2_INCLUDED makes some minor changes in the files. The previous definition
+PCRE2_PCRETEST makes some minor changes in the files. The previous definition
 of PRIV avoids name clashes. */

-#define PCRE2_INCLUDED
+#define PCRE2_PCRE2TEST
 #include "pcre2_tables.c"
 #include "pcre2_ucd.c"

@ -340,12 +340,14 @@ either on a pattern or a data line, so they must all be distinct. */
 #define CTL_FULLBINCODE      0x00000200
 #define CTL_GETALL           0x00000400
 #define CTL_GLOBAL           0x00000800
-#define CTL_INFO             0x00001000
-#define CTL_JITVERIFY        0x00002000
-#define CTL_LIMITS           0x00004000
-#define CTL_MARK             0x00008000
-#define CTL_MEMORY           0x00010000
-#define CTL_POSIX            0x00020000
+#define CTL_HEXPAT           0x00001000
+#define CTL_INFO             0x00002000
+#define CTL_JITVERIFY        0x00004000
+#define CTL_LIMITS           0x00008000
+#define CTL_MARK             0x00010000
+#define CTL_MEMORY           0x00020000
+#define CTL_PATLEN           0x00040000
+#define CTL_POSIX            0x00080000

 #define CTL_DEBUG            (CTL_FULLBINCODE|CTL_INFO)  /* For setting */
 #define CTL_ANYINFO          (CTL_DEBUG|CTL_BINCODE)     /* For testing */
@ -441,6 +443,7 @@ static modstruct modlist[] = {
  { "get",                 MOD_DAT,  MOD_NN,  DO(get_numbers),           DO(get_names) },
  { "getall",              MOD_DAT,  MOD_CTL, CTL_GETALL,                DO(control) },
  { "global",              MOD_PNDP, MOD_CTL, CTL_GLOBAL,                PO(control) },
+  { "hex",                 MOD_PAT,  MOD_CTL, CTL_HEXPAT,                PO(control) },
  { "info",                MOD_PAT,  MOD_CTL, CTL_INFO,                  PO(control) },
  { "jit",                 MOD_PAT,  MOD_IND, 7,                         PO(jit) },
  { "jitstack",            MOD_DAT,  MOD_INT, 0,                         DO(jitstack) },
@ -475,6 +478,7 @@ static modstruct modlist[] = {
  { "tables",              MOD_PAT,  MOD_INT, 0,                         PO(tables_id) },
  { "ucp",                 MOD_PATP, MOD_OPT, PCRE2_UCP,                 PO(options) },
  { "ungreedy",            MOD_PAT,  MOD_OPT, PCRE2_UNGREEDY,            PO(options) },
+  { "use_length",          MOD_PAT,  MOD_CTL, CTL_PATLEN,                PO(control) },
  { "utf",                 MOD_PATP, MOD_OPT, PCRE2_UTF,                 PO(options) }
 };

@ -626,13 +630,8 @@ Pattern lines are always copied to pbuffer8 for use in callouts, even if they
 are actually compiled from pbuffer16 or pbuffer32. */

 static int       pbuffer8_size  = 50000;        /* Initial size, bytes */
-static int pbuffer16_size = 0;            /* Only set once needed */
-static int pbuffer32_size = 0;            /* Only set once needed */
-
-static uint8_t  *buffer = NULL;
 static uint8_t  *pbuffer8 = NULL;
-static uint16_t *pbuffer16 = NULL;
-static uint32_t *pbuffer32 = NULL;
+static uint8_t  *buffer = NULL;

 /* The dbuffer is where all processed data lines are put. In non-8-bit modes it
 is cast as needed. For long data lines it grows as necessary. */
@ -655,6 +654,8 @@ pcre2_code_16            *compiled_code16;
 pcre2_compile_context_16 *pat_context16, *default_pat_context16;
 pcre2_match_context_16   *dat_context16, *default_dat_context16;
 pcre2_match_data_16      *match_data16;
+static int pbuffer16_size = 0;            /* Only set once needed */
+static uint16_t *pbuffer16 = NULL;
 #endif

 #ifdef SUPPORT_PCRE32
@ -662,6 +663,8 @@ pcre2_code_32            *compiled_code32;
 pcre2_compile_context_32 *pat_context32, *default_pat_context32;
 pcre2_match_context_32   *dat_context32, *default_dat_context32;
 pcre2_match_data_32      *match_data32;
+static int pbuffer32_size = 0;            /* Only set once needed */
+static uint32_t *pbuffer32 = NULL;
 #endif


@ -997,10 +1000,10 @@ the three different cases. */

 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
-    a = G(pcre2_dfa-match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
+    a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
      G(g,BITONE),G(h,BITONE),i,j); \
  else \
-    a = G(pcre2_dfa-match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
+    a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
      G(g,BITTWO),G(h,BITTWO),i,j)

 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
@ -1178,20 +1181,20 @@ the three different cases. */
 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
  G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,G(g,8))
 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
-  G(a,8) = pcre2_dfa-match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8),i,j)
+  a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8),i,j)
 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
  r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size))
 #define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_8(G(a,8),b)
 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
-  G(a,8) = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8))
+  a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8))
 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_8(NULL)
 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c)
-#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(a)
-#define PCRE2_PATTERN_INFO(a,b,c,d) G(a,8) = pcre2_pattern_info_8(G(b,8),c,d)
-#define PCRE2_PRINTINT(a,b) pcre2_printint_8(compiled_code8,outfile,a)
-#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b) \
+#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
+#define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
+#define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
+#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
-  a = pcre2_substring_copy_bynumber_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
+  a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
  a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e)
 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a)
@ -1207,12 +1210,12 @@ the three different cases. */
 #define SETFLD(x,y,z) G(x,8)->y = z
 #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z
 #define SETOP(x,y,z) G(x,8) z y
-#define SETCASTPTR(x,y) G(x,8) = (uint8_t) *)y
-#define STRLEN(p) (int)strlen(p)
+#define SETCASTPTR(x,y) G(x,8) = (uint8_t *)y
+#define STRLEN(p) (int)strlen((char *)p)
 #define SUB1(a,b) G(a,8)(G(b,8))
 #define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
-#define TEST(x,r,y) (G(a,8) r (y))
-#define TESTFLD(x,f,r,y) (G(a,8)->f r (y))
+#define TEST(x,r,y) (G(x,8) r (y))
+#define TESTFLD(x,f,r,y) (G(x,8)->f r (y))


 /* ----- Only 16-bit mode is supported ----- */
@ -1231,20 +1234,20 @@ the three different cases. */
 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
  G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,G(g,16))
 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
-  G(a,16) = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16),i,j)
+  a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16),i,j)
 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
  r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size))
 #define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_16(G(a,16),b)
 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_16(NULL)
 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
-  G(a,16) = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16))
+  a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16))
 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c)
-#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(a)
+#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
 #define PCRE2_PATTERN_INFO(a,b,c,d) G(a,16) = pcre2_pattern_info_16(G(b,16),c,d)
-#define PCRE2_PRINTINT(a,b) pcre2_printint_16(compiled_code16,outfile,a)
+#define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
-  a = pcre2_substring_copy_bynumber_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e);
+  a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e);
 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
  a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e);
 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
@ -1260,12 +1263,12 @@ the three different cases. */
 #define SETFLD(x,y,z) G(x,16)->y = z
 #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
 #define SETOP(x,y,z) G(x,16) z y
-#define SETCASTPTR(x,y) G(x,16) = (uint16_t) *)y
+#define SETCASTPTR(x,y) G(x,16) = (uint16_t *)y
 #define STRLEN(p) (int)strlen16(p)
 #define SUB1(a,b) G(a,16)(G(b,16))
 #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
-#define TEST(x,r,y) (G(a,16) r (y))
-#define TESTFLD(x,f,r,y) (G(a,16)->f r (y))
+#define TEST(x,r,y) (G(x,16) r (y))
+#define TESTFLD(x,f,r,y) (G(x,16)->f r (y))


 /* ----- Only 32-bit mode is supported ----- */
@ -1284,20 +1287,20 @@ the three different cases. */
 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
  G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,G(g,32))
 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
-  G(a,32) = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32),i,j)
+  a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32),i,j)
 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
  r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size))
 #define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_32(G(a,32),b)
 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
-  G(a,32) = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),g(h,32))
+  a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),g(h,32))
 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_32(NULL)
 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c)
-#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(a)
+#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
 #define PCRE2_PATTERN_INFO(a,b,c,d) G(a,32) = pcre2_pattern_info_32(G(b,32),c,d)
-#define PCRE2_PRINTINT(a,b) pcre2_printint_32(compiled_code32,outfile,a)
+#define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
-  a = pcre2_substring_copy_bynumber_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e);
+  a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e);
 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
  a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
@ -1313,12 +1316,12 @@ define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d) \
 #define SETFLD(x,y,z) G(x,32)->y = z
 #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
 #define SETOP(x,y,z) G(x,32) z y
-#define SETCASTPTR(x,y) G(x,32) = (uint32_t) *)y
+#define SETCASTPTR(x,y) G(x,32) = (uint32_t *)y
 #define STRLEN(p) (int)strle32(p)
 #define SUB1(a,b) G(a,32)(G(b,32))
 #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
-#define TEST(x,r,y) (G(a,32) r (y))
-#define TESTFLD(x,f,r,y) (G(a,32)->f r (y))
+#define TEST(x,r,y) (G(x,32) r (y))
+#define TESTFLD(x,f,r,y) (G(x,32)->f r (y))

 #endif

@ -2669,7 +2672,7 @@ Returns:      nothing
 static void
 show_compile_controls(uint32_t controls, const char *before, const char *after)
 {
-fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
  before,
  ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
  ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
@ -2679,9 +2682,11 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
  ((controls & CTL_FLIPBYTES) != 0)? " flipbytes" : "",
  ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
  ((controls & CTL_GLOBAL) != 0)? " global" : "",
+  ((controls & CTL_HEXPAT) != 0)? " hex" : "", 
  ((controls & CTL_INFO) != 0)? " info" : "",
  ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
  ((controls & CTL_MARK) != 0)? " mark" : "",
+  ((controls & CTL_PATLEN) != 0)? " use_length" : "", 
  ((controls & CTL_POSIX) != 0)? " posix" : "",
  after);
 }
@ -2705,7 +2710,8 @@ Returns:      nothing
 static void
 show_compile_options(uint32_t options, const char *before, const char *after)
 {
-fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
+else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
  before,
  ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
  ((options & PCRE2_CASELESS) != 0)? " caseless" : "",
@ -2896,15 +2902,13 @@ if ((pat_patctl.control & CTL_INFO) != 0)
    fprintf(outfile, "No options\n");
  else
    {
-    if (compile_options != 0)
    show_compile_options(compile_options, "Compile options:", "\n");
-    if (pattern_options != 0)
    show_compile_options(pattern_options, "Pattern options:", "\n");
    }

  if (jchanged) fprintf(outfile, "Duplicate name status changes\n");

-  if (bsr_convention != PCRE2_BSR_DEFAULT)
+  if (bsr_convention != BSR_DEFAULT)
    fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
      "any Unicode newline" : "CR, LF, or CRLF");

@ -3272,25 +3276,63 @@ for(;;)
  if (infile != stdin) fprintf(outfile, "%s", (char *)p);
  }

-/* If the first character after the delimiter is backslash, make
-the pattern end with backslash. This is purely to provide a way
-of testing for the error message when a pattern ends with backslash. */
+/* If the first character after the delimiter is backslash, make the pattern
+end with backslash. This is purely to provide a way of testing for the error
+message when a pattern ends with backslash. */

 if (p[1] == '\\') *p++ = '\\';

-/* Terminate the pattern at the delimiter, and save a copy of the pattern
-for callouts. */
+/* Terminate the pattern at the delimiter, and compute the length. */

 *p++ = 0;
-patlen = p - buffer - 1;
-strncpy((char *)pbuffer8, (char *)(buffer+1), patlen);
+patlen = p - buffer - 2;

-/* Look for modifiers and options after the final delimiter. If successful,
-compile the pattern. */
+/* Look for modifiers and options after the final delimiter. */

 if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
 utf = (pat_patctl.options & PCRE2_UTF) != 0;

+/* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
+in callouts. Convert to binary if required. */
+
+if ((pat_patctl.control & CTL_HEXPAT) != 0)
+  {
+  uint8_t *pp, *pt;
+  uint32_t c, d;
+  
+  if ((pat_patctl.control & CTL_POSIX) != 0)
+    {
+    fprintf(outfile, "** Hex patterns are not supported for the POSIX API\n");
+    return PR_SKIP;
+    }    
+    
+  pt = pbuffer8;
+  for (pp = buffer + 1; *pp != 0; pp++)
+    {
+    if (isspace(*pp)) continue; 
+    c = toupper(*pp++); 
+    if (*pp == 0)
+      {
+      fprintf(outfile, "** Odd number of digits in hex pattern.\n");
+      return PR_SKIP;
+      }    
+    d = toupper(*pp); 
+    if (!isxdigit(c) || !isxdigit(d))
+      {
+      fprintf(outfile, "** Non-hex-digit in hex pattern.\n");
+      return PR_SKIP;
+      }
+    *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) +
+             (isdigit(d)? (d - '0') : (d - 'A' + 10)); 
+    }   
+  *pt = 0;
+  patlen = pt - pbuffer8; 
+  }
+else
+  {
+  strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
+  } 
+  
 /* Sort out character tables */

 if (pat_patctl.locale[0] != 0)
@ -3394,12 +3436,12 @@ modes. */

 #ifdef SUPPORT_PCRE16
 if (test_mode == PCRE16_MODE)
-  patlen = to16(pbuffer8, utf, (int)strlen((char *)pbuffer8));
+  patlen = to16(pbuffer8, utf, patlen);
 #endif

 #ifdef SUPPORT_PCRE32
 if (test_mode == PCRE32_MODE)
-  patlen = to32(pbuffer8, utf, (int)strlen((char *)pbuffer8));
+  patlen = to32(pbuffer8, utf, patlen);
 #endif

 switch(patlen)
@ -3423,8 +3465,13 @@ switch(patlen)
  break;
  }

-/* The pattern in now in pbuffer[8|16|32], with the length in patlen. Compile
-many times when timing. */
+/* The pattern in now in pbuffer[8|16|32], with the length in patlen. By 
+default, however, we pass a zero-terminated pattern. The length is passed only 
+if we had a hex pattern or if use_length was set. */
+
+if ((pat_patctl.control & (CTL_PATLEN|CTL_HEXPAT)) == 0) patlen = -1;
+
+/* Compile many times when timing. */

 if (timeit > 0)
  {
@ -3960,9 +4007,9 @@ if ((pat_patctl.control & CTL_POSIX) != 0)

  if (dat_datctl.cfail[0] != 0 || dat_datctl.cfail[1] != 0)
    prmsg(&msg, "callout_fail");
-  if (dat_datctl.copy_numbers[0] != 0 || dat_datctl.copy_names[0] != 0)
+  if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
    prmsg(&msg, "copy");
-  if (dat_datctl.get_numbers[0] != 0 || dat_datctl.get_names[0] != 0)
+  if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
    prmsg(&msg, "get");
  if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");

@ -4059,6 +4106,9 @@ for (gmatched = 0;; gmatched++)

 #ifdef FIXME
  jit_was_used = FALSE;
+  
+Need to set newline and bsr in match context and allow them to be
+set in the datctl block. 
 #endif

  /* Adjust match_data according to size of offsets required. */
@ -4502,12 +4552,6 @@ if ((dat_datctl.control & CTL_DFA) != 0)
        }
      break;

-      case PCRE2_ERROR_BADUTF:
-      fprintf(outfile, "Error %d (bad UTF-%d string) offset=%d reason=%d\n",
-        capcount, test_mode, CASTFLD(int, match_data, startchar),
-        CASTFLD(int, match_data, utf_reason));
-      break;
-
      case PCRE2_ERROR_BADUTF_OFFSET:
      fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode);
      break;