API documentation and a lot of little related changes to the code.

2014-09-19 07:43:39 +00:00 · 2014-09-19 07:43:39 +00:00 · eee8530add
parent de4f203346
commit eee8530add
40 changed files with 3484 additions and 459 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -149,8 +149,8 @@ SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL
 SET(PCRE2_SUPPORT_PCRE2GREP_JIT ON CACHE BOOL
    "Enable use of Just-in-time compiling in pcre2grep.")

-SET(PCRE2_SUPPORT_UTF OFF CACHE BOOL
-    "Enable support for Unicode Transformation Format (UTF-8/UTF-16/UTF-32) encoding.")
+SET(PCRE2_SUPPORT_UNICODE OFF CACHE BOOL
+    "Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.")

 SET(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
    "ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")
@ -245,9 +245,9 @@ IF(PCRE2_SUPPORT_BSR_ANYCRLF)
        SET(BSR_ANYCRLF 1)
 ENDIF(PCRE2_SUPPORT_BSR_ANYCRLF)

-IF(PCRE2_SUPPORT_UTF)
-        SET(SUPPORT_UTF 1)
-ENDIF(PCRE2_SUPPORT_UTF)
+IF(PCRE2_SUPPORT_UNICODE)
+        SET(SUPPORT_UNICODE 1)
+ENDIF(PCRE2_SUPPORT_UNICODE)

 IF(PCRE2_SUPPORT_JIT)
        SET(SUPPORT_JIT 1)
@ -709,7 +709,7 @@ IF(PCRE2_SHOW_REPORT)
  MESSAGE(STATUS "  Build 16 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE16}")
  MESSAGE(STATUS "  Build 32 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE32}")
  MESSAGE(STATUS "  Enable JIT compiling support .... : ${PCRE2_SUPPORT_JIT}")
-  MESSAGE(STATUS "  Enable UTF support .............. : ${PCRE2_SUPPORT_UTF}")
+  MESSAGE(STATUS "  Enable Unicode support .......... : ${PCRE2_SUPPORT_UNICODE}")
  MESSAGE(STATUS "  Newline char/sequence ........... : ${PCRE2_NEWLINE}")
  MESSAGE(STATUS "  \\R matches only ANYCRLF ......... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
  MESSAGE(STATUS "  EBCDIC coding ................... : ${PCRE2_EBCDIC}")
--- a/Makefile.am
+++ b/Makefile.am
@ -76,7 +76,10 @@ AM_CPPFLAGS = -I$(builddir)/src -I$(srcdir)/src
 #  doc/html/pcreunicode.html

 # FIXME
-#dist_man_MANS = \
+dist_man_MANS = \
+  doc/pcre2api.3
+
+
 #  doc/pcre2-config.1 \
 #  doc/pcre2.3 \
 #  doc/pcre2-16.3 \
@ -108,7 +111,6 @@ AM_CPPFLAGS = -I$(builddir)/src -I$(srcdir)/src
 #  doc/pcre2_utf16_to_host_byte_order.3 \
 #  doc/pcre2_utf32_to_host_byte_order.3 \
 #  doc/pcre2_version.3 \
-#  doc/pcre2api.3 \
 #  doc/pcre2build.3 \
 #  doc/pcre2callout.3 \
 #  doc/pcre2compat.3 \
--- a/7
+++ b/7
@ -314,10 +314,11 @@ else
  fi
 fi

-# UTF support always applies to all bit sizes if both are supported; we can't
-# have UTF-8 support without UTF-16 or UTF-32 support.
+# UTF support is implied by Unicode support, and it always applies to all bit
+# sizes if both are supported; we can't have UTF-8 support without UTF-16 or
+# UTF-32 support.

-$sim ./pcre2test -C utf >/dev/null
+$sim ./pcre2test -C unicode >/dev/null
 utf=$?

 jitopt=
--- a/config-cmake.h.in
+++ b/config-cmake.h.in
@ -25,7 +25,7 @@

 #cmakedefine SUPPORT_JIT 1
 #cmakedefine SUPPORT_PCRE2GREP_JIT 1
-#cmakedefine SUPPORT_UTF 1
+#cmakedefine SUPPORT_UNICODE 1
 #cmakedefine SUPPORT_VALGRIND 1

 #cmakedefine BSR_ANYCRLF 1
--- a/configure.ac
+++ b/configure.ac
@ -137,11 +137,11 @@ AC_ARG_ENABLE(rebuild-chartables,
                             [rebuild character tables in current locale]),
              , enable_rebuild_chartables=no)

-# Handle --enable-utf (disabled by default)
-AC_ARG_ENABLE(utf,
-              AS_HELP_STRING([--enable-utf],
-                             [enable UTF-8/16/32 support (incompatible with --enable-ebcdic)]),
-              , enable_utf=unset)
+# Handle --enable-unicode (disabled by default)
+AC_ARG_ENABLE(unicode,
+              AS_HELP_STRING([--enable-unicode],
+                             [enable Unicode support (incompatible with --enable-ebcdic)]),
+              , enable_unicode=unset)

 # Handle newline options
 ac_pcre2_newline=lf
@ -288,10 +288,10 @@ then
  AC_MSG_ERROR([At least one of the 8, 16 or 32 bit libraries must be enabled])
 fi

-# enable_utf is disabled by default.
-if test "x$enable_utf" = "xunset"
+# enable_unicode is disabled by default.
+if test "x$enable_unicode" = "xunset"
 then
-  enable_utf=no
+  enable_unicode=no
 fi

 # Convert the newline identifier into the appropriate integer value. These must
@ -320,8 +320,8 @@ fi
 #
 if test "x$enable_ebcdic" = "xyes"; then
  enable_rebuild_chartables=yes
-  if test "x$enable_utf" = "xyes"; then
-    AC_MSG_ERROR([support for EBCDIC and UTF-8/16/32 cannot be enabled at the same time])
+  if test "x$enable_unicode" = "xyes"; then
+    AC_MSG_ERROR([support for EBCDIC and Unicode cannot be enabled at the same time])
  fi
 fi

@ -372,7 +372,7 @@ AM_CONDITIONAL(WITH_PCRE16, test "x$enable_pcre16" = "xyes")
 AM_CONDITIONAL(WITH_PCRE32, test "x$enable_pcre32" = "xyes")
 AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
 AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
-AM_CONDITIONAL(WITH_UTF, test "x$enable_utf" = "xyes")
+AM_CONDITIONAL(WITH_UNICODE, test "x$enable_unicode" = "xyes")
 AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes")

 # Checks for typedefs, structures, and compiler characteristics.
@ -513,12 +513,12 @@ if test "$enable_pcre2grep_jit" = "yes"; then
    Define to any value to enable JIT support in pcre2grep.])
 fi

-if test "$enable_utf" = "yes"; then
-  AC_DEFINE([SUPPORT_UTF], [], [
-    Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
+if test "$enable_unicode" = "yes"; then
+  AC_DEFINE([SUPPORT_UNICODE], [], [
+    Define to any value to enable support for Unicode and UTF encoding.
    This will work even in an EBCDIC environment, but it is incompatible
    with the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC
-    code *or* ASCII/UTF-8/16/32, but not both at once.])
+    code *or* ASCII/Unicode, but not both at once.])
 fi

 if test "$enable_stack_for_recursion" = "no"; then
@ -854,7 +854,7 @@ $PACKAGE-$VERSION configuration summary:
    Build 16-bit pcre2 library ...... : ${enable_pcre16}
    Build 32-bit pcre2 library ...... : ${enable_pcre32}
    Enable JIT compiling support .... : ${enable_jit}
-    Enable UTF-8/16/32 support ...... : ${enable_utf}
+    Enable Unicode support .......... : ${enable_unicode}
    Newline char/sequence ........... : ${enable_newline}
    \R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
    EBCDIC coding ................... : ${enable_ebcdic}
--- a/doc/pcre2api.3
+++ b/doc/pcre2api.3
--- a/doc/pcre2test.1
+++ b/doc/pcre2test.1
@ -120,7 +120,7 @@ to the same value:
  pcre16     the 16-bit library was built
  pcre32     the 32-bit library was built
  pcre8      the 8-bit library was built
-  utf        UTF and Unicode property support is available
+  unicode    Unicode support is available
 .sp
 If an unknown option is given, an error message is output; the exit code is 0.
 .TP 10
--- a/doc/pcre2unicode.3
+++ b/doc/pcre2unicode.3
@ -0,0 +1,254 @@
+.TH PCRE2UNICODE 3 "16 September 2014" "PCRE2 10.00"
+.SH NAME
+PCRE - Perl-compatible regular expressions (revised API)
+.SH "UNICODE AND UTF SUPPORT"
+.rs
+.sp
+When PCRE2 is built with Unicode support, it acquires knowledge of Unicode
+character properties and can process text strings in UTF-8, UTF-16, or UTF-32
+format (depending on the code unit width). By default, PCRE2 assumes that one
+code unit is one character. To process a pattern as a UTF string, where a
+character may require more than one code unit, you must call
+.\" HREF
+\fBpcre2_compile()\fP
+.\"
+with the PCRE2_UTF option flag, or the pattern must start with the sequence
+(*UTF). When either of these is the case, both the pattern and any subject
+strings that are matched against it are treated as UTF strings instead of
+strings of individual one-code-unit characters.
+.P
+If you build PCRE2 with Unicode support, the library will be bigger, but the
+additional run time overhead is limited to testing the PCRE2_UTF flag
+occasionally, so should not be very much.
+.
+.
+.SH "UNICODE PROPERTY SUPPORT"
+.rs
+.sp
+When PCRE2 is built with Unicode support, the escape sequences \ep{..},
+\eP{..}, and \eX can be used. The Unicode properties that can be tested are
+limited to the general category properties such as Lu for an upper case letter
+or Nd for a decimal number, the Unicode script names such as Arabic or Han, and
+the derived properties Any and L&. Full lists are given in the
+.\" HREF
+\fBpcre2pattern\fP
+.\"
+and
+.\" HREF
+\fBpcre2syntax\fP
+.\"
+documentation. Only the short names for properties are supported. For example,
+\ep{L} matches a letter. Its Perl synonym, \ep{Letter}, is not supported.
+Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
+compatibility with Perl 5.6. PCRE does not support this.
+.
+.
+.SH "WIDE CHARACTERS AND UTF MODES"
+.rs
+.sp
+Codepoints less than 256 can be specified in patterns by either braced or
+unbraced hexadecimal escape sequences (for example, \ex{b3} or \exb3). Larger
+values have to use braced sequences. Unbraced octal code points up to \e777 are
+also recognized; larger ones can be coded using \eo{...}.
+.P
+In UTF modes, repeat quantifiers apply to complete UTF characters, not to
+individual code units.
+.P
+In UTF modes, the dot metacharacter matches one UTF character instead of a
+single code unit.
+.P
+The escape sequence \eC can be used to match a single code unit, in a UTF mode, 
+but its use can lead to some strange effects because it breaks up multi-unit
+characters (see the description of \eC in the
+.\" HREF
+\fBpcre2pattern\fP
+.\"
+documentation). The use of \eC is not supported in the alternative matching
+function \fBpcre2_dfa_exec()\fP, nor is it supported in UTF mode by the JIT
+optimization. If JIT optimization is requested for a UTF pattern that contains
+\eC, it will not succeed, and so the matching will be carried out by the normal
+interpretive function.
+.P
+The character escapes \eb, \eB, \ed, \eD, \es, \eS, \ew, and \eW correctly test
+characters of any code value, but, by default, the characters that PCRE2
+recognizes as digits, spaces, or word characters remain the same set as in
+non-UTF mode, all with code points less than 256. This remains true even when
+PCRE2 is built to include Unicode support, because to do otherwise would slow
+down matching in many common cases. Note that this also applies to \eb
+and \eB, because they are defined in terms of \ew and \eW. If you want
+to test for a wider sense of, say, "digit", you can use explicit Unicode
+property tests such as \ep{Nd}. Alternatively, if you set the PCRE2_UCP option,
+the way that the character escapes work is changed so that Unicode properties
+are used to determine which characters match. There are more details in the
+section on
+.\" HTML <a href="pcre2pattern.html#genericchartypes">
+.\" </a>
+generic character types
+.\"
+in the
+.\" HREF
+\fBpcre2pattern\fP
+.\"
+documentation.
+.P
+Similarly, characters that match the POSIX named character classes are all
+low-valued characters, unless the PCRE2_UCP option is set.
+.P
+However, the special horizontal and vertical white space matching escapes (\eh,
+\eH, \ev, and \eV) do match all the appropriate Unicode characters, whether or
+not PCRE2_UCP is set.
+.P
+Case-insensitive matching in UTF mode makes use of Unicode properties. A few
+Unicode characters such as Greek sigma have more than two codepoints that are
+case-equivalent, and these are treated as such.
+.
+.
+.SH "VALIDITY OF UTF STRINGS"
+.rs
+.sp
+When the PCRE2_UTF option is set, the strings passed as patterns and subjects
+are (by default) checked for validity on entry to the relevant functions. 
+If an invalid UTF string is passed, an error return is given. 
+.P
+UTF-16 and UTF-32 strings can indicate their endianness by special code knows
+as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting
+strings to be in host byte order.
+.P
+The entire string is checked before any other processing takes place. In
+addition to checking the format of the string, there is a check to ensure that
+all code points lie in the range U+0 to U+10FFFF, excluding the surrogate area.
+The so-called "non-character" code points are not excluded because Unicode
+corrigendum #9 makes it clear that they should not be.
+.P
+Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16,
+where they are used in pairs to encode code points with values greater than
+0xFFFF. The code points that are encoded by UTF-16 pairs are available
+independently in the UTF-8 and UTF-32 encodings. (In other words, the whole
+surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and
+UTF-32.)
+.P
+In some situations, you may already know that your strings are valid, and
+therefore want to skip these checks in order to improve performance, for
+example in the case of a long subject string that is being scanned repeatedly.
+If you set the PCRE2_NO_UTF_CHECK flag at compile time or at run time, PCRE2
+assumes that the pattern or subject it is given (respectively) contains only
+valid UTF code unit sequences.
+.P
+Passing PCRE2_NO_UTF_CHECK to \fBpcre2_compile()\fP just disables the check for
+the pattern; it does not also apply to subject strings. If you want to disable
+the check for a subject string you must pass this option to \fBpcre2_exec()\fP
+or \fBpcre2_dfa_exec()\fP.
+.P
+If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result
+is undefined and your program may crash or loop indefinitely.
+.
+.
+.\" HTML <a name="utf8strings"></a>
+.SS "Errors in UTF-8 strings"
+.rs
+.sp
+The following negative error codes are given for invalid UTF-8 strings:
+.sp
+  PCRE2_ERROR_UTF8_ERR1
+  PCRE2_ERROR_UTF8_ERR2
+  PCRE2_ERROR_UTF8_ERR3
+  PCRE2_ERROR_UTF8_ERR4
+  PCRE2_ERROR_UTF8_ERR5
+.sp
+The string ends with a truncated UTF-8 character; the code specifies how many
+bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8 characters to be
+no longer than 4 bytes, the encoding scheme (originally defined by RFC 2279)
+allows for up to 6 bytes, and this is checked first; hence the possibility of
+4 or 5 missing bytes.
+.sp
+  PCRE2_ERROR_UTF8_ERR6
+  PCRE2_ERROR_UTF8_ERR7
+  PCRE2_ERROR_UTF8_ERR8
+  PCRE2_ERROR_UTF8_ERR9
+  PCRE2_ERROR_UTF8_ERR10
+.sp
+The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of the
+character do not have the binary value 0b10 (that is, either the most
+significant bit is 0, or the next bit is 1).
+.sp
+  PCRE2_ERROR_UTF8_ERR11
+  PCRE2_ERROR_UTF8_ERR12
+.sp
+A character that is valid by the RFC 2279 rules is either 5 or 6 bytes long;
+these code points are excluded by RFC 3629.
+.sp
+  PCRE2_ERROR_UTF8_ERR13
+.sp
+A 4-byte character has a value greater than 0x10fff; these code points are
+excluded by RFC 3629.
+.sp
+  PCRE2_ERROR_UTF8_ERR14
+.sp
+A 3-byte character has a value in the range 0xd800 to 0xdfff; this range of
+code points are reserved by RFC 3629 for use with UTF-16, and so are excluded
+from UTF-8.
+.sp
+  PCRE2_ERROR_UTF8_ERR15
+  PCRE2_ERROR_UTF8_ERR16
+  PCRE2_ERROR_UTF8_ERR17
+  PCRE2_ERROR_UTF8_ERR18
+  PCRE2_ERROR_UTF8_ERR19
+.sp
+A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes for a
+value that can be represented by fewer bytes, which is invalid. For example,
+the two bytes 0xc0, 0xae give the value 0x2e, whose correct coding uses just
+one byte.
+.sp
+  PCRE2_ERROR_UTF8_ERR20
+.sp
+The two most significant bits of the first byte of a character have the binary
+value 0b10 (that is, the most significant bit is 1 and the second is 0). Such a
+byte can only validly occur as the second or subsequent byte of a multi-byte
+character.
+.sp
+  PCRE2_ERROR_UTF8_ERR21
+.sp
+The first byte of a character has the value 0xfe or 0xff. These values can
+never occur in a valid UTF-8 string.
+.
+.
+.\" HTML <a name="utf16strings"></a>
+.SS "Errors in UTF-16 strings"
+.rs
+.sp
+The following negative error codes are given for invalid UTF-16 strings:
+.sp
+  PCRE_UTF16_ERR1  Missing low surrogate at end of string
+  PCRE_UTF16_ERR2  Invalid low surrogate follows high surrogate
+  PCRE_UTF16_ERR3  Isolated low surrogate
+.sp
+.
+.
+.\" HTML <a name="utf32strings"></a>
+.SS "Errors in UTF-32 strings"
+.rs
+.sp
+The following negative error codes are given for invalid UTF-32 strings:
+.sp
+  PCRE_UTF32_ERR1  Surrogate character (range from 0xd800 to 0xdfff)
+  PCRE_UTF32_ERR2  Code point is greater than 0x10ffff
+.sp
+.
+.
+.SH AUTHOR
+.rs
+.sp
+.nf
+Philip Hazel
+University Computing Service
+Cambridge CB2 3QH, England.
+.fi
+.
+.
+.SH REVISION
+.rs
+.sp
+.nf
+Last updated: 16 September 2014
+Copyright (c) 1997-2014 University of Cambridge.
+.fi
--- a/maint/ManyConfigTests
+++ b/maint/ManyConfigTests
@ -202,7 +202,7 @@ if [ $ISGCC -ne 0 -a $usemain -ne 0 ]; then
  echo "---------- Maximally configured test with -O2 ----------"
  SAVECLFAGS="$CFLAGS"
  CFLAGS="$CFLAGS -O2"
-  opts="--disable-shared --enable-utf $enable_jit --enable-pcre16 --enable-pcre32"
+  opts="--disable-shared --enable-unicode $enable_jit --enable-pcre16 --enable-pcre32"
  runtest
  CFLAGS="$SAVECFLAGS"
 fi
@ -211,23 +211,23 @@ if [ $usemain -ne 0 ]; then
  echo "---------- Non-JIT tests in the current directory ----------"
  for opts in \
    "" \
-    "--enable-utf --disable-static" \
+    "--enable-unicode --disable-static" \
    "--disable-stack-for-recursion --disable-shared" \
-    "--enable-utf --disable-shared" \
-    "--enable-utf --disable-stack-for-recursion --disable-shared" \
-    "--enable-utf --with-link-size=3 --disable-shared" \
+    "--enable-unicode --disable-shared" \
+    "--enable-unicode --disable-stack-for-recursion --disable-shared" \
+    "--enable-unicode --with-link-size=3 --disable-shared" \
    "--enable-rebuild-chartables --disable-shared" \
    "--enable-newline-is-any --disable-shared" \
    "--enable-newline-is-cr --disable-shared" \
    "--enable-newline-is-crlf --disable-shared" \
    "--enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared" \
-    "--enable-utf --enable-newline-is-any --disable-stack-for-recursion --disable-static" \
+    "--enable-unicode --enable-newline-is-any --disable-stack-for-recursion --disable-static" \
    "--enable-pcre16" \
    "--enable-pcre16 --disable-stack-for-recursion --disable-shared" \
-    "--enable-pcre16 --enable-utf --disable-stack-for-recursion --disable-shared" \
+    "--enable-pcre16 --enable-unicode --disable-stack-for-recursion --disable-shared" \
    "--enable-pcre32" \
    "--enable-pcre32 --disable-stack-for-recursion --disable-shared" \
-    "--enable-pcre32 --enable-utf --disable-stack-for-recursion --disable-shared" \
+    "--enable-pcre32 --enable-unicode --disable-stack-for-recursion --disable-shared" \
    "--enable-pcre32 --enable-pcre16 --disable-shared" \
    "--enable-pcre32 --enable-pcre16 --disable-pcre8 --disable-shared"
  do
@ -241,18 +241,18 @@ if [ $usejit -ne 0 ]; then
  echo "---------- JIT tests in the current directory ----------"
  for opts in \
    "--enable-jit --disable-shared" \
-    "--enable-jit --enable-utf --disable-shared" \
-    "--enable-jit --enable-utf --with-link-size=3 --disable-shared" \
-    "--enable-jit --enable-pcre16 --enable-utf --disable-shared" \
+    "--enable-jit --enable-unicode --disable-shared" \
+    "--enable-jit --enable-unicode --with-link-size=3 --disable-shared" \
+    "--enable-jit --enable-pcre16 --enable-unicode --disable-shared" \
    "--enable-jit --enable-pcre16 --disable-pcre8 --disable-shared" \
-    "--enable-jit --enable-pcre16 --disable-pcre8 --enable-utf --disable-shared" \
-    "--enable-jit --enable-pcre16 --enable-utf --with-link-size=3 --disable-shared" \
-    "--enable-jit --enable-pcre16 --enable-utf --with-link-size=4 --disable-shared" \
-    "--enable-jit --enable-pcre32 --enable-utf --disable-shared" \
+    "--enable-jit --enable-pcre16 --disable-pcre8 --enable-unicode --disable-shared" \
+    "--enable-jit --enable-pcre16 --enable-unicode --with-link-size=3 --disable-shared" \
+    "--enable-jit --enable-pcre16 --enable-unicode --with-link-size=4 --disable-shared" \
+    "--enable-jit --enable-pcre32 --enable-unicode --disable-shared" \
    "--enable-jit --enable-pcre32 --disable-pcre8 --disable-shared" \
-    "--enable-jit --enable-pcre32 --disable-pcre8 --enable-utf --disable-shared" \
-    "--enable-jit --enable-pcre32 --enable-utf --with-link-size=4 --disable-shared" \
-    "--enable-jit --enable-pcre32 --enable-pcre16 --disable-pcre8 --enable-utf --enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared"
+    "--enable-jit --enable-pcre32 --disable-pcre8 --enable-unicode --disable-shared" \
+    "--enable-jit --enable-pcre32 --enable-unicode --with-link-size=4 --disable-shared" \
+    "--enable-jit --enable-pcre32 --enable-pcre16 --disable-pcre8 --enable-unicode --enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared"
  do
    runtest
  done
@ -267,8 +267,8 @@ if [ $usevalgrind -ne 0 ]; then
  withvalgrind="with valgrind"

  for opts in \
-    "--enable-utf --disable-stack-for-recursion --disable-shared" \
-    "--enable-utf --with-link-size=3 --disable-shared" \
+    "--enable-unicode --disable-stack-for-recursion --disable-shared" \
+    "--enable-unicode --with-link-size=3 --disable-shared" \
    "--disable-shared"
  do
    opts="--enable-valgrind $opts"
@ -277,8 +277,8 @@ if [ $usevalgrind -ne 0 ]; then

  if [ $usejit -ne 0 ]; then
    for opts in \
-      "--enable-jit --enable-utf --disable-shared" \
-      "--enable-jit --enable-pcre16 --enable-pcre32 --enable-utf"
+      "--enable-jit --enable-unicode --disable-shared" \
+      "--enable-jit --enable-pcre16 --enable-pcre32 --enable-unicode"
    do
      opts="--enable-valgrind $opts"
      runtest
@ -324,7 +324,7 @@ fi

 if [ $usetmp -ne 0 ]; then
  for opts in \
-    "--enable-utf --disable-shared"
+    "--enable-unicode --disable-shared"
  do
    runtest
  done
--- a/maint/MultiStage2.py
+++ b/maint/MultiStage2.py
@ -472,7 +472,7 @@ print("condition to cut out the tables when not needed. But don't leave")
 print("a totally empty module because some compilers barf at that.")
 print("Instead, just supply small dummy tables. */")
 print()
-print("#ifndef SUPPORT_UTF")
+print("#ifndef SUPPORT_UNICODE")
 print("const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0 }};")
 print("const uint8_t PRIV(ucd_stage1)[] = {0};")
 print("const uint16_t PRIV(ucd_stage2)[] = {0};")
@ -507,7 +507,7 @@ print_table(min_stage2, 'PRIV(ucd_stage2)', min_block_size)
 print("#if UCD_BLOCK_SIZE != %d" % min_block_size)
 print("#error Please correct UCD_BLOCK_SIZE in pcre2_internal.h")
 print("#endif")
-print("#endif  /* SUPPORT_UTF */")
+print("#endif  /* SUPPORT_UNICODE */")
 print()
 print("#endif  /* PCRE2_PCRE2TEST */")

--- a/maint/ucptest.c
+++ b/maint/ucptest.c
@ -19,8 +19,8 @@ one. */
 #include "../src/config.h"
 #endif

-#ifndef SUPPORT_UTF
-#define SUPPORT_UTF
+#ifndef SUPPORT_UNICODE
+#define SUPPORT_UNICODE
 #endif

 #include <ctype.h>
--- a/src/config.h.generic
+++ b/src/config.h.generic
@ -278,11 +278,11 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to any value to enable the 8 bit PCRE2 library. */
 /* #undef SUPPORT_PCRE8 */

-/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
-   This will work even in an EBCDIC environment, but it is incompatible with
-   the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
-   ASCII/UTF-8/16/32, but not both at once. */
-/* #undef SUPPORT_UTF */
+/* Define to any value to enable support for Unicode and UTF encoding. This
+   will work even in an EBCDIC environment, but it is incompatible with the
+   EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
+   ASCII/Unicode, but not both at once. */
+/* #undef SUPPORT_UNICODE */

 /* Define to any value for valgrind support to find invalid memory reads. */
 /* #undef SUPPORT_VALGRIND */
--- a/src/pcre2.h.in
+++ b/src/pcre2.h.in
@ -193,32 +193,32 @@ must all be greater than zero. */
 #define PCRE2_ERROR_UTF32_ERR1      (-27)
 #define PCRE2_ERROR_UTF32_ERR2      (-28)

-/* Error codes for pcre2[_dfa]_match() */
+/* Error codes for pcre2[_dfa]_match(), substring extraction functions, and
+context functions. */

-#define PCRE2_ERROR_BADCOUNT        (-29)
-#define PCRE2_ERROR_BADENDIANNESS   (-30)
-#define PCRE2_ERROR_BADLENGTH       (-31)
-#define PCRE2_ERROR_BADMAGIC        (-32)
-#define PCRE2_ERROR_BADMODE         (-33)
-#define PCRE2_ERROR_BADOFFSET       (-34)
-#define PCRE2_ERROR_BADOPTION       (-35)
-#define PCRE2_ERROR_BADUTFOFFSET    (-36)
-#define PCRE2_ERROR_CALLOUT         (-37)  /* Never used by PCRE2 itself */
-#define PCRE2_ERROR_DFA_BADRESTART  (-38)
-#define PCRE2_ERROR_DFA_RECURSE     (-39)
-#define PCRE2_ERROR_DFA_UCOND       (-40)
-#define PCRE2_ERROR_DFA_UITEM       (-41)
-#define PCRE2_ERROR_DFA_UMLIMIT     (-42)
-#define PCRE2_ERROR_DFA_WSSIZE      (-43)
-#define PCRE2_ERROR_INTERNAL        (-44)
-#define PCRE2_ERROR_JIT_BADOPTION   (-45)
-#define PCRE2_ERROR_JIT_STACKLIMIT  (-46)
-#define PCRE2_ERROR_MATCHLIMIT      (-47)
-#define PCRE2_ERROR_NOMEMORY        (-48)
-#define PCRE2_ERROR_NOSUBSTRING     (-49)
-#define PCRE2_ERROR_NULL            (-50)
-#define PCRE2_ERROR_RECURSELOOP     (-51)
-#define PCRE2_ERROR_RECURSIONLIMIT  (-52)
+#define PCRE2_ERROR_BADDATA         (-29)
+#define PCRE2_ERROR_BADLENGTH       (-30)
+#define PCRE2_ERROR_BADMAGIC        (-31)
+#define PCRE2_ERROR_BADMODE         (-32)
+#define PCRE2_ERROR_BADOFFSET       (-33)
+#define PCRE2_ERROR_BADOPTION       (-34)
+#define PCRE2_ERROR_BADUTFOFFSET    (-35)
+#define PCRE2_ERROR_CALLOUT         (-36)  /* Never used by PCRE2 itself */
+#define PCRE2_ERROR_DFA_BADRESTART  (-37)
+#define PCRE2_ERROR_DFA_RECURSE     (-38)
+#define PCRE2_ERROR_DFA_UCOND       (-39)
+#define PCRE2_ERROR_DFA_UITEM       (-40)
+#define PCRE2_ERROR_DFA_WSSIZE      (-41)
+#define PCRE2_ERROR_INTERNAL        (-42)
+#define PCRE2_ERROR_JIT_BADOPTION   (-43)
+#define PCRE2_ERROR_JIT_STACKLIMIT  (-44)
+#define PCRE2_ERROR_MATCHLIMIT      (-45)
+#define PCRE2_ERROR_NOMEMORY        (-46)
+#define PCRE2_ERROR_NOSUBSTRING     (-47)
+#define PCRE2_ERROR_NULL            (-48)
+#define PCRE2_ERROR_RECURSELOOP     (-49)
+#define PCRE2_ERROR_RECURSIONLIMIT  (-50)
+#define PCRE2_ERROR_UNSET           (-51)

 /* Request types for pcre2_pattern_info() */

@ -257,8 +257,8 @@ must all be greater than zero. */
 #define PCRE2_CONFIG_PARENSLIMIT             7
 #define PCRE2_CONFIG_RECURSIONLIMIT          5
 #define PCRE2_CONFIG_STACKRECURSE            8
-#define PCRE2_CONFIG_UNICODE_VERSION         9
-#define PCRE2_CONFIG_UTF                    10
+#define PCRE2_CONFIG_UNICODE                 9
+#define PCRE2_CONFIG_UNICODE_VERSION        10
 #define PCRE2_CONFIG_VERSION                11

 /* Types for code units in patterns and subject strings. */
@ -338,7 +338,7 @@ expanded for each width below. Start with functions that give general
 information. */

 #define PCRE2_GENERAL_INFO_FUNCTIONS \
-PCRE2_EXP_DECL int       pcre2_config(int, void *, PCRE2_SIZE);
+PCRE2_EXP_DECL int       pcre2_config(uint32_t, void *, PCRE2_SIZE);


 /* Functions for manipulating contexts. */
@ -437,16 +437,16 @@ PCRE2_EXP_DECL PCRE2_SIZE  pcre2_get_startchar(pcre2_match_data *);
 PCRE2_EXP_DECL int       pcre2_substring_copy_byname(pcre2_match_data *, \
                           PCRE2_SPTR, PCRE2_UCHAR *, PCRE2_SIZE *); \
 PCRE2_EXP_DECL int       pcre2_substring_copy_bynumber(pcre2_match_data *, \
-                           int, PCRE2_UCHAR *, PCRE2_SIZE *); \
+                           unsigned int, PCRE2_UCHAR *, PCRE2_SIZE *); \
 PCRE2_EXP_DECL void      pcre2_substring_free(PCRE2_UCHAR *); \
 PCRE2_EXP_DECL int       pcre2_substring_get_byname(pcre2_match_data *, \
                           PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_SIZE *); \
 PCRE2_EXP_DECL int       pcre2_substring_get_bynumber(pcre2_match_data *, \
-                           int, PCRE2_UCHAR **, PCRE2_SIZE *); \
+                           unsigned int, PCRE2_UCHAR **, PCRE2_SIZE *); \
 PCRE2_EXP_DECL int       pcre2_substring_length_byname(pcre2_match_data *, \
                           PCRE2_SPTR, PCRE2_SIZE *); \
 PCRE2_EXP_DECL int       pcre2_substring_length_bynumber(pcre2_match_data *, \
-                           int, PCRE2_SIZE *); \
+                           unsigned int, PCRE2_SIZE *); \
 PCRE2_EXP_DECL int       pcre2_substring_nametable_scan(const pcre2_code *, \
                           PCRE2_SPTR, PCRE2_SPTR *, PCRE2_SPTR *); \
 PCRE2_EXP_DECL int       pcre2_substring_number_from_name(\
@ -622,24 +622,27 @@ PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
 #undef PCRE2_OTHER_FUNCTIONS
 #undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS

-/* Re-define PCRE2_SUFFIX to use the external width value, if defined.
-Otherwise, undefine the other macros and make PCRE2_SUFFIX a no-op, to reduce
-confusion. */
+/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
+PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
+PCRE2_SUFFIX a no-op. Otherwise, generate an error. */

 #undef PCRE2_SUFFIX
-#ifdef PCRE2_CODE_UNIT_WIDTH
-#if PCRE2_CODE_UNIT_WIDTH != 8 && \
-    PCRE2_CODE_UNIT_WIDTH != 16 && \
-    PCRE2_CODE_UNIT_WIDTH != 32
-#error PCRE2_CODE_UNIT_WIDTH must be 8, 16, or 32
-#endif
+#ifndef PCRE2_CODE_UNIT_WIDTH
+#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
+#error Use 8, 16, or 32; or 0 for a multi-width application.
+#else  /* PCRE2_CODE_UNIT_WIDTH is defined */
+#if PCRE2_CODE_UNIT_WIDTH == 8 || \
+    PCRE2_CODE_UNIT_WIDTH == 16 || \
+    PCRE2_CODE_UNIT_WIDTH == 32
 #define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
-#else
+#elif PCRE2_CODE_UNIT_WIDTH == 0
 #undef PCRE2_JOIN
 #undef PCRE2_GLUE
 #define PCRE2_SUFFIX(a) a
-#endif
-
+#else
+#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
+#endif 
+#endif  /* PCRE2_CODE_UNIT_WIDTH is defined */

 #ifdef __cplusplus
 }  /* extern "C" */
--- a/src/pcre2_auto_possess.c
+++ b/src/pcre2_auto_possess.c
@ -231,7 +231,7 @@ static const uint8_t opcode_possessify[] = {



-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 /*************************************************
 *        Check a character and a property        *
 *************************************************/
@ -311,7 +311,7 @@ switch(ptype)

 return FALSE;
 }
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */



@ -368,7 +368,7 @@ PCRE2_UCHAR base;
 PCRE2_SPTR end;
 uint32_t chr;

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 uint32_t *clist_dest;
 const uint32_t *clist_src;
 #else
@ -451,7 +451,7 @@ switch(c)
  GETCHARINCTEST(chr, code);
  list[2] = chr;

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
  if (chr < 128 || (chr < 256 && !utf))
    list[3] = fcc[chr];
  else
@ -470,7 +470,7 @@ switch(c)
    list[4] = NOTACHAR;
  return code;

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
  case OP_PROP:
  case OP_NOTPROP:
  if (code[0] != PT_CLIST)
@ -812,7 +812,7 @@ for(;;)
    leftop = base_list[0];
    rightop = list[0];

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    accepted = FALSE; /* Always set in non-unicode case. */
    if (leftop == OP_PROP || leftop == OP_NOTPROP)
      {
@ -915,7 +915,7 @@ for(;;)
      }

    else
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */

    accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
           rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
@ -1039,7 +1039,7 @@ for(;;)
      case OP_EOD:    /* Can always possessify before \z */
      break;

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      case OP_PROP:
      case OP_NOTPROP:
      if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@ -433,7 +433,7 @@ static const int posix_class_maps[] = {
 /* Table of substitutes for \d etc when PCRE2_UCP is set. They are replaced by
 Unicode property escapes. */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 static const PCRE2_UCHAR string_PNd[]  = {
  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
  CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
@ -541,7 +541,7 @@ static PCRE2_SPTR posix_substitutes[] = {
  NULL                  /* ^xdigit */
 };
 #define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(PCRE2_UCHAR *))
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */

 /* Masks for checking option settings. */

@ -887,7 +887,7 @@ for (;;)
    case OP_NOTI:
    branchlength++;
    cc += 2;
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
 #endif
    break;
@ -901,7 +901,7 @@ for (;;)
    case OP_NOTEXACTI:
    branchlength += (int)GET2(cc,1);
    cc += 2 + IMM2_SIZE;
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
 #endif
    break;
@ -1315,7 +1315,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
    actual length is stored in the compiled code, so we must update "code"
    here. */

-#if defined SUPPORT_UTF || PCRE2_CODE_UNIT_WIDTH != 8
+#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
    case OP_XCLASS:
    ccode = code += GET(code, 1);
    goto CHECK_CLASS_REPEAT;
@ -1325,7 +1325,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
    case OP_NCLASS:
    ccode = code + PRIV(OP_lengths)[OP_CLASS];

-#if defined SUPPORT_UTF || PCRE2_CODE_UNIT_WIDTH != 8
+#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
    CHECK_CLASS_REPEAT:
 #endif

@ -2062,7 +2062,7 @@ return escape;



-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 /*************************************************
 *               Handle \P and \p                 *
 *************************************************/
@ -2678,7 +2678,7 @@ return -1;



-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 /*************************************************
 *           Get othercase range                  *
 *************************************************/
@ -2740,7 +2740,7 @@ for (++c; c <= d; c++)
 *cptr = c;             /* Rest of input range */
 return 0;
 }
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */



@ -2780,7 +2780,7 @@ range. */

 if ((options & PCRE2_CASELESS) != 0)
  {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
  if ((options & PCRE2_UTF) != 0)
    {
    int rc;
@ -2810,7 +2810,7 @@ if ((options & PCRE2_CASELESS) != 0)
      }
    }
  else
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */

  /* Not UTF mode */

@ -2844,7 +2844,7 @@ if (end >= start)
  {
  PCRE2_UCHAR *uchardata = *uchardptr;

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
  if ((options & PCRE2_UTF) != 0)
    {
    if (start < end)
@ -2860,7 +2860,7 @@ if (end >= start)
      }
    }
  else
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */

  /* Without UTF support, character values are constrained by the bit length,
  and can only be > 256 for 16-bit and 32-bit libraries. */
@ -3042,7 +3042,7 @@ uint8_t classbits[32];
 not do this for other options (e.g. PCRE2_EXTENDED) because they may change
 dynamically as we process the pattern. */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 BOOL utf = (options & PCRE2_UTF) != 0;
 #if PCRE2_CODE_UNIT_WIDTH != 32
 PCRE2_UCHAR utf_units[6];      /* For setting up multi-cu chars */
@ -3235,7 +3235,7 @@ for (;; ptr++)
          break;
          }
        ptr++;
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
        if (utf) FORWARDCHAR(ptr);
 #endif
        }
@ -3474,7 +3474,7 @@ for (;; ptr++)
        goto FAILED;
        }

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      if (utf && HAS_EXTRALEN(c))
        {                           /* Braces are required because the */
        GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
@ -3556,7 +3556,7 @@ for (;; ptr++)
        that are not available via \p or \P generate XCL_PROP/XCL_NOTPROP
        directly. UCP support is not available unless UTF support is.*/

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
        if ((options & PCRE2_UCP) != 0)
          {
          unsigned int ptype = 0;
@ -3599,7 +3599,7 @@ for (;; ptr++)
            break;
            }
          }
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */

        /* In the non-UCP case, or when UCP makes no difference, we build the
        bit map for the POSIX class in a chunk of local store because we may be
@ -3689,7 +3689,7 @@ for (;; ptr++)

          switch (escape)
            {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
            case ESC_du:     /* These are the values given for \d etc */
            case ESC_DU:     /* when PCRE2_UCP is set. We replace the */
            case ESC_wu:     /* escape sequence with an appropriate \p */
@ -3757,7 +3757,7 @@ for (;; ptr++)
              cb, PRIV(vspace_list));
            break;

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
            case ESC_p:
            case ESC_P:
              {
@ -3840,7 +3840,7 @@ for (;; ptr++)

        /* Otherwise, we have a potential range; pick up the next character */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
        if (utf)
          {                           /* Braces are required because the */
          GETCHARLEN(d, ptr, ptr);    /* macro generates multiple statements */
@ -3940,7 +3940,7 @@ for (;; ptr++)

        if (negate_class)
          {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
          int d;
 #endif
          if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
@ -3951,7 +3951,7 @@ for (;; ptr++)
          one other case. If so, generate a special OP_NOTPROP item instead of
          OP_NOTI. */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
          if (utf && (options & PCRE2_CASELESS) != 0 &&
              (d = UCD_CASESET(c)) != 0)
            {
@ -4032,7 +4032,7 @@ for (;; ptr++)
    be listed) there are no characters < 256, we can omit the bitmap in the
    actual compiled code. */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    if (xclass && (!should_flip_negation || (options & PCRE2_UCP) != 0))
 #elif PCRE2_CODE_UNIT_WIDTH != 8
    if (xclass && !should_flip_negation)
@ -4157,7 +4157,7 @@ for (;; ptr++)
            break;
            }
          p++;
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
          if (utf) FORWARDCHAR(p);
 #endif
          }           /* Loop for comment characters */
@ -4265,7 +4265,7 @@ for (;; ptr++)
    /* If previous was a character type match (\d or similar), abolish it and
    create a suitable repeat item. The code is shared with single-character
    repeats by setting op_type to add a suitable offset into repeat_type. Note
-    the the Unicode property types will be present only when SUPPORT_UTF is
+    the the Unicode property types will be present only when SUPPORT_UNICODE is
    defined, but we don't wrap the little bits of code here because it just
    makes it horribly messy. */

@ -4880,7 +4880,7 @@ for (;; ptr++)
        case OP_NOTEXACT:
        case OP_NOTEXACTI:
        tempcode += PRIV(OP_lengths)[*tempcode];
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
        if (utf && HAS_EXTRALEN(tempcode[-1]))
          tempcode += GET_EXTRALEN(tempcode[-1]);
 #endif
@ -6407,7 +6407,7 @@ for (;; ptr++)

      /* So are Unicode property matches, if supported. */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      else if (escape == ESC_P || escape == ESC_p)
        {
        BOOL negated;
@ -6442,7 +6442,7 @@ for (;; ptr++)
        if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
             cb->max_lookbehind == 0)
          cb->max_lookbehind = 1;
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
        if (escape >= ESC_DU && escape <= ESC_wu)
          {
          nestptr = ptr + 1;                   /* Where to resume */
@ -6479,7 +6479,7 @@ for (;; ptr++)
    mclength = 1;
    mcbuffer[0] = c;

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    if (utf && HAS_EXTRALEN(c))
      ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));
 #endif
@ -6493,7 +6493,7 @@ for (;; ptr++)
    /* For caseless UTF mode, check whether this character has more than one
    other case. If so, generate a special OP_PROP item instead of OP_CHARI. */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    if (utf && (options & PCRE2_CASELESS) != 0)
      {
      GETCHAR(c, mcbuffer);
@ -7527,7 +7527,7 @@ ptr += skipatstart;

 /* Can't support UTF or UCP unless PCRE2 has been compiled with UTF support. */

-#ifndef SUPPORT_UTF
+#ifndef SUPPORT_UNICODE
 if ((cb.external_options & (PCRE2_UTF|PCRE2_UCP)) != 0)
  {
  errorcode = ERR32;
@ -7911,7 +7911,7 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0)
      points and cannot have another case. In 16-bit and 32-bit modes, we can
      check wide characters when UTF (and therefore UCP) is supported. */

-#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
      else if (firstcu <= MAX_UTF_CODE_POINT &&
               UCD_OTHERCASE(firstcu) != firstcu)
        re->flags |= PCRE2_FIRSTCASELESS;
@ -7945,7 +7945,7 @@ if (reqcuflags >= 0 &&
      {
      if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS;
      }
-#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
    else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu)
      re->flags |= PCRE2_LASTCASELESS;
 #endif
--- a/src/pcre2_config.c
+++ b/src/pcre2_config.c
@ -75,7 +75,7 @@ Returns:           0 if data returned
 */

 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
-pcre2_config(int what, void *where, size_t length)
+pcre2_config(uint32_t what, void *where, size_t length)
 {
 if (length < sizeof(int)) return PCRE2_ERROR_BADLENGTH;

@ -145,7 +145,7 @@ switch (what)
  
  case PCRE2_CONFIG_UNICODE_VERSION:
    { 
-#if defined SUPPORT_UTF
+#if defined SUPPORT_UNICODE
    const char *v = PRIV(unicode_version);
 #else
    const char *v = "Unicode not supported";
@ -158,8 +158,8 @@ switch (what)
    }
  break;

-  case PCRE2_CONFIG_UTF:
-#if defined SUPPORT_UTF
+  case PCRE2_CONFIG_UNICODE:
+#if defined SUPPORT_UNICODE
  *((int *)where) = 1;
 #else
  *((int *)where) = 0;
--- a/src/pcre2_context.c
+++ b/src/pcre2_context.c
@ -263,8 +263,9 @@ if (mcontext != NULL)
 *             Set values in contexts             *
 *************************************************/

-/* All these functions return 1 for success or 0 if invalid data is given. Only 
-some of the functions are able to test the validity of the data. */
+/* All these functions return 0 for success or PCRE2_ERROR_BADDATA if invalid
+data is given. Only some of the functions are able to test the validity of the
+data. */


 /* ------------ Compile contexts ------------ */
@ -274,7 +275,7 @@ pcre2_set_character_tables(pcre2_compile_context *ccontext,
  const unsigned char *tables)
 {
 ccontext->tables = tables;
-return 1;
+return 0;
 }

 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 
@ -285,10 +286,10 @@ switch(value)
  case PCRE2_BSR_ANYCRLF:
  case PCRE2_BSR_UNICODE:
  ccontext->bsr_convention = value;
-  return 1;
+  return 0;
  
  default:
-  return 0;  
+  return PCRE2_ERROR_BADDATA;  
  }
 }

@ -303,10 +304,10 @@ switch(newline)
  case PCRE2_NEWLINE_ANY:
  case PCRE2_NEWLINE_ANYCRLF:
  ccontext->newline_convention = newline;
-  return 1;
+  return 0;
     
  default: 
-  return 0;  
+  return PCRE2_ERROR_BADDATA;  
  }   
 }

@ -314,7 +315,7 @@ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit)
 {
 ccontext->parens_nest_limit = limit;
-return 1;
+return 0;
 }

 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
@ -322,7 +323,7 @@ pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
  int (*guard)(uint32_t))
 {
 ccontext->stack_guard = guard;
-return 1;
+return 0;
 }


@ -336,10 +337,10 @@ switch(value)
  case PCRE2_BSR_ANYCRLF:
  case PCRE2_BSR_UNICODE:
  mcontext->bsr_convention = value;
-  return 1;
+  return 0;
  
  default:
-  return 0;  
+  return PCRE2_ERROR_BADDATA;  
  }
 }

@ -354,10 +355,10 @@ switch(newline)
  case PCRE2_NEWLINE_ANY:
  case PCRE2_NEWLINE_ANYCRLF:
  mcontext->newline_convention = newline;
-  return 1;
+  return 0;
     
  default: 
-  return 0;  
+  return PCRE2_ERROR_BADDATA;  
  }   
 }

@ -367,21 +368,21 @@ pcre2_set_callout(pcre2_match_context *mcontext,
 {
 mcontext->callout = callout;
 mcontext->callout_data = callout_data;
-return 1;
+return 0;
 }

 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
 {
 mcontext->match_limit = limit;
-return 1;
+return 0;
 }

 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
 {
 mcontext->recursion_limit = limit;
-return 1;
+return 0;
 }

 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
@ -399,7 +400,7 @@ mcontext->stack_memctl.memory_data = mydata;
 (void)myfree;
 (void)mydata;
 #endif
-return 1;
+return 0;
 }   

 /* End of pcre2_context.c */
--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
@ -391,7 +391,7 @@ PCRE2_SPTR start_subject = mb->start_subject;
 PCRE2_SPTR end_subject = mb->end_subject;
 PCRE2_SPTR start_code = mb->start_code;

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
 #else
 BOOL utf = FALSE;
@ -447,7 +447,7 @@ if (*first_op == OP_REVERSE)
  /* If we can't go back the amount required for the longest lookbehind
  pattern, go back as far as we can; some alternatives may still be viable. */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
  /* In character mode we have to step back character by character */

  if (utf)
@ -570,11 +570,11 @@ for (;;)
  if (ptr < end_subject)
    {
    clen = 1;        /* Number of data items in the character */
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    GETCHARLENTEST(c, ptr, clen);
 #else
    c = *ptr;
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */
    }
  else
    {
@ -652,9 +652,9 @@ for (;;)
    if (coptable[codevalue] > 0)
      {
      dlen = 1;
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */
      d = code[coptable[codevalue]];
      if (codevalue >= OP_TYPESTAR)
        {
@ -948,11 +948,11 @@ for (;;)
          {
          PCRE2_SPTR temp = ptr - 1;
          if (temp < mb->start_used_ptr) mb->start_used_ptr = temp;
-#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
          if (utf) { BACKCHAR(temp); }
 #endif
          GETCHARTEST(d, temp);
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
          if ((mb->poptions & PCRE2_UCP) != 0)
            {
            if (d == '_') left_word = TRUE; else
@ -972,12 +972,12 @@ for (;;)
          if (ptr >= mb->last_used_ptr)
            {
            PCRE2_SPTR temp = ptr + 1;
-#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
            if (utf) { FORWARDCHAR(temp); }
 #endif
            mb->last_used_ptr = temp;
            } 
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
          if ((mb->poptions & PCRE2_UCP) != 0)
            {
            if (c == '_') right_word = TRUE; else
@ -1003,7 +1003,7 @@ for (;;)
      if the support is in the binary; otherwise a compile-time error occurs.
      */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      case OP_PROP:
      case OP_NOTPROP:
      if (clen > 0)
@ -1258,7 +1258,7 @@ for (;;)
      argument. It keeps the code above fast for the other cases. The argument
      is in the d variable. */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      case OP_PROP_EXTRA + OP_TYPEPLUS:
      case OP_PROP_EXTRA + OP_TYPEMINPLUS:
      case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
@ -1501,7 +1501,7 @@ for (;;)
      break;

      /*-----------------------------------------------------------------*/
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      case OP_PROP_EXTRA + OP_TYPEQUERY:
      case OP_PROP_EXTRA + OP_TYPEMINQUERY:
      case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
@ -1785,7 +1785,7 @@ for (;;)
      break;

      /*-----------------------------------------------------------------*/
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      case OP_PROP_EXTRA + OP_TYPEEXACT:
      case OP_PROP_EXTRA + OP_TYPEUPTO:
      case OP_PROP_EXTRA + OP_TYPEMINUPTO:
@ -2063,7 +2063,7 @@ for (;;)
      case OP_CHARI:
      if (clen == 0) break;

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      if (utf)
        {
        if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
@ -2077,7 +2077,7 @@ for (;;)
          }
        }
      else
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */
      /* Not UTF mode */
        {
        if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
@ -2086,7 +2086,7 @@ for (;;)
      break;


-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      /*-----------------------------------------------------------------*/
      /* This is a tricky one because it can match more than one character.
      Find out how many characters to skip, and then set up a negative state
@ -2222,11 +2222,11 @@ for (;;)
      if (clen > 0)
        {
        unsigned int otherd;
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
        if (utf && d >= 128)
          otherd = UCD_OTHERCASE(d);
        else
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */
        otherd = TABLE_GET(d, fcc, d);
        if (c != d && c != otherd)
          { ADD_NEW(state_offset + dlen + 1, 0); }
@ -2257,11 +2257,11 @@ for (;;)
        uint32_t otherd = NOTACHAR;
        if (caseless)
          {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
          if (utf && d >= 128)
            otherd = UCD_OTHERCASE(d);
          else
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */
          otherd = TABLE_GET(d, fcc, d);
          }
        if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@ -2300,11 +2300,11 @@ for (;;)
        uint32_t otherd = NOTACHAR;
        if (caseless)
          {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
          if (utf && d >= 128)
            otherd = UCD_OTHERCASE(d);
          else
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */
          otherd = TABLE_GET(d, fcc, d);
          }
        if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@ -2341,11 +2341,11 @@ for (;;)
        uint32_t otherd = NOTACHAR;
        if (caseless)
          {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
          if (utf && d >= 128)
            otherd = UCD_OTHERCASE(d);
          else
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */
          otherd = TABLE_GET(d, fcc, d);
          }
        if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@ -2374,11 +2374,11 @@ for (;;)
        uint32_t otherd = NOTACHAR;
        if (caseless)
          {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
          if (utf && d >= 128)
            otherd = UCD_OTHERCASE(d);
          else
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */
          otherd = TABLE_GET(d, fcc, d);
          }
        if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@ -2414,11 +2414,11 @@ for (;;)
        uint32_t otherd = NOTACHAR;
        if (caseless)
          {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
          if (utf && d >= 128)
            otherd = UCD_OTHERCASE(d);
          else
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */
          otherd = TABLE_GET(d, fcc, d);
          }
        if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@ -2747,7 +2747,7 @@ for (;;)
          for (rc = rc*2 - 2; rc >= 0; rc -= 2)
            {
            int charcount = local_offsets[rc+1] - local_offsets[rc];
-#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
            if (utf)
              {
              PCRE2_SPTR p = start_subject + local_offsets[rc];
@ -2851,7 +2851,7 @@ for (;;)
            PCRE2_SPTR p = ptr;
            PCRE2_SPTR pp = local_ptr;
            charcount = (int)(pp - p);
-#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
            if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
 #endif
            ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
@ -2933,7 +2933,7 @@ for (;;)
            }
          else
            {
-#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
            if (utf)
              {
              PCRE2_SPTR p = start_subject + local_offsets[0];
@ -3106,14 +3106,24 @@ if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
 if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
 if (start_offset > length) return PCRE2_ERROR_BADOFFSET;

-/* Check that the first field in the block is the magic number. If it is not,
-return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to
-REVERSED_MAGIC_NUMBER we return with PCRE2_ERROR_BADENDIANNESS, which
-means that the pattern is likely compiled with different endianness. */
+/* FIXME: Remove BADENDIANNESS if saving/restoring is not to be implemented. */
+
+/* Check that the first field in the block is the magic number. If it is not,
+return with PCRE2_ERROR_BADMAGIC. */
+
+if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
+
+#ifdef FIXME
+If saving restoring gets implemented, define PCRE2_ERROR_BADENDIANNESS, and add
+this comment and code:
+
+/* However, if the magic number is equal to REVERSED_MAGIC_NUMBER we return
+with PCRE2_ERROR_BADENDIANNESS, which means that the pattern is likely compiled
+with different endianness. */

-if (re->magic_number != MAGIC_NUMBER)
  return re->magic_number == REVERSED_MAGIC_NUMBER?
    PCRE2_ERROR_BADENDIANNESS:PCRE2_ERROR_BADMAGIC;
+#endif

 /* Check the code unit width. */

@ -3238,7 +3248,7 @@ switch(newline)
 we must also check that a starting offset does not point into the middle of a
 multiunit character. */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
  {
  match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->rightchar));
@ -3253,7 +3263,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
    return PCRE2_ERROR_BADUTFOFFSET;
 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
  }
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */

 /* Set up the first code unit to match, if available. The first_codeunit value
 is never set for an anchored regular expression, but the anchoring may be
@ -3270,7 +3280,7 @@ if (!anchored)
    if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
      {
      first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu);
-#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
      if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
 #endif
      }
@ -3290,7 +3300,7 @@ if ((re->flags & PCRE2_LASTSET) != 0)
  if ((re->flags & PCRE2_LASTCASELESS) != 0)
    {
    req_cu2 = TABLE_GET(req_cu, mb->tables + fcc_offset, req_cu);
-#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
    if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu);
 #endif
    }
@ -3327,7 +3337,7 @@ for (;;)
    if (firstline)
      {
      PCRE2_SPTR t = start_match;
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      if (utf)
        {
        while (t < mb->end_subject && !IS_NEWLINE(t))
@ -3362,7 +3372,7 @@ for (;;)
      {
      if (start_match > mb->start_subject + start_offset)
        {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
        if (utf)
          {
          while (start_match < end_subject && !WAS_NEWLINE(start_match))
@ -3516,7 +3526,7 @@ for (;;)

  if (firstline && IS_NEWLINE(start_match)) break;
  start_match++;
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
  if (utf)
    {
    ACROSSCHAR(start_match < end_subject, *start_match,
--- a/src/pcre2_error.c
+++ b/src/pcre2_error.c
@ -198,35 +198,34 @@ static const char match_error_texts[] =
  "UTF-16 error: isolated low surrogate\0" 
  "UTF-32 error: code points 0xd800-0xdfff are not defined\0"
  "UTF-32 error: code points greater than 0x10ffff are not defined\0" 
-  "bad count value\0"
+  "bad data value\0"
  /* 30 */ 
-  "pattern compiled with other endianness\0"
  "bad length\0"
  "magic number missing\0"
  "pattern compiled in wrong mode: 8/16/32-bit error\0"
  "bad offset value\0"
-  /* 35 */ 
  "bad option value\0"
+  /* 35 */ 
  "bad offset into UTF string\0"
  "callout error code\0"              /* Never returned by PCRE2 itself */   
  "invalid data in workspace for DFA restart\0"
  "too much recursion for DFA matching\0"
-  /* 40 */ 
  "backreference condition or recursion test not supported for DFA matching\0"
+  /* 40 */ 
  "item unsupported for DFA matching\0"
-  "match limit not supported for DFA matching\0"
  "workspace size exceeded in DFA matching\0"
  "internal error - pattern overwritten?\0"
-  /* 45 */ 
  "bad JIT option\0"
  "JIT stack limit reached\0"
+  /* 45 */ 
  "match limit exceeded\0"
  "no more memory\0"
  "unknown or unset substring\0" 
-  /* 50 */ 
  "NULL argument passed\0"
  "nested recursion at the same subject position\0"
+  /* 50 */ 
  "recursion limit exceeded\0"
+  "requested value is not set\0" 
  ; 


--- a/src/pcre2_internal.h
+++ b/src/pcre2_internal.h
@ -38,11 +38,11 @@ POSSIBILITY OF SUCH DAMAGE.
 -----------------------------------------------------------------------------
 */

-/* We do not support both EBCDIC and UTF at the same time. The "configure"
+/* We do not support both EBCDIC and Unicode at the same time. The "configure"
 script prevents both being selected, but not everybody uses "configure". */

-#if defined EBCDIC && defined SUPPORT_UTF
-#error The use of both EBCDIC and SUPPORT_UTF is not supported.
+#if defined EBCDIC && defined SUPPORT_UNICODE
+#error The use of both EBCDIC and SUPPORT_UNICODE is not supported.
 #endif

 /* Standard C headers */
@ -597,14 +597,14 @@ there are some longer strings as well.

 This means that, on EBCDIC platforms, the PCRE library can handle either
 EBCDIC, or UTF-8, but not both. To support both in the same compiled library
-would need different lookups depending on whether PCRE_UTF8 was set or not.
+would need different lookups depending on whether PCRE2_UTF was set or not.
 This would make it impossible to use characters in switch/case statements,
 which would reduce performance. For a theoretical use (which nobody has asked
 for) in a minority area (EBCDIC platforms), this is not sensible. Any
 application that did need both could compile two versions of the library, using
 macros to give the functions distinct names. */

-#ifndef SUPPORT_UTF
+#ifndef SUPPORT_UNICODE

 /* UTF-8 support is not enabled; use the platform-dependent character literals
 so that PCRE works in both ASCII and EBCDIC environments, but only in non-UTF
@ -920,7 +920,7 @@ a positive value. */
 #define STRING_LIMIT_MATCH_EQ             "LIMIT_MATCH="
 #define STRING_LIMIT_RECURSION_EQ         "LIMIT_RECURSION="

-#else  /* SUPPORT_UTF */
+#else  /* SUPPORT_UNICODE */

 /* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This
 works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode
@ -1189,7 +1189,7 @@ only. */
 #define STRING_LIMIT_MATCH_EQ             STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
 #define STRING_LIMIT_RECURSION_EQ         STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN

-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */

 /* -------------------- End of character and string names -------------------*/

@ -1775,10 +1775,10 @@ typedef struct {

 /* ----------------- Items that need PCRE2_CODE_UNIT_WIDTH ----------------- */

-/* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is not
-defined, so the following items are omitted. */
+/* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is defined as
+0, so the following items are omitted. */

-#ifdef PCRE2_CODE_UNIT_WIDTH
+#if defined PCRE2_CODE_UNIT_WIDTH && PCRE2_CODE_UNIT_WIDTH != 0

 /* This is the largest non-UTF code point. */

--- a/src/pcre2_intmodedep.h
+++ b/src/pcre2_intmodedep.h
@ -208,9 +208,9 @@ tables. */
 #if PCRE2_CODE_UNIT_WIDTH == 8
 #define MAX_255(c) TRUE
 #define MAX_MARK ((1u << 8) - 1)
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 #define SUPPORT_WIDE_CHARS
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */
 #define TABLE_GET(c, table, default) ((table)[c])

 #else  /* Code units are 16 or 32 bits */
@ -246,7 +246,7 @@ complicated ones for UTF characters. GETCHARLENTEST and other macros are not
 used when UTF is not supported. To make sure they can never even appear when
 UTF support is omitted, we don't even define them. */

-#ifndef SUPPORT_UTF
+#ifndef SUPPORT_UNICODE

 /* #define MAX_UTF_SINGLE_CU */
 /* #define HAS_EXTRALEN(c) */
@ -263,7 +263,7 @@ UTF support is omitted, we don't even define them. */
 /* #define FORWARDCHAR(eptr) */
 /* #define ACROSSCHAR(condition, eptr, action) */

-#else   /* SUPPORT_UTF */
+#else   /* SUPPORT_UNICODE */

 /* ------------------- 8-bit support  ------------------ */

@ -527,7 +527,7 @@ These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
 #define PUTCHAR(c, p) (*p = c, 1)

 #endif  /* UTF-32 character handling */
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */


 /* Mode-dependent macros that have the same definition in all modes. */
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@ -145,7 +145,7 @@ static int
 match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr, 
  match_block *mb, BOOL caseless, PCRE2_SIZE *lengthptr)
 {
-#if defined SUPPORT_UTF
+#if defined SUPPORT_UNICODE
 BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
 #endif

@ -173,7 +173,7 @@ length = mb->ovector[offset+1] - mb->ovector[offset];

 if (caseless)
  {
-#if defined SUPPORT_UTF
+#if defined SUPPORT_UNICODE
  if (utf)
    {
    /* Match characters up to the end of the reference. NOTE: the number of
@ -352,7 +352,7 @@ typedef struct heapframe {
  struct heapframe *Xprevframe;
  struct heapframe *Xnextframe;

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
  PCRE2_SPTR Xcharptr;
 #endif
  PCRE2_SPTR Xeptr;
@ -378,7 +378,7 @@ typedef struct heapframe {
  uint32_t Xop;
  uint32_t Xsave_capture_last;
 
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
  uint32_t Xprop_value;
  int Xprop_type;
  int Xprop_fail_result;
@ -399,7 +399,7 @@ typedef struct heapframe {
  eptrblock Xnewptrb;
  recursion_info Xnew_recursive;

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
  PCRE2_UCHAR Xocchars[6];
 #endif   
 } heapframe;
@ -610,7 +610,7 @@ HEAP_RECURSE:

 /* Ditto for the local variables */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 #define charptr            frame->Xcharptr
 #define prop_value         frame->Xprop_value
 #define prop_type          frame->Xprop_type
@ -666,7 +666,7 @@ declarations can be cut out in a block. The only declarations within blocks
 below are for variables that do not have to be preserved over a recursive call
 to RMATCH(). */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 PCRE2_SPTR charptr;
 #endif
 PCRE2_SPTR callpat;
@ -684,7 +684,7 @@ uint32_t number;
 uint32_t op;
 uint32_t save_capture_last;

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 uint32_t prop_value;
 int prop_type;
 int prop_fail_result;
@ -721,7 +721,7 @@ the alternative names that are used. */
 /* These statements are here to stop the compiler complaining about unitialized
 variables. */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 prop_value = 0;
 prop_fail_result = 0;
 #endif
@ -742,7 +742,7 @@ call because it's quite a complicated macro. It has to be used in one
 particular way. This shouldn't, however, impact performance when true recursion
 is being used. */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 utf = (mb->poptions & PCRE2_UTF) != 0;
 #else
 utf = FALSE;
@ -1662,7 +1662,7 @@ for (;;)
    back a number of characters, not bytes. */

    case OP_REVERSE:
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    if (utf)
      {
      i = GET(ecode, 1);
@ -2197,7 +2197,7 @@ for (;;)
      be "non-word" characters. Remember the earliest consulted character for
      partial matching. */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      if (utf)
        {
        /* Get status of previous character */
@ -2257,7 +2257,7 @@ for (;;)
        if (eptr == mb->start_subject) prev_is_word = FALSE; else
          {
          if (eptr <= mb->start_used_ptr) mb->start_used_ptr = eptr - 1;
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
          if ((mb->poptions & PCRE2_UCP) != 0)
            {
            c = eptr[-1];
@ -2283,7 +2283,7 @@ for (;;)
        else
          {
          if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1; 
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
          if ((mb->poptions & PCRE2_UCP) != 0)
            {
            c = *eptr;
@ -2334,7 +2334,7 @@ for (;;)
      RRETURN(MATCH_NOMATCH);
      }
    eptr++;
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    if (utf) ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
 #endif
    ecode++;
@ -2550,7 +2550,7 @@ for (;;)
    ecode++;
    break;

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    /* Check the next character by Unicode property. We will get here only
    if the support is in the binary; otherwise a compile-time error occurs. */

@ -2684,7 +2684,7 @@ for (;;)
    CHECK_PARTIAL();
    ecode++;
    break;
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */


    /* Match a back reference, possibly repeatedly. Look past the end of the
@ -2955,7 +2955,7 @@ for (;;)

      /* First, ensure the minimum number of matches are present. */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      if (utf)
        {
        for (i = 1; i <= min; i++)
@ -3007,7 +3007,7 @@ for (;;)

      if (minimize)
        {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
        if (utf)
          {
          for (fi = min;; fi++)
@ -3063,7 +3063,7 @@ for (;;)
        {
        pp = eptr;

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
        if (utf)
          {
          for (i = min; i < max; i++)
@ -3232,7 +3232,7 @@ for (;;)
            SCHECK_PARTIAL();
            break;
            }
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
          GETCHARLENTEST(c, eptr, len);
 #else
          c = *eptr;
@ -3248,7 +3248,7 @@ for (;;)
          RMATCH(eptr, ecode, offset_top, mb, eptrb, RM21);
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
          if (eptr-- == pp) break;        /* Stop if tried at original pos */
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
          if (utf) BACKCHAR(eptr);
 #endif
          }
@ -3262,7 +3262,7 @@ for (;;)
    /* Match a single character, casefully */

    case OP_CHAR:
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    if (utf)
      {
      length = 1;
@ -3299,7 +3299,7 @@ for (;;)
      RRETURN(MATCH_NOMATCH);
      }

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    if (utf)
      {
      length = 1;
@ -3334,7 +3334,7 @@ for (;;)

        if (fc != dc)
          {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
          if (dc != UCD_OTHERCASE(fc))
 #endif
            RRETURN(MATCH_NOMATCH);
@ -3342,7 +3342,7 @@ for (;;)
        }
      }
    else
-#endif   /* SUPPORT_UTF */
+#endif   /* SUPPORT_UNICODE */

    /* Not UTF mode */
      {
@ -3436,7 +3436,7 @@ for (;;)
    for speed. */

    REPEATCHAR:
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    if (utf)
      {
      length = 1;
@ -3527,7 +3527,7 @@ for (;;)
      value of fc will always be < 128. */
      }
    else
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */

      /* When not in UTF-8 mode, load a single-byte character. */
      fc = *ecode++;
@ -3547,11 +3547,11 @@ for (;;)
      /* fc must be < 128 if UTF is enabled. */
      foc = mb->fcc[fc];
 #else
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      if (utf && fc > 127)
        foc = UCD_OTHERCASE(fc);
      else
-#endif /* SUPPORT_UTF */
+#endif /* SUPPORT_UNICODE */
        foc = TABLE_GET(fc, mb->fcc, fc);
 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */

@ -3682,7 +3682,7 @@ for (;;)
      SCHECK_PARTIAL();
      RRETURN(MATCH_NOMATCH);
      }
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    if (utf)
      {
      register uint32_t ch, och;
@ -3705,7 +3705,7 @@ for (;;)
        }
      }
    else
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */
      {
      register uint32_t ch = ecode[1];
      c = *eptr++;
@ -3803,14 +3803,14 @@ for (;;)

    if (op >= OP_NOTSTARI)     /* Caseless */
      {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      if (utf && fc > 127)
        foc = UCD_OTHERCASE(fc);
      else
-#endif /* SUPPORT_UTF */
+#endif /* SUPPORT_UNICODE */
        foc = TABLE_GET(fc, mb->fcc, fc);

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      if (utf)
        {
        register uint32_t d;
@ -3826,7 +3826,7 @@ for (;;)
          }
        }
      else
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */
      /* Not UTF mode */
        {
        for (i = 1; i <= min; i++)
@ -3845,7 +3845,7 @@ for (;;)

      if (minimize)
        {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
        if (utf)
          {
          register uint32_t d;
@ -3864,7 +3864,7 @@ for (;;)
            }
          }
        else
-#endif  /*SUPPORT_UTF */
+#endif  /*SUPPORT_UNICODE */
        /* Not UTF mode */
          {
          for (fi = min;; fi++)
@ -3890,7 +3890,7 @@ for (;;)
        {
        pp = eptr;

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
        if (utf)
          {
          register uint32_t d;
@ -3917,7 +3917,7 @@ for (;;)
            }
          }
        else
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */
        /* Not UTF mode */
          {
          for (i = min; i < max; i++)
@ -3947,7 +3947,7 @@ for (;;)

    else
      {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      if (utf)
        {
        register uint32_t d;
@ -3981,7 +3981,7 @@ for (;;)

      if (minimize)
        {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
        if (utf)
          {
          register uint32_t d;
@ -4025,7 +4025,7 @@ for (;;)
        {
        pp = eptr;

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
        if (utf)
          {
          register uint32_t d;
@ -4144,7 +4144,7 @@ for (;;)
    REPEATTYPE:
    ctype = *ecode++;      /* Code for the character type */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    if (ctype == OP_PROP || ctype == OP_NOTPROP)
      {
      prop_fail_result = ctype == OP_NOTPROP;
@ -4162,7 +4162,7 @@ for (;;)

    if (min > 0)
      {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      if (prop_type >= 0)
        {
        switch(prop_type)
@ -4378,11 +4378,11 @@ for (;;)
        }

      else
-#endif     /* SUPPORT_UTF */
+#endif     /* SUPPORT_UNICODE */

 /* Handle all other cases when the coding is UTF-8 */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      if (utf) switch(ctype)
        {
        case OP_ANY:
@ -4631,7 +4631,7 @@ for (;;)
        }  /* End switch(ctype) */

      else
-#endif     /* SUPPORT_UTF */
+#endif     /* SUPPORT_UNICODE */

      /* Code for the non-UTF-8 case for minimum matching of operators other
      than OP_PROP and OP_NOTPROP. */
@ -4889,7 +4889,7 @@ for (;;)

    if (minimize)
      {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      if (prop_type >= 0)
        {
        switch(prop_type)
@ -5138,9 +5138,9 @@ for (;;)
          }
        }
      else
-#endif     /* SUPPORT_UTF */
+#endif     /* SUPPORT_UNICODE */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      if (utf)
        {
        for (fi = min;; fi++)
@ -5410,7 +5410,7 @@ for (;;)
      {
      pp = eptr;  /* Remember where we started */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      if (prop_type >= 0)
        {
        switch(prop_type)
@ -5696,9 +5696,9 @@ for (;;)
        }

      else
-#endif   /* SUPPORT_UTF */
+#endif   /* SUPPORT_UNICODE */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      if (utf)
        {
        switch(ctype)
@ -5940,7 +5940,7 @@ for (;;)
          }
        }
      else
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */
      /* Not UTF mode */
        {
        switch(ctype)
@ -6219,13 +6219,13 @@ switch (frame->Xwhere)
 #ifdef SUPPORT_WIDE_CHARS
  LBL(20) LBL(21)
 #endif
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
  LBL(16) LBL(18)
  LBL(22) LBL(23) LBL(28) LBL(30)
  LBL(32) LBL(34) LBL(42) LBL(46)
  LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
  LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */
  default:
  return PCRE2_ERROR_INTERNAL;
  }
@ -6398,14 +6398,21 @@ if (code == NULL || subject == NULL || match_data == NULL)
  return PCRE2_ERROR_NULL;
 if (start_offset > length) return PCRE2_ERROR_BADOFFSET;

-/* Check that the first field in the block is the magic number. If it is not,
-return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to
-REVERSED_MAGIC_NUMBER we return with PCRE2_ERROR_BADENDIANNESS, which
-means that the pattern is likely compiled with different endianness. */
+/* Check that the first field in the block is the magic number. */
+
+if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
+
+#ifdef FIXME
+If saving restoring gets implemented, define PCRE2_ERROR_BADENDIANNESS, and add
+this comment and code:
+
+/* However, if the magic number is equal to REVERSED_MAGIC_NUMBER we return
+with PCRE2_ERROR_BADENDIANNESS, which means that the pattern is likely compiled
+with different endianness. */

-if (re->magic_number != MAGIC_NUMBER)
  return re->magic_number == REVERSED_MAGIC_NUMBER?
    PCRE2_ERROR_BADENDIANNESS:PCRE2_ERROR_BADMAGIC;
+#endif

 /* Check the code unit width. */

@ -6451,7 +6458,7 @@ mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
 we must also check that a starting offset does not point into the middle of a
 multiunit character. */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
  {
  match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->rightchar));
@ -6466,7 +6473,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
    return PCRE2_ERROR_BADUTFOFFSET;
 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
  }
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */

 /* If the pattern was successfully studied with JIT support, run the JIT
 executable instead of the rest of this function. Most options must be set at
@ -6539,7 +6546,7 @@ mb->match_limit = (mcontext->match_limit < re->limit_match)?
                  mcontext->match_limit : re->limit_match;
 mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)?
                            mcontext->recursion_limit : re->limit_recursion;
-                            
+    
 /* Pointers to the individual character tables */

 mb->lcc = re->tables + lcc_offset;
@ -6640,7 +6647,7 @@ if (!anchored)
    if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
      {
      first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
-#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
      if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
 #endif
      }
@ -6660,7 +6667,7 @@ if ((re->flags & PCRE2_LASTSET) != 0)
  if ((re->flags & PCRE2_LASTCASELESS) != 0)
    {
    req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
-#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
    if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu);
 #endif
    }
@ -6696,7 +6703,7 @@ for(;;)
    if (firstline)
      {
      PCRE2_SPTR t = start_match;
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      if (utf)
        {
        while (t < mb->end_subject && !IS_NEWLINE(t))
@ -6731,7 +6738,7 @@ for(;;)
      {
      if (start_match > mb->start_subject + start_offset)
        {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
        if (utf)
          {
          while (start_match < end_subject && !WAS_NEWLINE(start_match))
@ -6905,7 +6912,7 @@ for(;;)
    case MATCH_THEN:
    mb->ignore_skip_arg = 0;
    new_start_match = start_match + 1;
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    if (utf)
      ACROSSCHAR(new_start_match < end_subject, *new_start_match,
        new_start_match++);
--- a/src/pcre2_newline.c
+++ b/src/pcre2_newline.c
@ -81,12 +81,12 @@ PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
 {
 uint32_t c;

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 if (utf) { GETCHAR(c, ptr); } else c = *ptr;
 #else
 (void)utf;
 c = *ptr;
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */

 if (type == NLTYPE_ANYCRLF) switch(c)
  {
@ -172,7 +172,7 @@ PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
 uint32_t c;
 ptr--;

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 if (utf)
  {
  BACKCHAR(ptr);
@ -182,7 +182,7 @@ else c = *ptr;
 #else
 (void)utf;
 c = *ptr;
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */

 if (type == NLTYPE_ANYCRLF) switch(c)
  {
--- a/src/pcre2_ord2utf.c
+++ b/src/pcre2_ord2utf.c
@ -50,10 +50,11 @@ into a UTF string. The behaviour is different for each code unit width. */
 #include "pcre2_internal.h"


-/* If SUPPORT_UTF is not defined, this function will never be called. Supply a 
-dummy function because some compilers do not like empty source modules. */
+/* If SUPPORT_UNICODE is not defined, this function will never be called.
+Supply a dummy function because some compilers do not like empty source
+modules. */

-#ifndef SUPPORT_UTF
+#ifndef SUPPORT_UNICODE
 unsigned int
 PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
 {
@ -61,7 +62,7 @@ PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
 (void)(buffer);
 return 0;
 }
-#else  /* SUPPORT_UTF */
+#else  /* SUPPORT_UNICODE */


 /*************************************************
@ -114,6 +115,6 @@ return 2;
 return 1;
 #endif
 }
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */

 /* End of pcre_ord2utf.c */
--- a/src/pcre2_pattern_info.c
+++ b/src/pcre2_pattern_info.c
@ -56,11 +56,9 @@ Arguments:
  what          what information is required
  where         where to put the information

-Returns:        0 if data returned, negative on error
+Returns:        0 if data returned, negative on error or unset value
 */

-/* FIXME: Remove BADENDIANNESS if saving/restoring is not to be implemented. */
-
 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
 {
@ -69,13 +67,21 @@ const pcre2_real_code *re = (pcre2_real_code *)code;
 if (re == NULL || where == NULL) return PCRE2_ERROR_NULL;

 /* Check that the first field in the block is the magic number. If it is not,
-return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to
-REVERSED_MAGIC_NUMBER we return with PCRE2_ERROR_BADENDIANNESS, which
-means that the pattern is likely compiled with different endianness. */
+return with PCRE2_ERROR_BADMAGIC. */
+
+if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
+
+#ifdef FIXME
+If saving restoring gets implemented, define PCRE2_ERROR_BADENDIANNESS, and add
+this comment and code:
+
+/* However, if the magic number is equal to REVERSED_MAGIC_NUMBER we return
+with PCRE2_ERROR_BADENDIANNESS, which means that the pattern is likely compiled
+with different endianness. */

-if (re->magic_number != MAGIC_NUMBER)
  return re->magic_number == REVERSED_MAGIC_NUMBER?
    PCRE2_ERROR_BADENDIANNESS:PCRE2_ERROR_BADMAGIC;
+#endif

 /* Check that this pattern was compiled in the correct bit mode */

@ -151,6 +157,7 @@ switch(what)

  case PCRE2_INFO_MATCHLIMIT:
  *((uint32_t *)where) = re->limit_match;
+  if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET; 
  break;

  case PCRE2_INFO_MAXLOOKBEHIND:
@ -179,6 +186,7 @@ switch(what)

  case PCRE2_INFO_RECURSIONLIMIT:
  *((uint32_t *)where) = re->limit_recursion;
+  if (re->limit_recursion == UINT32_MAX) return PCRE2_ERROR_UNSET; 
  break;

  case PCRE2_INFO_SIZE:
--- a/src/pcre2_printint.c
+++ b/src/pcre2_printint.c
@ -94,7 +94,7 @@ BOOL one_code_unit = !utf;

 /* If UTF is supported and requested, check for a valid single code unit. */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 if (utf)
  {
 #if PCRE2_CODE_UNIT_WIDTH == 8
@ -105,7 +105,7 @@ if (utf)
  one_code_unit = (c & 0xfffff800u) != 0xd800u;
 #endif  /* CODE_UNIT_WIDTH */
  }
-#endif  /* SUPPORT_UTF */  
+#endif  /* SUPPORT_UNICODE */  

 /* Handle a valid one-code-unit character at any width. */

@ -121,7 +121,7 @@ if (one_code_unit)
 for each width. If UTF is not supported, control should never get here, but we 
 need a return statement to keep the compiler happy. */

-#ifndef SUPPORT_UTF
+#ifndef SUPPORT_UNICODE
 return 0;
 #else

@ -178,7 +178,7 @@ as an indication. */
 fprintf(f, "\\X{%x}", c);
 return 0;
 #endif  /* PCRE2_CODE_UNIT_WIDTH == 32 */
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */
 }


@ -221,7 +221,7 @@ into the main code, however, we just put one into this function. */
 static const char *
 get_ucpname(unsigned int ptype, unsigned int pvalue)
 {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 int i;
 for (i = utt_size - 1; i >= 0; i--)
  {
@ -233,7 +233,7 @@ return (i >= 0)? utt_names + utt[i].name_offset : "??";
 (void)ptype;
 (void)pvalue;
 return "??";
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */
 }


--- a/src/pcre2_study.c
+++ b/src/pcre2_study.c
@ -228,7 +228,7 @@ for (;;)
    case OP_NOTPOSPLUSI:
    branchlength++;
    cc += 2;
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
 #endif
    break;
@ -249,7 +249,7 @@ for (;;)
    case OP_NOTEXACTI:
    branchlength += GET2(cc,1);
    cc += 2 + IMM2_SIZE;
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
 #endif
    break;
@ -297,7 +297,7 @@ for (;;)
    appear, but leave the code, just in case.) */

    case OP_ANYBYTE:
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    if (utf) return -1;
 #endif
    branchlength++;
@ -536,7 +536,7 @@ for (;;)
    case OP_NOTPOSQUERYI:

    cc += PRIV(OP_lengths)[op];
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
 #endif
    break;
@ -608,7 +608,7 @@ SET_BIT(c);
 /* In UTF-8 or UTF-16 mode, pick up the remaining code units in order to find
 the end of the character, even when caseless. */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
 if (utf)
  {
 #if PCRE2_CODE_UNIT_WIDTH == 8
@ -617,7 +617,7 @@ if (utf)
  if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, p);
 #endif
  }
-#endif  /* SUPPORT_UTF */   
+#endif  /* SUPPORT_UNICODE */   

 /* If caseless, handle the other case of the character. */

@ -671,7 +671,7 @@ set_type_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
 register uint32_t c;
 for (c = 0; c < table_limit; c++) 
  re->start_bitmap[c] |= re->tables[c+cbits_offset+cbit_type];
-#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
 if (table_limit == 32) return;
 for (c = 128; c < 256; c++)
  {
@ -712,7 +712,7 @@ set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
 register uint32_t c;
 for (c = 0; c < table_limit; c++) 
  re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]);
-#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
 if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
 #endif
 }
@ -752,7 +752,7 @@ set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf)
 register uint32_t c;
 int yield = SSB_DONE;

-#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
 int table_limit = utf? 16:32;
 #else
 int table_limit = 32;
@ -866,7 +866,7 @@ do
        const uint32_t *p = PRIV(ucd_caseless_sets) + tcode[2];
        while ((c = *p++) < NOTACHAR)
          {
-#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8         
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8         
          if (utf)
            {
            PCRE2_UCHAR buff[6];
@ -1042,7 +1042,7 @@ do
      /* For the 8-bit library in UTF-8 mode, set the bits for the first code 
      units of horizontal space characters. */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      if (utf)
        {
        SET_BIT(0xC2);  /* For U+00A0 */
@ -1081,7 +1081,7 @@ do
      /* For the 8-bit library in UTF-8 mode, set the bits for the first code 
      units of vertical space characters. */
 
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
      if (utf)
        {
        SET_BIT(0xC2);  /* For U+0085 (NEL) */
@ -1181,7 +1181,7 @@ do
        /* For the 8-bit library in UTF-8 mode, set the bits for the first code 
        units of horizontal space characters. */

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
        if (utf)
          {
          SET_BIT(0xC2);  /* For U+00A0 */
@ -1218,7 +1218,7 @@ do
        /* For the 8-bit library in UTF-8 mode, set the bits for the first code 
        units of vertical space characters. */
 
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
        if (utf)
          {
          SET_BIT(0xC2);  /* For U+0085 (NEL) */
@ -1287,7 +1287,7 @@ do
      character modes, set the 0xFF bit to indicate code units >= 255. */

      case OP_NCLASS:
-#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
      if (utf)
        {
        re->start_bitmap[24] |= 0xf0;            /* Bits for 0xc4 - 0xc8 */
@ -1318,7 +1318,7 @@ do
      
      if (classmap != NULL)
        { 
-#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
        if (utf)
          {
          for (c = 0; c < 16; c++) re->start_bitmap[c] |= classmap[c];
--- a/src/pcre2_substring.c
+++ b/src/pcre2_substring.c
@ -108,8 +108,8 @@ Returns:         if successful: 0
 */

 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
-pcre2_substring_copy_bynumber(pcre2_match_data *match_data, int stringnumber,
-  PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
+pcre2_substring_copy_bynumber(pcre2_match_data *match_data, 
+  unsigned int stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
 {
 PCRE2_SIZE left, right;
 PCRE2_SIZE p = 0;
@ -189,8 +189,8 @@ Returns:         if successful: zero
 */

 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
-pcre2_substring_get_bynumber(pcre2_match_data *match_data, int stringnumber, 
-  PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
+pcre2_substring_get_bynumber(pcre2_match_data *match_data, 
+  unsigned int stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
 {
 PCRE2_SIZE left, right;
 PCRE2_SIZE p = 0;
@ -288,7 +288,7 @@ Returns:          0 if successful, else a negative error number

 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 pcre2_substring_length_bynumber(pcre2_match_data *match_data,
-  int stringnumber, PCRE2_SIZE *sizeptr)
+  unsigned int stringnumber, PCRE2_SIZE *sizeptr)
 {
 if (stringnumber >= match_data->oveccount ||
    stringnumber > match_data->code->top_bracket ||
--- a/src/pcre2_tables.c
+++ b/src/pcre2_tables.c
@ -76,7 +76,7 @@ as for the library in 8-bit mode, because pcre2test uses UTF-8 internally for
 handling wide characters. */

 #if defined PCRE2_PCRE2TEST || \
-   (defined SUPPORT_UTF && \
+   (defined SUPPORT_UNICODE && \
    defined PCRE2_CODE_UNIT_WIDTH && \
    PCRE2_CODE_UNIT_WIDTH == 8)

@ -106,7 +106,7 @@ const uint8_t PRIV(utf8_table4)[] = {
 #endif /* UTF-8 support needed */


-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE

 /* Table to translate from particular type value to the general value. */

@ -728,6 +728,6 @@ const ucp_type_table PRIV(utt)[] = {

 const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);

-#endif /* SUPPORT_UTF */
+#endif /* SUPPORT_UNICODE */

 /* End of pcre2_tables.c */
--- a/src/pcre2_ucd.c
+++ b/src/pcre2_ucd.c
@ -32,7 +32,7 @@ condition to cut out the tables when not needed. But don't leave
 a totally empty module because some compilers barf at that.
 Instead, just supply small dummy tables. */

-#ifndef SUPPORT_UTF
+#ifndef SUPPORT_UNICODE
 const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0 }};
 const uint8_t PRIV(ucd_stage1)[] = {0};
 const uint16_t PRIV(ucd_stage2)[] = {0};
@ -3628,6 +3628,6 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 58112 bytes, block = 128 */
 #if UCD_BLOCK_SIZE != 128
 #error Please correct UCD_BLOCK_SIZE in pcre2_internal.h
 #endif
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */

 #endif  /* PCRE2_PCRE2TEST */
--- a/src/pcre2_valid_utf.c
+++ b/src/pcre2_valid_utf.c
@ -50,12 +50,12 @@ strings. */
 #include "pcre2_internal.h"


-#ifndef SUPPORT_UTF
+#ifndef SUPPORT_UNICODE
 /*************************************************
-*        Dummy function when UTF not supported   *
+*  Dummy function when Unicode is not supported  *
 *************************************************/

-/* This function should never be called when UTF is not supported. */
+/* This function should never be called when Unicode is not supported. */

 int
 PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
@ -388,6 +388,6 @@ for (p = string; length-- > 0; p++)
 return 0;
 #endif  /* CODE_UNIT_WIDTH */
 }
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */

 /* End of pcre2_valid_utf.c */
--- a/src/pcre2_xclass.c
+++ b/src/pcre2_xclass.c
@ -103,7 +103,7 @@ while ((t = *data++) != XCL_END)
  uint32_t x, y;
  if (t == XCL_SINGLE)
    {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    if (utf)
      {
      GETCHARINC(x, data); /* macro generates multiple statements */
@ -115,7 +115,7 @@ while ((t = *data++) != XCL_END)
    }
  else if (t == XCL_RANGE)
    {
-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
    if (utf)
      {
      GETCHARINC(x, data); /* macro generates multiple statements */
@ -130,7 +130,7 @@ while ((t = *data++) != XCL_END)
    if (c >= x && c <= y) return !negated;
    }

-#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UNICODE
  else  /* XCL_PROP & XCL_NOTPROP */
    {
    const ucd_record *prop = GET_UCD(c);
@ -262,7 +262,7 @@ while ((t = *data++) != XCL_END)
    }
 #else
  (void)utf;  /* Avoid compiler warning */     
-#endif  /* SUPPORT_UTF */
+#endif  /* SUPPORT_UNICODE */
  }

 return negated;   /* char did not match */
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@ -196,6 +196,7 @@ so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
 for building the library. */

 #define PRIV(name) name
+#define PCRE2_CODE_UNIT_WIDTH 0
 #include "pcre2.h"
 #include "pcre2posix.h"
 #include "pcre2_internal.h"
@ -208,16 +209,17 @@ of PRIV avoids name clashes. */
 #include "pcre2_tables.c"
 #include "pcre2_ucd.c"

-/* When PCRE2_CODE_UNIT_WIDTH is unset, pcre2_internal.h does not include
+/* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
 pcre2_intmodedep.h, which is where mode-dependent macros and structures are
 defined. We can now include it for each supported code unit width. Because
-PCRE2_CODE_UNIT_WIDTH was not defined before including pcre2.h, it will have
-left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately while
-including these files, and then restore it to a no-op. Because LINK_SIZE may be
-changed in 16-bit mode and forced to 1 in 32-bit mode, the order of these
-inclusions should not be changed. */
+PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
+have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
+while including these files, and then restore it to a no-op. Because LINK_SIZE
+may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
+these inclusions should not be changed. */

 #undef PCRE2_SUFFIX
+#undef PCRE2_CODE_UNIT_WIDTH

 #ifdef   SUPPORT_PCRE8
 #define  PCRE2_CODE_UNIT_WIDTH 8
@ -576,7 +578,7 @@ static coptstruct coptlist[] = {
  { "pcre16",    CONF_FIX, SUPPORT_16 },
  { "pcre32",    CONF_FIX, SUPPORT_32 },
  { "pcre8",     CONF_FIX, SUPPORT_8 },
-  { "utf",       CONF_INT, PCRE2_CONFIG_UTF }
+  { "unicode",   CONF_INT, PCRE2_CONFIG_UNICODE }
 };

 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
@ -2815,22 +2817,26 @@ pattern.
 Arguments:
  what        code for the required information
  where       where to put the answer
+  unsetok     PCRE2_ERROR_UNSET is an "expected" result 

 Returns:      the return from pcre2_pattern_info()
 */

 static int
-pattern_info(int what, void *where)
+pattern_info(int what, void *where, BOOL unsetok)
 {
 int rc;
 PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
 if (rc >= 0) return 0;
-fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
-  what);
-if (rc == PCRE2_ERROR_BADMODE)
-  fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
-    "%d-bit mode\n", test_mode,
-    8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
+if (rc != PCRE2_ERROR_UNSET || !unsetok)
+  {
+  fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
+    what);
+  if (rc == PCRE2_ERROR_BADMODE)
+    fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
+      "%d-bit mode\n", test_mode,
+      8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
+  }     
 return rc;
 }

@ -3026,32 +3032,61 @@ if ((pat_patctl.control & CTL_INFO) != 0)
  {
  const void *nametable;
  const uint8_t *start_bits;
+  BOOL match_limit_set, recursion_limit_set; 
  uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
    hascrorlf, jchanged, last_ctype, last_cunit, match_empty, match_limit,
    maxlookbehind, minlength, nameentrysize, namecount, newline_convention,
    recursion_limit;
+    
+  /* These info requests may return PCRE2_ERROR_UNSET. */
+
+  switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
+    {
+    case 0:
+    match_limit_set = TRUE;
+    break;
+    
+    case PCRE2_ERROR_UNSET:
+    match_limit_set = FALSE;
+    break;
+    
+    default:
+    return PR_ABEND;
+    }          
+      
+  switch(pattern_info(PCRE2_INFO_RECURSIONLIMIT, &recursion_limit, TRUE))
+    { 
+    case 0:
+    recursion_limit_set = TRUE;
+    break;
+    
+    case PCRE2_ERROR_UNSET:
+    recursion_limit_set = FALSE;
+    break;
+      
+    default:
+    return PR_ABEND;        
+    }

  /* These info requests should always succeed. */

-  if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax) +
-      pattern_info(PCRE2_INFO_BSR, &bsr_convention) +
-      pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count) +
-      pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits) +
-      pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit) +
-      pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype) +
-      pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf) +
-      pattern_info(PCRE2_INFO_JCHANGED, &jchanged) +
-      pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit) +
-      pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype) +
-      pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty) +
-      pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit) +
-      pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind) +
-      pattern_info(PCRE2_INFO_MINLENGTH, &minlength) +
-      pattern_info(PCRE2_INFO_NAMECOUNT, &namecount) +
-      pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize) +
-      pattern_info(PCRE2_INFO_NAMETABLE, &nametable) +
-      pattern_info(PCRE2_INFO_NEWLINE, &newline_convention) +
-      pattern_info(PCRE2_INFO_RECURSIONLIMIT, &recursion_limit)
+  if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
+      pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
+      pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
+      pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
+      pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
+      pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
+      pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
+      pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
+      pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
+      pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
+      pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
+      pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) +
+      pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
+      pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
+      pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
+      pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
+      pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
      != 0)
    return PR_ABEND;

@ -3062,11 +3097,11 @@ if ((pat_patctl.control & CTL_INFO) != 0)

  if (maxlookbehind > 0)
    fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
-
-  if (match_limit != UINT32_MAX)
+    
+  if (match_limit_set)
    fprintf(outfile, "Match limit = %u\n", match_limit);

-  if (recursion_limit != UINT32_MAX)
+  if (recursion_limit_set)
    fprintf(outfile, "Recursion limit = %u\n", recursion_limit);

  if (namecount > 0)
@ -3099,8 +3134,8 @@ if ((pat_patctl.control & CTL_INFO) != 0)
  if (hascrorlf)   fprintf(outfile, "Contains explicit CR or LF match\n");
  if (match_empty) fprintf(outfile, "May match empty string\n");

-  pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options);
-  pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options);
+  pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
+  pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);

  /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
  cluttering up the verification output of non-UTF test files. */
@ -3234,7 +3269,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
  if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
    {
    size_t jitsize;
-    if (pattern_info(PCRE2_INFO_JITSIZE, &jitsize) == 0)
+    if (pattern_info(PCRE2_INFO_JITSIZE, &jitsize, FALSE) == 0)
      {
      if (jitsize > 0)
        fprintf(outfile, "JIT compilation was successful\n");
@ -3625,14 +3660,14 @@ if ((pat_patctl.control & CTL_MEMORY) != 0)
  if (test_mode == 32) cblock_size = sizeof(pcre2_real_code_32);
 #endif

-  (void)pattern_info(PCRE2_INFO_SIZE, &size);
-  (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count);
-  (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
+  (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
+  (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
+  (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
  fprintf(outfile, "Memory allocation (code space): %d\n",
    (int)(size - name_count*name_entry_size*code_unit_size - cblock_size));
  if (pat_patctl.jit != 0)
    {
-    (void)pattern_info(PCRE2_INFO_JITSIZE, &size);
+    (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
    fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
    }
  }
@ -4452,7 +4487,7 @@ for (gmatched = 0;; gmatched++)
    if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
      {
      uint32_t maxcapcount;
-      if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount) < 0)
+      if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
        return PR_SKIP;
      capcount = maxcapcount + 1;   /* Allow for full match */
      if (capcount > (int)dat_datctl.oveccount) capcount = dat_datctl.oveccount;
@ -4943,7 +4978,7 @@ printf("     newline        newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
 printf("     pcre8          8 bit library support enabled [0, 1]\n");
 printf("     pcre16         16 bit library support enabled [0, 1]\n");
 printf("     pcre32         32 bit library support enabled [0, 1]\n");
-printf("     utf            Unicode Transformation Format supported [0, 1]\n");
+printf("     unicode        Unicode and UTF support enabled [0, 1]\n");
 printf("  -d            set default pattern control 'debug'\n");
 printf("  -dfa          set default subject control 'dfa'\n");
 printf("  -help         show usage information\n");
@ -5057,7 +5092,7 @@ printf("  16-bit support\n");
 printf("  32-bit support\n");
 #endif

-(void)PCRE2_CONFIG(PCRE2_CONFIG_UTF, &rc, sizeof(rc));
+(void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &rc, sizeof(rc));
 if (rc != 0)
  printf("  UTF support (Unicode version %s)\n", uversion);
 else
--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@ -384,15 +384,15 @@ aaaaa2
 010203040506
 RC=0
 ======== STDERR ========
-pcre2grep: pcre2_match() gave error -47 while matching this text:
+pcre2grep: pcre2_match() gave error -45 while matching this text:

 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa

-pcre2grep: pcre2_match() gave error -47 while matching this text:
+pcre2grep: pcre2_match() gave error -45 while matching this text:

 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa

-pcre2grep: Error -46, -47 or -52 means that a resource limit was exceeded.
+pcre2grep: Error -44, -45 or -50 means that a resource limit was exceeded.
 pcre2grep: Check your regex for nested unlimited loops.
 ---------------------------- Test 38 ------------------------------
 This line contains a binary zero here >< for testing.
@ -510,23 +510,23 @@ In the middle of a line, PATTERN appears.
 Check up on PATTERN near the end.
 RC=0
 ---------------------------- Test 62 -----------------------------
-pcre2grep: pcre2_match() gave error -47 while matching text that starts:
+pcre2grep: pcre2_match() gave error -45 while matching text that starts:

 This is a file of miscellaneous text that is used as test data for checking
 that the pcregrep command is working correctly. The file must be more than 24K
 long so that it needs more than a single read

-pcre2grep: Error -46, -47 or -52 means that a resource limit was exceeded.
+pcre2grep: Error -44, -45 or -50 means that a resource limit was exceeded.
 pcre2grep: Check your regex for nested unlimited loops.
 RC=1
 ---------------------------- Test 63 -----------------------------
-pcre2grep: pcre2_match() gave error -52 while matching text that starts:
+pcre2grep: pcre2_match() gave error -50 while matching text that starts:

 This is a file of miscellaneous text that is used as test data for checking
 that the pcregrep command is working correctly. The file must be more than 24K
 long so that it needs more than a single read

-pcre2grep: Error -46, -47 or -52 means that a resource limit was exceeded.
+pcre2grep: Error -44, -45 or -50 means that a resource limit was exceeded.
 pcre2grep: Check your regex for nested unlimited loops.
 RC=1
 ---------------------------- Test 64 ------------------------------
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@ -888,7 +888,7 @@ Subject length lower bound = 3
    a\x{123}aa\=offset=1
 0: aa
    a\x{123}aa\=offset=2
-Error -36 (bad UTF-8 offset)
+Error -35 (bad UTF-8 offset)
    a\x{123}aa\=offset=3
 0: aa
    a\x{123}aa\=offset=4
@ -896,7 +896,7 @@ Error -36 (bad UTF-8 offset)
    a\x{123}aa\=offset=5
 No match
    a\x{123}aa\=offset=6
-Failed: error -34: bad offset value
+Failed: error -33: bad offset value

 /\x{1234}+/Ii,utf
 Capturing subpattern count = 0
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@ -787,9 +787,9 @@ Subject length lower bound = 3
    a\x{123}aa\=offset=4
 No match
    a\x{123}aa\=offset=5
-Failed: error -34: bad offset value
+Failed: error -33: bad offset value
    a\x{123}aa\=offset=6
-Failed: error -34: bad offset value
+Failed: error -33: bad offset value

 /\x{1234}+/Ii,utf
 Capturing subpattern count = 0
@ -851,9 +851,9 @@ Subject length lower bound = 1

 /a/utf
    \x{10000}\=offset=1
-Error -36 (bad UTF-16 offset)
+Error -35 (bad UTF-16 offset)
    \x{10000}ab\=offset=1
-Error -36 (bad UTF-16 offset)
+Error -35 (bad UTF-16 offset)
    \x{10000}ab\=offset=2
 0: a
    \x{10000}ab\=offset=3
@ -861,7 +861,7 @@ No match
    \x{10000}ab\=offset=4
 No match
    \x{10000}ab\=offset=5
-Failed: error -34: bad offset value
+Failed: error -33: bad offset value

 /<2F><><EFBFBD>/utf
 Failed: error -26 at offset 0: UTF-16 error: isolated low surrogate
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@ -779,9 +779,9 @@ Subject length lower bound = 3
    a\x{123}aa\=offset=4
 No match
    a\x{123}aa\=offset=5
-Failed: error -34: bad offset value
+Failed: error -33: bad offset value
    a\x{123}aa\=offset=6
-Failed: error -34: bad offset value
+Failed: error -33: bad offset value

 /\x{1234}+/Ii,utf
 Capturing subpattern count = 0
@ -851,9 +851,9 @@ No match
    \x{10000}ab\=offset=3
 No match
    \x{10000}ab\=offset=4
-Failed: error -34: bad offset value
+Failed: error -33: bad offset value
    \x{10000}ab\=offset=5
-Failed: error -34: bad offset value
+Failed: error -33: bad offset value

 /<2F><><EFBFBD>/utf
 Failed: error -27 at offset 0: UTF-32 error: code points 0xd800-0xdfff are not defined
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@ -986,7 +986,7 @@ Subject length lower bound = 4
 0: abcd
 1: a
 2: d
-copy substring 5 failed (-49): unknown or unset substring
+copy substring 5 failed (-47): unknown or unset substring

 /(.{20})/I
 Capturing subpattern count = 1
@ -1040,9 +1040,9 @@ Subject length lower bound = 4
 2: <unset>
 3: f
 1G a (1)
-get substring 2 failed (-49): unknown or unset substring
+get substring 2 failed (-47): unknown or unset substring
 3G f (1)
-get substring 4 failed (-49): unknown or unset substring
+get substring 4 failed (-47): unknown or unset substring
 0L adef
 1L a
 2L 
@ -1055,7 +1055,7 @@ get substring 4 failed (-49): unknown or unset substring
 1G bc (2)
 2G bc (2)
 3G f (1)
-get substring 4 failed (-49): unknown or unset substring
+get substring 4 failed (-47): unknown or unset substring
 0L bcdef
 1L bc
 2L bc
@ -4370,7 +4370,7 @@ Subject length lower bound = 8
 0: abcdefgh
 1: cd
 2: gh
-copy substring 'three' failed (-49): unknown or unset substring
+copy substring 'three' failed (-47): unknown or unset substring

 /(?P<Tes>)(?P<Test>)/IB
 ------------------------------------------------------------------
@ -5737,7 +5737,7 @@ No match
 0: a1
 1: a1
 2: a1
-copy substring 'Z' failed (-49): unknown or unset substring
+copy substring 'Z' failed (-47): unknown or unset substring
  C a1 (2) A
    
 /(?|(?<a>)(?<b>)(?<a>)|(?<a>)(?<b>)(?<a>))/I,dupnames
@ -5778,7 +5778,7 @@ Subject length lower bound = 2
  C a (1) A
    cd\=copy=A
 0: cd
-copy substring 'A' failed (-49): unknown or unset substring
+copy substring 'A' failed (-47): unknown or unset substring

 /^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames
 Capturing subpattern count = 4
@ -5822,7 +5822,7 @@ No match
 0: a1
 1: a1
 2: a1
-get substring 'Z' failed (-49): unknown or unset substring
+get substring 'Z' failed (-47): unknown or unset substring
  G a1 (2) A

 /^(?P<A>a)(?P<A>b)/I,dupnames
@ -5853,7 +5853,7 @@ Subject length lower bound = 2
  G a (1) A
    cd\=get=A
 0: cd
-get substring 'A' failed (-49): unknown or unset substring
+get substring 'A' failed (-47): unknown or unset substring

 /^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames
 Capturing subpattern count = 4
@ -10446,7 +10446,7 @@ Partial match: abc
    abc\=offset=3
 No match
    abc\=offset=4
-Failed: error -34: bad offset value
+Failed: error -33: bad offset value
    abc\=offset=-4 
 ** Invalid value in 'offset=-4'

@ -11129,15 +11129,15 @@ Matched, but too many substrings

 /((?2))((?1))/
    abc
-Failed: error -51: nested recursion at the same subject position
+Failed: error -49: nested recursion at the same subject position

 /((?(R2)a+|(?1)b))/
    aaaabcde
-Failed: error -51: nested recursion at the same subject position
+Failed: error -49: nested recursion at the same subject position

 /(?(R)a*(?1)|((?R))b)/
    aaaabcde
-Failed: error -51: nested recursion at the same subject position
+Failed: error -49: nested recursion at the same subject position

 /(a+|(?R)b)/
 Failed: error 140 at offset 7: recursion could loop indefinitely
@ -12129,11 +12129,11 @@ Subject length lower bound = 3
    aaaaaaaaaaaaaz
 No match
    aaaaaaaaaaaaaz\=match_limit=3000
-Failed: error -47: match limit exceeded
+Failed: error -45: match limit exceeded

 /(a+)*zz/
    aaaaaaaaaaaaaz\=recursion_limit=10
-Failed: error -52: recursion limit exceeded
+Failed: error -50: recursion limit exceeded

 /(*LIMIT_MATCH=3000)(a+)*zz/I
 Capturing subpattern count = 1
@ -12142,9 +12142,9 @@ Starting code units: a z
 Last code unit = 'z'
 Subject length lower bound = 2
    aaaaaaaaaaaaaz
-Failed: error -47: match limit exceeded
+Failed: error -45: match limit exceeded
    aaaaaaaaaaaaaz\=match_limit=60000
-Failed: error -47: match limit exceeded
+Failed: error -45: match limit exceeded

 /(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
 Capturing subpattern count = 1
@ -12153,7 +12153,7 @@ Starting code units: a z
 Last code unit = 'z'
 Subject length lower bound = 2
    aaaaaaaaaaaaaz
-Failed: error -47: match limit exceeded
+Failed: error -45: match limit exceeded

 /(*LIMIT_MATCH=60000)(a+)*zz/I
 Capturing subpattern count = 1
@ -12164,7 +12164,7 @@ Subject length lower bound = 2
    aaaaaaaaaaaaaz
 No match
    aaaaaaaaaaaaaz\=match_limit=3000
-Failed: error -47: match limit exceeded
+Failed: error -45: match limit exceeded

 /(*LIMIT_RECURSION=10)(a+)*zz/I
 Capturing subpattern count = 1
@ -12173,9 +12173,9 @@ Starting code units: a z
 Last code unit = 'z'
 Subject length lower bound = 2
    aaaaaaaaaaaaaz
-Failed: error -52: recursion limit exceeded
+Failed: error -50: recursion limit exceeded
    aaaaaaaaaaaaaz\=recursion_limit=1000
-Failed: error -52: recursion limit exceeded
+Failed: error -50: recursion limit exceeded

 /(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I
 Capturing subpattern count = 1
@ -12195,7 +12195,7 @@ Subject length lower bound = 2
    aaaaaaaaaaaaaz
 No match
    aaaaaaaaaaaaaz\=recursion_limit=10
-Failed: error -52: recursion limit exceeded
+Failed: error -50: recursion limit exceeded

 # This test causes a segfault with Perl 5.18.0 

--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@ -6132,7 +6132,7 @@ No match

 /^(?(2)a|(1)(2))+$/
    123a
-Failed: error -40: backreference condition or recursion test not supported for DFA matching
+Failed: error -39: backreference condition or recursion test not supported for DFA matching

 /(?<=a|bbbb)c/
    ac
@ -7059,7 +7059,7 @@ Partial match: dogs

 /abc\K123/
    xyzabc123pqr
-Failed: error -41: item unsupported for DFA matching
+Failed: error -40: item unsupported for DFA matching
    
 /(?<=abc)123/
    xyzabc123pqr 
@ -7185,29 +7185,29 @@ No match

 /^(?!a(*SKIP)b)/
    ac
-Failed: error -41: item unsupported for DFA matching
+Failed: error -40: item unsupported for DFA matching
    
 /^(?=a(*SKIP)b|ac)/
    ** Failers
 No match
    ac
-Failed: error -41: item unsupported for DFA matching
+Failed: error -40: item unsupported for DFA matching
    
 /^(?=a(*THEN)b|ac)/
    ac
-Failed: error -41: item unsupported for DFA matching
+Failed: error -40: item unsupported for DFA matching
    
 /^(?=a(*PRUNE)b)/
    ab  
-Failed: error -41: item unsupported for DFA matching
+Failed: error -40: item unsupported for DFA matching
    ** Failers 
 No match
    ac
-Failed: error -41: item unsupported for DFA matching
+Failed: error -40: item unsupported for DFA matching

 /^(?(?!a(*SKIP)b))/
    ac
-Failed: error -41: item unsupported for DFA matching
+Failed: error -40: item unsupported for DFA matching

 /(?<=abc)def/
    abc\=ph
@ -7277,7 +7277,7 @@ Partial match: abc
    abc\=offset=3
 No match
    abc\=offset=4
-Failed: error -34: bad offset value
+Failed: error -33: bad offset value
    abc\=offset=-4 
 ** Invalid value in 'offset=-4'

@ -7403,7 +7403,7 @@ No match

 /((?2))((?1))/
    abc
-Failed: error -51: nested recursion at the same subject position
+Failed: error -49: nested recursion at the same subject position

 /(?(R)a+|(?R)b)/
    aaaabcde
@ -7419,11 +7419,11 @@ Failed: error -51: nested recursion at the same subject position

 /((?(R2)a+|(?1)b))/
    aaaabcde
-Failed: error -40: backreference condition or recursion test not supported for DFA matching
+Failed: error -39: backreference condition or recursion test not supported for DFA matching

 /(?(R)a*(?1)|((?R))b)/
    aaaabcde
-Failed: error -51: nested recursion at the same subject position
+Failed: error -49: nested recursion at the same subject position

 /(a+)/no_auto_possess
    aaaa\=ovector=3
@ -7572,7 +7572,7 @@ Partial match: \x0d\x0d\x0d

 /abcdef/
   abc\=dfa_restart
-Failed: error -38: invalid data in workspace for DFA restart
+Failed: error -37: invalid data in workspace for DFA restart

 /<H((?(?!<H|F>)(.)|(?R))++)*F>/
    text <H more text <H texting more  hexA0-"\xA0"    hex above 7F-"\xBC" F> text xxxxx <H text F> text F> text2 <H text sample F> more text.
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@ -1230,7 +1230,7 @@ Partial match: the cat

 /ab\Cde/utf
    abXde
-Failed: error -41: item unsupported for DFA matching
+Failed: error -40: item unsupported for DFA matching

 /(?<=ab\Cde)X/utf
 Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion