Cleanup of Makefile.os4, added release rule and a README file for this release

Implement -Z in pcre2grep and update documentation
Added some special heap tests
2022-07-31 20:34:33 +01:00 · 2022-07-30 17:41:49 +01:00 · 2022-07-28 17:58:19 +01:00 · 2022-07-27 18:00:40 +01:00 · 2022-07-27 17:44:55 +01:00 · 2022-07-15 17:18:11 +01:00
175 changed files with 54773 additions and 18235 deletions
--- a/.bazelrc
+++ b/.bazelrc
@ -0,0 +1,3 @@
+common --experimental_enable_bzlmod
+build --incompatible_enable_cc_toolchain_resolution
+build --incompatible_strict_action_env
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -0,0 +1,77 @@
+
+name: Build
+on: [push, pull_request]
+
+jobs:
+  linux:
+    name: Linux
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+        
+      - name: Autogen
+        run: ./autogen.sh
+        
+      - name: Configure
+        run: ./configure --enable-jit --enable-pcre2-8 --enable-pcre2-16 --enable-pcre2-32
+        
+      - name: Build
+        run: make
+        
+      - name: Test (main test script)
+        run: ./RunTest
+
+      - name: Test (JIT test program)
+        run: ./pcre2_jit_test
+
+      - name: Test (pcre2grep test script)
+        run: ./RunGrepTest
+    
+  alpine:
+    name: alpine
+    runs-on: ubuntu-latest
+    container: alpine 
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+        
+      - name: Autotools
+        run: apk add --no-cache automake autoconf gcc libtool make musl-dev 
+        
+      - name: Autogen
+        run: ./autogen.sh
+        
+      - name: Configure
+        run: ./configure --enable-jit --enable-pcre2-8 --enable-pcre2-16 --enable-pcre2-32
+        
+      - name: Build
+        run: make
+        
+      - name: Test (main test script)
+        run: ./RunTest
+
+      - name: Test (JIT test program)
+        run: ./pcre2_jit_test
+
+      - name: Test (pcre2grep test script)
+        run: ./RunGrepTest
+        
+  windows:      
+    name: 32bit Windows
+    runs-on: windows-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Configure
+        run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -B build -A Win32
+
+      - name: Build
+        run: cmake --build build
+
+      - name: Test
+        run: |
+          cd build\Debug
+          ..\..\RunTest.bat
+           
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@ -0,0 +1,73 @@
+# For most projects, this workflow file will not need changing; you simply need
+# to commit it to your repository.
+#
+# You may wish to alter this file to override the set of languages analyzed,
+# or to provide custom queries or build logic.
+#
+# ******** NOTE ********
+# We have attempted to detect the languages in your repository. Please check
+# the `language` matrix defined below to confirm you have the correct set of
+# supported CodeQL languages.
+#
+name: "CodeQL"
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    # The branches below must be a subset of the branches above
+    branches: [ master ]
+  schedule:
+    - cron: '27 6 * * 4'
+
+# Declare default permissions as read only.
+permissions: read-all
+
+jobs:
+  analyze:
+    name: Analyze
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+
+    strategy:
+      fail-fast: false
+      matrix:
+        language: [ 'cpp', 'python' ]
+        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
+        # Learn more about CodeQL language support at https://git.io/codeql-language-support
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+
+    # Initializes the CodeQL tools for scanning.
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v1
+      with:
+        languages: ${{ matrix.language }}
+        # If you wish to specify custom queries, you can do so here or in a config file.
+        # By default, queries listed here will override any specified in a config file.
+        # Prefix the list here with "+" to use these queries and those in the config file.
+        # queries: ./path/to/local/query, your-org/your-repo/queries@main
+
+    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
+    # If this step fails, then you should remove it and run the build manually (see below)
+    - name: Autobuild
+      uses: github/codeql-action/autobuild@v1
+
+    # ℹ️ Command-line programs to run using the OS shell.
+    # 📚 https://git.io/JvXDl
+
+    # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
+    #    and modify them (or add more) to build your code if your project
+    #    uses a compiled language
+
+    #- run: |
+    #   make bootstrap
+    #   make release
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v1
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@ -0,0 +1,55 @@
+name: Scorecards supply-chain security
+on:
+  # Only the default branch is supported.
+  branch_protection_rule:
+  schedule:
+    - cron: '23 17 * * 1'
+  push:
+    branches: [ master ]
+
+# Declare default permissions as read only.
+permissions: read-all
+
+jobs:
+  analysis:
+    name: Scorecards analysis
+    runs-on: ubuntu-latest
+    permissions:
+      # Needed to upload the results to code-scanning dashboard.
+      security-events: write
+      actions: read
+      contents: read
+
+    steps:
+      - name: "Checkout code"
+        uses: actions/checkout@ec3a7ce113134d7a93b817d10a8272cb61118579 # v2.4.0
+        with:
+          persist-credentials: false
+
+      - name: "Run analysis"
+        uses: ossf/scorecard-action@c1aec4ac820532bab364f02a81873c555a0ba3a1 # v1.0.4
+        with:
+          results_file: results.sarif
+          results_format: sarif
+          # Read-only PAT token. To create it,
+          # follow the steps in https://github.com/ossf/scorecard-action#pat-token-creation.
+          repo_token: ${{ secrets.SCORECARD_READ_TOKEN }}
+          # Publish the results to enable scorecard badges. For more details, see
+          # https://github.com/ossf/scorecard-action#publishing-results.
+          # For private repositories, `publish_results` will automatically be set to `false`,
+          # regardless of the value entered here.
+          publish_results: true
+
+      # Upload the results as artifacts (optional).
+      - name: "Upload artifact"
+        uses: actions/upload-artifact@82c141cc518b40d92cc801eee768e7aafc9c2fa2 # v2.3.1
+        with:
+          name: SARIF file
+          path: results.sarif
+          retention-days: 5
+
+      # Upload the results to GitHub's code scanning dashboard.
+      - name: "Upload to code-scanning"
+        uses: github/codeql-action/upload-sarif@5f532563584d71fdef14ee64d17bafb34f751ce5 # v1.0.26
+        with:
+          sarif_file: results.sarif
--- a/.gitignore
+++ b/.gitignore
@ -6,7 +6,9 @@
 *.pc
 *.o
 *~
+*.lha

+__pycache__
 .deps
 .libs

@ -74,4 +76,7 @@ src/pcre2.h
 src/pcre2_chartables.c
 src/stamp-h1

+/bazel-*
+
 # End
+
--- a/6
+++ b/6
@ -8,7 +8,7 @@ Email domain:     gmail.com
 Retired from University of Cambridge Computing Service,
 Cambridge, England.

-Copyright (c) 1997-2021 University of Cambridge
+Copyright (c) 1997-2022 University of Cambridge
 All rights reserved


@ -19,7 +19,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu

-Copyright(c) 2010-2021 Zoltan Herczeg
+Copyright(c) 2010-2022 Zoltan Herczeg
 All rights reserved.


@ -30,7 +30,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu

-Copyright(c) 2009-2021 Zoltan Herczeg
+Copyright(c) 2009-2022 Zoltan Herczeg
 All rights reserved.

 ####
--- a/BUILD.bazel
+++ b/BUILD.bazel
@ -0,0 +1,72 @@
+load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
+load("@bazel_skylib//rules:copy_file.bzl", "copy_file")
+
+copy_file(
+    name = "config_h_generic",
+    src = "src/config.h.generic",
+    out = "src/config.h",
+)
+
+copy_file(
+    name = "pcre2_h_generic",
+    src = "src/pcre2.h.generic",
+    out = "src/pcre2.h",
+)
+
+copy_file(
+    name = "pcre2_chartables_c",
+    src = "src/pcre2_chartables.c.dist",
+    out = "src/pcre2_chartables.c",
+)
+
+cc_library(
+    name = "pcre2",
+    srcs = [
+        "src/pcre2_auto_possess.c",
+        "src/pcre2_compile.c",
+        "src/pcre2_config.c",
+        "src/pcre2_context.c",
+        "src/pcre2_convert.c",
+        "src/pcre2_dfa_match.c",
+        "src/pcre2_error.c",
+        "src/pcre2_extuni.c",
+        "src/pcre2_find_bracket.c",
+        "src/pcre2_maketables.c",
+        "src/pcre2_match.c",
+        "src/pcre2_match_data.c",
+        "src/pcre2_newline.c",
+        "src/pcre2_ord2utf.c",
+        "src/pcre2_pattern_info.c",
+        "src/pcre2_script_run.c",
+        "src/pcre2_serialize.c",
+        "src/pcre2_string_utils.c",
+        "src/pcre2_study.c",
+        "src/pcre2_substitute.c",
+        "src/pcre2_substring.c",
+        "src/pcre2_tables.c",
+        "src/pcre2_ucd.c",
+        "src/pcre2_ucptables.c",
+        "src/pcre2_valid_utf.c",
+        "src/pcre2_xclass.c",
+        ":pcre2_chartables_c",
+    ],
+    hdrs = glob(["src/*.h"]) + [
+        ":config_h_generic",
+        ":pcre2_h_generic",
+    ],
+    defines = [
+        "HAVE_CONFIG_H",
+        "PCRE2_CODE_UNIT_WIDTH=8",
+        "PCRE2_STATIC",
+    ],
+    includes = ["src"],
+    strip_include_prefix = "src",
+    visibility = ["//visibility:public"],
+)
+
+cc_binary(
+    name = "pcre2demo",
+    srcs = ["src/pcre2demo.c"],
+    visibility = ["//visibility:public"],
+    deps = [":pcre2"],
+)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -103,13 +103,18 @@
 PROJECT(PCRE2 C)

 # Increased minimum to 2.8.5 to support GNUInstallDirs.
-# Increased minimum to 3.0.0 because older than 2.8.12 is deprecated.
-CMAKE_MINIMUM_REQUIRED(VERSION 3.0.0)
+# Increased minimum to 3.1 to support imported targets.
+CMAKE_MINIMUM_REQUIRED(VERSION 3.1)

 # Set policy CMP0026 to avoid warnings for the use of LOCATION in
 # GET_TARGET_PROPERTY. This should no longer be required.
 # CMAKE_POLICY(SET CMP0026 OLD)

+# With a recent cmake, you can provide a rootdir to look for non
+# standard installed library dependencies, but to do so, the policy
+# needs to be set to new (by uncommenting the following)
+# CMAKE_POLICY(SET CMP0074 NEW)
+
 # For FindReadline.cmake. This was changed to allow setting CMAKE_MODULE_PATH
 # on the command line.
 # SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
@ -134,8 +139,6 @@ INCLUDE(CheckTypeSize)
 INCLUDE(GNUInstallDirs) # for CMAKE_INSTALL_LIBDIR

 CHECK_INCLUDE_FILE(dirent.h     HAVE_DIRENT_H)
-CHECK_INCLUDE_FILE(stdint.h     HAVE_STDINT_H)
-CHECK_INCLUDE_FILE(inttypes.h   HAVE_INTTYPES_H)
 CHECK_INCLUDE_FILE(sys/stat.h   HAVE_SYS_STAT_H)
 CHECK_INCLUDE_FILE(sys/types.h  HAVE_SYS_TYPES_H)
 CHECK_INCLUDE_FILE(unistd.h     HAVE_UNISTD_H)
@ -144,10 +147,16 @@ CHECK_INCLUDE_FILE(windows.h    HAVE_WINDOWS_H)
 CHECK_SYMBOL_EXISTS(bcopy         "strings.h"  HAVE_BCOPY)
 CHECK_SYMBOL_EXISTS(memfd_create  "sys/mman.h" HAVE_MEMFD_CREATE)
 CHECK_SYMBOL_EXISTS(memmove       "string.h"   HAVE_MEMMOVE)
-CHECK_SYMBOL_EXISTS(realpath      "stdlib.h"   HAVE_REALPATH)
 CHECK_SYMBOL_EXISTS(secure_getenv "stdlib.h"   HAVE_SECURE_GETENV)
 CHECK_SYMBOL_EXISTS(strerror      "string.h"   HAVE_STRERROR)

+CHECK_C_SOURCE_COMPILES(
+  "#include <stdlib.h>
+   #include <limits.h>
+   int main(int c, char *v[]) { char buf[PATH_MAX]; realpath(v[1], buf); return 0; }"
+  HAVE_REALPATH
+)
+
 set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
 set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror")
 CHECK_C_SOURCE_COMPILES(
@ -302,9 +311,19 @@ ENDIF(PCRE2_SUPPORT_LIBZ)
 IF(EDITLINE_FOUND)
  OPTION (PCRE2_SUPPORT_LIBEDIT  "Enable support for linking pcre2test with libedit." OFF)
 ENDIF(EDITLINE_FOUND)
-IF(PCRE2_SUPPORT_LIBEDIT)
-  INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR})
-ENDIF(PCRE2_SUPPORT_LIBEDIT)
+IF(EDITLINE_FOUND)
+  IF(PCRE2_SUPPORT_LIBEDIT)
+    INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR})
+  ENDIF(PCRE2_SUPPORT_LIBEDIT)
+ELSE(EDITLINE_FOUND)
+  IF(PCRE2_SUPPORT_LIBEDIT)
+    MESSAGE(FATAL_ERROR
+      " libedit not found, set EDITLINE_INCLUDE_DIR to a compatible header\n"
+      " or set Editline_ROOT to a full libedit installed tree, as needed\n"
+      " Might need to enable policy CMP0074 in CMakeLists.txt"
+    )
+  ENDIF(PCRE2_SUPPORT_LIBEDIT)
+ENDIF(EDITLINE_FOUND)

 # readline lib
 IF(READLINE_FOUND)
@ -342,7 +361,12 @@ IF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8)
 ENDIF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8)

 IF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT)
-        MESSAGE(FATAL_ERROR "Only one of libreadline or libeditline can be specified")
+        IF(READLINE_FOUND)
+                MESSAGE(FATAL_ERROR
+                  " Only one of the readline compatible libraries can be enabled.\n"
+                  " Disable libreadline with -DPCRE2_SUPPORT_LIBREADLINE=OFF"
+                )
+        ENDIF(READLINE_FOUND)
 ENDIF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT)

 IF(PCRE2_SUPPORT_BSR_ANYCRLF)
@ -358,7 +382,13 @@ IF(PCRE2_SUPPORT_UNICODE)
 ENDIF(PCRE2_SUPPORT_UNICODE)

 IF(PCRE2_SUPPORT_JIT)
-        SET(SUPPORT_JIT 1)
+	SET(SUPPORT_JIT 1)
+	IF(UNIX)
+		FIND_PACKAGE(Threads REQUIRED)
+		IF(CMAKE_USE_PTHREADS_INIT)
+			SET(REQUIRE_PTHREAD 1)
+		ENDIF(CMAKE_USE_PTHREADS_INIT)
+	ENDIF(UNIX)
 ENDIF(PCRE2_SUPPORT_JIT)

 IF(PCRE2_SUPPORT_JIT_SEALLOC)
@ -628,6 +658,8 @@ IF(MINGW AND BUILD_SHARED_LIBS)
 ENDIF(MINGW AND BUILD_SHARED_LIBS)

 IF(MSVC AND BUILD_SHARED_LIBS)
+  SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-posix.pdb ${dll_pdb_files})
+  SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-posixd.pdb ${dll_pdb_debug_files})
  IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
    SET(PCRE2_SOURCES ${PCRE2_SOURCES} pcre2.rc)
  ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
@ -673,6 +705,10 @@ IF(PCRE2_BUILD_PCRE2_8)
      VERSION ${LIBPCRE2_8_VERSION}
      SOVERSION ${LIBPCRE2_8_SOVERSION})
    TARGET_COMPILE_DEFINITIONS(pcre2-8-static PUBLIC PCRE2_STATIC)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-8-static PUBLIC ${PROJECT_BINARY_DIR})
+    IF(REQUIRE_PTHREAD)
+        TARGET_LINK_LIBRARIES(pcre2-8-static Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
    SET(targets ${targets} pcre2-8-static)
    ADD_LIBRARY(pcre2-posix-static STATIC ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
    SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES
@ -683,6 +719,7 @@ IF(PCRE2_BUILD_PCRE2_8)
      SOVERSION ${LIBPCRE2_POSIX_SOVERSION})
    TARGET_LINK_LIBRARIES(pcre2-posix-static pcre2-8-static)
    TARGET_COMPILE_DEFINITIONS(pcre2-posix-static PUBLIC PCRE2_STATIC)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-posix-static PUBLIC ${PROJECT_BINARY_DIR})
    SET(targets ${targets} pcre2-posix-static)

    IF(MSVC)
@ -699,6 +736,7 @@ IF(PCRE2_BUILD_PCRE2_8)

  IF(BUILD_SHARED_LIBS)
    ADD_LIBRARY(pcre2-8-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-8-shared PUBLIC ${PROJECT_BINARY_DIR})
    SET_TARGET_PROPERTIES(pcre2-8-shared PROPERTIES
      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}"
@ -706,8 +744,12 @@ IF(PCRE2_BUILD_PCRE2_8)
      VERSION ${LIBPCRE2_8_VERSION}
      SOVERSION ${LIBPCRE2_8_SOVERSION}
      OUTPUT_NAME pcre2-8)
+    IF(REQUIRE_PTHREAD)
+        TARGET_LINK_LIBRARIES(pcre2-8-shared Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
    SET(targets ${targets} pcre2-8-shared)
    ADD_LIBRARY(pcre2-posix-shared SHARED ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
+    TARGET_INCLUDE_DIRECTORIES(pcre2-posix-shared PUBLIC ${PROJECT_BINARY_DIR})
    SET_TARGET_PROPERTIES(pcre2-posix-shared PROPERTIES
      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}"
@ -717,6 +759,8 @@ IF(PCRE2_BUILD_PCRE2_8)
      OUTPUT_NAME pcre2-posix)
    TARGET_LINK_LIBRARIES(pcre2-posix-shared pcre2-8-shared)
    SET(targets ${targets} pcre2-posix-shared)
+    SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-8.pdb ${dll_pdb_files})
+    SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-8d.pdb ${dll_pdb_debug_files})

    IF(MINGW)
      IF(NON_STANDARD_LIB_PREFIX)
@ -742,6 +786,7 @@ ENDIF(PCRE2_BUILD_PCRE2_8)
 IF(PCRE2_BUILD_PCRE2_16)
  IF(BUILD_STATIC_LIBS)
    ADD_LIBRARY(pcre2-16-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-16-static PUBLIC ${PROJECT_BINARY_DIR})
    SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES
      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16
      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
@ -749,6 +794,9 @@ IF(PCRE2_BUILD_PCRE2_16)
      VERSION ${LIBPCRE2_16_VERSION}
      SOVERSION ${LIBPCRE2_16_SOVERSION})
    TARGET_COMPILE_DEFINITIONS(pcre2-16-static PUBLIC PCRE2_STATIC)
+    IF(REQUIRE_PTHREAD)
+      TARGET_LINK_LIBRARIES(pcre2-16-static Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
    SET(targets ${targets} pcre2-16-static)

    IF(MSVC)
@ -763,6 +811,7 @@ IF(PCRE2_BUILD_PCRE2_16)

  IF(BUILD_SHARED_LIBS)
    ADD_LIBRARY(pcre2-16-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-16-shared PUBLIC ${PROJECT_BINARY_DIR})
    SET_TARGET_PROPERTIES(pcre2-16-shared PROPERTIES
      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16
      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
@ -770,7 +819,12 @@ IF(PCRE2_BUILD_PCRE2_16)
      VERSION ${LIBPCRE2_16_VERSION}
      SOVERSION ${LIBPCRE2_16_SOVERSION}
      OUTPUT_NAME pcre2-16)
+    IF(REQUIRE_PTHREAD)
+      TARGET_LINK_LIBRARIES(pcre2-16-shared Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
    SET(targets ${targets} pcre2-16-shared)
+    SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-16.pdb ${dll_pdb_files})
+    SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-16d.pdb ${dll_pdb_debug_files})

    IF(MINGW)
      IF(NON_STANDARD_LIB_PREFIX)
@ -794,6 +848,7 @@ ENDIF(PCRE2_BUILD_PCRE2_16)
 IF(PCRE2_BUILD_PCRE2_32)
  IF(BUILD_STATIC_LIBS)
    ADD_LIBRARY(pcre2-32-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-32-static PUBLIC ${PROJECT_BINARY_DIR})
    SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES
      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32
      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
@ -801,6 +856,9 @@ IF(PCRE2_BUILD_PCRE2_32)
      VERSION ${LIBPCRE2_32_VERSION}
      SOVERSION ${LIBPCRE2_32_SOVERSION})
    TARGET_COMPILE_DEFINITIONS(pcre2-32-static PUBLIC PCRE2_STATIC)
+    IF(REQUIRE_PTHREAD)
+      TARGET_LINK_LIBRARIES(pcre2-32-static Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
    SET(targets ${targets} pcre2-32-static)

    IF(MSVC)
@ -815,6 +873,7 @@ IF(PCRE2_BUILD_PCRE2_32)

  IF(BUILD_SHARED_LIBS)
    ADD_LIBRARY(pcre2-32-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-32-shared PUBLIC ${PROJECT_BINARY_DIR})
    SET_TARGET_PROPERTIES(pcre2-32-shared PROPERTIES
      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32
      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
@ -822,7 +881,12 @@ IF(PCRE2_BUILD_PCRE2_32)
      VERSION ${LIBPCRE2_32_VERSION}
      SOVERSION ${LIBPCRE2_32_SOVERSION}
      OUTPUT_NAME pcre2-32)
+    IF(REQUIRE_PTHREAD)
+      TARGET_LINK_LIBRARIES(pcre2-32-shared Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
    SET(targets ${targets} pcre2-32-shared)
+    SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-32.pdb ${dll_pdb_files})
+    SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-32d.pdb ${dll_pdb_debug_files})

    IF(MINGW)
      IF(NON_STANDARD_LIB_PREFIX)
@ -1024,25 +1088,13 @@ FILE(GLOB html ${PROJECT_SOURCE_DIR}/doc/html/*.html)
 FILE(GLOB man1 ${PROJECT_SOURCE_DIR}/doc/*.1)
 FILE(GLOB man3 ${PROJECT_SOURCE_DIR}/doc/*.3)

-FOREACH(man ${man3})
-        GET_FILENAME_COMPONENT(man_tmp ${man} NAME)
-        SET(man3_new ${man3} ${man})
-ENDFOREACH(man ${man3})
-SET(man3 ${man3_new})
-
 INSTALL(FILES ${man1} DESTINATION man/man1)
 INSTALL(FILES ${man3} DESTINATION man/man3)
 INSTALL(FILES ${html} DESTINATION share/doc/pcre2/html)

 IF(MSVC AND INSTALL_MSVC_PDB)
-    INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2.pdb
-                  ${PROJECT_BINARY_DIR}/pcre2posix.pdb
-            DESTINATION bin
-            CONFIGURATIONS RelWithDebInfo)
-    INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2d.pdb
-                  ${PROJECT_BINARY_DIR}/pcre2posixd.pdb
-            DESTINATION bin
-            CONFIGURATIONS Debug)
+ INSTALL(FILES ${dll_pdb_files} DESTINATION bin CONFIGURATIONS RelWithDebInfo)
+ INSTALL(FILES ${dll_pdb_debug_files} DESTINATION bin CONFIGURATIONS Debug)
 ENDIF(MSVC AND INSTALL_MSVC_PDB)

 # Help, only for nice output
--- a/240
+++ b/240
@ -1,11 +1,229 @@
-Change Log for PCRE2
--------------------
+Change Log for PCRE2 - see also the Git log
+-------------------------------------------

-Version 10.38-RC1 31-August-2021
--------------------------------
+
+Version 10.41 xx-xxx-2022
+-------------------------
+
+1. Add fflush() before and after a fork callout in pcre2grep to get its output
+to be the same on all systems. (THere were previously ordering differences in
+Alpine Linux).
+
+2. Merged patch from @carenas (GitHub #110) for pthreads support in CMake.
+
+3. SSF scorecards grumbled about possible overflow in an expression in
+pcre2test. It never would have overflowed in practice, but some casts have been
+added and at the some time there's been some tidying of fprints that output
+size_t values.
+
+4. PR #94 showed up an unused enum in pcre2_convert.c, which is now removed.
+
+5. Minor code re-arrangement to remove gcc warning about realloc() in
+pcre2test.
+
+6. Change a number of int variables that hold buffer and line lengths in
+pcre2grep to PCRE2_SIZE (aka size_t).
+
+7. Added an #ifdef to cut out a call to PRIV(jit_free) when JIT is not
+supported (even though that function would do nothing in that case) at the
+request of a user who doesn't even want to link with pcre_jit_compile.o. Also
+tidied up an untidy #ifdef arrangement in pcre2test.
+
+8. Fixed an issue in the backtracking optimization of character repeats in
+JIT. Furthermore optimize star repetitions, not just plus repetitions.
+
+9. Removed the use of an initial backtracking frames vector on the system stack 
+in pcre2_match() so that it now always uses the heap. (In a multi-thread 
+environment with very small stacks there had been an issue.) This also is 
+tidier for JIT matching, which didn't need that vector. The heap vector is now 
+remembered in the match data block and re-used if that block itself is re-used. 
+It is freed with the match data block.
+
+10. Adjusted the find_limits code in pcre2test to work with change 9 above.
+
+11. Added find_limits_noheap to pcre2test, because the heap limits are now 
+different in different environments and so cannot be included in the standard 
+tests.
+
+12. Created a test for pcre2_match() heap processing that is not part of the 
+tests run by 'make check', but can be run manually. The current output is from 
+a 64-bit system.
+
+13. Implemented -Z aka --null in pcre2grep.
+
+
+Version 10.40 15-April-2022
+---------------------------
+
+1. Merged patch from @carenas (GitHub #35, 7db87842) to fix pcre2grep incorrect
+handling of multiple passes.
+
+2. Merged patch from @carenas (GitHub #36, dae47509) to fix portability issue
+in pcre2grep with buffered fseek(stdin).
+
+3. Merged patch from @carenas (GitHub #37, acc520924) to fix tests when -S is
+not supported.
+
+4. Revert an unintended change in JIT repeat detection.
+
+5. Merged patch from @carenas (GitHub #52, b037bfa1) to fix build on GNU Hurd.
+
+6. Merged documentation and comments patches from @carenas (GitHub #47).
+
+7. Merged patch from @carenas (GitHub #49) to remove obsolete JFriedl test code
+from pcre2grep.
+
+8. Merged patch from @carenas (GitHub #48) to fix CMake install issue #46.
+
+9. Merged patch from @carenas (GitHub #53) fixing NULL checks in matching and
+substituting.
+
+10. Add null_subject and null_replacement modifiers to pcre2test.
+
+11. Add check for NULL subject to POSIX regexec() function.
+
+12. Add check for NULL replacement to pcre2_substitute().
+
+13. For the subject arguments of pcre2_match(), pcre2_dfa_match(), and
+pcre2_substitute(), and the replacement argument of the latter, if the pointer
+is NULL and the length is zero, treat as an empty string. Apparently a number
+of applications treat NULL/0 in this way.
+
+14. Added support for Bidi_Class and a number of binary Unicode properties,
+including Bidi_Control.
+
+15. Fix some minor issues raised by clang sanitize.
+
+16. Very minor code speed up for maximizing character property matches.
+
+17. A number of changes to script matching for \p and \P:
+
+    (a) Script extensions for a character are now coded as a bitmap instead of
+        a list of script numbers, which should be faster and does not need a
+        loop.
+
+    (b) Added the syntax \p{script:xxx} and \p{script_extensions:xxx} (synonyms
+        sc and scx).
+
+    (c) Changed \p{scriptname} from being the same as \p{sc:scriptname} to being
+        the same as \p{scx:scriptname} because this change happened in Perl at
+        release 5.26.
+
+    (d) The standard Unicode 4-letter abbreviations for script names are now
+        recognized.
+
+    (e) In accordance with Unicode and Perl's "loose matching" rules, spaces,
+        hyphens, and underscores are ignored in property names, which are then
+        matched independent of case.
+
+18. The Python scripts in the maint directory have been refactored. There are
+now three scripts that generate pcre2_ucd.c, pcre2_ucp.h, and pcre2_ucptables.c
+(which is #included by pcre2_tables.c). The data lists that used to be
+duplicated are now held in a single common Python module.
+
+19. On CHERI, and thus Arm's Morello prototype, pointers are represented as
+hardware capabilities, which consist of both an integer address and additional
+metadata, meaning they are twice the size of the platform's size_t type, i.e.
+16 bytes on a 64-bit system. The ovector member of heapframe happens to only be
+8 byte aligned, and so computing frame_size ended up with a multiple of 8 but
+not 16. Whilst the first frame was always suitably aligned, this then
+misaligned the frame that follows, resulting in an alignment fault when storing
+a pointer to Fecode at the start of match. Patch to fix this issue by Jessica
+Clarke PR#72.
+
+20. Added -LP and -LS listing options to pcre2test.
+
+21. A user discovered that the library names in CMakeLists.txt for MSVC
+debugger (PDB) files were incorrect - perhaps never tried for PCRE2?
+
+22. An item such as [Aa] is optimized into a caseless single character match.
+When this was quantified (e.g. [Aa]{2}) and was also the last literal item in a
+pattern, the optimizing "must be present for a match" character check was not
+being flagged as caseless, causing some matches that should have succeeded to
+fail.
+
+23. Fixed a unicode property matching issue in JIT. The character was not
+fully read in caseless matching.
+
+24. Fixed an issue affecting recursions in JIT caused by duplicated data
+transfers.
+
+25. Merged patch from @carenas (GitHub #96) which fixes some problems with
+pcre2test and readline/readedit:
+
+  * Use the right header for libedit in FreeBSD with autoconf
+  * Really allow libedit with cmake
+  * Avoid using readline headers with libedit
+
+
+Version 10.39 29-October-2021
+-----------------------------
+
+1. Fix incorrect detection of alternatives in first character search in JIT.
+
+2. Merged patch from @carenas (GitHub #28):
+
+  Visual Studio 2013 includes support for %zu and %td, so let newer
+  versions of it avoid the fallback, and while at it, make sure that
+  the first check is for DISABLE_PERCENT_ZT so it will be always
+  honoured if chosen.
+
+  prtdiff_t is signed, so use a signed type instead, and make sure
+  that an appropriate width is chosen if pointers are 64bit wide and
+  long is not (ex: Windows 64bit).
+
+  IMHO removing the cast (and therefore the possibilty of truncation)
+  make the code cleaner and the fallback is likely portable enough
+  with all 64-bit POSIX systems doing LP64 except for Windows.
+
+3. Merged patch from @carenas (GitHub #29) to update to Unicode 14.0.0.
+
+4. Merged patch from @carenas (GitHub #30):
+
+  * Cleanup: remove references to no longer used stdint.h
+
+  Since 19c50b9d (Unconditionally use inttypes.h instead of trying for stdint.h
+  (simplification) and remove the now unnecessary inclusion in
+  pcre2_internal.h., 2018-11-14), stdint.h is no longer used.
+
+  Remove checks for it in autotools and CMake and document better the expected
+  build failures for systems that might have stdint.h (C99) and not inttypes.h
+  (from POSIX), like old Windows.
+
+  * Cleanup: remove detection for inttypes.h which is a hard dependency
+
+  CMake checks for standard headers are not meant to be used for hard
+  dependencies, so will prevent a possible fallback to work.
+
+  Alternatively, the header could be checked to make the configuration fail
+  instead of breaking the build, but that was punted, as it was missing anyway
+  from autotools.
+
+5. Merged patch from @carenas (GitHub #32):
+
+  * jit: allow building with ancient MSVC versions
+
+  Visual Studio older than 2013 fails to build with JIT enabled, because it is
+  unable to parse non C89 compatible syntax, with mixed declarations and code.
+  While most recent compilers wouldn't even report this as a warning since it
+  is valid C99, it could be also made visible by adding to gcc/clang the
+  -Wdeclaration-after-statement flag at build time.
+
+  Move the code below the affected definitions.
+
+  * pcre2grep: avoid mixing declarations with code
+
+  Since d5a61ee8 (Patch to detect (and ignore) symlink loops in pcre2grep,
+  2021-08-28), code will fail to build in a strict C89 compiler.
+
+  Reformat slightly to make it C89 compatible again.
+
+
+Version 10.38 01-October-2021
+-----------------------------

 1. Fix invalid single character repetition issues in JIT when the repetition
-is inside a capturing bracket and the bracket is preceeded by character
+is inside a capturing bracket and the bracket is preceded by character
 literals.

 2. Installed revised CMake configuration files provided by Jan-Willem Blokland.
@ -52,6 +270,14 @@ However, just in case anybody was relying on the old behaviour, there is an
 option called PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK that enables the old behaviour.
 An option has also been added to pcre2grep to enable this.

+7. Re-enable a JIT optimization which was unintentionally disabled in 10.35.
+
+8. There is a loop counter to catch excessively crazy patterns when checking
+the lengths of lookbehinds at compile time. This was incorrectly getting reset
+whenever a lookahead was processed, leading to some fuzzer-generated patterns
+taking a very long time to compile when (?|) was present in the pattern,
+because (?|) disables caching of group lengths.
+

 Version 10.37 26-May-2021
 -------------------------
@ -237,7 +463,7 @@ now correctly backtracked, so this unnecessary restriction has been removed.

 7. Added PCRE2_SUBSTITUTE_MATCHED.

-8. Added (?* and (?<* as synonms for (*napla: and (*naplb: to match another
+8. Added (?* and (?<* as synonyms for (*napla: and (*naplb: to match another
 regex engine. The Perl regex folks are aware of this usage and have made a note
 about it.

@ -668,7 +894,7 @@ Patch by Guillem Jover.
 warnings were reported.

 38. Using the clang compiler with sanitizing options causes runtime complaints
-about truncation for statments such as x = ~x when x is an 8-bit value; it
+about truncation for statements such as x = ~x when x is an 8-bit value; it
 seems to compute ~x as a 32-bit value. Changing such statements to x = 255 ^ x
 gets rid of the warnings. There were also two missing casts in pcre2test.

--- a/64
+++ b/64
@ -8,8 +8,8 @@ library is referred to as PCRE1 below. For information about testing PCRE2, see
 the pcre2test documentation and the comment at the head of the RunTest file.

 PCRE1 releases were up to 8.3x when PCRE2 was developed, and later bug fix
-releases remain in the 8.xx series. PCRE2 releases started at 10.00 to avoid
-confusion with PCRE1.
+releases carried on the 8.xx series, up to the final 8.45 release. PCRE2
+releases started at 10.00 to avoid confusion with PCRE1.


 Historical note 1
@ -38,8 +38,8 @@ Historical note 2
 By contrast, the code originally written by Henry Spencer (which was
 subsequently heavily modified for Perl) compiles the expression twice: once in
 a dummy mode in order to find out how much store will be needed, and then for
-real. (The Perl version probably doesn't do this any more; I'm talking about
-the original library.) The execution function operates by backtracking and
+real. (The Perl version may or may not still do this; I'm talking about the
+original library.) The execution function operates by backtracking and
 maximizing (or, optionally, minimizing, in Perl) the amount of the subject that
 matches individual wild portions of the pattern. This is an "NFA algorithm" in
 Friedl's terminology.
@ -151,8 +151,8 @@ of code units in the item itself. The exception is the aforementioned large
 advance to check for such values. When auto-callouts are enabled, the generous
 assumption is made that there will be a callout for each pattern code unit
 (which of course is only actually true if all code units are literals) plus one
-at the end. There is a default parsed pattern vector on the system stack, but
-if this is not big enough, heap memory is used.
+at the end. A default parsed pattern vector is defined on the system stack, to
+minimize memory handling, but if this is not big enough, heap memory is used.

 As before, the actual compiling function is run twice, the first time to
 determine the amount of memory needed for the final compiled pattern. It
@ -187,7 +187,7 @@ META_CLASS_EMPTY      [] empty class - only with PCRE2_ALLOW_EMPTY_CLASS
 META_CLASS_EMPTY_NOT  [^] negative empty class - ditto
 META_CLASS_END        ] end of non-empty class
 META_CLASS_NOT        [^ start non-empty negative class
-META_COMMIT           (*COMMIT)
+META_COMMIT           (*COMMIT) - no argument (see below for with argument)
 META_COND_ASSERT      (?(?assertion)
 META_DOLLAR           $ metacharacter
 META_DOT              . metacharacter
@ -201,18 +201,18 @@ META_NOCAPTURE        (?: no capture parens
 META_PLUS             +
 META_PLUS_PLUS        ++
 META_PLUS_QUERY       +?
-META_PRUNE            (*PRUNE) - no argument
+META_PRUNE            (*PRUNE) - no argument (see below for with argument)
 META_QUERY            ?
 META_QUERY_PLUS       ?+
 META_QUERY_QUERY      ??
 META_RANGE_ESCAPED    hyphen in class range with at least one escape
 META_RANGE_LITERAL    hyphen in class range defined literally
-META_SKIP             (*SKIP) - no argument
-META_THEN             (*THEN) - no argument
+META_SKIP             (*SKIP) - no argument (see below for with argument)
+META_THEN             (*THEN) - no argument (see below for with argument)

 The two RANGE values occur only in character classes. They are positioned
 between two literals that define the start and end of the range. In an EBCDIC
-evironment it is necessary to know whether either of the range values was
+environment it is necessary to know whether either of the range values was
 specified as an escape. In an ASCII/Unicode environment the distinction is not
 relevant.

@ -229,17 +229,16 @@ If the data for META_ALT is non-zero, it is inside a lookbehind, and the data
 is the length of its branch, for which OP_REVERSE must be generated.

 META_BACKREF, META_CAPTURE, and META_RECURSE have the capture group number as
-their data in the lower 16 bits of the element.
+their data in the lower 16 bits of the element. META_RECURSE is followed by an
+offset, for use in error messages.

 META_BACKREF is followed by an offset if the back reference group number is 10
-or more. The offsets of the first ocurrences of references to groups whose
+or more. The offsets of the first occurrences of references to groups whose
 numbers are less than 10 are put in cb->small_ref_offset[] (only the first
 occurrence is useful). On 64-bit systems this avoids using more than two parsed
 pattern elements for items such as \3. The offset is used when an error occurs
 because the reference is to a non-existent group.

-META_RECURSE is always followed by an offset, for use in error messages.
-
 META_ESCAPE has an ESC_xxx value as its data. For ESC_P and ESC_p, the next
 element contains the 16-bit type and data property values, packed together.
 ESC_g and ESC_k are used only for named references - numerical ones are turned
@ -291,9 +290,9 @@ META_LOOKBEHIND       (?<=      start of lookbehind
 META_LOOKBEHIND_NA    (*naplb:  start of non-atomic lookbehind
 META_LOOKBEHINDNOT    (?<!      start of negative lookbehind

-The following are followed by two elements, the minimum and maximum. Repeat
-values are limited to 65535 (MAX_REPEAT). A maximum value of "unlimited" is
-represented by UNLIMITED_REPEAT, which is bigger than MAX_REPEAT:
+The following are followed by two elements, the minimum and maximum. The
+maximum value is limited to 65535 (MAX_REPEAT). A maximum value of "unlimited"
+is represented by UNLIMITED_REPEAT, which is bigger than MAX_REPEAT:

 META_MINMAX           {n,m}  repeat
 META_MINMAX_PLUS      {n,m}+ repeat
@ -347,11 +346,11 @@ support is not available for this kind of matching.
 Changeable options
 ------------------

-The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL, and
-others) may be changed in the middle of patterns by items such as (?i). Their
-processing is handled entirely at compile time by generating different opcodes
-for the different settings. The runtime functions do not need to keep track of
-an option's state.
+The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL) and
+some others may be changed in the middle of patterns by items such as (?i).
+Their processing is handled entirely at compile time by generating different
+opcodes for the different settings. The runtime functions do not need to keep
+track of an option's state.

 PCRE2_DUPNAMES, PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE
 are tracked and processed during the parsing pre-pass. The others are handled
@ -437,7 +436,7 @@ Backtracking control verbs
 --------------------------

 Verbs with no arguments generate opcodes with no following data (as listed
-in the section above). 
+in the section above).

 (*MARK:NAME) generates OP_MARK followed by the mark name, preceded by a
 length in one code unit, and followed by a binary zero. The name length is
@ -468,8 +467,8 @@ Caseless matching (positive or negative) of characters that have more than two
 case-equivalent code points (which is possible only in UTF mode) is handled by
 compiling a Unicode property item (see below), with the pseudo-property
 PT_CLIST. The value of this property is an offset in a vector called
-"ucd_caseless_sets" which identifies the start of a short list of equivalent
-characters, terminated by the value NOTACHAR (0xffffffff).
+"ucd_caseless_sets" which identifies the start of a short list of case
+equivalent characters, terminated by the value NOTACHAR (0xffffffff).


 Repeating single characters
@ -546,8 +545,9 @@ Each is followed by two code units that encode the desired property as a type
 and a value. The types are a set of #defines of the form PT_xxx, and the values
 are enumerations of the form ucp_xx, defined in the pcre2_ucp.h source file.
 The value is relevant only for PT_GC (General Category), PT_PC (Particular
-Category), PT_SC (Script), and the pseudo-property PT_CLIST, which is used to
-identify a list of case-equivalent characters when there are three or more.
+Category), PT_SC (Script), PT_BIDICL (Bidi Class), PT_BOOL (Boolean property),
+and the pseudo-property PT_CLIST, which is used to identify a list of
+case-equivalent characters when there are three or more (see above).

 Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
 three code units: OP_PROP or OP_NOTPROP, and then the desired property type and
@ -665,9 +665,9 @@ a count that immediately follows the offset.
 There are several opcodes that mark the end of a subpattern group. OP_KET is
 used for subpatterns that do not repeat indefinitely, OP_KETRMIN and
 OP_KETRMAX are used for indefinite repetitions, minimally or maximally
-respectively, and OP_KETRPOS for possessive repetitions (see below for more 
+respectively, and OP_KETRPOS for possessive repetitions (see below for more
 details). All four are followed by a LINK_SIZE value giving (as a positive
-number) the offset back to the matching bracket opcode.
+number) the offset back to the matching opening bracket opcode.

 If a subpattern is quantified such that it is permitted to match zero times, it
 is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
@ -718,7 +718,7 @@ Assertions

 Forward assertions are also just like other subpatterns, but starting with one
 of the opcodes OP_ASSERT, OP_ASSERT_NA (non-atomic assertion), or
-OP_ASSERT_NOT. Backward assertions use the opcodes OP_ASSERTBACK, 
+OP_ASSERT_NOT. Backward assertions use the opcodes OP_ASSERTBACK,
 OP_ASSERTBACK_NA, and OP_ASSERTBACK_NOT, and the first opcode inside the
 assertion is OP_REVERSE, followed by a count of the number of characters to
 move back the pointer in the subject string. In ASCII or UTF-32 mode, the count
@ -827,4 +827,4 @@ not a real opcode, but is used to check at compile time that tables indexed by
 opcode are the correct length, in order to catch updating errors.

 Philip Hazel
-12 July 2019
+April 2022
--- a/6
+++ b/6
@ -26,7 +26,7 @@ Email domain:     gmail.com
 Retired from University of Cambridge Computing Service,
 Cambridge, England.

-Copyright (c) 1997-2021 University of Cambridge
+Copyright (c) 1997-2022 University of Cambridge
 All rights reserved.


@ -37,7 +37,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Email domain:     freemail.hu

-Copyright(c) 2010-2021 Zoltan Herczeg
+Copyright(c) 2010-2022 Zoltan Herczeg
 All rights reserved.


@ -48,7 +48,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Email domain:     freemail.hu

-Copyright(c) 2009-2021 Zoltan Herczeg
+Copyright(c) 2009-2022 Zoltan Herczeg
 All rights reserved.


--- a/MODULE.bazel
+++ b/MODULE.bazel
@ -0,0 +1,8 @@
+module(
+    name = "pcre2",
+    version = "10.40",
+    compatibility_level = 1,
+)
+
+bazel_dep(name = "rules_cc", version = "0.0.1")
+bazel_dep(name = "bazel_skylib", version = "1.2.1")
--- a/Makefile.am
+++ b/Makefile.am
@ -382,6 +382,10 @@ COMMON_SOURCES = \
  src/pcre2_valid_utf.c \
  src/pcre2_xclass.c

+# The pcre2_ucptables.c file is #included by pcre2_tables.c
+
+EXTRA_DIST += src/pcre2_ucptables.c
+
 if WITH_PCRE2_8
 lib_LTLIBRARIES += libpcre2-8.la
 libpcre2_8_la_SOURCES = \
@ -448,9 +452,10 @@ EXTRA_DIST += \
  src/sljit/sljitNativePPC_32.c \
  src/sljit/sljitNativePPC_64.c \
  src/sljit/sljitNativePPC_common.c \
+  src/sljit/sljitNativeRISCV_32.c \
+  src/sljit/sljitNativeRISCV_64.c \
+  src/sljit/sljitNativeRISCV_common.c \
  src/sljit/sljitNativeS390X.c \
-  src/sljit/sljitNativeSPARC_32.c \
-  src/sljit/sljitNativeSPARC_common.c \
  src/sljit/sljitNativeX86_32.c \
  src/sljit/sljitNativeX86_64.c \
  src/sljit/sljitNativeX86_common.c \
@ -663,6 +668,7 @@ EXTRA_DIST += \
  testdata/testinput23 \
  testdata/testinput24 \
  testdata/testinput25 \
+  testdata/testinput26 \
  testdata/testinputEBC \
  testdata/testoutput1 \
  testdata/testoutput2 \
@ -705,6 +711,7 @@ EXTRA_DIST += \
  testdata/testoutput23 \
  testdata/testoutput24 \
  testdata/testoutput25 \
+  testdata/testoutput26 \
  testdata/testoutputEBC \
  testdata/valgrind-jit.supp \
  testdata/wintestinput3 \
--- a/Makefile.os4
+++ b/Makefile.os4
@ -0,0 +1,271 @@
+#
+# Project: pcre2
+#
+# Created on: 10-01-2022 22:01:46
+#
+# commands to use:
+# make -f Makefile.os4 libpcre2.a
+# make -f Makefile.os4 libpcre2-posix.a
+# make -f Makefile.os4 pcre2test
+# sh RunTest
+# make -f Makefile.os4 clean
+#
+
+###################################################################
+##
+##////  Objects
+##
+###################################################################
+
+libpcre2_OBJ := \
+	 src/pcre2_chartables.o src/pcre2_auto_possess.o src/pcre2_compile.o \
+	 src/pcre2_config.o src/pcre2_context.o src/pcre2_convert.o \
+	 src/pcre2_dfa_match.o src/pcre2_error.o src/pcre2_extuni.o \
+	 src/pcre2_find_bracket.o src/pcre2_jit_compile.o src/pcre2_maketables.o \
+	 src/pcre2_match.o src/pcre2_match_data.o src/pcre2_newline.o \
+	 src/pcre2_ord2utf.o src/pcre2_pattern_info.o src/pcre2_script_run.o \
+	 src/pcre2_serialize.o src/pcre2_string_utils.o src/pcre2_study.o \
+	 src/pcre2_substitute.o src/pcre2_substring.o src/pcre2_tables.o \
+	 src/pcre2_ucd.o src/pcre2_valid_utf.o src/pcre2_xclass.o \
+	
+
+
+pcre2posix_OBJ := \
+	 src/pcre2posix.o
+
+
+pcre2test_OBJ := \
+	 src/pcre2test.o
+
+
+pcre2grep_OBJ := \
+	 src/pcre2grep.o
+
+###################################################################
+##
+##////  Variables and Environment
+##
+###################################################################
+
+MCRT := -mcrt=newlib
+ifeq ($(USE_CLIB2), yes)
+MCRT := -mcrt=clib2
+endif
+
+CC := gcc:bin/gcc
+
+INCPATH := -I. -Isrc
+
+# for pcre2test
+CFLAGS := $(MCRT) $(INCPATH) -O2 -DHAVE_CONFIG_H -DPCRE2_CODE_UNIT_WIDTH=8
+
+###################################################################
+##
+##////  General rules
+##
+###################################################################
+
+.PHONY: all all-before all-after clean clean-custom realclean
+
+all: all-before libpcre2.a libpcre2-posix.a all-after
+
+all-before:
+#	You can add rules here to execute before the project is built
+
+all-after:
+#	You can add rules here to execute after the project is built
+
+tests: pcre2test pcre2grep
+
+clean: clean-custom
+	@echo "Cleaning compiler objects..."
+	@rm -f  $(libpcre2_OBJ) $(pcre2posix_OBJ) $(pcre2test_OBJ)
+
+cleanall: clean
+	@echo "Cleaning compiler targets..."
+	@rm -f  libpcre.a libpcre-posix.a pcre2test pcre2grep
+
+###################################################################
+##
+##////  Targets
+##
+###################################################################
+
+libpcre2.a: $(libpcre2_OBJ)
+	ar -rcs libpcre2.a $(libpcre2_OBJ)
+	ranlib libpcre2.a
+
+libpcre2-posix.a: $(pcre2posix_OBJ)
+	ar -rcs libpcre2-posix.a $(pcre2posix_OBJ)
+	ranlib libpcre2-posix.a
+
+pcre2test: libpcre2.a libpcre2-posix.a $(pcre2test_OBJ)
+	@echo "Linking pcre2test"
+	@gcc:bin/gcc $(MCRT) -o pcre2test $(pcre2test_OBJ) -L. -lauto -lpcre2 -lpcre2-posix
+	@echo "Removing stale debug target: pcre2test"
+	@rm -f pcre2test.debug
+	
+pcre2grep: libpcre2.a $(pcre2grep_OBJ)
+	@echo "Linking pcre2grep"
+	@gcc:bin/gcc $(MCRT) -o pcre2grep $(pcre2grep_OBJ) -L . -lauto -lpcre2
+	@echo "Removing stale debug target: pcre2grep"
+	@rm -f pcre2grep.debug
+
+
+###################################################################
+##
+##////  Standard rules
+##
+###################################################################
+
+# A default rule to make all the objects listed below
+# because we are hiding compiler commands from the output
+
+.c.o:
+	@echo "Compiling $<"
+	@$(CC) -c $< -o $*.o $(CFLAGS)
+
+src/pcre2_chartables.o: src/pcre2_chartables.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_auto_possess.o: src/pcre2_auto_possess.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_compile.o: src/pcre2_compile.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h src/pcre2_intmodedep.h \
+	
+
+src/pcre2_config.o: src/pcre2_config.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_context.o: src/pcre2_context.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_convert.o: src/pcre2_convert.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_dfa_match.o: src/pcre2_dfa_match.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_error.o: src/pcre2_error.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_extuni.o: src/pcre2_extuni.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_find_bracket.o: src/pcre2_find_bracket.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_jit_compile.o: src/pcre2_jit_compile.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h src/pcre2_intmodedep.h \
+	 src/sljit/sljitLir.c src/sljit/sljitLir.h src/sljit/sljitConfig.h \
+	 src/sljit/sljitConfigInternal.h src/sljit/sljitUtils.c src/sljit/sljitProtExecAllocator.c \
+	 src/sljit/sljitWXExecAllocator.c src/sljit/sljitExecAllocator.c src/pcre2_jit_simd_inc.h \
+	 src/pcre2_jit_neon_inc.h src/pcre2_jit_match.c
+
+src/pcre2_maketables.o: src/pcre2_maketables.c
+
+src/pcre2_match.o: src/pcre2_match.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_match_data.o: src/pcre2_match_data.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_newline.o: src/pcre2_newline.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_ord2utf.o: src/pcre2_ord2utf.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_pattern_info.o: src/pcre2_pattern_info.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_script_run.o: src/pcre2_script_run.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_serialize.o: src/pcre2_serialize.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2test.o: src/pcre2test.c src/config.h src/pcre2.h \
+	 src/pcre2posix.h src/pcre2_internal.h src/pcre2_ucp.h \
+	 src/pcre2_intmodedep.h src/pcre2_tables.c src/pcre2_ucptables.c \
+	 src/pcre2_ucd.c src/pcre2_printint.c
+
+src/pcre2_string_utils.o: src/pcre2_string_utils.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_study.o: src/pcre2_study.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_substitute.o: src/pcre2_substitute.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_substring.o: src/pcre2_substring.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2posix.o: src/pcre2posix.c src/config.h src/pcre2.h \
+	
+
+src/pcre2_tables.o: src/pcre2_tables.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h src/pcre2_intmodedep.h \
+	
+
+src/pcre2_ucd.o: src/pcre2_ucd.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_valid_utf.o: src/pcre2_valid_utf.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_xclass.o: src/pcre2_xclass.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+
+src/pcre2grep.o: src/pcre2grep.c src/config.h
+
+###################################################################
+##
+##////  Custom rules
+##
+###################################################################
+
+runtests: libpcre2.a libpcre2-posix.a tests
+	sh RunTest
+	sh RunGrepTest
+
+release:
+	@echo "Create release folders..."
+	@mkdir -p release/local/newlib/lib release/local/clib2/lib release/local/Documentation/pcre2 release/local/common/include
+	
+	@echo "Building newlib based libraries..."
+	@make -f Makefile.os4 all
+	@cp libpcre2.a release/local/newlib/lib/
+	@cp libpcre2-posix.a release/local/newlib/lib/
+	
+	@echo "Clean build and libraries files..."
+	@make -f Makefile.os4 cleanall
+	
+	@echo "Building clib2 based libraries..."
+	@make -f Makefile.os4 all USE_CLIB2=yes
+	@cp libpcre2.a release/local/clib2/lib/
+	@cp libpcre2-posix.a release/local/clib2/lib/
+
+	@echo "Copy the necessary files..."
+	@cp src/pcre2.h release/local/common/include/
+	@cp src/pcre2posix.h release/local/common/include/
+	@cp COPYING release/local/Documentation/pcre2/
+	@cp HACKING release/local/Documentation/pcre2/
+	@cp LICENCE release/local/Documentation/pcre2/
+	@cp README release/local/Documentation/pcre2/
+	@cp README-OS4.md release/local/Documentation/pcre2/
+	
+	@echo "Clean build and libraries files..."
+	@make -f Makefile.os4 cleanall
+	
+	@echo "Creating the lha release file..."
+	@rm -f pcre2.lha
+	@lha -aeqr3 a pcre2.lha release/
+	
+	@rm -rf release
+
+###################################################################
+
--- a/48
+++ b/48
@ -2,8 +2,52 @@ News about PCRE2 releases
 -------------------------


-Version 10.38-RC1 31-August-2021
--------------------------------
+Version 10.40 15-April-2022
+---------------------------
+
+This is mostly a bug-fixing and code-tidying release. However, there are some
+extensions to Unicode property handling:
+
+* Added support for Bidi_Class and a number of binary Unicode properties,
+including Bidi_Control.
+
+* A number of changes to script matching for \p and \P:
+
+  (a) Script extensions for a character are now coded as a bitmap instead of
+      a list of script numbers, which should be faster and does not need a
+      loop.
+
+  (b) Added the syntax \p{script:xxx} and \p{script_extensions:xxx} (synonyms
+      sc and scx).
+
+  (c) Changed \p{scriptname} from being the same as \p{sc:scriptname} to being
+      the same as \p{scx:scriptname} because this change happened in Perl at
+      release 5.26.
+
+  (d) The standard Unicode 4-letter abbreviations for script names are now
+      recognized.
+
+  (e) In accordance with Unicode and Perl's "loose matching" rules, spaces,
+      hyphens, and underscores are ignored in property names, which are then
+      matched independent of case.
+
+As always, see ChangeLog for a list of all changes (also the Git log).
+
+
+Version 10.39 29-October-2021
+-----------------------------
+
+This release is happening soon after 10.38 because the bug fix is important.
+
+1. Fix incorrect detection of alternatives in first character search in JIT.
+
+2. Update to Unicode 14.0.0.
+
+3. Some code cleanups (see ChangeLog).
+
+
+Version 10.38 01-October-2021
+-----------------------------

 As well as some bug fixes and tidies (as always, see ChangeLog for details),
 the documentation is updated to list the new URLs, following the move of the
--- a/13
+++ b/13
@ -121,6 +121,7 @@ environment, for example.
       pcre2_substring.c
       pcre2_tables.c
       pcre2_ucd.c
+       pcre2_ucptables.c
       pcre2_valid_utf.c
       pcre2_xclass.c

@ -306,7 +307,7 @@ cache can be deleted by selecting "File > Delete Cache".
 3.  Create a new, empty build directory, preferably a subdirectory of the
    source dir. For example, C:\pcre2\pcre2-xx\build.

-4.  Run cmake-gui from the Shell envirornment of your build tool, for example,
+4.  Run cmake-gui from the Shell environment of your build tool, for example,
    Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
    to start Cmake from the Windows Start menu, as this can lead to errors.

@ -343,10 +344,10 @@ cache can be deleted by selecting "File > Delete Cache".

 BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO

-The code currently cannot be compiled without a stdint.h header, which is
-available only in relatively recent versions of Visual Studio. However, this
-portable and permissively-licensed implementation of the header worked without
-issue:
+The code currently cannot be compiled without an inttypes.h header, which is
+available only with Visual Studio 2013 or newer. However, this portable and
+permissively-licensed implementation of the stdint.h header could be used as an
+alternative:

  http://www.azillionmonkeys.com/qed/pstdint.h

@ -373,7 +374,7 @@ Otherwise:
 1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
   have been created.

-2. Edit RunTest.bat to indentify the full or relative location of
+2. Edit RunTest.bat to identify the full or relative location of
   the pcre2 source (wherein which the testdata folder resides), e.g.:

   set srcdir=C:\pcre2\pcre2-10.00
--- a/66
+++ b/66
@ -5,11 +5,10 @@ PCRE2 is a re-working of the original PCRE1 library to provide an entirely new
 API. Since its initial release in 2015, there has been further development of
 the code and it now differs from PCRE1 in more than just the API. There are new
 features, and the internals have been improved. The original PCRE1 library is
-now obsolete and should not be used in new projects. The latest release of
-PCRE2 is available in .tar.gz, tar.bz2, or .zip form from this GitHub
-repository:
+now obsolete and no longer maintained. The latest release of PCRE2 is available
+in .tar.gz, tar.bz2, or .zip form from this GitHub repository:

-https://github.com/PhilipHazel/pcre2/releases
+https://github.com/PCRE2Project/pcre2/releases

 There is a mailing list for discussion about the development of PCRE2 at
 pcre2-dev@googlegroups.com. You can subscribe by sending an email to
@ -18,7 +17,7 @@ pcre2-dev+subscribe@googlegroups.com.
 You can access the archives and also subscribe or manage your subscription
 here:

-https://groups.google.com/pcre2-dev
+https://groups.google.com/g/pcre2-dev

 Please read the NEWS file if you are upgrading from a previous release. The
 contents of this README file are:
@ -115,12 +114,18 @@ Building PCRE2 using autotools
 The following instructions assume the use of the widely used "configure; make;
 make install" (autotools) process.

-To build PCRE2 on system that supports autotools, first run the "configure"
-command from the PCRE2 distribution directory, with your current directory set
+If you have downloaded and unpacked a PCRE2 release tarball, run the
+"configure" command from the PCRE2 directory, with your current directory set
 to the directory where you want the files to be created. This command is a
 standard GNU "autoconf" configuration script, for which generic instructions
 are supplied in the file INSTALL.

+The files in the GitHub repository do not contain "configure". If you have
+downloaded the PCRE2 source files from GitHub, before you can run "configure"
+you must run the shell script called autogen.sh. This runs a number of
+autotools to create a "configure" script (you must of course have the autotools
+commands installed in order to do this).
+
 Most commonly, people build PCRE2 within its own distribution directory, and in
 this case, on many systems, just running "./configure" is sufficient. However,
 the usual methods of changing standard defaults are available. For example:
@ -189,10 +194,10 @@ library. They are also documented in the pcre2build man page.

  As well as supporting UTF strings, Unicode support includes support for the
  \P, \p, and \X sequences that recognize Unicode character properties.
-  However, only the basic two-letter properties such as Lu are supported.
-  Escape sequences such as \d and \w in patterns do not by default make use of
-  Unicode properties, but can be made to do so by setting the PCRE2_UCP option
-  or starting a pattern with (*UCP).
+  However, only a subset of Unicode properties are supported; see the
+  pcre2pattern man page for details. Escape sequences such as \d and \w in
+  patterns do not by default make use of Unicode properties, but can be made to
+  do so by setting the PCRE2_UCP option or starting a pattern with (*UCP).

 . You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
  of the preceding, or any of the Unicode newline sequences, or the NUL (zero)
@ -370,19 +375,20 @@ library. They are also documented in the pcre2build man page.
  necessary to specify something like LIBS="-lncurses" as well. This is
  because, to quote the readline INSTALL, "Readline uses the termcap functions,
  but does not link with the termcap or curses library itself, allowing
-  applications which link with readline the to choose an appropriate library."
+  applications which link with readline the option to choose an appropriate
+  library."
  If you get error messages about missing functions tgetstr, tgetent, tputs,
  tgetflag, or tgoto, this is the problem, and linking with the ncurses library
  should fix it.

 . The C99 standard defines formatting modifiers z and t for size_t and
  ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
-  environments other than Microsoft Visual Studio when __STDC_VERSION__ is
-  defined and has a value greater than or equal to 199901L (indicating C99).
-  However, there is at least one environment that claims to be C99 but does not
-  support these modifiers. If --disable-percent-zt is specified, no use is made
-  of the z or t modifiers. Instead of %td or %zu, %lu is used, with a cast for
-  size_t values.
+  environments other than Microsoft Visual Studio versions earlier than 2013
+  when __STDC_VERSION__ is defined and has a value greater than or equal to
+  199901L (indicating C99). However, there is at least one environment that
+  claims to be C99 but does not support these modifiers. If
+  --disable-percent-zt is specified, no use is made of the z or t modifiers.
+  Instead of %td or %zu, %lu is used, with a cast for size_t values.

 . There is a special option called --enable-fuzz-support for use by people who
  want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
@ -395,10 +401,10 @@ library. They are also documented in the pcre2build man page.
  Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
  be created. This is normally run under valgrind or used when PCRE2 is
  compiled with address sanitizing enabled. It calls the fuzzing function and
-  outputs information about it is doing. The input strings are specified by
-  arguments: if an argument starts with "=" the rest of it is a literal input
-  string. Otherwise, it is assumed to be a file name, and the contents of the
-  file are the test string.
+  outputs information about what it is doing. The input strings are specified
+  by arguments: if an argument starts with "=" the rest of it is a literal
+  input string. Otherwise, it is assumed to be a file name, and the contents
+  of the file are the test string.

 . Releases before 10.30 could be compiled with --disable-stack-for-recursion,
  which caused pcre2_match() to use individual blocks on the heap for
@ -412,7 +418,7 @@ The "configure" script builds the following files for the basic C library:
 . Makefile             the makefile that builds the library
 . src/config.h         build-time configuration options for the library
 . src/pcre2.h          the public PCRE2 header file
-. pcre2-config          script that shows the building settings such as CFLAGS
+. pcre2-config         script that shows the building settings such as CFLAGS
                         that were set for "configure"
 . libpcre2-8.pc        )
 . libpcre2-16.pc       ) data for the pkg-config command
@ -572,9 +578,9 @@ at build time" for more details.
 Making new tarballs
 -------------------

-The command "make dist" creates two PCRE2 tarballs, in tar.gz and zip formats.
-The command "make distcheck" does the same, but then does a trial build of the
-new distribution to ensure that it works.
+The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and
+zip formats. The command "make distcheck" does the same, but then does a trial
+build of the new distribution to ensure that it works.

 If you have modified any of the man page sources in the doc directory, you
 should first run the PrepareRelease script before making a distribution. This
@ -603,13 +609,13 @@ is available. RunTest outputs a comment when it skips a test.

 Many (but not all) of the tests that are not skipped are run twice if JIT
 support is available. On the second run, JIT compilation is forced. This
-testing can be suppressed by putting "nojit" on the RunTest command line.
+testing can be suppressed by putting "-nojit" on the RunTest command line.

 The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
 libraries that are enabled. If you want to run just one set of tests, call
 RunTest with either the -8, -16 or -32 option.

-If valgrind is installed, you can run the tests under it by putting "valgrind"
+If valgrind is installed, you can run the tests under it by putting "-valgrind"
 on the RunTest command line. To run pcre2test on just one or more specific test
 files, give their numbers as arguments to RunTest, for example:

@ -690,7 +696,7 @@ Test 14 contains some special UTF and UCP tests that give different output for
 different code unit widths.

 Test 15 contains a number of tests that must not be run with JIT. They check,
-among other non-JIT things, the match-limiting features of the intepretive
+among other non-JIT things, the match-limiting features of the interpretive
 matcher.

 Test 16 is run only when JIT support is not available. It checks that an
@ -906,4 +912,4 @@ The distribution should contain the files listed below.
 Philip Hazel
 Email local part: Philip.Hazel
 Email domain: gmail.com
-Last updated: 27 August 2021
+Last updated: 15 April 2022
--- a/README-OS4.md
+++ b/README-OS4.md
@ -0,0 +1,39 @@
+PCRE2 (Perl-compatible regular expression library)
+---------------------------------------------------------------------------
+
+This is a port of PCRE2 10.40 by Philip Hazel for AmigaOS 4, as found at the
+GitHub repository https://github.com/PCRE2Project/pcre2
+
+More information about PCRE can be found at its official website
+at https://www.pcre.org and at the documentation that comes with this
+package.
+
+In the archive both newlib and clib2 libraries are included. It has been
+tested with various applications, but in case you find issues please 
+contact me.
+
+To install it into your AmigaOS 4 SDK installation, just extract all the 
+files in the SDK: path.
+
+Compile
+--------------------------
+The source and the changes I did can be found at my personale repository
+https://git.walkero.gr/walkero/pcre2
+
+You can compile it using the Makefile.os4 file, and produce the libraries
+yourself.
+
+* with newlib run:
+  ```bash
+  make -f Makefile.os4 all
+  ```
+* with clib2 run:
+  ```bash
+  make -f Makefile.os4 all USE_CLIB2=yes
+  ```
+
+Changelog
+--------------------------
+v10.40r1 - 2022-07-31
+* First release
+
--- a/README.md
+++ b/README.md
@ -14,14 +14,14 @@ flexible API, the code of PCRE2 has been much improved since the fork.
 ## Download

 As well as downloading from the 
-[GitHub site](https://github.com/PhilipHazel/pcre2), you can download PCRE2 
+[GitHub site](https://github.com/PCRE2Project/pcre2), you can download PCRE2 
 or the older, unmaintained PCRE1 library from an 
 [*unofficial* mirror](https://sourceforge.net/projects/pcre/files/) at SourceForge.

 You can check out the PCRE2 source code via Git or Subversion:

-    git clone https://github.com/PhilipHazel/pcre2.git
-    svn co    https://github.com/PhilipHazel/pcre2.git
+    git clone https://github.com/PCRE2Project/pcre2.git
+    svn co    https://github.com/PCRE2Project/pcre2.git

 ## Contributed Ports

@ -36,7 +36,7 @@ default character encoding, can be found at
 ## Documentation

 You can read the PCRE2 documentation 
-[here](https://philiphazel.github.io/pcre2/doc/html/index.html).
+[here](https://PCRE2Project.github.io/pcre2/doc/html/index.html).

 Comparisons to Perl's regular expression semantics can be found in the
 community authored Wikipedia entry for PCRE.
--- a/48
+++ b/48
@ -68,6 +68,22 @@ diff -b  /dev/null /dev/null 2>/dev/null && cf="diff -b"
 diff -u  /dev/null /dev/null 2>/dev/null && cf="diff -u"
 diff -ub /dev/null /dev/null 2>/dev/null && cf="diff -ub"

+# Some tests involve NUL characters. It seems impossible to handle them easily
+# in many operating systems. An earlier version of this script used sed to
+# translate NUL into the string ZERO, but this didn't work on Solaris (aka
+# SunOS), where the version of sed explicitly doesn't like them, and also MacOS
+# (Darwin), OpenBSD, FreeBSD, NetBSD, and some Linux distributions like Alpine,
+# even when using GNU sed. A user suggested using tr instead, which
+# necessitates translating to a single character. However, on (some versions
+# of?) Solaris, the normal "tr" cannot handle binary zeros, but if
+# /usr/xpg4/bin/tr is available, it can do so, so test for that.
+
+if [ -x /usr/xpg4/bin/tr ] ; then
+  tr=/usr/xpg4/bin/tr
+else
+  tr=tr
+fi
+
 # If this test is being run from "make check", $srcdir will be set. If not, set
 # it to the current or parent directory, whichever one contains the test data.
 # Subsequently, we run most of the pcre2grep tests in the source directory so
@ -674,13 +690,27 @@ echo "---------------------------- Test 131 -----------------------------" >>tes
 echo "RC=$?" >>testtrygrep

 echo "---------------------------- Test 132 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -A3 '^match'; echo '---'; head -1) <$srcdir/testdata/grepinput >>testtrygrep 2>&1
+(cd $srcdir; exec 3<testdata/grepinput; $valgrind $vjs $pcre2grep -m1 -A3 '^match' <&3; echo '---'; head -1 <&3; exec 3<&-) >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep

 echo "---------------------------- Test 133 -----------------------------" >>testtrygrep
+(cd $srcdir; exec 3<testdata/grepinput; $valgrind $vjs $pcre2grep -m1 -A3 '^match' <&3; echo '---'; $valgrind $vjs $pcre2grep -m1 -A3 '^match' <&3; exec 3<&-) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 134 -----------------------------" >>testtrygrep
 (cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep

+echo "---------------------------- Test 135 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -lZ 'word' ./testdata/grepinputv ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -A 1 -B 1 -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -MHZn 'start[\s]+end' testdata/grepinputM) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+
 # Now compare the results.

 $cf $srcdir/testdata/grepoutput testtrygrep
@ -755,22 +785,6 @@ $valgrind $vjs $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >
 printf '%c--------------------------- Test N6 ------------------------------\r\n' - >>testtrygrep
 $valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep

-# This next test involves NUL characters. It seems impossible to handle them
-# easily in many operating systems. An earlier version of this script used sed
-# to translate NUL into the string ZERO, but this didn't work on Solaris (aka
-# SunOS), where the version of sed explicitly doesn't like them, and also MacOS
-# (Darwin), OpenBSD, FreeBSD, NetBSD, and some Linux distributions like Alpine,
-# even when using GNU sed. A user suggested using tr instead, which
-# necessitates translating to a single character (@). However, on (some
-# versions of?) Solaris, the normal "tr" cannot handle binary zeros, but if
-# /usr/xpg4/bin/tr is available, it can do so, so test for that.
-
-if [ -x /usr/xpg4/bin/tr ] ; then
-  tr=/usr/xpg4/bin/tr
-else
-  tr=tr
-fi
-
 printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep
 printf 'abc\0def' >testNinputgrep
 $valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep
--- a/63
+++ b/63
@ -17,8 +17,16 @@
 # individual test numbers, ranges of tests such as 3-6 or 3- (meaning 3 to the
 # end), or a number preceded by ~ to exclude a test. For example, "3-15 ~10"
 # runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests
-# except test 10. Whatever order the arguments are in, the tests are always run
-# in numerical order.
+# except test 10. Whatever order the arguments are in, these tests are always
+# run in numerical order.
+#
+# If no specific tests are selected (which is the case when this script is run
+# via 'make check') the default is to run all the numbered tests.
+#
+# There may also be named (as well as numbered) tests for special purposes. At
+# present there is just one, called "heap". This test's output contains the
+# sizes of heap frames and frame vectors, which depend on the environment. It
+# is therefore not run unless explicitly requested.
 #
 # Inappropriate tests are automatically skipped (with a comment to say so). For
 # example, if JIT support is not compiled, test 16 is skipped, whereas if JIT
@ -80,7 +88,9 @@ title22="Test 22: \C tests with UTF (not supported for DFA matching)"
 title23="Test 23: \C disabled test"
 title24="Test 24: Non-UTF pattern conversion tests"
 title25="Test 25: UTF pattern conversion tests"
-maxtest=25
+title26="Test 26: Auto-generated unicode property tests"
+maxtest=26
+titleheap="Test 'heap': Environment-specific heap tests"

 if [ $# -eq 1 -a "$1" = "list" ]; then
  echo $title0
@ -109,6 +119,12 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
  echo $title23
  echo $title24
  echo $title25
+  echo $title26
+  echo ""
+  echo $titleheap
+  echo ""
+  echo "Numbered tests are automatically run if nothing selected."
+  echo "Named tests must be explicitly selected."
  exit 0
 fi

@ -238,6 +254,8 @@ do22=no
 do23=no
 do24=no
 do25=no
+do26=no
+doheap=no

 while [ $# -gt 0 ] ; do
  case $1 in
@ -267,6 +285,8 @@ while [ $# -gt 0 ] ; do
   23) do23=yes;;
   24) do24=yes;;
   25) do25=yes;;
+   26) do26=yes;;
+ heap) doheap=yes;;
   -8) arg8=yes;;
  -16) arg16=yes;;
  -32) arg32=yes;;
@ -320,7 +340,8 @@ fi
 # set up a large stack.

 $sim ./pcre2test -S 64 /dev/null /dev/null
-if [ $? -eq 0 -a "$bigstack" != "" ] ; then
+support_setstack=$?
+if [ $support_setstack -eq 0 -a "$bigstack" != "" ] ; then
  setstack="-S 64"
 else
  setstack=""
@ -407,8 +428,8 @@ if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
  fi
 fi

-# If no specific tests were requested, select all. Those that are not
-# relevant will be automatically skipped.
+# If no specific tests were requested, select all the numbered tests. Those
+# that are not relevant will be automatically skipped.

 if [ $do0  = no -a $do1  = no -a $do2  = no -a $do3  = no -a \
     $do4  = no -a $do5  = no -a $do6  = no -a $do7  = no -a \
@ -416,7 +437,7 @@ if [ $do0  = no -a $do1  = no -a $do2  = no -a $do3  = no -a \
     $do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \
     $do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \
     $do20 = no -a $do21 = no -a $do22 = no -a $do23 = no -a \
-     $do24 = no -a $do25 = no \
+     $do24 = no -a $do25 = no -a $do26 = no -a $doheap = no \
   ]; then
  do0=yes
  do1=yes
@ -444,6 +465,7 @@ if [ $do0  = no -a $do1  = no -a $do2  = no -a $do3  = no -a \
  do23=yes
  do24=yes
  do25=yes
+  do26=yes
 fi

 # Handle any explicit skips at this stage, so that an argument list may consist
@ -479,7 +501,9 @@ for bmode in "$test8" "$test16" "$test32"; do
    echo '' >testtry
    checkspecial '-C'
    checkspecial '--help'
-    checkspecial '-S 1 -t 10 testSinput'
+    if [ $support_setstack -eq 0 ] ; then
+      checkspecial '-S 1 -t 10 testSinput'
+    fi
    echo "  OK"
  fi

@ -860,6 +884,29 @@ for bmode in "$test8" "$test16" "$test32"; do
    fi
  fi

+  # Auto-generated unicode property tests
+
+  if [ $do26 = yes ] ; then
+    echo $title26
+    if [ $utf -eq 0 ] ; then
+      echo "  Skipped because UTF-$bits support is not available"
+    else
+      for opt in "" $jitopt; do
+        $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput26 testtry
+        checkresult $? 26 "$opt"
+      done
+    fi
+  fi
+
+  # Manually selected heap tests - output may vary in different environments,
+  # which is why that are not automatically run.
+
+  if [ $doheap = yes ] ; then
+    echo $titleheap
+    $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinputheap testtry
+    checkresult $? heap-$bits ""
+  fi
+
 # End of loop for 8/16/32-bit tests
 done

--- a/RunTest.bat
+++ b/RunTest.bat
@ -135,9 +135,9 @@ if "%all%" == "yes" (
  set do7=yes
  set do8=yes
  set do9=yes
-  set do10=yes
+  set do10=no
  set do11=yes
-  set do12=yes
+  set do12=no
  set do13=yes
  set do14=yes
  set do15=yes
--- a/WORKSPACE.bazel
+++ b/WORKSPACE.bazel
@ -0,0 +1 @@
+# See MODULE.bazel
--- a/cmake/FindEditline.cmake
+++ b/cmake/FindEditline.cmake
@ -1,17 +1,16 @@
 # Modified from FindReadline.cmake (PH Feb 2012)

-if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
+if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
  set(EDITLINE_FOUND TRUE)
-else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
-  FIND_PATH(EDITLINE_INCLUDE_DIR readline.h
-    /usr/include/editline
-    /usr/include/edit/readline  
-    /usr/include/readline
+else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
+  FIND_PATH(EDITLINE_INCLUDE_DIR readline.h PATH_SUFFIXES
+    editline
+    edit/readline
  )
  
  FIND_LIBRARY(EDITLINE_LIBRARY NAMES edit)
  include(FindPackageHandleStandardArgs)
-  FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY )
+  FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)

  MARK_AS_ADVANCED(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
-endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
+endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
--- a/config-cmake.h.in
+++ b/config-cmake.h.in
@ -2,8 +2,6 @@

 #cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1
 #cmakedefine HAVE_DIRENT_H 1
-#cmakedefine HAVE_INTTYPES_H 1    
-#cmakedefine HAVE_STDINT_H 1                                                   
 #cmakedefine HAVE_STRERROR 1
 #cmakedefine HAVE_SYS_STAT_H 1
 #cmakedefine HAVE_SYS_TYPES_H 1
--- a/configure.ac
+++ b/configure.ac
@ -9,15 +9,15 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
 dnl be defined as -RC2, for example. For real releases, it should be empty.

 m4_define(pcre2_major, [10])
-m4_define(pcre2_minor, [38])
-m4_define(pcre2_prerelease, [-RC1])
-m4_define(pcre2_date, [2021-08-31])
+m4_define(pcre2_minor, [41])
+m4_define(pcre2_prerelease, [])
+m4_define(pcre2_date, [2022-xx-xx])

 # Libtool shared library interface versions (current:revision:age)
-m4_define(libpcre2_8_version,     [10:3:10])
-m4_define(libpcre2_16_version,    [10:3:10])
-m4_define(libpcre2_32_version,    [10:3:10])
-m4_define(libpcre2_posix_version, [3:1:0])
+m4_define(libpcre2_8_version,     [11:0:11])
+m4_define(libpcre2_16_version,    [11:0:11])
+m4_define(libpcre2_32_version,    [11:0:11])
+m4_define(libpcre2_posix_version, [3:2:0])

 # NOTE: The CMakeLists.txt file searches for the above variables in the first
 # 50 lines of this file. Please update that if the variables above are moved.
@ -512,7 +512,20 @@ AC_TYPE_SIZE_T

 # Checks for library functions.

-AC_CHECK_FUNCS(bcopy memfd_create memmove mkostemp realpath secure_getenv strerror)
+AC_CHECK_FUNCS(bcopy memfd_create memmove mkostemp secure_getenv strerror)
+AC_MSG_CHECKING([for realpath])
+AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+#include <stdlib.h>
+#include <limits.h>
+]],[[
+char buffer[PATH_MAX];
+realpath(".", buffer);
+]])],
+[AC_MSG_RESULT([yes])
+ AC_DEFINE([HAVE_REALPATH], 1,
+  [Define to 1 if you have the `realpath' function.])
+],
+AC_MSG_RESULT([no]))

 # Check for the availability of libz (aka zlib)

@ -584,14 +597,14 @@ if test "$enable_pcre2test_libreadline" = "yes"; then
 fi
 fi

-
 # Check for the availability of libedit. Different distributions put its
 # headers in different places. Try to cover the most common ones.

 if test "$enable_pcre2test_libedit" = "yes"; then
-  AC_CHECK_HEADERS([editline/readline.h], [HAVE_EDITLINE_READLINE_H=1],
-    [AC_CHECK_HEADERS([edit/readline/readline.h], [HAVE_READLINE_READLINE_H=1],
-      [AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_READLINE_H=1])])])
+  AC_CHECK_HEADERS([editline/readline.h edit/readline/readline.h readline.h], [
+    HAVE_LIBEDIT_HEADER=1
+    break
+  ])
  AC_CHECK_LIB([edit], [readline], [LIBEDIT="-ledit"])
 fi

@ -927,10 +940,9 @@ if test "$enable_pcre2test_libedit" = "yes"; then
    echo "** Cannot use both --enable-pcre2test-libedit and --enable-pcre2test-readline"
    exit 1
  fi
-  if test "$HAVE_EDITLINE_READLINE_H" != "1" -a \
-          "$HAVE_READLINE_READLINE_H" != "1"; then
-    echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h"
-    echo "** nor readline/readline.h was found."
+  if test -z "$HAVE_LIBEDIT_HEADER"; then
+    echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h,"
+    echo "** edit/readline/readline.h nor a compatible header was found."
    exit 1
  fi
  if test -z "$LIBEDIT"; then
--- a/doc/html/NON-AUTOTOOLS-BUILD.txt
+++ b/doc/html/NON-AUTOTOOLS-BUILD.txt
@ -121,6 +121,7 @@ environment, for example.
       pcre2_substring.c
       pcre2_tables.c
       pcre2_ucd.c
+       pcre2_ucptables.c
       pcre2_valid_utf.c
       pcre2_xclass.c

@ -306,7 +307,7 @@ cache can be deleted by selecting "File > Delete Cache".
 3.  Create a new, empty build directory, preferably a subdirectory of the
    source dir. For example, C:\pcre2\pcre2-xx\build.

-4.  Run cmake-gui from the Shell envirornment of your build tool, for example,
+4.  Run cmake-gui from the Shell environment of your build tool, for example,
    Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
    to start Cmake from the Windows Start menu, as this can lead to errors.

@ -343,10 +344,10 @@ cache can be deleted by selecting "File > Delete Cache".

 BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO

-The code currently cannot be compiled without a stdint.h header, which is
-available only in relatively recent versions of Visual Studio. However, this
-portable and permissively-licensed implementation of the header worked without
-issue:
+The code currently cannot be compiled without an inttypes.h header, which is
+available only with Visual Studio 2013 or newer. However, this portable and
+permissively-licensed implementation of the stdint.h header could be used as an
+alternative:

  http://www.azillionmonkeys.com/qed/pstdint.h

@ -373,7 +374,7 @@ Otherwise:
 1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
   have been created.

-2. Edit RunTest.bat to indentify the full or relative location of
+2. Edit RunTest.bat to identify the full or relative location of
   the pcre2 source (wherein which the testdata folder resides), e.g.:

   set srcdir=C:\pcre2\pcre2-10.00
--- a/doc/html/README.txt
+++ b/doc/html/README.txt
@ -5,11 +5,10 @@ PCRE2 is a re-working of the original PCRE1 library to provide an entirely new
 API. Since its initial release in 2015, there has been further development of
 the code and it now differs from PCRE1 in more than just the API. There are new
 features, and the internals have been improved. The original PCRE1 library is
-now obsolete and should not be used in new projects. The latest release of
-PCRE2 is available in .tar.gz, tar.bz2, or .zip form from this GitHub
-repository:
+now obsolete and no longer maintained. The latest release of PCRE2 is available
+in .tar.gz, tar.bz2, or .zip form from this GitHub repository:

-https://github.com/PhilipHazel/pcre2/releases
+https://github.com/PCRE2Project/pcre2/releases

 There is a mailing list for discussion about the development of PCRE2 at
 pcre2-dev@googlegroups.com. You can subscribe by sending an email to
@ -18,7 +17,7 @@ pcre2-dev+subscribe@googlegroups.com.
 You can access the archives and also subscribe or manage your subscription
 here:

-https://groups.google.com/pcre2-dev
+https://groups.google.com/g/pcre2-dev

 Please read the NEWS file if you are upgrading from a previous release. The
 contents of this README file are:
@ -115,12 +114,18 @@ Building PCRE2 using autotools
 The following instructions assume the use of the widely used "configure; make;
 make install" (autotools) process.

-To build PCRE2 on system that supports autotools, first run the "configure"
-command from the PCRE2 distribution directory, with your current directory set
+If you have downloaded and unpacked a PCRE2 release tarball, run the
+"configure" command from the PCRE2 directory, with your current directory set
 to the directory where you want the files to be created. This command is a
 standard GNU "autoconf" configuration script, for which generic instructions
 are supplied in the file INSTALL.

+The files in the GitHub repository do not contain "configure". If you have
+downloaded the PCRE2 source files from GitHub, before you can run "configure"
+you must run the shell script called autogen.sh. This runs a number of
+autotools to create a "configure" script (you must of course have the autotools
+commands installed in order to do this).
+
 Most commonly, people build PCRE2 within its own distribution directory, and in
 this case, on many systems, just running "./configure" is sufficient. However,
 the usual methods of changing standard defaults are available. For example:
@ -189,10 +194,10 @@ library. They are also documented in the pcre2build man page.

  As well as supporting UTF strings, Unicode support includes support for the
  \P, \p, and \X sequences that recognize Unicode character properties.
-  However, only the basic two-letter properties such as Lu are supported.
-  Escape sequences such as \d and \w in patterns do not by default make use of
-  Unicode properties, but can be made to do so by setting the PCRE2_UCP option
-  or starting a pattern with (*UCP).
+  However, only a subset of Unicode properties are supported; see the
+  pcre2pattern man page for details. Escape sequences such as \d and \w in
+  patterns do not by default make use of Unicode properties, but can be made to
+  do so by setting the PCRE2_UCP option or starting a pattern with (*UCP).

 . You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
  of the preceding, or any of the Unicode newline sequences, or the NUL (zero)
@ -370,19 +375,20 @@ library. They are also documented in the pcre2build man page.
  necessary to specify something like LIBS="-lncurses" as well. This is
  because, to quote the readline INSTALL, "Readline uses the termcap functions,
  but does not link with the termcap or curses library itself, allowing
-  applications which link with readline the to choose an appropriate library."
+  applications which link with readline the option to choose an appropriate
+  library."
  If you get error messages about missing functions tgetstr, tgetent, tputs,
  tgetflag, or tgoto, this is the problem, and linking with the ncurses library
  should fix it.

 . The C99 standard defines formatting modifiers z and t for size_t and
  ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
-  environments other than Microsoft Visual Studio when __STDC_VERSION__ is
-  defined and has a value greater than or equal to 199901L (indicating C99).
-  However, there is at least one environment that claims to be C99 but does not
-  support these modifiers. If --disable-percent-zt is specified, no use is made
-  of the z or t modifiers. Instead of %td or %zu, %lu is used, with a cast for
-  size_t values.
+  environments other than Microsoft Visual Studio versions earlier than 2013
+  when __STDC_VERSION__ is defined and has a value greater than or equal to
+  199901L (indicating C99). However, there is at least one environment that
+  claims to be C99 but does not support these modifiers. If
+  --disable-percent-zt is specified, no use is made of the z or t modifiers.
+  Instead of %td or %zu, %lu is used, with a cast for size_t values.

 . There is a special option called --enable-fuzz-support for use by people who
  want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
@ -395,10 +401,10 @@ library. They are also documented in the pcre2build man page.
  Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
  be created. This is normally run under valgrind or used when PCRE2 is
  compiled with address sanitizing enabled. It calls the fuzzing function and
-  outputs information about it is doing. The input strings are specified by
-  arguments: if an argument starts with "=" the rest of it is a literal input
-  string. Otherwise, it is assumed to be a file name, and the contents of the
-  file are the test string.
+  outputs information about what it is doing. The input strings are specified
+  by arguments: if an argument starts with "=" the rest of it is a literal
+  input string. Otherwise, it is assumed to be a file name, and the contents
+  of the file are the test string.

 . Releases before 10.30 could be compiled with --disable-stack-for-recursion,
  which caused pcre2_match() to use individual blocks on the heap for
@ -412,7 +418,7 @@ The "configure" script builds the following files for the basic C library:
 . Makefile             the makefile that builds the library
 . src/config.h         build-time configuration options for the library
 . src/pcre2.h          the public PCRE2 header file
-. pcre2-config          script that shows the building settings such as CFLAGS
+. pcre2-config         script that shows the building settings such as CFLAGS
                         that were set for "configure"
 . libpcre2-8.pc        )
 . libpcre2-16.pc       ) data for the pkg-config command
@ -572,9 +578,9 @@ at build time" for more details.
 Making new tarballs
 -------------------

-The command "make dist" creates two PCRE2 tarballs, in tar.gz and zip formats.
-The command "make distcheck" does the same, but then does a trial build of the
-new distribution to ensure that it works.
+The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and
+zip formats. The command "make distcheck" does the same, but then does a trial
+build of the new distribution to ensure that it works.

 If you have modified any of the man page sources in the doc directory, you
 should first run the PrepareRelease script before making a distribution. This
@ -603,13 +609,13 @@ is available. RunTest outputs a comment when it skips a test.

 Many (but not all) of the tests that are not skipped are run twice if JIT
 support is available. On the second run, JIT compilation is forced. This
-testing can be suppressed by putting "nojit" on the RunTest command line.
+testing can be suppressed by putting "-nojit" on the RunTest command line.

 The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
 libraries that are enabled. If you want to run just one set of tests, call
 RunTest with either the -8, -16 or -32 option.

-If valgrind is installed, you can run the tests under it by putting "valgrind"
+If valgrind is installed, you can run the tests under it by putting "-valgrind"
 on the RunTest command line. To run pcre2test on just one or more specific test
 files, give their numbers as arguments to RunTest, for example:

@ -690,7 +696,7 @@ Test 14 contains some special UTF and UCP tests that give different output for
 different code unit widths.

 Test 15 contains a number of tests that must not be run with JIT. They check,
-among other non-JIT things, the match-limiting features of the intepretive
+among other non-JIT things, the match-limiting features of the interpretive
 matcher.

 Test 16 is run only when JIT support is not available. It checks that an
@ -906,4 +912,4 @@ The distribution should contain the files listed below.
 Philip Hazel
 Email local part: Philip.Hazel
 Email domain: gmail.com
-Last updated: 27 August 2021
+Last updated: 15 April 2022
--- a/doc/html/pcre2_compile.html
+++ b/doc/html/pcre2_compile.html
@ -92,8 +92,18 @@ Additional options may be set in the compile context via the
 function.
 </P>
 <P>
-The yield of this function is a pointer to a private data structure that
-contains the compiled pattern, or NULL if an error was detected.
+If either of <i>errorcode</i> or <i>erroroffset</i> is NULL, the function returns
+NULL immediately. Otherwise, the yield of this function is a pointer to a
+private data structure that contains the compiled pattern, or NULL if an error
+was detected. In the error case, a text error message can be obtained by
+passing the value returned via the <i>errorcode</i> argument to the the
+<b>pcre2_get_error_message()</b> function. The offset (in code units) where the
+error was encountered is returned via the <i>erroroffset</i> argument.
+</P>
+<P>
+If there is no error, the value passed via <i>errorcode</i> returns the message
+"no error" if passed to <b>pcre2_get_error_message()</b>, and the value passed
+via <i>erroroffset</i> is zero.
 </P>
 <P>
 There is a complete description of the PCRE2 native API, with more detail on
--- a/doc/html/pcre2_jit_stack_create.html
+++ b/doc/html/pcre2_jit_stack_create.html
@ -34,7 +34,8 @@ allocation. The result can be passed to the JIT run-time code by calling
 <b>pcre2_jit_stack_assign()</b> to associate the stack with a compiled pattern,
 which can then be processed by <b>pcre2_match()</b> or <b>pcre2_jit_match()</b>.
 A maximum stack size of 512KiB to 1MiB should be more than enough for any
-pattern. For more details, see the
+pattern. If the stack couldn't be allocated or the values passed were not
+reasonable, NULL will be returned. For more details, see the
 <a href="pcre2jit.html"><b>pcre2jit</b></a>
 page.
 </P>
--- a/doc/html/pcre2_serialize_decode.html
+++ b/doc/html/pcre2_serialize_decode.html
@ -48,7 +48,7 @@ the following negative error codes:
  PCRE2_ERROR_BADDATA   <i>number_of_codes</i> is zero or less
  PCRE2_ERROR_BADMAGIC  mismatch of id bytes in <i>bytes</i>
  PCRE2_ERROR_BADMODE   mismatch of variable unit size or PCRE version
-  PCRE2_ERROR_MEMORY    memory allocation failed
+  PCRE2_ERROR_NOMEMORY  memory allocation failed
  PCRE2_ERROR_NULL      <i>codes</i> or <i>bytes</i> is NULL
 </pre>
 PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled
--- a/doc/html/pcre2_set_compile_extra_options.html
+++ b/doc/html/pcre2_set_compile_extra_options.html
@ -30,8 +30,8 @@ This function sets additional option bits for <b>pcre2_compile()</b> that are
 housed in a compile context. It completely replaces all the bits. The extra
 options are:
 <pre>
-  PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK     Allow \K in lookarounds  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \x{df800} to \x{dfff}
-                                         in UTF-8 and UTF-32 modes
+  PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK     Allow \K in lookarounds
+  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \x{d800} to \x{dfff} in UTF-8 and UTF-32 modes
  PCRE2_EXTRA_ALT_BSUX                 Extended alternate \u, \U, and \x handling
  PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL    Treat all invalid escapes as a literal following character
  PCRE2_EXTRA_ESCAPED_CR_IS_LF         Interpret \r as \n
--- a/doc/html/pcre2_substitute.html
+++ b/doc/html/pcre2_substitute.html
@ -68,29 +68,29 @@ automatically added.
 The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for
 zero-terminated strings. The options are:
 <pre>
-  PCRE2_ANCHORED             Match only at the first position
-  PCRE2_ENDANCHORED          Pattern can match only at end of subject
-  PCRE2_NOTBOL               Subject is not the beginning of a line
-  PCRE2_NOTEOL               Subject is not the end of a line
-  PCRE2_NOTEMPTY             An empty string is not a valid match
-  PCRE2_NOTEMPTY_ATSTART     An empty string at the start of the subject is not a valid match
-  PCRE2_NO_JIT               Do not use JIT matching
-  PCRE2_NO_UTF_CHECK         Do not check the subject or replacement for UTF validity (only relevant if
-                              PCRE2_UTF was set at compile time)
-  PCRE2_SUBSTITUTE_EXTENDED  Do extended replacement processing
-  PCRE2_SUBSTITUTE_GLOBAL    Replace all occurrences in the subject
-  PCRE2_SUBSTITUTE_LITERAL   The replacement string is literal
-  PCRE2_SUBSTITUTE_MATCHED   Use pre-existing match data for 1st match
-  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH  If overflow, compute needed length
+  PCRE2_ANCHORED                     Match only at the first position
+  PCRE2_ENDANCHORED                  Match only at end of subject
+  PCRE2_NOTBOL                       Subject is not the beginning of a line
+  PCRE2_NOTEOL                       Subject is not the end of a line
+  PCRE2_NOTEMPTY                     An empty string is not a valid match
+  PCRE2_NOTEMPTY_ATSTART             An empty string at the start of the subject is not a valid match
+  PCRE2_NO_JIT                       Do not use JIT matching
+  PCRE2_NO_UTF_CHECK                 Do not check for UTF validity in the subject or replacement
+                                      (only relevant if PCRE2_UTF was set at compile time)
+  PCRE2_SUBSTITUTE_EXTENDED          Do extended replacement processing
+  PCRE2_SUBSTITUTE_GLOBAL            Replace all occurrences in the subject
+  PCRE2_SUBSTITUTE_LITERAL           The replacement string is literal
+  PCRE2_SUBSTITUTE_MATCHED           Use pre-existing match data for first match
+  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH   If overflow, compute needed length
  PCRE2_SUBSTITUTE_REPLACEMENT_ONLY  Return only replacement string(s)
-  PCRE2_SUBSTITUTE_UNKNOWN_UNSET  Treat unknown group as unset
-  PCRE2_SUBSTITUTE_UNSET_EMPTY  Simple unset insert = empty string
+  PCRE2_SUBSTITUTE_UNKNOWN_UNSET     Treat unknown group as unset
+  PCRE2_SUBSTITUTE_UNSET_EMPTY       Simple unset insert = empty string
 </pre>
 If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_EXTENDED,
 PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and PCRE2_SUBSTITUTE_UNSET_EMPTY are ignored.
 </P>
 <P>
-If PCRE2_SUBSTITUTE_MATCHED is set, <i>match_data</i> must be non-zero; its
+If PCRE2_SUBSTITUTE_MATCHED is set, <i>match_data</i> must be non-NULL; its
 contents must be the result of a call to <b>pcre2_match()</b> using the same
 pattern and subject.
 </P>
--- a/doc/html/pcre2api.html
+++ b/doc/html/pcre2api.html
@ -1017,7 +1017,7 @@ has its own memory control arrangements (see the
 documentation for more details). If the limit is reached, the negative error
 code PCRE2_ERROR_HEAPLIMIT is returned. The default limit can be set when PCRE2
 is built; if it is not, the default is set very large and is essentially
-"unlimited".
+unlimited.
 </P>
 <P>
 A value for the heap limit may also be supplied by an item at the start of a
@ -1030,19 +1030,17 @@ less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
 limit is set, less than the default.
 </P>
 <P>
-The <b>pcre2_match()</b> function starts out using a 20KiB vector on the system
-stack for recording backtracking points. The more nested backtracking points
-there are (that is, the deeper the search tree), the more memory is needed.
-Heap memory is used only if the initial vector is too small. If the heap limit
-is set to a value less than 21 (in particular, zero) no heap memory will be
-used. In this case, only patterns that do not have a lot of nested backtracking
-can be successfully processed.
+The <b>pcre2_match()</b> function always needs some heap memory, so setting a
+value of zero guarantees a "heap limit exceeded" error. Details of how
+<b>pcre2_match()</b> uses the heap are given in the
+<a href="pcre2perform.html"><b>pcre2perform</b></a>
+documentation.
 </P>
 <P>
-Similarly, for <b>pcre2_dfa_match()</b>, a vector on the system stack is used
-when processing pattern recursions, lookarounds, or atomic groups, and only if
-this is not big enough is heap memory used. In this case, too, setting a value
-of zero disables the use of the heap.
+For <b>pcre2_dfa_match()</b>, a vector on the system stack is used when
+processing pattern recursions, lookarounds, or atomic groups, and only if this
+is not big enough is heap memory used. In this case, setting a value of zero
+disables the use of the heap.
 <br>
 <br>
 <b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
@ -1089,10 +1087,10 @@ less than the limit set by the caller of <b>pcre2_match()</b> or
 <br>
 <br>
 This parameter limits the depth of nested backtracking in <b>pcre2_match()</b>.
-Each time a nested backtracking point is passed, a new memory "frame" is used
+Each time a nested backtracking point is passed, a new memory frame is used
 to remember the state of matching at that point. Thus, this parameter
 indirectly limits the amount of memory that is used in a match. However,
-because the size of each memory "frame" depends on the number of capturing
+because the size of each memory frame depends on the number of capturing
 parentheses, the actual memory limit varies from pattern to pattern. This limit
 was more useful in versions before 10.30, where function recursion was used for
 backtracking.
@ -1383,8 +1381,7 @@ If <i>errorcode</i> or <i>erroroffset</i> is NULL, <b>pcre2_compile()</b> return
 NULL immediately. Otherwise, the variables to which these point are set to an
 error code and an offset (number of code units) within the pattern,
 respectively, when <b>pcre2_compile()</b> returns NULL because a compilation
-error has occurred. The values are not defined when compilation is successful
-and <b>pcre2_compile()</b> returns a non-NULL value.
+error has occurred. 
 </P>
 <P>
 There are nearly 100 positive error codes that <b>pcre2_compile()</b> may return
@ -1399,15 +1396,18 @@ because the textual error messages that are obtained by calling the
 message"
 <a href="#geterrormessage">below)</a>
 should be self-explanatory. Macro names starting with PCRE2_ERROR_ are defined
-for both positive and negative error codes in <b>pcre2.h</b>.
+for both positive and negative error codes in <b>pcre2.h</b>. When compilation
+is successful <i>errorcode</i> is set to a value that returns the message "no
+error" if passed to <b>pcre2_get_error_message()</b>.
 </P>
 <P>
 The value returned in <i>erroroffset</i> is an indication of where in the
-pattern the error occurred. It is not necessarily the furthest point in the
-pattern that was read. For example, after the error "lookbehind assertion is
-not fixed length", the error offset points to the start of the failing
-assertion. For an invalid UTF-8 or UTF-16 string, the offset is that of the
-first code unit of the failing character.
+pattern an error occurred. When there is no error, zero is returned. A non-zero
+value is not necessarily the furthest point in the pattern that was read. For
+example, after the error "lookbehind assertion is not fixed length", the error
+offset points to the start of the failing assertion. For an invalid UTF-8 or
+UTF-16 string, the offset is that of the first code unit of the failing
+character.
 </P>
 <P>
 Some errors are not detected until the whole pattern has been scanned; in these
@ -1845,7 +1845,7 @@ undefined. It may cause your program to crash or loop.
 </P>
 <P>
 Note that this option can also be passed to <b>pcre2_match()</b> and
-<b>pcre_dfa_match()</b>, to suppress UTF validity checking of the subject
+<b>pcre2_dfa_match()</b>, to suppress UTF validity checking of the subject
 string.
 </P>
 <P>
@ -2055,8 +2055,8 @@ point. However, this applies only to characters whose code points are less than
 \d.
 </P>
 <P>
-When PCRE2 is built with Unicode support (the default), the Unicode properties
-of all characters can be tested with \p and \P, or, alternatively, the
+When PCRE2 is built with Unicode support (the default), certain Unicode
+character properties can be tested with \p and \P, or, alternatively, the
 PCRE2_UCP option can be set when a pattern is compiled; this causes \w and
 friends to use Unicode property support instead of the built-in tables.
 PCRE2_UCP also causes upper/lower casing operations on characters with code
@ -2316,7 +2316,7 @@ return zero. The third argument should point to a <b>size_t</b> variable.
  PCRE2_INFO_LASTCODETYPE
 </pre>
 Returns 1 if there is a rightmost literal code unit that must exist in any
-matched string, other than at its start. The third argument should  point to a
+matched string, other than at its start. The third argument should point to a
 <b>uint32_t</b> variable. If there is no such value, 0 is returned. When 1 is
 returned, the code unit value itself can be retrieved using
 PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last literal value is
@ -2640,7 +2640,9 @@ The subject string is passed to <b>pcre2_match()</b> as a pointer in
 <i>startoffset</i>. The length and offset are in code units, not characters.
 That is, they are in bytes for the 8-bit library, 16-bit code units for the
 16-bit library, and 32-bit code units for the 32-bit library, whether or not
-UTF processing is enabled.
+UTF processing is enabled. As a special case, if <i>subject</i> is NULL and
+<i>length</i> is zero, the subject is assumed to be an empty string. If
+<i>length</i> is non-zero, an error occurs if <i>subject</i> is NULL.
 </P>
 <P>
 If <i>startoffset</i> is greater than the length of the subject,
@ -3144,11 +3146,11 @@ The backtracking match limit was reached.
 <pre>
  PCRE2_ERROR_NOMEMORY
 </pre>
-If a pattern contains many nested backtracking points, heap memory is used to
-remember them. This error is given when the memory allocation function (default
-or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
-if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
-also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
+Heap memory is used to remember backgracking points. This error is given when
+the memory allocation function (default or custom) fails. Note that a different
+error, PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds
+the heap limit. PCRE2_ERROR_NOMEMORY is also returned if
+PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
 <pre>
  PCRE2_ERROR_NULL
 </pre>
@ -3394,12 +3396,17 @@ same number causes an error at compile time.
 <P>
 This function optionally calls <b>pcre2_match()</b> and then makes a copy of the
 subject string in <i>outputbuffer</i>, replacing parts that were matched with
-the <i>replacement</i> string, whose length is supplied in <b>rlength</b>. This
-can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. There is an
-option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just the
-replacement string(s). The default action is to perform just one replacement if
-the pattern matches, but there is an option that requests multiple replacements
-(see PCRE2_SUBSTITUTE_GLOBAL below).
+the <i>replacement</i> string, whose length is supplied in <b>rlength</b>, which
+can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As a
+special case, if <i>replacement</i> is NULL and <i>rlength</i> is zero, the
+replacement is assumed to be an empty string. If <i>rlength</i> is non-zero, an
+error occurs if <i>replacement</i> is NULL.
+</P>
+<P>
+There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just
+the replacement string(s). The default action is to perform just one
+replacement if the pattern matches, but there is an option that requests
+multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below).
 </P>
 <P>
 If successful, <b>pcre2_substitute()</b> returns the number of substitutions
@ -3433,12 +3440,12 @@ block may or may not have been changed.
 As well as the usual options for <b>pcre2_match()</b>, a number of additional
 options can be set in the <i>options</i> argument of <b>pcre2_substitute()</b>.
 One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external
-<i>match_data</i> block must be provided, and it must have been used for an
-external call to <b>pcre2_match()</b>. The data in the <i>match_data</i> block
-(return code, offset vector) is used for the first substitution instead of
-calling <b>pcre2_match()</b> from within <b>pcre2_substitute()</b>. This allows
-an application to check for a match before choosing to substitute, without
-having to repeat the match.
+<i>match_data</i> block must be provided, and it must have already been used for
+an external call to <b>pcre2_match()</b> with the same pattern and subject
+arguments. The data in the <i>match_data</i> block (return code, offset vector)
+is then used for the first substitution instead of calling <b>pcre2_match()</b>
+from within <b>pcre2_substitute()</b>. This allows an application to check for a
+match before choosing to substitute, without having to repeat the match.
 </P>
 <P>
 The contents of the externally supplied match data block are not changed when
@ -3583,7 +3590,7 @@ and force lower case. The escape sequences change the current state: \U and
 terminating a \Q quoted sequence) reverts to no case forcing. The sequences
 \u and \l force the next character (if it is a letter) to upper or lower
 case, respectively, and then the state automatically reverts to no case
-forcing. Case forcing applies to all inserted  characters, including those from
+forcing. Case forcing applies to all inserted characters, including those from
 capture groups and letters within \Q...\E quoted sequences. If either
 PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode
 properties are used for case forcing characters whose code points are greater
@ -3655,7 +3662,9 @@ default.
 </P>
 <P>
 PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the
-<i>match_data</i> argument is NULL.
+<i>match_data</i> argument is NULL or if the <i>subject</i> or <i>replacement</i>
+arguments are NULL. For backward compatibility reasons an exception is made for
+the <i>replacement</i> argument if the <i>rlength</i> argument is also 0.
 </P>
 <P>
 PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in the
@ -3810,12 +3819,13 @@ other alternatives. Ultimately, when it runs out of matches,
 <P>
 The function <b>pcre2_dfa_match()</b> is called to match a subject string
 against a compiled pattern, using a matching algorithm that scans the subject
-string just once (not counting lookaround assertions), and does not backtrack.
-This has different characteristics to the normal algorithm, and is not
-compatible with Perl. Some of the features of PCRE2 patterns are not supported.
-Nevertheless, there are times when this kind of matching can be useful. For a
-discussion of the two matching algorithms, and a list of features that
-<b>pcre2_dfa_match()</b> does not support, see the
+string just once (not counting lookaround assertions), and does not backtrack
+(except when processing lookaround assertions). This has different
+characteristics to the normal algorithm, and is not compatible with Perl. Some
+of the features of PCRE2 patterns are not supported. Nevertheless, there are
+times when this kind of matching can be useful. For a discussion of the two
+matching algorithms, and a list of features that <b>pcre2_dfa_match()</b> does
+not support, see the
 <a href="pcre2matching.html"><b>pcre2matching</b></a>
 documentation.
 </P>
@ -3850,7 +3860,7 @@ Here is an example of a simple call to <b>pcre2_dfa_match()</b>:
 </PRE>
 </P>
 <br><b>
-Option bits for <b>pcre_dfa_match()</b>
+Option bits for <b>pcre2_dfa_match()</b>
 </b><br>
 <P>
 The unused bits of the <i>options</i> argument for <b>pcre2_dfa_match()</b> must
@ -4008,9 +4018,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC42" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 30 August 2021
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2build.html
+++ b/doc/html/pcre2build.html
@ -142,8 +142,9 @@ locked this out by setting PCRE2_NEVER_UTF.
 UTF support allows the libraries to process character code points up to
 0x10ffff in the strings that they handle. Unicode support also gives access to
 the Unicode properties of characters, using pattern escapes such as \P, \p,
-and \X. Only the general category properties such as <i>Lu</i> and <i>Nd</i> are
-supported. Details are given in the
+and \X. Only the general category properties such as <i>Lu</i> and <i>Nd</i>,
+script names, and some bi-directional properties are supported. Details are
+given in the
 <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
 documentation.
 </P>
@ -283,12 +284,11 @@ to the <b>configure</b> command. This setting also applies to the
 counting is done differently).
 </P>
 <P>
-The <b>pcre2_match()</b> function starts out using a 20KiB vector on the system
-stack to record backtracking points. The more nested backtracking points there
-are (that is, the deeper the search tree), the more memory is needed. If the
-initial vector is not large enough, heap memory is used, up to a certain limit,
-which is specified in kibibytes (units of 1024 bytes). The limit can be changed
-at run time, as described in the
+The <b>pcre2_match()</b> function uses heap memory to record backtracking
+points. The more nested backtracking points there are (that is, the deeper the
+search tree), the more memory is needed. There is an upper limit, specified in
+kibibytes (units of 1024 bytes). This limit can be changed at run time, as
+described in the
 <a href="pcre2api.html"><b>pcre2api</b></a>
 documentation. The default limit (in effect unlimited) is 20 million. You can
 change this by a setting such as
@ -307,7 +307,7 @@ You can also explicitly limit the depth of nested backtracking in the
 for --with-match-limit. You can set a lower default limit by adding, for
 example,
 <pre>
-  --with-match-limit_depth=10000
+  --with-match-limit-depth=10000
 </pre>
 to the <b>configure</b> command. This value can be overridden at run time. This
 depth limit indirectly limits the amount of heap memory that is used, but
@ -553,15 +553,16 @@ documentation.
 <P>
 The C99 standard defines formatting modifiers z and t for size_t and
 ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
-environments other than Microsoft Visual Studio when __STDC_VERSION__ is
-defined and has a value greater than or equal to 199901L (indicating C99).
+environments other than old versions of Microsoft Visual Studio when
+__STDC_VERSION__ is defined and has a value greater than or equal to 199901L
+(indicating support for C99).
 However, there is at least one environment that claims to be C99 but does not
 support these modifiers. If
 <pre>
  --disable-percent-zt
 </pre>
 is specified, no use is made of the z or t modifiers. Instead of %td or %zu,
-%lu is used, with a cast for size_t values.
+a suitable format is used depending in the size of long for the platform.
 </P>
 <br><a name="SEC22" href="#TOC1">SUPPORT FOR FUZZERS</a><br>
 <P>
@ -607,16 +608,16 @@ give a warning.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC26" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 20 March 2020
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2020 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2compat.html
+++ b/doc/html/pcre2compat.html
@ -18,33 +18,41 @@ DIFFERENCES BETWEEN PCRE2 AND PERL
 <P>
 This document describes some of the differences in the ways that PCRE2 and Perl
 handle regular expressions. The differences described here are with respect to
-Perl version 5.32.0, but as both Perl and PCRE2 are continually changing, the
+Perl version 5.34.0, but as both Perl and PCRE2 are continually changing, the
 information may at times be out of date.
 </P>
 <P>
-1. PCRE2 has only a subset of Perl's Unicode support. Details of what it does
+1. When PCRE2_DOTALL (equivalent to Perl's /s qualifier) is not set, the
+behaviour of the '.' metacharacter differs from Perl. In PCRE2, '.' matches the
+next character unless it is the start of a newline sequence. This means that,
+if the newline setting is CR, CRLF, or NUL, '.' will match the code point LF
+(0x0A) in ASCII/Unicode environments, and NL (either 0x15 or 0x25) when using
+EBCDIC. In Perl, '.' appears never to match LF, even when 0x0A is not a newline
+indicator.
+</P>
+<P>
+2. PCRE2 has only a subset of Perl's Unicode support. Details of what it does
 have are given in the
 <a href="pcre2unicode.html"><b>pcre2unicode</b></a>
 page.
 </P>
 <P>
-2. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but
+3. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but
 they do not mean what you might think. For example, (?!a){3} does not assert
 that the next three characters are not "a". It just asserts that the next
 character is not "a" three times (in principle; PCRE2 optimizes this to run the
 assertion just once). Perl allows some repeat quantifiers on other assertions,
-for example, \b* (but not \b{3}, though oddly it does allow ^{3}), but these
-do not seem to have any use. PCRE2 does not allow any kind of quantifier on
-non-lookaround assertions.
+for example, \b* , but these do not seem to have any use. PCRE2 does not allow
+any kind of quantifier on non-lookaround assertions.
 </P>
 <P>
-3. Capture groups that occur inside negative lookaround assertions are counted,
+4. Capture groups that occur inside negative lookaround assertions are counted,
 but their entries in the offsets vector are set only when a negative assertion
 is a condition that has a matching branch (that is, the condition is false).
 Perl may set such capture groups in other circumstances.
 </P>
 <P>
-4. The following Perl escape sequences are not supported: \F, \l, \L, \u,
+5. The following Perl escape sequences are not supported: \F, \l, \L, \u,
 \U, and \N when followed by a character name. \N on its own, matching a
 non-newline character, and \N{U+dd..}, matching a Unicode code point, are
 supported. The escapes that modify the case of following letters are
@ -55,26 +63,26 @@ PCRE2_EXTRA_ALT_BSUX options is set, \U and \u are interpreted as ECMAScript
 interprets them.
 </P>
 <P>
-5. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is
+6. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is
 built with Unicode support (the default). The properties that can be tested
 with \p and \P are limited to the general category properties such as Lu and
-Nd, script names such as Greek or Han, and the derived properties Any and L&.
-Both PCRE2 and Perl support the Cs (surrogate) property, but in PCRE2 its use
-is limited. See the
+Nd, script names such as Greek or Han, Bidi_Class, Bidi_Control, and the
+derived properties Any and LC (synonym L&). Both PCRE2 and Perl support the Cs
+(surrogate) property, but in PCRE2 its use is limited. See the
 <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
 documentation for details. The long synonyms for property names that Perl
 supports (such as \p{Letter}) are not supported by PCRE2, nor is it permitted
 to prefix any of these properties with "Is".
 </P>
 <P>
-6. PCRE2 supports the \Q...\E escape for quoting substrings. Characters
+7. PCRE2 supports the \Q...\E escape for quoting substrings. Characters
 in between are treated as literals. However, this is slightly different from
 Perl in that $ and @ are also handled as literals inside the quotes. In Perl,
-they cause variable interpolation (but of course PCRE2 does not have
-variables). Also, Perl does "double-quotish backslash interpolation" on any
-backslashes between \Q and \E which, its documentation says, "may lead to
-confusing results". PCRE2 treats a backslash between \Q and \E just like any
-other character. Note the following examples:
+they cause variable interpolation (PCRE2 does not have variables). Also, Perl
+does "double-quotish backslash interpolation" on any backslashes between \Q
+and \E which, its documentation says, "may lead to confusing results". PCRE2
+treats a backslash between \Q and \E just like any other character. Note the
+following examples:
 <pre>
    Pattern            PCRE2 matches     Perl matches

@ -88,19 +96,19 @@ The \Q...\E sequence is recognized both inside and outside character classes
 by both PCRE2 and Perl.
 </P>
 <P>
-7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
+8. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
 constructions. However, PCRE2 does have a "callout" feature, which allows an
 external function to be called during pattern matching. See the
 <a href="pcre2callout.html"><b>pcre2callout</b></a>
 documentation for details.
 </P>
 <P>
-8. Subroutine calls (whether recursive or not) were treated as atomic groups up
+9. Subroutine calls (whether recursive or not) were treated as atomic groups up
 to PCRE2 release 10.23, but from release 10.30 this changed, and backtracking
 into subroutine calls is now supported, as in Perl.
 </P>
 <P>
-9. In PCRE2, if any of the backtracking control verbs are used in a group that
+10. In PCRE2, if any of the backtracking control verbs are used in a group that
 is called as a subroutine (whether or not recursively), their effect is
 confined to that group; it does not extend to the surrounding pattern. This is
 not always the case in Perl. In particular, if (*THEN) is present in a group
@ -109,20 +117,20 @@ the group does not contain any | characters. Note that such groups are
 processed as anchored at the point where they are tested.
 </P>
 <P>
-10. If a pattern contains more than one backtracking control verb, the first
+11. If a pattern contains more than one backtracking control verb, the first
 one that is backtracked onto acts. For example, in the pattern
 A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C
 triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the
 same as PCRE2, but there are cases where it differs.
 </P>
 <P>
-11. There are some differences that are concerned with the settings of captured
+12. There are some differences that are concerned with the settings of captured
 strings when part of a pattern is repeated. For example, matching "aba" against
 the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to
 "b".
 </P>
 <P>
-12. PCRE2's handling of duplicate capture group numbers and names is not as
+13. PCRE2's handling of duplicate capture group numbers and names is not as
 general as Perl's. This is a consequence of the fact the PCRE2 works internally
 just with numbers, using an external table to translate between numbers and
 names. In particular, a pattern such as (?|(?&#60;a&#62;A)|(?&#60;b&#62;B)), where the two
@ -132,42 +140,43 @@ to distinguish which group matched, because both names map to capture group
 number 1. To avoid this confusing situation, an error is given at compile time.
 </P>
 <P>
-13. Perl used to recognize comments in some places that PCRE2 does not, for
+14. Perl used to recognize comments in some places that PCRE2 does not, for
 example, between the ( and ? at the start of a group. If the /x modifier is
 set, Perl allowed white space between ( and ? though the latest Perls give an
 error (for a while it was just deprecated). There may still be some cases where
 Perl behaves differently.
 </P>
 <P>
-14. Perl, when in warning mode, gives warnings for character classes such as
+15. Perl, when in warning mode, gives warnings for character classes such as
 [A-\d] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE2 has no
 warning features, so it gives an error in these cases because they are almost
 certainly user mistakes.
 </P>
 <P>
-15. In PCRE2, the upper/lower case character properties Lu and Ll are not
+16. In PCRE2, the upper/lower case character properties Lu and Ll are not
 affected when case-independent matching is specified. For example, \p{Lu}
 always matches an upper case letter. I think Perl has changed in this respect;
-in the release at the time of writing (5.32), \p{Lu} and \p{Ll} match all
+in the release at the time of writing (5.34), \p{Lu} and \p{Ll} match all
 letters, regardless of case, when case independence is specified.
 </P>
 <P>
-16. From release 5.32.0, Perl locks out the use of \K in lookaround
+17. From release 5.32.0, Perl locks out the use of \K in lookaround
 assertions. From release 10.38 PCRE2 does the same by default. However, there
 is an option for re-enabling the previous behaviour. When this option is set,
 \K is acted on when it occurs in positive assertions, but is ignored in
 negative assertions.
 </P>
 <P>
-17. PCRE2 provides some extensions to the Perl regular expression facilities.
+18. PCRE2 provides some extensions to the Perl regular expression facilities.
 Perl 5.10 included new features that were not in earlier versions of Perl, some
 of which (such as named parentheses) were in PCRE2 for some time before. This
-list is with respect to Perl 5.32:
+list is with respect to Perl 5.34:
 <br>
 <br>
 (a) Although lookbehind assertions in PCRE2 must match fixed length strings,
 each alternative toplevel branch of a lookbehind assertion can match a
-different length of string. Perl requires them all to have the same length.
+different length of string. Perl used to require them all to have the same
+length, but the latest version has some variable length support.
 <br>
 <br>
 (b) From PCRE2 10.23, backreferences to groups of fixed length are supported
@ -221,12 +230,12 @@ extension to the lookaround facilities. The default, Perl-compatible
 lookarounds are atomic.
 </P>
 <P>
-18. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa
+19. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa
 modifier restricts /i case-insensitive matching to pure ascii, ignoring Unicode
 rules. This separation cannot be represented with PCRE2_UCP.
 </P>
 <P>
-19. Perl has different limits than PCRE2. See the
+20. Perl has different limits than PCRE2. See the
 <a href="pcre2limit.html"><b>pcre2limit</b></a>
 documentation for details. Perl went with 5.10 from recursion to iteration
 keeping the intermediate matches on the heap, which is ~10% slower but does not
@ -248,7 +257,7 @@ Cambridge, England.
 REVISION
 </b><br>
 <P>
-Last updated: 30 August 2021
+Last updated: 08 December 2021
 <br>
 Copyright &copy; 1997-2021 University of Cambridge.
 <br>
--- a/doc/html/pcre2convert.html
+++ b/doc/html/pcre2convert.html
@ -141,8 +141,8 @@ permitted to match separator characters, but the double-star (**) feature
 </P>
 <P>
 PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR matches globs with wildcards allowed to
-match separator characters. PCRE2_GLOB_NO_STARSTAR matches globs with the
-double-star feature disabled. These options may be given together.
+match separator characters. PCRE2_CONVERT_GLOB_NO_STARSTAR matches globs with
+the double-star feature disabled. These options may be given together.
 </P>
 <br><a name="SEC5" href="#TOC1">CONVERTING POSIX PATTERNS</a><br>
 <P>
--- a/doc/html/pcre2grep.html
+++ b/doc/html/pcre2grep.html
@ -71,13 +71,15 @@ For example:
 <pre>
  pcre2grep some-pattern file1 - file3
 </pre>
-Input files are searched line by line. By default, each line that matches a
+By default, input files are searched line by line. Each line that matches a
 pattern is copied to the standard output, and if there is more than one file,
 the file name is output at the start of each line, followed by a colon.
-However, there are options that can change how <b>pcre2grep</b> behaves. In
-particular, the <b>-M</b> option makes it possible to search for strings that
-span line boundaries. What defines a line boundary is controlled by the
-<b>-N</b> (<b>--newline</b>) option.
+However, there are options that can change how <b>pcre2grep</b> behaves. For
+example, the <b>-M</b> option makes it possible to search for strings that span
+line boundaries. What defines a line boundary is controlled by the <b>-N</b>
+(<b>--newline</b>) option. The <b>-h</b> and <b>-H</b> options control whether or
+not file names are shown, and the <b>-Z</b> option changes the file name
+terminator to a zero byte.
 </P>
 <P>
 The amount of memory used for buffering files that are being scanned is
@ -178,9 +180,11 @@ Output up to <i>number</i> lines of context after each matching line. Fewer
 lines are output if the next match or the end of the file is reached, or if the
 processing buffer size has been set too small. If file names and/or line
 numbers are being output, a hyphen separator is used instead of a colon for the
-context lines. A line containing "--" is output between each group of lines,
-unless they are in fact contiguous in the input file. The value of <i>number</i>
-is expected to be relatively small. When <b>-c</b> is used, <b>-A</b> is ignored.
+context lines (the <b>-Z</b> option can be used to change the file name
+terminator to a zero byte). A line containing "--" is output between each group
+of lines, unless they are in fact contiguous in the input file. The value of
+<i>number</i> is expected to be relatively small. When <b>-c</b> is used,
+<b>-A</b> is ignored.
 </P>
 <P>
 <b>-a</b>, <b>--text</b>
@ -199,9 +203,10 @@ Output up to <i>number</i> lines of context before each matching line. Fewer
 lines are output if the previous match or the start of the file is within
 <i>number</i> lines, or if the processing buffer size has been set too small. If
 file names and/or line numbers are being output, a hyphen separator is used
-instead of a colon for the context lines. A line containing "--" is output
-between each group of lines, unless they are in fact contiguous in the input
-file. The value of <i>number</i> is expected to be relatively small. When
+instead of a colon for the context lines (the <b>-Z</b> option can be used to
+change the file name terminator to a zero byte). A line containing "--" is
+output between each group of lines, unless they are in fact contiguous in the
+input file. The value of <i>number</i> is expected to be relatively small. When
 <b>-c</b> is used, <b>-B</b> is ignored.
 </P>
 <P>
@ -411,20 +416,22 @@ shown separately. This option is mutually exclusive with <b>--output</b>,
 <P>
 <b>-H</b>, <b>--with-filename</b>
 Force the inclusion of the file name at the start of output lines when
-searching a single file. By default, the file name is not shown in this case.
-For matching lines, the file name is followed by a colon; for context lines, a
-hyphen separator is used. If a line number is also being output, it follows the
-file name. When the <b>-M</b> option causes a pattern to match more than one
-line, only the first is preceded by the file name. This option overrides any
-previous <b>-h</b>, <b>-l</b>, or <b>-L</b> options.
+searching a single file. The file name is not normally shown in this case.
+By default, for matching lines, the file name is followed by a colon; for
+context lines, a hyphen separator is used. The <b>-Z</b> option can be used to
+change the terminator to a zero byte. If a line number is also being output,
+it follows the file name. When the <b>-M</b> option causes a pattern to match
+more than one line, only the first is preceded by the file name. This option
+overrides any previous <b>-h</b>, <b>-l</b>, or <b>-L</b> options.
 </P>
 <P>
 <b>-h</b>, <b>--no-filename</b>
-Suppress the output file names when searching multiple files. By default,
-file names are shown when multiple files are searched. For matching lines, the
-file name is followed by a colon; for context lines, a hyphen separator is used.
-If a line number is also being output, it follows the file name. This option
-overrides any previous <b>-H</b>, <b>-L</b>, or <b>-l</b> options.
+Suppress the output file names when searching multiple files. File names are
+normally shown when multiple files are searched. By default, for matching
+lines, the file name is followed by a colon; for context lines, a hyphen
+separator is used. The <b>-Z</b> option can be used to change the terminator to
+a zero byte. If a line number is also being output, it follows the file name.
+This option overrides any previous <b>-H</b>, <b>-L</b>, or <b>-l</b> options.
 </P>
 <P>
 <b>--heap-limit</b>=<i>number</i>
@ -481,18 +488,20 @@ given any number of times. If a directory matches both <b>--include-dir</b> and
 <b>-L</b>, <b>--files-without-match</b>
 Instead of outputting lines from the files, just output the names of the files
 that do not contain any lines that would have been output. Each file name is
-output once, on a separate line. This option overrides any previous <b>-H</b>,
-<b>-h</b>, or <b>-l</b> options.
+output once, on a separate line by default, but if the <b>-Z</b> option is set, 
+they are separated by zero bytes instead of newlines. This option overrides any
+previous <b>-H</b>, <b>-h</b>, or <b>-l</b> options.
 </P>
 <P>
 <b>-l</b>, <b>--files-with-matches</b>
 Instead of outputting lines from the files, just output the names of the files
 containing lines that would have been output. Each file name is output once, on
-a separate line. Searching normally stops as soon as a matching line is found
-in a file. However, if the <b>-c</b> (count) option is also used, matching
-continues in order to obtain the correct count, and those files that have at
-least one match are listed along with their counts. Using this option with
-<b>-c</b> is a way of suppressing the listing of files with no matches that
+a separate line, but if the <b>-Z</b> option is set, they are separated by zero
+bytes instead of newlines. Searching normally stops as soon as a matching line
+is found in a file. However, if the <b>-c</b> (count) option is also used,
+matching continues in order to obtain the correct count, and those files that
+have at least one match are listed along with their counts. Using this option
+with <b>-c</b> is a way of suppressing the listing of files with no matches that
 occurs with <b>-c</b> on its own. This option overrides any previous <b>-H</b>,
 <b>-h</b>, or <b>-L</b> options.
 </P>
@ -592,10 +601,7 @@ value set by <b>--match-limit</b> is reached, an error occurs.
 <br>
 <br>
 The <b>--heap-limit</b> option specifies, as a number of kibibytes (units of
-1024 bytes), the amount of heap memory that may be used for matching. Heap
-memory is needed only if matching the pattern requires a significant number of
-nested backtracking points to be remembered. This parameter can be set to zero
-to forbid the use of heap memory altogether.
+1024 bytes), the maximum amount of heap memory that may be used for matching.
 <br>
 <br>
 The <b>--depth-limit</b> option limits the depth of nested backtracking points,
@ -839,6 +845,13 @@ pattern and ")$" at the end. This option applies only to the patterns that are
 matched against the contents of files; it does not apply to patterns specified
 by any of the <b>--include</b> or <b>--exclude</b> options.
 </P>
+<P>
+<b>-Z</b>, <b>--null</b>
+Terminate files names in the regular output with a zero byte (the NUL
+character) instead of what would normally appear. This is useful when file
+names contain unusual characters such as colons, hyphens, or even newlines. The
+option does not apply to file names in error messages.
+</P>
 <br><a name="SEC7" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
 <P>
 The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
@ -1053,9 +1066,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC16" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 31 August 2021
+Last updated: 30 July 2022
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2jit.html
+++ b/doc/html/pcre2jit.html
@ -269,11 +269,11 @@ starts another match, that match must use a different JIT stack to the one used
 for currently suspended match(es).
 </P>
 <P>
-In a multithread application, if you do not
-specify a JIT stack, or if you assign or pass back NULL from a callback, that
-is thread-safe, because each thread has its own machine stack. However, if you
-assign or pass back a non-NULL JIT stack, this must be a different stack for
-each thread so that the application is thread-safe.
+In a multithread application, if you do not specify a JIT stack, or if you
+assign or pass back NULL from a callback, that is thread-safe, because each
+thread has its own machine stack. However, if you assign or pass back a
+non-NULL JIT stack, this must be a different stack for each thread so that the
+application is thread-safe.
 </P>
 <P>
 Strictly speaking, even more is allowed. You can assign the same non-NULL stack
@ -382,8 +382,8 @@ out this complicated API.
 <b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
 </P>
 <P>
-The JIT executable allocator does not free all memory when it is possible.
-It expects new allocations, and keeps some free memory around to improve
+The JIT executable allocator does not free all memory when it is possible. It
+expects new allocations, and keeps some free memory around to improve
 allocation speed. However, in low memory conditions, it might be better to free
 all possible memory. You can cause this to happen by calling
 pcre2_jit_free_unused_memory(). Its argument is a general context, for custom
@ -442,10 +442,10 @@ that was not compiled.
 <P>
 When you call <b>pcre2_match()</b>, as well as testing for invalid options, a
 number of other sanity checks are performed on the arguments. For example, if
-the subject pointer is NULL, an immediate error is given. Also, unless
-PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for validity. In the
-interests of speed, these checks do not happen on the JIT fast path, and if
-invalid data is passed, the result is undefined.
+the subject pointer is NULL but the length is non-zero, an immediate error is
+given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested
+for validity. In the interests of speed, these checks do not happen on the JIT
+fast path, and if invalid data is passed, the result is undefined.
 </P>
 <P>
 Bypassing the sanity checks and the <b>pcre2_match()</b> wrapping can give
@ -466,9 +466,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC14" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 23 May 2019
+Last updated: 30 November 2021
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2021 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2limits.html
+++ b/doc/html/pcre2limits.html
@ -71,13 +71,18 @@ is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
 The maximum length of a string argument to a callout is the largest number a
 32-bit unsigned integer can hold.
 </P>
+<P>
+The maximum amount of heap memory used for matching is controlled by the heap 
+limit, which can be set in a pattern or in a match context. The default is a 
+very large number, effectively unlimited.
+</P>
 <br><b>
 AUTHOR
 </b><br>
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
@ -86,9 +91,9 @@ Cambridge, England.
 REVISION
 </b><br>
 <P>
-Last updated: 02 February 2019
+Last updated: 26 July 2022
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2pattern.html
+++ b/doc/html/pcre2pattern.html
@ -534,7 +534,7 @@ for themselves. For example, outside a character class:
  \0113  is a tab followed by the character "3"
  \113   might be a backreference, otherwise the character with octal code 113
  \377   might be a backreference, otherwise the value 255 (decimal)
-  \81    is always a backreference .sp
+  \81    is always a backreference
 </pre>
 Note that octal values of 100 or greater that are specified using this syntax
 must not be introduced by a leading zero, because no more than three octal
@ -776,194 +776,62 @@ can be used in any mode, though in 8-bit and 16-bit non-UTF modes these
 sequences are of course limited to testing characters whose code points are
 less than U+0100 and U+10000, respectively. In 32-bit non-UTF mode, code points
 greater than 0x10ffff (the Unicode limit) may be encountered. These are all
-treated as being in the Unknown script and with an unassigned type. The extra
-escape sequences are:
+treated as being in the Unknown script and with an unassigned type.
+</P>
+<P>
+Matching characters by Unicode property is not fast, because PCRE2 has to do a
+multistage table lookup in order to find a character's property. That is why
+the traditional escape sequences such as \d and \w do not use Unicode
+properties in PCRE2 by default, though you can make them do so by setting the
+PCRE2_UCP option or by starting the pattern with (*UCP).
+</P>
+<P>
+The extra escape sequences that provide property support are:
 <pre>
  \p{<i>xx</i>}   a character with the <i>xx</i> property
  \P{<i>xx</i>}   a character without the <i>xx</i> property
  \X       a Unicode extended grapheme cluster
 </pre>
-The property names represented by <i>xx</i> above are case-sensitive. There is
-support for Unicode script names, Unicode general category properties, "Any",
-which matches any character (including newline), and some special PCRE2
-properties (described in the
-<a href="#extraprops">next section).</a>
-Other Perl properties such as "InMusicalSymbols" are not supported by PCRE2.
-Note that \P{Any} does not match any characters, so always causes a match
-failure.
+The property names represented by <i>xx</i> above are not case-sensitive, and in
+accordance with Unicode's "loose matching" rules, spaces, hyphens, and
+underscores are ignored. There is support for Unicode script names, Unicode
+general category properties, "Any", which matches any character (including
+newline), Bidi_Class, a number of binary (yes/no) properties, and some special
+PCRE2 properties (described
+<a href="#extraprops">below).</a>
+Certain other Perl properties such as "InMusicalSymbols" are not supported by
+PCRE2. Note that \P{Any} does not match any characters, so always causes a
+match failure.
+</P>
+<br><b>
+Script properties for \p and \P
+</b><br>
+<P>
+There are three different syntax forms for matching a script. Each Unicode
+character has a basic script and, optionally, a list of other scripts ("Script
+Extensions") with which it is commonly used. Using the Adlam script as an
+example, \p{sc:Adlam} matches characters whose basic script is Adlam, whereas
+\p{scx:Adlam} matches, in addition, characters that have Adlam in their
+extensions list. The full names "script" and "script extensions" for the
+property types are recognized, and a equals sign is an alternative to the
+colon. If a script name is given without a property type, for example,
+\p{Adlam}, it is treated as \p{scx:Adlam}. Perl changed to this
+interpretation at release 5.26 and PCRE2 changed at release 10.40.
 </P>
 <P>
-Sets of Unicode characters are defined as belonging to certain scripts. A
-character from one of these sets can be matched using a script name. For
-example:
-<pre>
-  \p{Greek}
-  \P{Han}
-</pre>
 Unassigned characters (and in non-UTF 32-bit mode, characters with code points
 greater than 0x10FFFF) are assigned the "Unknown" script. Others that are not
 part of an identified script are lumped together as "Common". The current list
-of scripts is:
-</P>
-<P>
-Adlam,
-Ahom,
-Anatolian_Hieroglyphs,
-Arabic,
-Armenian,
-Avestan,
-Balinese,
-Bamum,
-Bassa_Vah,
-Batak,
-Bengali,
-Bhaiksuki,
-Bopomofo,
-Brahmi,
-Braille,
-Buginese,
-Buhid,
-Canadian_Aboriginal,
-Carian,
-Caucasian_Albanian,
-Chakma,
-Cham,
-Cherokee,
-Chorasmian,
-Common,
-Coptic,
-Cuneiform,
-Cypriot,
-Cyrillic,
-Deseret,
-Devanagari,
-Dives_Akuru,
-Dogra,
-Duployan,
-Egyptian_Hieroglyphs,
-Elbasan,
-Elymaic,
-Ethiopic,
-Georgian,
-Glagolitic,
-Gothic,
-Grantha,
-Greek,
-Gujarati,
-Gunjala_Gondi,
-Gurmukhi,
-Han,
-Hangul,
-Hanifi_Rohingya,
-Hanunoo,
-Hatran,
-Hebrew,
-Hiragana,
-Imperial_Aramaic,
-Inherited,
-Inscriptional_Pahlavi,
-Inscriptional_Parthian,
-Javanese,
-Kaithi,
-Kannada,
-Katakana,
-Kayah_Li,
-Kharoshthi,
-Khitan_Small_Script,
-Khmer,
-Khojki,
-Khudawadi,
-Lao,
-Latin,
-Lepcha,
-Limbu,
-Linear_A,
-Linear_B,
-Lisu,
-Lycian,
-Lydian,
-Mahajani,
-Makasar,
-Malayalam,
-Mandaic,
-Manichaean,
-Marchen,
-Masaram_Gondi,
-Medefaidrin,
-Meetei_Mayek,
-Mende_Kikakui,
-Meroitic_Cursive,
-Meroitic_Hieroglyphs,
-Miao,
-Modi,
-Mongolian,
-Mro,
-Multani,
-Myanmar,
-Nabataean,
-Nandinagari,
-New_Tai_Lue,
-Newa,
-Nko,
-Nushu,
-Nyakeng_Puachue_Hmong,
-Ogham,
-Ol_Chiki,
-Old_Hungarian,
-Old_Italic,
-Old_North_Arabian,
-Old_Permic,
-Old_Persian,
-Old_Sogdian,
-Old_South_Arabian,
-Old_Turkic,
-Oriya,
-Osage,
-Osmanya,
-Pahawh_Hmong,
-Palmyrene,
-Pau_Cin_Hau,
-Phags_Pa,
-Phoenician,
-Psalter_Pahlavi,
-Rejang,
-Runic,
-Samaritan,
-Saurashtra,
-Sharada,
-Shavian,
-Siddham,
-SignWriting,
-Sinhala,
-Sogdian,
-Sora_Sompeng,
-Soyombo,
-Sundanese,
-Syloti_Nagri,
-Syriac,
-Tagalog,
-Tagbanwa,
-Tai_Le,
-Tai_Tham,
-Tai_Viet,
-Takri,
-Tamil,
-Tangut,
-Telugu,
-Thaana,
-Thai,
-Tibetan,
-Tifinagh,
-Tirhuta,
-Ugaritic,
-Unknown,
-Vai,
-Wancho,
-Warang_Citi,
-Yezidi,
-Yi,
-Zanabazar_Square.
+of recognized script names and their 4-character abbreviations can be obtained
+by running this command:
+<pre>
+  pcre2test -LS
+
+</PRE>
 </P>
+<br><b>
+The general category property for \p and \P
+</b><br>
 <P>
 Each character has exactly one Unicode general category property, specified by
 a two-letter abbreviation. For compatibility with Perl, negation can be
@ -1025,9 +893,9 @@ The following general category property codes are supported:
  Zp    Paragraph separator
  Zs    Space separator
 </pre>
-The special property L& is also supported: it matches a character that has
-the Lu, Ll, or Lt property, in other words, a letter that is not classified as
-a modifier or "other".
+The special property LC, which has the synonym L&, is also supported: it
+matches a character that has the Lu, Ll, or Lt property, in other words, a
+letter that is not classified as a modifier or "other".
 </P>
 <P>
 The Cs (Surrogate) property applies only to characters whose code points are in
@ -1054,12 +922,54 @@ Specifying caseless matching does not affect these escape sequences. For
 example, \p{Lu} always matches only upper case letters. This is different from
 the behaviour of current versions of Perl.
 </P>
+<br><b>
+Binary (yes/no) properties for \p and \P
+</b><br>
 <P>
-Matching characters by Unicode property is not fast, because PCRE2 has to do a
-multistage table lookup in order to find a character's property. That is why
-the traditional escape sequences such as \d and \w do not use Unicode
-properties in PCRE2 by default, though you can make them do so by setting the
-PCRE2_UCP option or by starting the pattern with (*UCP).
+Unicode defines a number of binary properties, that is, properties whose only
+values are true or false. You can obtain a list of those that are recognized by
+\p and \P, along with their abbreviations, by running this command:
+<pre>
+  pcre2test -LP
+
+</PRE>
+</P>
+<br><b>
+The Bidi_Class property for \p and \P
+</b><br>
+<P>
+<pre>
+  \p{Bidi_Class:&#60;class&#62;}   matches a character with the given class
+  \p{BC:&#60;class&#62;}           matches a character with the given class
+</pre>
+The recognized classes are:
+<pre>
+  AL          Arabic letter
+  AN          Arabic number
+  B           paragraph separator
+  BN          boundary neutral
+  CS          common separator
+  EN          European number
+  ES          European separator
+  ET          European terminator
+  FSI         first strong isolate
+  L           left-to-right
+  LRE         left-to-right embedding
+  LRI         left-to-right isolate
+  LRO         left-to-right override
+  NSM         non-spacing mark
+  ON          other neutral
+  PDF         pop directional format
+  PDI         pop directional isolate
+  R           right-to-left
+  RLE         right-to-left embedding
+  RLI         right-to-left isolate
+  RLO         right-to-left override
+  S           segment separator
+  WS          which space
+</pre>
+An equals sign may be used instead of a colon. The class names are
+case-insensitive; only the short names listed above are recognized.
 </P>
 <br><b>
 Extended grapheme clusters
@ -1336,15 +1246,17 @@ end of the subject in both modes, and if all branches of a pattern start with
 <P>
 Outside a character class, a dot in the pattern matches any one character in
 the subject string except (by default) a character that signifies the end of a
-line.
+line. One or more characters may be specified as line terminators (see
+<a href="#newlines">"Newline conventions"</a>
+above).
 </P>
 <P>
-When a line ending is defined as a single character, dot never matches that
-character; when the two-character sequence CRLF is used, dot does not match CR
-if it is immediately followed by LF, but otherwise it matches all characters
-(including isolated CRs and LFs). When any Unicode line endings are being
-recognized, dot does not match CR or LF or any of the other line ending
-characters.
+Dot never matches a single line-ending character. When the two-character
+sequence CRLF is the only line ending, dot does not match CR if it is
+immediately followed by LF, but otherwise it matches all characters (including
+isolated CRs and LFs). When ANYCRLF is selected for line endings, no occurences
+of CR of LF match dot. When all Unicode line endings are being recognized, dot
+does not match CR or LF or any of the other line ending characters.
 </P>
 <P>
 The behaviour of dot with regard to newlines can be changed. If the
@ -2175,10 +2087,10 @@ be easier to remember:
 <pre>
  (*atomic:\d+)foo
 </pre>
-This kind of parenthesized group "locks up" the  part of the pattern it
-contains once it has matched, and a failure further into the pattern is
-prevented from backtracking into it. Backtracking past it to previous items,
-however, works as normal.
+This kind of parenthesized group "locks up" the part of the pattern it contains
+once it has matched, and a failure further into the pattern is prevented from
+backtracking into it. Backtracking past it to previous items, however, works as
+normal.
 </P>
 <P>
 An alternative description is that a group of this type matches exactly the
@ -2899,7 +2811,7 @@ breaks):
  (?(DEFINE) (?&#60;byte&#62; 2[0-4]\d | 25[0-5] | 1\d\d | [1-9]?\d) )
  \b (?&byte) (\.(?&byte)){3} \b
 </pre>
-The first part of the pattern is a DEFINE group inside which a another group
+The first part of the pattern is a DEFINE group inside which another group
 named "byte" is defined. This matches an individual component of an IPv4
 address (a number less than 256). When matching takes place, this part of the
 pattern is skipped because DEFINE acts like a false condition. The rest of the
@ -3854,9 +3766,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC32" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 30 August 2021
+Last updated: 12 January 2022
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2perform.html
+++ b/doc/html/pcre2perform.html
@ -83,12 +83,31 @@ From release 10.30, the interpretive (non-JIT) version of <b>pcre2_match()</b>
 uses very little system stack at run time. In earlier releases recursive
 function calls could use a great deal of stack, and this could cause problems,
 but this usage has been eliminated. Backtracking positions are now explicitly
-remembered in memory frames controlled by the code. An initial 20KiB vector of
-frames is allocated on the system stack (enough for about 100 frames for small
-patterns), but if this is insufficient, heap memory is used. The amount of heap
-memory can be limited; if the limit is set to zero, only the initial stack
-vector is used. Rewriting patterns to be time-efficient, as described below,
-may also reduce the memory requirements.
+remembered in memory frames controlled by the code. 
+</P>
+<P>
+The size of each frame depends on the size of pointer variables and the number
+of capturing parenthesized groups in the pattern being matched. On a 64-bit
+system the frame size for a pattern with no captures is 128 bytes. For each
+capturing group the size increases by 16 bytes.
+</P>
+<P>
+Until release 10.41, an initial 20KiB frames vector was allocated on the system 
+stack, but this still caused some issues for multi-thread applications where
+each thread has a very small stack. From release 10.41 backtracking memory
+frames are always held in heap memory. An initial heap allocation is obtained
+the first time any match data block is passed to <b>pcre2_match()</b>. This is
+remembered with the match data block and re-used if that block is used for
+another match. It is freed when the match data block itself is freed.
+</P>
+<P>
+The size of the initial block is the larger of 20KiB or ten times the pattern's 
+frame size, unless the heap limit is less than this, in which case the heap 
+limit is used. If the initial block proves to be too small during matching, it
+is replaced by a larger block, subject to the heap limit. The heap limit is 
+checked only when a new block is to be allocated. Reducing the heap limit 
+between calls to <b>pcre2_match()</b> with the same match data block does not 
+affect the saved block.
 </P>
 <P>
 In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive
@ -245,16 +264,16 @@ pattern to match. This is done by repeatedly matching with different limits.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC6" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 03 February 2019
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2serialize.html
+++ b/doc/html/pcre2serialize.html
@ -23,12 +23,12 @@ please consult the man page, in case the conversion went wrong.
 <br><a name="SEC1" href="#TOC1">SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS</a><br>
 <P>
 <b>int32_t pcre2_serialize_decode(pcre2_code **<i>codes</i>,</b>
-<b>  int32_t <i>number_of_codes</i>, const uint32_t *<i>bytes</i>,</b>
+<b>  int32_t <i>number_of_codes</i>, const uint8_t *<i>bytes</i>,</b>
 <b>  pcre2_general_context *<i>gcontext</i>);</b>
 <br>
 <br>
-<b>int32_t pcre2_serialize_encode(pcre2_code **<i>codes</i>,</b>
-<b>  int32_t <i>number_of_codes</i>, uint32_t **<i>serialized_bytes</i>,</b>
+<b>int32_t pcre2_serialize_encode(const pcre2_code **<i>codes</i>,</b>
+<b>  int32_t <i>number_of_codes</i>, uint8_t **<i>serialized_bytes</i>,</b>
 <b>  PCRE2_SIZE *<i>serialized_size</i>, pcre2_general_context *<i>gcontext</i>);</b>
 <br>
 <br>
@ -94,7 +94,7 @@ of serialized patterns, or one of the following negative error codes:
 <pre>
  PCRE2_ERROR_BADDATA      the number of patterns is zero or less
  PCRE2_ERROR_BADMAGIC     mismatch of id bytes in one of the patterns
-  PCRE2_ERROR_MEMORY       memory allocation failed
+  PCRE2_ERROR_NOMEMORY     memory allocation failed
  PCRE2_ERROR_MIXEDTABLES  the patterns do not all use the same tables
  PCRE2_ERROR_NULL         the 1st, 3rd, or 4th argument is NULL
 </pre>
@ -154,7 +154,6 @@ mangagement functions for the decoded patterns. If this argument is NULL,
 <b>malloc()</b> and <b>free()</b> are used. After deserialization, the byte
 stream is no longer needed and can be discarded.
 <pre>
-  int32_t number_of_codes;
  pcre2_code *list_of_codes[2];
  uint8_t *bytes = &#60;serialized data&#62;;
  int32_t number_of_codes =
--- a/doc/html/pcre2syntax.html
+++ b/doc/html/pcre2syntax.html
@ -19,29 +19,31 @@ please consult the man page, in case the conversion went wrong.
 <li><a name="TOC4" href="#SEC4">CHARACTER TYPES</a>
 <li><a name="TOC5" href="#SEC5">GENERAL CATEGORY PROPERTIES FOR \p and \P</a>
 <li><a name="TOC6" href="#SEC6">PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P</a>
-<li><a name="TOC7" href="#SEC7">SCRIPT NAMES FOR \p AND \P</a>
-<li><a name="TOC8" href="#SEC8">CHARACTER CLASSES</a>
-<li><a name="TOC9" href="#SEC9">QUANTIFIERS</a>
-<li><a name="TOC10" href="#SEC10">ANCHORS AND SIMPLE ASSERTIONS</a>
-<li><a name="TOC11" href="#SEC11">REPORTED MATCH POINT SETTING</a>
-<li><a name="TOC12" href="#SEC12">ALTERNATION</a>
-<li><a name="TOC13" href="#SEC13">CAPTURING</a>
-<li><a name="TOC14" href="#SEC14">ATOMIC GROUPS</a>
-<li><a name="TOC15" href="#SEC15">COMMENT</a>
-<li><a name="TOC16" href="#SEC16">OPTION SETTING</a>
-<li><a name="TOC17" href="#SEC17">NEWLINE CONVENTION</a>
-<li><a name="TOC18" href="#SEC18">WHAT \R MATCHES</a>
-<li><a name="TOC19" href="#SEC19">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
-<li><a name="TOC20" href="#SEC20">NON-ATOMIC LOOKAROUND ASSERTIONS</a>
-<li><a name="TOC21" href="#SEC21">SCRIPT RUNS</a>
-<li><a name="TOC22" href="#SEC22">BACKREFERENCES</a>
-<li><a name="TOC23" href="#SEC23">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
-<li><a name="TOC24" href="#SEC24">CONDITIONAL PATTERNS</a>
-<li><a name="TOC25" href="#SEC25">BACKTRACKING CONTROL</a>
-<li><a name="TOC26" href="#SEC26">CALLOUTS</a>
-<li><a name="TOC27" href="#SEC27">SEE ALSO</a>
-<li><a name="TOC28" href="#SEC28">AUTHOR</a>
-<li><a name="TOC29" href="#SEC29">REVISION</a>
+<li><a name="TOC7" href="#SEC7">BINARY PROPERTIES FOR \p AND \P</a>
+<li><a name="TOC8" href="#SEC8">SCRIPT MATCHING WITH \p AND \P</a>
+<li><a name="TOC9" href="#SEC9">THE BIDI_CLASS PROPERTY FOR \p AND \P</a>
+<li><a name="TOC10" href="#SEC10">CHARACTER CLASSES</a>
+<li><a name="TOC11" href="#SEC11">QUANTIFIERS</a>
+<li><a name="TOC12" href="#SEC12">ANCHORS AND SIMPLE ASSERTIONS</a>
+<li><a name="TOC13" href="#SEC13">REPORTED MATCH POINT SETTING</a>
+<li><a name="TOC14" href="#SEC14">ALTERNATION</a>
+<li><a name="TOC15" href="#SEC15">CAPTURING</a>
+<li><a name="TOC16" href="#SEC16">ATOMIC GROUPS</a>
+<li><a name="TOC17" href="#SEC17">COMMENT</a>
+<li><a name="TOC18" href="#SEC18">OPTION SETTING</a>
+<li><a name="TOC19" href="#SEC19">NEWLINE CONVENTION</a>
+<li><a name="TOC20" href="#SEC20">WHAT \R MATCHES</a>
+<li><a name="TOC21" href="#SEC21">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
+<li><a name="TOC22" href="#SEC22">NON-ATOMIC LOOKAROUND ASSERTIONS</a>
+<li><a name="TOC23" href="#SEC23">SCRIPT RUNS</a>
+<li><a name="TOC24" href="#SEC24">BACKREFERENCES</a>
+<li><a name="TOC25" href="#SEC25">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
+<li><a name="TOC26" href="#SEC26">CONDITIONAL PATTERNS</a>
+<li><a name="TOC27" href="#SEC27">BACKTRACKING CONTROL</a>
+<li><a name="TOC28" href="#SEC28">CALLOUTS</a>
+<li><a name="TOC29" href="#SEC29">SEE ALSO</a>
+<li><a name="TOC30" href="#SEC30">AUTHOR</a>
+<li><a name="TOC31" href="#SEC31">REVISION</a>
 </ul>
 <br><a name="SEC1" href="#TOC1">PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY</a><br>
 <P>
@ -136,6 +138,11 @@ happening, \s and \w may also match characters with code points in the range
 sequences is changed to use Unicode properties and they match many more
 characters.
 </P>
+<P>
+Property descriptions in \p and \P are matched caselessly; hyphens,
+underscores, and white space are ignored, in accordance with Unicode's "loose
+matching" rules.
+</P>
 <br><a name="SEC5" href="#TOC1">GENERAL CATEGORY PROPERTIES FOR \p and \P</a><br>
 <P>
 <pre>
@ -152,6 +159,7 @@ characters.
  Lo         Other letter
  Lt         Title case letter
  Lu         Upper case letter
+  Lc         Ll, Lu, or Lt
  L&         Ll, Lu, or Lt

  M          Mark
@ -198,166 +206,58 @@ characters.
 Perl and POSIX space are now the same. Perl added VT to its space character set
 at release 5.18.
 </P>
-<br><a name="SEC7" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
+<br><a name="SEC7" href="#TOC1">BINARY PROPERTIES FOR \p AND \P</a><br>
 <P>
-Adlam,
-Ahom,
-Anatolian_Hieroglyphs,
-Arabic,
-Armenian,
-Avestan,
-Balinese,
-Bamum,
-Bassa_Vah,
-Batak,
-Bengali,
-Bhaiksuki,
-Bopomofo,
-Brahmi,
-Braille,
-Buginese,
-Buhid,
-Canadian_Aboriginal,
-Carian,
-Caucasian_Albanian,
-Chakma,
-Cham,
-Cherokee,
-Chorasmian,
-Common,
-Coptic,
-Cuneiform,
-Cypriot,
-Cyrillic,
-Deseret,
-Devanagari,
-Dives_Akuru,
-Dogra,
-Duployan,
-Egyptian_Hieroglyphs,
-Elbasan,
-Elymaic,
-Ethiopic,
-Georgian,
-Glagolitic,
-Gothic,
-Grantha,
-Greek,
-Gujarati,
-Gunjala_Gondi,
-Gurmukhi,
-Han,
-Hangul,
-Hanifi_Rohingya,
-Hanunoo,
-Hatran,
-Hebrew,
-Hiragana,
-Imperial_Aramaic,
-Inherited,
-Inscriptional_Pahlavi,
-Inscriptional_Parthian,
-Javanese,
-Kaithi,
-Kannada,
-Katakana,
-Kayah_Li,
-Kharoshthi,
-Khitan_Small_Script,
-Khmer,
-Khojki,
-Khudawadi,
-Lao,
-Latin,
-Lepcha,
-Limbu,
-Linear_A,
-Linear_B,
-Lisu,
-Lycian,
-Lydian,
-Mahajani,
-Makasar,
-Malayalam,
-Mandaic,
-Manichaean,
-Marchen,
-Masaram_Gondi,
-Medefaidrin,
-Meetei_Mayek,
-Mende_Kikakui,
-Meroitic_Cursive,
-Meroitic_Hieroglyphs,
-Miao,
-Modi,
-Mongolian,
-Mro,
-Multani,
-Myanmar,
-Nabataean,
-Nandinagari,
-New_Tai_Lue,
-Newa,
-Nko,
-Nushu,
-Nyakeng_Puachue_Hmong,
-Ogham,
-Ol_Chiki,
-Old_Hungarian,
-Old_Italic,
-Old_North_Arabian,
-Old_Permic,
-Old_Persian,
-Old_Sogdian,
-Old_South_Arabian,
-Old_Turkic,
-Oriya,
-Osage,
-Osmanya,
-Pahawh_Hmong,
-Palmyrene,
-Pau_Cin_Hau,
-Phags_Pa,
-Phoenician,
-Psalter_Pahlavi,
-Rejang,
-Runic,
-Samaritan,
-Saurashtra,
-Sharada,
-Shavian,
-Siddham,
-SignWriting,
-Sinhala,
-Sogdian,
-Sora_Sompeng,
-Soyombo,
-Sundanese,
-Syloti_Nagri,
-Syriac,
-Tagalog,
-Tagbanwa,
-Tai_Le,
-Tai_Tham,
-Tai_Viet,
-Takri,
-Tamil,
-Tangut,
-Telugu,
-Thaana,
-Thai,
-Tibetan,
-Tifinagh,
-Tirhuta,
-Ugaritic,
-Vai,
-Wancho,
-Warang_Citi,
-Yezidi,
-Yi,
-Zanabazar_Square.
+Unicode defines a number of binary properties, that is, properties whose only
+values are true or false. You can obtain a list of those that are recognized by
+\p and \P, along with their abbreviations, by running this command:
+<pre>
+  pcre2test -LP
+</PRE>
 </P>
-<br><a name="SEC8" href="#TOC1">CHARACTER CLASSES</a><br>
+<br><a name="SEC8" href="#TOC1">SCRIPT MATCHING WITH \p AND \P</a><br>
+<P>
+Many script names and their 4-letter abbreviations are recognized in
+\p{sc:...} or \p{scx:...} items, or on their own with \p (and also \P of
+course). You can obtain a list of these scripts by running this command:
+<pre>
+  pcre2test -LS
+</PRE>
+</P>
+<br><a name="SEC9" href="#TOC1">THE BIDI_CLASS PROPERTY FOR \p AND \P</a><br>
+<P>
+<pre>
+  \p{Bidi_Class:&#60;class&#62;}   matches a character with the given class
+  \p{BC:&#60;class&#62;}           matches a character with the given class
+</pre>
+The recognized classes are:
+<pre>
+  AL          Arabic letter
+  AN          Arabic number
+  B           paragraph separator
+  BN          boundary neutral
+  CS          common separator
+  EN          European number
+  ES          European separator
+  ET          European terminator
+  FSI         first strong isolate
+  L           left-to-right
+  LRE         left-to-right embedding
+  LRI         left-to-right isolate
+  LRO         left-to-right override
+  NSM         non-spacing mark
+  ON          other neutral
+  PDF         pop directional format
+  PDI         pop directional isolate
+  R           right-to-left
+  RLE         right-to-left embedding
+  RLI         right-to-left isolate
+  RLO         right-to-left override
+  S           segment separator
+  WS          which space
+</PRE>
+</P>
+<br><a name="SEC10" href="#TOC1">CHARACTER CLASSES</a><br>
 <P>
 <pre>
  [...]       positive character class
@ -385,7 +285,7 @@ In PCRE2, POSIX character set names recognize only ASCII characters by default,
 but some of them use Unicode properties if PCRE2_UCP is set. You can use
 \Q...\E inside a character class.
 </P>
-<br><a name="SEC9" href="#TOC1">QUANTIFIERS</a><br>
+<br><a name="SEC11" href="#TOC1">QUANTIFIERS</a><br>
 <P>
 <pre>
  ?           0 or 1, greedy
@ -406,7 +306,7 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
  {n,}?       n or more, lazy
 </PRE>
 </P>
-<br><a name="SEC10" href="#TOC1">ANCHORS AND SIMPLE ASSERTIONS</a><br>
+<br><a name="SEC12" href="#TOC1">ANCHORS AND SIMPLE ASSERTIONS</a><br>
 <P>
 <pre>
  \b          word boundary
@ -424,7 +324,7 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
  \G          first matching position in subject
 </PRE>
 </P>
-<br><a name="SEC11" href="#TOC1">REPORTED MATCH POINT SETTING</a><br>
+<br><a name="SEC13" href="#TOC1">REPORTED MATCH POINT SETTING</a><br>
 <P>
 <pre>
  \K          set reported start of match
@ -434,13 +334,13 @@ for compatibility with Perl. However, if the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
 option is set, the previous behaviour is re-enabled. When this option is set,
 \K is honoured in positive assertions, but ignored in negative ones.
 </P>
-<br><a name="SEC12" href="#TOC1">ALTERNATION</a><br>
+<br><a name="SEC14" href="#TOC1">ALTERNATION</a><br>
 <P>
 <pre>
  expr|expr|expr...
 </PRE>
 </P>
-<br><a name="SEC13" href="#TOC1">CAPTURING</a><br>
+<br><a name="SEC15" href="#TOC1">CAPTURING</a><br>
 <P>
 <pre>
  (...)           capture group
@ -455,20 +355,20 @@ In non-UTF modes, names may contain underscores and ASCII letters and digits;
 in UTF modes, any Unicode letters and Unicode decimal digits are permitted. In
 both cases, a name must not start with a digit.
 </P>
-<br><a name="SEC14" href="#TOC1">ATOMIC GROUPS</a><br>
+<br><a name="SEC16" href="#TOC1">ATOMIC GROUPS</a><br>
 <P>
 <pre>
  (?&#62;...)         atomic non-capture group
  (*atomic:...)   atomic non-capture group
 </PRE>
 </P>
-<br><a name="SEC15" href="#TOC1">COMMENT</a><br>
+<br><a name="SEC17" href="#TOC1">COMMENT</a><br>
 <P>
 <pre>
  (?#....)        comment (not nestable)
 </PRE>
 </P>
-<br><a name="SEC16" href="#TOC1">OPTION SETTING</a><br>
+<br><a name="SEC18" href="#TOC1">OPTION SETTING</a><br>
 <P>
 Changes of these options within a group are automatically cancelled at the end
 of the group.
@ -513,7 +413,7 @@ not increase them. LIMIT_RECURSION is an obsolete synonym for LIMIT_DEPTH. The
 application can lock out the use of (*UTF) and (*UCP) by setting the
 PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, respectively, at compile time.
 </P>
-<br><a name="SEC17" href="#TOC1">NEWLINE CONVENTION</a><br>
+<br><a name="SEC19" href="#TOC1">NEWLINE CONVENTION</a><br>
 <P>
 These are recognized only at the very start of the pattern or after option
 settings with a similar syntax.
@ -526,7 +426,7 @@ settings with a similar syntax.
  (*NUL)          the NUL character (binary zero)
 </PRE>
 </P>
-<br><a name="SEC18" href="#TOC1">WHAT \R MATCHES</a><br>
+<br><a name="SEC20" href="#TOC1">WHAT \R MATCHES</a><br>
 <P>
 These are recognized only at the very start of the pattern or after option
 setting with a similar syntax.
@ -535,7 +435,7 @@ setting with a similar syntax.
  (*BSR_UNICODE)  any Unicode newline sequence
 </PRE>
 </P>
-<br><a name="SEC19" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
+<br><a name="SEC21" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
 <P>
 <pre>
  (?=...)                     )
@ -556,7 +456,7 @@ setting with a similar syntax.
 </pre>
 Each top-level branch of a lookbehind must be of a fixed length.
 </P>
-<br><a name="SEC20" href="#TOC1">NON-ATOMIC LOOKAROUND ASSERTIONS</a><br>
+<br><a name="SEC22" href="#TOC1">NON-ATOMIC LOOKAROUND ASSERTIONS</a><br>
 <P>
 These assertions are specific to PCRE2 and are not Perl-compatible.
 <pre>
@ -569,7 +469,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
  (*non_atomic_positive_lookbehind:...)  )
 </PRE>
 </P>
-<br><a name="SEC21" href="#TOC1">SCRIPT RUNS</a><br>
+<br><a name="SEC23" href="#TOC1">SCRIPT RUNS</a><br>
 <P>
 <pre>
  (*script_run:...)           ) script run, can be backtracked into
@ -579,7 +479,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
  (*asr:...)                  )
 </PRE>
 </P>
-<br><a name="SEC22" href="#TOC1">BACKREFERENCES</a><br>
+<br><a name="SEC24" href="#TOC1">BACKREFERENCES</a><br>
 <P>
 <pre>
  \n              reference by number (can be ambiguous)
@ -596,7 +496,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
  (?P=name)       reference by name (Python)
 </PRE>
 </P>
-<br><a name="SEC23" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
+<br><a name="SEC25" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
 <P>
 <pre>
  (?R)            recurse whole pattern
@ -615,7 +515,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
  \g'-n'          call subroutine by relative number (PCRE2 extension)
 </PRE>
 </P>
-<br><a name="SEC24" href="#TOC1">CONDITIONAL PATTERNS</a><br>
+<br><a name="SEC26" href="#TOC1">CONDITIONAL PATTERNS</a><br>
 <P>
 <pre>
  (?(condition)yes-pattern)
@ -638,7 +538,7 @@ Note the ambiguity of (?(R) and (?(Rn) which might be named reference
 conditions or recursion tests. Such a condition is interpreted as a reference
 condition if the relevant named group exists.
 </P>
-<br><a name="SEC25" href="#TOC1">BACKTRACKING CONTROL</a><br>
+<br><a name="SEC27" href="#TOC1">BACKTRACKING CONTROL</a><br>
 <P>
 All backtracking control verbs may be in the form (*VERB:NAME). For (*MARK) the
 name is mandatory, for the others it is optional. (*SKIP) changes its behaviour
@ -665,7 +565,7 @@ pattern is not anchored.
 The effect of one of these verbs in a group called as a subroutine is confined
 to the subroutine call.
 </P>
-<br><a name="SEC26" href="#TOC1">CALLOUTS</a><br>
+<br><a name="SEC28" href="#TOC1">CALLOUTS</a><br>
 <P>
 <pre>
  (?C)            callout (assumed number 0)
@ -676,12 +576,12 @@ The allowed string delimiters are ` ' " ^ % # $ (which are the same for the
 start and the end), and the starting delimiter { matched with the ending
 delimiter }. To encode the ending delimiter within the string, double it.
 </P>
-<br><a name="SEC27" href="#TOC1">SEE ALSO</a><br>
+<br><a name="SEC29" href="#TOC1">SEE ALSO</a><br>
 <P>
 <b>pcre2pattern</b>(3), <b>pcre2api</b>(3), <b>pcre2callout</b>(3),
 <b>pcre2matching</b>(3), <b>pcre2</b>(3).
 </P>
-<br><a name="SEC28" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC30" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel
 <br>
@ -690,11 +590,11 @@ Retired from University Computing Service
 Cambridge, England.
 <br>
 </P>
-<br><a name="SEC29" href="#TOC1">REVISION</a><br>
+<br><a name="SEC31" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 30 August 2021
+Last updated: 12 January 2022
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2test.html
+++ b/doc/html/pcre2test.html
@ -78,7 +78,7 @@ to 8-bit code units for output.
 </P>
 <P>
 In the rest of this document, the names of library functions and structures
-are given in generic form, for example, <b>pcre_compile()</b>. The actual
+are given in generic form, for example, <b>pcre2_compile()</b>. The actual
 names used in the libraries have a suffix _8, _16, or _32, as appropriate.
 <a name="inputencoding"></a></P>
 <br><a name="SEC3" href="#TOC1">INPUT ENCODING</a><br>
@ -253,7 +253,19 @@ available, and the use of JIT for matching is verified.
 <b>-LM</b>
 List modifiers: write a list of available pattern and subject modifiers to the
 standard output, then exit with zero exit code. All other options are ignored.
-If both -C and -LM are present, whichever is first is recognized.
+If both -C and any -Lx options are present, whichever is first is recognized.
+</P>
+<P>
+<b>-LP</b>
+List properties: write a list of recognized Unicode properties to the standard
+output, then exit with zero exit code. All other options are ignored. If both
+-C and any -Lx options are present, whichever is first is recognized.
+</P>
+<P>
+<b>-LS</b>
+List scripts: write a list of recogized Unicode script names to the standard
+output, then exit with zero exit code. All other options are ignored. If both
+-C and any -Lx options are present, whichever is first is recognized.
 </P>
 <P>
 <b>-pattern</b> <i>modifier-list</i>
@ -1229,7 +1241,8 @@ pattern, but can be overridden by modifiers on the subject.
      copy=&#60;number or name&#62;      copy captured substring
      depth_limit=&#60;n&#62;            set a depth limit
      dfa                        use <b>pcre2_dfa_match()</b>
-      find_limits                find match and depth limits
+      find_limits                find heap, match and depth limits
+      find_limits_noheap         find match and depth limits
      get=&#60;number or name&#62;       extract captured substring
      getall                     extract all captured substrings
  /g  global                     global matching
@ -1239,6 +1252,8 @@ pattern, but can be overridden by modifiers on the subject.
      match_limit=&#60;n&#62;            set a match limit
      memory                     show heap memory usage
      null_context               match with a NULL context
+      null_replacement           substitute with NULL replacement
+      null_subject               match with NULL subject
      offset=&#60;n&#62;                 set starting offset
      offset_limit=&#60;n&#62;           set offset limit
      ovector=&#60;n&#62;                set size of output vector
@ -1550,7 +1565,7 @@ Setting heap, match, and depth limits
 <P>
 The <b>heap_limit</b>, <b>match_limit</b>, and <b>depth_limit</b> modifiers set
 the appropriate limits in the match context. These values are ignored when the
-<b>find_limits</b> modifier is specified.
+<b>find_limits</b> or <b>find_limits_noheap</b> modifier is specified.
 </P>
 <br><b>
 Finding minimum limits
@ -1560,8 +1575,12 @@ If the <b>find_limits</b> modifier is present on a subject line, <b>pcre2test</b
 calls the relevant matching function several times, setting different values in
 the match context via <b>pcre2_set_heap_limit()</b>,
 <b>pcre2_set_match_limit()</b>, or <b>pcre2_set_depth_limit()</b> until it finds
-the minimum values for each parameter that allows the match to complete without
-error. If JIT is being used, only the match limit is relevant.
+the smallest value for each parameter that allows the match to complete without
+a "limit exceeded" error. The match itself may succeed or fail. An alternative
+modifier, <b>find_limits_noheap</b>, omits the heap limit. This is used in the
+standard tests, because the minimum heap limit varies between systems. If JIT
+is being used, only the match limit is relevant, and the other two are
+automatically omitted.
 </P>
 <P>
 When using this modifier, the pattern should not contain any limit settings
@ -1589,9 +1608,7 @@ overall amount of computing resource that is used.
 </P>
 <P>
 For both kinds of matching, the <i>heap_limit</i> number, which is in kibibytes
-(units of 1024 bytes), limits the amount of heap memory used for matching. A
-value of zero disables the use of any heap memory; many simple pattern matches
-can be done without using the heap, so zero is not an unreasonable setting.
+(units of 1024 bytes), limits the amount of heap memory used for matching.
 </P>
 <br><b>
 Showing MARK names
@ -1609,12 +1626,10 @@ Showing memory usage
 <P>
 The <b>memory</b> modifier causes <b>pcre2test</b> to log the sizes of all heap
 memory allocation and freeing calls that occur during a call to
-<b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>. These occur only when a match
-requires a bigger vector than the default for remembering backtracking points
-(<b>pcre2_match()</b>) or for internal workspace (<b>pcre2_dfa_match()</b>). In
-many cases there will be no heap memory used and therefore no additional
-output. No heap memory is allocated during matching with JIT, so in that case
-the <b>memory</b> modifier never has any effect. For this modifier to work, the
+<b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>. In the latter case, heap memory
+is used only when a match requires more internal workspace that the default
+allocation on the stack, so in many cases there will be no output. No heap
+memory is allocated during matching with JIT. For this modifier to work, the
 <b>null_context</b> modifier must not be set on both the pattern and the
 subject, though it can be set on one or the other.
 </P>
@ -1668,7 +1683,7 @@ When testing <b>pcre2_substitute()</b>, this modifier also has the effect of
 passing the replacement string as zero-terminated.
 </P>
 <br><b>
-Passing a NULL context
+Passing a NULL context, subject, or replacement
 </b><br>
 <P>
 Normally, <b>pcre2test</b> passes a context block to <b>pcre2_match()</b>,
@ -1676,7 +1691,13 @@ Normally, <b>pcre2test</b> passes a context block to <b>pcre2_match()</b>,
 If the <b>null_context</b> modifier is set, however, NULL is passed. This is for
 testing that the matching and substitution functions behave correctly in this
 case (they use default values). This modifier cannot be used with the
-<b>find_limits</b> or <b>substitute_callout</b> modifiers.
+<b>find_limits</b>, <b>find_limits_noheap</b>, or <b>substitute_callout</b>
+modifiers.
+</P>
+<P>
+Similarly, for testing purposes, if the <b>null_subject</b> or
+<b>null_replacement</b> modifier is set, the subject or replacement string
+pointers are passed as NULL, respectively, to the relevant functions.
 </P>
 <br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
 <P>
@ -2122,9 +2143,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC21" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 30 August 2021
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2unicode.html
+++ b/doc/html/pcre2unicode.html
@ -50,17 +50,18 @@ UNICODE PROPERTY SUPPORT
 <P>
 When PCRE2 is built with Unicode support, the escape sequences \p{..},
 \P{..}, and \X can be used. This is not dependent on the PCRE2_UTF setting.
-The Unicode properties that can be tested are limited to the general category
-properties such as Lu for an upper case letter or Nd for a decimal number, the
-Unicode script names such as Arabic or Han, and the derived properties Any and
-L&. Full lists are given in the
+The Unicode properties that can be tested are a subset of those that Perl
+supports. Currently they are limited to the general category properties such as
+Lu for an upper case letter or Nd for a decimal number, the Unicode script
+names such as Arabic or Han, Bidi_Class, Bidi_Control, and the derived
+properties Any and LC (synonym L&). Full lists are given in the
 <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
 and
 <a href="pcre2syntax.html"><b>pcre2syntax</b></a>
-documentation. Only the short names for properties are supported. For example,
-\p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported.
-Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
-compatibility with Perl 5.6. PCRE2 does not support this.
+documentation. In general, only the short names for properties are supported.
+For example, \p{L} matches a letter. Its longer synonym, \p{Letter}, is not
+supported. Furthermore, in Perl, many properties may optionally be prefixed by
+"Is", for compatibility with Perl 5.6. PCRE2 does not support this.
 </P>
 <br><b>
 WIDE CHARACTERS AND UTF MODES
@ -477,7 +478,7 @@ AUTHOR
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
@ -486,9 +487,9 @@ Cambridge, England.
 REVISION
 </b><br>
 <P>
-Last updated: 23 February 2020
+Last updated: 22 December 2021
 <br>
-Copyright &copy; 1997-2020 University of Cambridge.
+Copyright &copy; 1997-2021 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/pcre2.txt
+++ b/doc/pcre2.txt
--- a/doc/pcre2_compile.3
+++ b/doc/pcre2_compile.3
@ -1,4 +1,4 @@
-.TH PCRE2_COMPILE 3 "23 May 2019" "PCRE2 10.34"
+.TH PCRE2_COMPILE 3 "22 April 2022" "PCRE2 10.41"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@ -80,8 +80,17 @@ Additional options may be set in the compile context via the
 .\"
 function.
 .P
-The yield of this function is a pointer to a private data structure that
-contains the compiled pattern, or NULL if an error was detected.
+If either of \fIerrorcode\fP or \fIerroroffset\fP is NULL, the function returns
+NULL immediately. Otherwise, the yield of this function is a pointer to a
+private data structure that contains the compiled pattern, or NULL if an error
+was detected. In the error case, a text error message can be obtained by
+passing the value returned via the \fIerrorcode\fP argument to the the
+\fBpcre2_get_error_message()\fP function. The offset (in code units) where the
+error was encountered is returned via the \fIerroroffset\fP argument.
+.P
+If there is no error, the value passed via \fIerrorcode\fP returns the message
+"no error" if passed to \fBpcre2_get_error_message()\fP, and the value passed
+via \fIerroroffset\fP is zero.
 .P
 There is a complete description of the PCRE2 native API, with more detail on
 each option, in the
--- a/doc/pcre2_jit_stack_create.3
+++ b/doc/pcre2_jit_stack_create.3
@ -22,7 +22,8 @@ allocation. The result can be passed to the JIT run-time code by calling
 \fBpcre2_jit_stack_assign()\fP to associate the stack with a compiled pattern,
 which can then be processed by \fBpcre2_match()\fP or \fBpcre2_jit_match()\fP.
 A maximum stack size of 512KiB to 1MiB should be more than enough for any
-pattern. For more details, see the
+pattern. If the stack couldn't be allocated or the values passed were not
+reasonable, NULL will be returned. For more details, see the
 .\" HREF
 \fBpcre2jit\fP
 .\"
--- a/doc/pcre2_serialize_decode.3
+++ b/doc/pcre2_serialize_decode.3
@ -36,7 +36,7 @@ the following negative error codes:
  PCRE2_ERROR_BADDATA   \fInumber_of_codes\fP is zero or less
  PCRE2_ERROR_BADMAGIC  mismatch of id bytes in \fIbytes\fP
  PCRE2_ERROR_BADMODE   mismatch of variable unit size or PCRE version
-  PCRE2_ERROR_MEMORY    memory allocation failed
+  PCRE2_ERROR_NOMEMORY  memory allocation failed
  PCRE2_ERROR_NULL      \fIcodes\fP or \fIbytes\fP is NULL
 .sp
 PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled
--- a/doc/pcre2_set_compile_extra_options.3
+++ b/doc/pcre2_set_compile_extra_options.3
@ -18,9 +18,9 @@ This function sets additional option bits for \fBpcre2_compile()\fP that are
 housed in a compile context. It completely replaces all the bits. The extra
 options are:
 .sp
-.\" JOIN
  PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK     Allow \eK in lookarounds
-  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \ex{df800} to \ex{dfff}
+.\" JOIN
+  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \ex{d800} to \ex{dfff}
                                         in UTF-8 and UTF-32 modes
 .\" JOIN
  PCRE2_EXTRA_ALT_BSUX                 Extended alternate \eu, \eU, and
--- a/doc/pcre2_substitute.3
+++ b/doc/pcre2_substitute.3
@ -55,32 +55,42 @@ automatically added.
 The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for
 zero-terminated strings. The options are:
 .sp
-  PCRE2_ANCHORED             Match only at the first position
-  PCRE2_ENDANCHORED          Pattern can match only at end of subject
-  PCRE2_NOTBOL               Subject is not the beginning of a line
-  PCRE2_NOTEOL               Subject is not the end of a line
-  PCRE2_NOTEMPTY             An empty string is not a valid match
+  PCRE2_ANCHORED                     Match only at the first position
+  PCRE2_ENDANCHORED                  Match only at end of subject
 .\" JOIN
-  PCRE2_NOTEMPTY_ATSTART     An empty string at the start of the
-                              subject is not a valid match
-  PCRE2_NO_JIT               Do not use JIT matching
+  PCRE2_NOTBOL                       Subject is not the beginning of a
+                                      line
+  PCRE2_NOTEOL                       Subject is not the end of a line
 .\" JOIN
-  PCRE2_NO_UTF_CHECK         Do not check the subject or replacement
-                              for UTF validity (only relevant if
-                              PCRE2_UTF was set at compile time)
-  PCRE2_SUBSTITUTE_EXTENDED  Do extended replacement processing
-  PCRE2_SUBSTITUTE_GLOBAL    Replace all occurrences in the subject
-  PCRE2_SUBSTITUTE_LITERAL   The replacement string is literal
-  PCRE2_SUBSTITUTE_MATCHED   Use pre-existing match data for 1st match
-  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH  If overflow, compute needed length
+  PCRE2_NOTEMPTY                     An empty string is not a
+                                      valid match
+.\" JOIN
+  PCRE2_NOTEMPTY_ATSTART             An empty string at the start of
+                                      the subject is not a valid match
+  PCRE2_NO_JIT                       Do not use JIT matching
+.\" JOIN
+  PCRE2_NO_UTF_CHECK                 Do not check for UTF validity in
+                                      the subject or replacement
+.\" JOIN
+                                      (only relevant if PCRE2_UTF was
+                                      set at compile time)
+  PCRE2_SUBSTITUTE_EXTENDED          Do extended replacement processing
+.\" JOIN
+  PCRE2_SUBSTITUTE_GLOBAL            Replace all occurrences in the
+                                      subject
+  PCRE2_SUBSTITUTE_LITERAL           The replacement string is literal
+.\" JOIN
+  PCRE2_SUBSTITUTE_MATCHED           Use pre-existing match data for
+                                      first match
+  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH   If overflow, compute needed length
  PCRE2_SUBSTITUTE_REPLACEMENT_ONLY  Return only replacement string(s)
-  PCRE2_SUBSTITUTE_UNKNOWN_UNSET  Treat unknown group as unset
-  PCRE2_SUBSTITUTE_UNSET_EMPTY  Simple unset insert = empty string
+  PCRE2_SUBSTITUTE_UNKNOWN_UNSET     Treat unknown group as unset
+  PCRE2_SUBSTITUTE_UNSET_EMPTY       Simple unset insert = empty string
 .sp
 If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_EXTENDED,
 PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and PCRE2_SUBSTITUTE_UNSET_EMPTY are ignored.
 .P
-If PCRE2_SUBSTITUTE_MATCHED is set, \fImatch_data\fP must be non-zero; its
+If PCRE2_SUBSTITUTE_MATCHED is set, \fImatch_data\fP must be non-NULL; its
 contents must be the result of a call to \fBpcre2_match()\fP using the same
 pattern and subject.
 .P
--- a/doc/pcre2api.3
+++ b/doc/pcre2api.3
@ -1,4 +1,4 @@
-.TH PCRE2API 3 "30 August 2021" "PCRE2 10.38"
+.TH PCRE2API 3 "27 July 2022" "PCRE2 10.41"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
@ -953,7 +953,7 @@ has its own memory control arrangements (see the
 documentation for more details). If the limit is reached, the negative error
 code PCRE2_ERROR_HEAPLIMIT is returned. The default limit can be set when PCRE2
 is built; if it is not, the default is set very large and is essentially
-"unlimited".
+unlimited.
 .P
 A value for the heap limit may also be supplied by an item at the start of a
 pattern of the form
@ -964,18 +964,18 @@ where ddd is a decimal number. However, such a setting is ignored unless ddd is
 less than the limit set by the caller of \fBpcre2_match()\fP or, if no such
 limit is set, less than the default.
 .P
-The \fBpcre2_match()\fP function starts out using a 20KiB vector on the system
-stack for recording backtracking points. The more nested backtracking points
-there are (that is, the deeper the search tree), the more memory is needed.
-Heap memory is used only if the initial vector is too small. If the heap limit
-is set to a value less than 21 (in particular, zero) no heap memory will be
-used. In this case, only patterns that do not have a lot of nested backtracking
-can be successfully processed.
+The \fBpcre2_match()\fP function always needs some heap memory, so setting a
+value of zero guarantees a "heap limit exceeded" error. Details of how
+\fBpcre2_match()\fP uses the heap are given in the
+.\" HREF
+\fBpcre2perform\fP
+.\"
+documentation.
 .P
-Similarly, for \fBpcre2_dfa_match()\fP, a vector on the system stack is used
-when processing pattern recursions, lookarounds, or atomic groups, and only if
-this is not big enough is heap memory used. In this case, too, setting a value
-of zero disables the use of the heap.
+For \fBpcre2_dfa_match()\fP, a vector on the system stack is used when
+processing pattern recursions, lookarounds, or atomic groups, and only if this
+is not big enough is heap memory used. In this case, setting a value of zero
+disables the use of the heap.
 .sp
 .nf
 .B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP,
@ -1019,10 +1019,10 @@ less than the limit set by the caller of \fBpcre2_match()\fP or
 .fi
 .sp
 This parameter limits the depth of nested backtracking in \fBpcre2_match()\fP.
-Each time a nested backtracking point is passed, a new memory "frame" is used
+Each time a nested backtracking point is passed, a new memory frame is used
 to remember the state of matching at that point. Thus, this parameter
 indirectly limits the amount of memory that is used in a match. However,
-because the size of each memory "frame" depends on the number of capturing
+because the size of each memory frame depends on the number of capturing
 parentheses, the actual memory limit varies from pattern to pattern. This limit
 was more useful in versions before 10.30, where function recursion was used for
 backtracking.
@ -1323,8 +1323,7 @@ If \fIerrorcode\fP or \fIerroroffset\fP is NULL, \fBpcre2_compile()\fP returns
 NULL immediately. Otherwise, the variables to which these point are set to an
 error code and an offset (number of code units) within the pattern,
 respectively, when \fBpcre2_compile()\fP returns NULL because a compilation
-error has occurred. The values are not defined when compilation is successful
-and \fBpcre2_compile()\fP returns a non-NULL value.
+error has occurred. 
 .P
 There are nearly 100 positive error codes that \fBpcre2_compile()\fP may return
 if it finds an error in the pattern. There are also some negative error codes
@ -1343,14 +1342,17 @@ message"
 below)
 .\"
 should be self-explanatory. Macro names starting with PCRE2_ERROR_ are defined
-for both positive and negative error codes in \fBpcre2.h\fP.
+for both positive and negative error codes in \fBpcre2.h\fP. When compilation
+is successful \fIerrorcode\fP is set to a value that returns the message "no
+error" if passed to \fBpcre2_get_error_message()\fP.
 .P
 The value returned in \fIerroroffset\fP is an indication of where in the
-pattern the error occurred. It is not necessarily the furthest point in the
-pattern that was read. For example, after the error "lookbehind assertion is
-not fixed length", the error offset points to the start of the failing
-assertion. For an invalid UTF-8 or UTF-16 string, the offset is that of the
-first code unit of the failing character.
+pattern an error occurred. When there is no error, zero is returned. A non-zero
+value is not necessarily the furthest point in the pattern that was read. For
+example, after the error "lookbehind assertion is not fixed length", the error
+offset points to the start of the failing assertion. For an invalid UTF-8 or
+UTF-16 string, the offset is that of the first code unit of the failing
+character.
 .P
 Some errors are not detected until the whole pattern has been scanned; in these
 cases, the offset passed back is the length of the pattern. Note that the
@ -1794,7 +1796,7 @@ it is set, the effect of passing an invalid UTF string as a pattern is
 undefined. It may cause your program to crash or loop.
 .P
 Note that this option can also be passed to \fBpcre2_match()\fP and
-\fBpcre_dfa_match()\fP, to suppress UTF validity checking of the subject
+\fBpcre2_dfa_match()\fP, to suppress UTF validity checking of the subject
 string.
 .P
 Note also that setting PCRE2_NO_UTF_CHECK at compile time does not disable the
@ -2015,8 +2017,8 @@ point. However, this applies only to characters whose code points are less than
 256. By default, higher-valued code points never match escapes such as \ew or
 \ed.
 .P
-When PCRE2 is built with Unicode support (the default), the Unicode properties
-of all characters can be tested with \ep and \eP, or, alternatively, the
+When PCRE2 is built with Unicode support (the default), certain Unicode
+character properties can be tested with \ep and \eP, or, alternatively, the
 PCRE2_UCP option can be set when a pattern is compiled; this causes \ew and
 friends to use Unicode property support instead of the built-in tables.
 PCRE2_UCP also causes upper/lower casing operations on characters with code
@ -2279,7 +2281,7 @@ return zero. The third argument should point to a \fBsize_t\fP variable.
  PCRE2_INFO_LASTCODETYPE
 .sp
 Returns 1 if there is a rightmost literal code unit that must exist in any
-matched string, other than at its start. The third argument should  point to a
+matched string, other than at its start. The third argument should point to a
 \fBuint32_t\fP variable. If there is no such value, 0 is returned. When 1 is
 returned, the code unit value itself can be retrieved using
 PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last literal value is
@ -2624,7 +2626,9 @@ The subject string is passed to \fBpcre2_match()\fP as a pointer in
 \fIstartoffset\fP. The length and offset are in code units, not characters.
 That is, they are in bytes for the 8-bit library, 16-bit code units for the
 16-bit library, and 32-bit code units for the 32-bit library, whether or not
-UTF processing is enabled.
+UTF processing is enabled. As a special case, if \fIsubject\fP is NULL and
+\fIlength\fP is zero, the subject is assumed to be an empty string. If
+\fIlength\fP is non-zero, an error occurs if \fIsubject\fP is NULL.
 .P
 If \fIstartoffset\fP is greater than the length of the subject,
 \fBpcre2_match()\fP returns PCRE2_ERROR_BADOFFSET. When the starting offset is
@ -3158,11 +3162,11 @@ The backtracking match limit was reached.
 .sp
  PCRE2_ERROR_NOMEMORY
 .sp
-If a pattern contains many nested backtracking points, heap memory is used to
-remember them. This error is given when the memory allocation function (default
-or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
-if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
-also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
+Heap memory is used to remember backgracking points. This error is given when
+the memory allocation function (default or custom) fails. Note that a different
+error, PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds
+the heap limit. PCRE2_ERROR_NOMEMORY is also returned if
+PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
 .sp
  PCRE2_ERROR_NULL
 .sp
@ -3413,12 +3417,16 @@ same number causes an error at compile time.
 .P
 This function optionally calls \fBpcre2_match()\fP and then makes a copy of the
 subject string in \fIoutputbuffer\fP, replacing parts that were matched with
-the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This
-can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. There is an
-option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just the
-replacement string(s). The default action is to perform just one replacement if
-the pattern matches, but there is an option that requests multiple replacements
-(see PCRE2_SUBSTITUTE_GLOBAL below).
+the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP, which
+can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As a
+special case, if \fIreplacement\fP is NULL and \fIrlength\fP is zero, the
+replacement is assumed to be an empty string. If \fIrlength\fP is non-zero, an
+error occurs if \fIreplacement\fP is NULL.
+.P
+There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just
+the replacement string(s). The default action is to perform just one
+replacement if the pattern matches, but there is an option that requests
+multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below).
 .P
 If successful, \fBpcre2_substitute()\fP returns the number of substitutions
 that were carried out. This may be zero if no match was found, and is never
@ -3447,12 +3455,12 @@ block may or may not have been changed.
 As well as the usual options for \fBpcre2_match()\fP, a number of additional
 options can be set in the \fIoptions\fP argument of \fBpcre2_substitute()\fP.
 One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external
-\fImatch_data\fP block must be provided, and it must have been used for an
-external call to \fBpcre2_match()\fP. The data in the \fImatch_data\fP block
-(return code, offset vector) is used for the first substitution instead of
-calling \fBpcre2_match()\fP from within \fBpcre2_substitute()\fP. This allows
-an application to check for a match before choosing to substitute, without
-having to repeat the match.
+\fImatch_data\fP block must be provided, and it must have already been used for
+an external call to \fBpcre2_match()\fP with the same pattern and subject
+arguments. The data in the \fImatch_data\fP block (return code, offset vector)
+is then used for the first substitution instead of calling \fBpcre2_match()\fP
+from within \fBpcre2_substitute()\fP. This allows an application to check for a
+match before choosing to substitute, without having to repeat the match.
 .P
 The contents of the externally supplied match data block are not changed when
 PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTITUTE_GLOBAL is also set,
@ -3584,7 +3592,7 @@ and force lower case. The escape sequences change the current state: \eU and
 terminating a \eQ quoted sequence) reverts to no case forcing. The sequences
 \eu and \el force the next character (if it is a letter) to upper or lower
 case, respectively, and then the state automatically reverts to no case
-forcing. Case forcing applies to all inserted  characters, including those from
+forcing. Case forcing applies to all inserted characters, including those from
 capture groups and letters within \eQ...\eE quoted sequences. If either
 PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode
 properties are used for case forcing characters whose code points are greater
@ -3649,7 +3657,9 @@ needed is returned via \fIoutlengthptr\fP. Note that this does not happen by
 default.
 .P
 PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the
-\fImatch_data\fP argument is NULL.
+\fImatch_data\fP argument is NULL or if the \fIsubject\fP or \fIreplacement\fP
+arguments are NULL. For backward compatibility reasons an exception is made for
+the \fIreplacement\fP argument if the \fIrlength\fP argument is also 0.
 .P
 PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in the
 replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE
@ -3811,12 +3821,13 @@ other alternatives. Ultimately, when it runs out of matches,
 .P
 The function \fBpcre2_dfa_match()\fP is called to match a subject string
 against a compiled pattern, using a matching algorithm that scans the subject
-string just once (not counting lookaround assertions), and does not backtrack.
-This has different characteristics to the normal algorithm, and is not
-compatible with Perl. Some of the features of PCRE2 patterns are not supported.
-Nevertheless, there are times when this kind of matching can be useful. For a
-discussion of the two matching algorithms, and a list of features that
-\fBpcre2_dfa_match()\fP does not support, see the
+string just once (not counting lookaround assertions), and does not backtrack
+(except when processing lookaround assertions). This has different
+characteristics to the normal algorithm, and is not compatible with Perl. Some
+of the features of PCRE2 patterns are not supported. Nevertheless, there are
+times when this kind of matching can be useful. For a discussion of the two
+matching algorithms, and a list of features that \fBpcre2_dfa_match()\fP does
+not support, see the
 .\" HREF
 \fBpcre2matching\fP
 .\"
@ -3848,7 +3859,7 @@ Here is an example of a simple call to \fBpcre2_dfa_match()\fP:
    wspace,         /* working space vector */
    20);            /* number of elements (NOT size in bytes) */
 .
-.SS "Option bits for \fBpcre_dfa_match()\fP"
+.SS "Option bits for \fBpcre2_dfa_match()\fP"
 .rs
 .sp
 The unused bits of the \fIoptions\fP argument for \fBpcre2_dfa_match()\fP must
@ -4016,6 +4027,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 30 August 2021
-Copyright (c) 1997-2021 University of Cambridge.
+Last updated: 27 July 2022
+Copyright (c) 1997-2022 University of Cambridge.
 .fi
--- a/doc/pcre2build.3
+++ b/doc/pcre2build.3
@ -1,4 +1,4 @@
-.TH PCRE2BUILD 3 "20 March 2020" "PCRE2 10.35"
+.TH PCRE2BUILD 3 "27 July 2022" "PCRE2 10.41"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .
@ -122,8 +122,9 @@ locked this out by setting PCRE2_NEVER_UTF.
 UTF support allows the libraries to process character code points up to
 0x10ffff in the strings that they handle. Unicode support also gives access to
 the Unicode properties of characters, using pattern escapes such as \eP, \ep,
-and \eX. Only the general category properties such as \fILu\fP and \fINd\fP are
-supported. Details are given in the
+and \eX. Only the general category properties such as \fILu\fP and \fINd\fP,
+script names, and some bi-directional properties are supported. Details are
+given in the
 .\" HREF
 \fBpcre2pattern\fP
 .\"
@ -277,12 +278,11 @@ to the \fBconfigure\fP command. This setting also applies to the
 \fBpcre2_dfa_match()\fP matching function, and to JIT matching (though the
 counting is done differently).
 .P
-The \fBpcre2_match()\fP function starts out using a 20KiB vector on the system
-stack to record backtracking points. The more nested backtracking points there
-are (that is, the deeper the search tree), the more memory is needed. If the
-initial vector is not large enough, heap memory is used, up to a certain limit,
-which is specified in kibibytes (units of 1024 bytes). The limit can be changed
-at run time, as described in the
+The \fBpcre2_match()\fP function uses heap memory to record backtracking
+points. The more nested backtracking points there are (that is, the deeper the
+search tree), the more memory is needed. There is an upper limit, specified in
+kibibytes (units of 1024 bytes). This limit can be changed at run time, as
+described in the
 .\" HREF
 \fBpcre2api\fP
 .\"
@ -302,7 +302,7 @@ You can also explicitly limit the depth of nested backtracking in the
 for --with-match-limit. You can set a lower default limit by adding, for
 example,
 .sp
-  --with-match-limit_depth=10000
+  --with-match-limit-depth=10000
 .sp
 to the \fBconfigure\fP command. This value can be overridden at run time. This
 depth limit indirectly limits the amount of heap memory that is used, but
@ -563,15 +563,16 @@ documentation.
 .sp
 The C99 standard defines formatting modifiers z and t for size_t and
 ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
-environments other than Microsoft Visual Studio when __STDC_VERSION__ is
-defined and has a value greater than or equal to 199901L (indicating C99).
+environments other than old versions of Microsoft Visual Studio when
+__STDC_VERSION__ is defined and has a value greater than or equal to 199901L
+(indicating support for C99).
 However, there is at least one environment that claims to be C99 but does not
 support these modifiers. If
 .sp
  --disable-percent-zt
 .sp
 is specified, no use is made of the z or t modifiers. Instead of %td or %zu,
-%lu is used, with a cast for size_t values.
+a suitable format is used depending in the size of long for the platform.
 .
 .
 .SH "SUPPORT FOR FUZZERS"
@ -623,7 +624,7 @@ give a warning.
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@ -632,6 +633,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 20 March 2020
-Copyright (c) 1997-2020 University of Cambridge.
+Last updated: 27 July 2022
+Copyright (c) 1997-2022 University of Cambridge.
 .fi
--- a/doc/pcre2compat.3
+++ b/doc/pcre2compat.3
@ -1,4 +1,4 @@
-.TH PCRE2COMPAT 3 "30 August 2021" "PCRE2 10.38"
+.TH PCRE2COMPAT 3 "08 December 2021" "PCRE2 10.40"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "DIFFERENCES BETWEEN PCRE2 AND PERL"
@ -6,31 +6,38 @@ PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
 This document describes some of the differences in the ways that PCRE2 and Perl
 handle regular expressions. The differences described here are with respect to
-Perl version 5.32.0, but as both Perl and PCRE2 are continually changing, the
+Perl version 5.34.0, but as both Perl and PCRE2 are continually changing, the
 information may at times be out of date.
 .P
-1. PCRE2 has only a subset of Perl's Unicode support. Details of what it does
+1. When PCRE2_DOTALL (equivalent to Perl's /s qualifier) is not set, the
+behaviour of the '.' metacharacter differs from Perl. In PCRE2, '.' matches the
+next character unless it is the start of a newline sequence. This means that,
+if the newline setting is CR, CRLF, or NUL, '.' will match the code point LF
+(0x0A) in ASCII/Unicode environments, and NL (either 0x15 or 0x25) when using
+EBCDIC. In Perl, '.' appears never to match LF, even when 0x0A is not a newline
+indicator.
+.P
+2. PCRE2 has only a subset of Perl's Unicode support. Details of what it does
 have are given in the
 .\" HREF
 \fBpcre2unicode\fP
 .\"
 page.
 .P
-2. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but
+3. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but
 they do not mean what you might think. For example, (?!a){3} does not assert
 that the next three characters are not "a". It just asserts that the next
 character is not "a" three times (in principle; PCRE2 optimizes this to run the
 assertion just once). Perl allows some repeat quantifiers on other assertions,
-for example, \eb* (but not \eb{3}, though oddly it does allow ^{3}), but these
-do not seem to have any use. PCRE2 does not allow any kind of quantifier on
-non-lookaround assertions.
+for example, \eb* , but these do not seem to have any use. PCRE2 does not allow
+any kind of quantifier on non-lookaround assertions.
 .P
-3. Capture groups that occur inside negative lookaround assertions are counted,
+4. Capture groups that occur inside negative lookaround assertions are counted,
 but their entries in the offsets vector are set only when a negative assertion
 is a condition that has a matching branch (that is, the condition is false).
 Perl may set such capture groups in other circumstances.
 .P
-4. The following Perl escape sequences are not supported: \eF, \el, \eL, \eu,
+5. The following Perl escape sequences are not supported: \eF, \el, \eL, \eu,
 \eU, and \eN when followed by a character name. \eN on its own, matching a
 non-newline character, and \eN{U+dd..}, matching a Unicode code point, are
 supported. The escapes that modify the case of following letters are
@ -40,12 +47,12 @@ generated by default. However, if either of the PCRE2_ALT_BSUX or
 PCRE2_EXTRA_ALT_BSUX options is set, \eU and \eu are interpreted as ECMAScript
 interprets them.
 .P
-5. The Perl escape sequences \ep, \eP, and \eX are supported only if PCRE2 is
+6. The Perl escape sequences \ep, \eP, and \eX are supported only if PCRE2 is
 built with Unicode support (the default). The properties that can be tested
 with \ep and \eP are limited to the general category properties such as Lu and
-Nd, script names such as Greek or Han, and the derived properties Any and L&.
-Both PCRE2 and Perl support the Cs (surrogate) property, but in PCRE2 its use
-is limited. See the
+Nd, script names such as Greek or Han, Bidi_Class, Bidi_Control, and the
+derived properties Any and LC (synonym L&). Both PCRE2 and Perl support the Cs
+(surrogate) property, but in PCRE2 its use is limited. See the
 .\" HREF
 \fBpcre2pattern\fP
 .\"
@ -53,14 +60,14 @@ documentation for details. The long synonyms for property names that Perl
 supports (such as \ep{Letter}) are not supported by PCRE2, nor is it permitted
 to prefix any of these properties with "Is".
 .P
-6. PCRE2 supports the \eQ...\eE escape for quoting substrings. Characters
+7. PCRE2 supports the \eQ...\eE escape for quoting substrings. Characters
 in between are treated as literals. However, this is slightly different from
 Perl in that $ and @ are also handled as literals inside the quotes. In Perl,
-they cause variable interpolation (but of course PCRE2 does not have
-variables). Also, Perl does "double-quotish backslash interpolation" on any
-backslashes between \eQ and \eE which, its documentation says, "may lead to
-confusing results". PCRE2 treats a backslash between \eQ and \eE just like any
-other character. Note the following examples:
+they cause variable interpolation (PCRE2 does not have variables). Also, Perl
+does "double-quotish backslash interpolation" on any backslashes between \eQ
+and \eE which, its documentation says, "may lead to confusing results". PCRE2
+treats a backslash between \eQ and \eE just like any other character. Note the
+following examples:
 .sp
    Pattern            PCRE2 matches     Perl matches
 .sp
@ -75,7 +82,7 @@ other character. Note the following examples:
 The \eQ...\eE sequence is recognized both inside and outside character classes
 by both PCRE2 and Perl.
 .P
-7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
+8. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
 constructions. However, PCRE2 does have a "callout" feature, which allows an
 external function to be called during pattern matching. See the
 .\" HREF
@ -83,11 +90,11 @@ external function to be called during pattern matching. See the
 .\"
 documentation for details.
 .P
-8. Subroutine calls (whether recursive or not) were treated as atomic groups up
+9. Subroutine calls (whether recursive or not) were treated as atomic groups up
 to PCRE2 release 10.23, but from release 10.30 this changed, and backtracking
 into subroutine calls is now supported, as in Perl.
 .P
-9. In PCRE2, if any of the backtracking control verbs are used in a group that
+10. In PCRE2, if any of the backtracking control verbs are used in a group that
 is called as a subroutine (whether or not recursively), their effect is
 confined to that group; it does not extend to the surrounding pattern. This is
 not always the case in Perl. In particular, if (*THEN) is present in a group
@ -95,18 +102,18 @@ that is called as a subroutine, its action is limited to that group, even if
 the group does not contain any | characters. Note that such groups are
 processed as anchored at the point where they are tested.
 .P
-10. If a pattern contains more than one backtracking control verb, the first
+11. If a pattern contains more than one backtracking control verb, the first
 one that is backtracked onto acts. For example, in the pattern
 A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C
 triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the
 same as PCRE2, but there are cases where it differs.
 .P
-11. There are some differences that are concerned with the settings of captured
+12. There are some differences that are concerned with the settings of captured
 strings when part of a pattern is repeated. For example, matching "aba" against
 the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to
 "b".
 .P
-12. PCRE2's handling of duplicate capture group numbers and names is not as
+13. PCRE2's handling of duplicate capture group numbers and names is not as
 general as Perl's. This is a consequence of the fact the PCRE2 works internally
 just with numbers, using an external table to translate between numbers and
 names. In particular, a pattern such as (?|(?<a>A)|(?<b>B)), where the two
@ -115,37 +122,38 @@ causes an error at compile time. If it were allowed, it would not be possible
 to distinguish which group matched, because both names map to capture group
 number 1. To avoid this confusing situation, an error is given at compile time.
 .P
-13. Perl used to recognize comments in some places that PCRE2 does not, for
+14. Perl used to recognize comments in some places that PCRE2 does not, for
 example, between the ( and ? at the start of a group. If the /x modifier is
 set, Perl allowed white space between ( and ? though the latest Perls give an
 error (for a while it was just deprecated). There may still be some cases where
 Perl behaves differently.
 .P
-14. Perl, when in warning mode, gives warnings for character classes such as
+15. Perl, when in warning mode, gives warnings for character classes such as
 [A-\ed] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE2 has no
 warning features, so it gives an error in these cases because they are almost
 certainly user mistakes.
 .P
-15. In PCRE2, the upper/lower case character properties Lu and Ll are not
+16. In PCRE2, the upper/lower case character properties Lu and Ll are not
 affected when case-independent matching is specified. For example, \ep{Lu}
 always matches an upper case letter. I think Perl has changed in this respect;
-in the release at the time of writing (5.32), \ep{Lu} and \ep{Ll} match all
+in the release at the time of writing (5.34), \ep{Lu} and \ep{Ll} match all
 letters, regardless of case, when case independence is specified.
 .P
-16. From release 5.32.0, Perl locks out the use of \eK in lookaround
+17. From release 5.32.0, Perl locks out the use of \eK in lookaround
 assertions. From release 10.38 PCRE2 does the same by default. However, there
 is an option for re-enabling the previous behaviour. When this option is set,
 \eK is acted on when it occurs in positive assertions, but is ignored in
 negative assertions.
 .P
-17. PCRE2 provides some extensions to the Perl regular expression facilities.
+18. PCRE2 provides some extensions to the Perl regular expression facilities.
 Perl 5.10 included new features that were not in earlier versions of Perl, some
 of which (such as named parentheses) were in PCRE2 for some time before. This
-list is with respect to Perl 5.32:
+list is with respect to Perl 5.34:
 .sp
 (a) Although lookbehind assertions in PCRE2 must match fixed length strings,
 each alternative toplevel branch of a lookbehind assertion can match a
-different length of string. Perl requires them all to have the same length.
+different length of string. Perl used to require them all to have the same
+length, but the latest version has some variable length support.
 .sp
 (b) From PCRE2 10.23, backreferences to groups of fixed length are supported
 in lookbehinds, provided that there is no possibility of referencing a
@ -186,11 +194,11 @@ the pattern.
 extension to the lookaround facilities. The default, Perl-compatible
 lookarounds are atomic.
 .P
-18. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa
+19. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa
 modifier restricts /i case-insensitive matching to pure ascii, ignoring Unicode
 rules. This separation cannot be represented with PCRE2_UCP.
 .P
-19. Perl has different limits than PCRE2. See the
+20. Perl has different limits than PCRE2. See the
 .\" HREF
 \fBpcre2limit\fP
 .\"
@ -214,6 +222,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 30 August 2021
+Last updated: 08 December 2021
 Copyright (c) 1997-2021 University of Cambridge.
 .fi
--- a/doc/pcre2convert.3
+++ b/doc/pcre2convert.3
@ -116,8 +116,8 @@ permitted to match separator characters, but the double-star (**) feature
 (which does match separators) is supported.
 .P
 PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR matches globs with wildcards allowed to
-match separator characters. PCRE2_GLOB_NO_STARSTAR matches globs with the
-double-star feature disabled. These options may be given together.
+match separator characters. PCRE2_CONVERT_GLOB_NO_STARSTAR matches globs with
+the double-star feature disabled. These options may be given together.
 .
 .
 .SH "CONVERTING POSIX PATTERNS"
--- a/doc/pcre2grep.1
+++ b/doc/pcre2grep.1
@ -1,4 +1,4 @@
-.TH PCRE2GREP 1 "31 August 2021" "PCRE2 10.38"
+.TH PCRE2GREP 1 "30 July 2022" "PCRE2 10.41"
 .SH NAME
 pcre2grep - a grep with Perl-compatible regular expressions.
 .SH SYNOPSIS
@ -43,13 +43,15 @@ For example:
 .sp
  pcre2grep some-pattern file1 - file3
 .sp
-Input files are searched line by line. By default, each line that matches a
+By default, input files are searched line by line. Each line that matches a
 pattern is copied to the standard output, and if there is more than one file,
 the file name is output at the start of each line, followed by a colon.
-However, there are options that can change how \fBpcre2grep\fP behaves. In
-particular, the \fB-M\fP option makes it possible to search for strings that
-span line boundaries. What defines a line boundary is controlled by the
-\fB-N\fP (\fB--newline\fP) option.
+However, there are options that can change how \fBpcre2grep\fP behaves. For
+example, the \fB-M\fP option makes it possible to search for strings that span
+line boundaries. What defines a line boundary is controlled by the \fB-N\fP
+(\fB--newline\fP) option. The \fB-h\fP and \fB-H\fP options control whether or
+not file names are shown, and the \fB-Z\fP option changes the file name
+terminator to a zero byte.
 .P
 The amount of memory used for buffering files that are being scanned is
 controlled by parameters that can be set by the \fB--buffer-size\fP and
@ -149,9 +151,11 @@ Output up to \fInumber\fP lines of context after each matching line. Fewer
 lines are output if the next match or the end of the file is reached, or if the
 processing buffer size has been set too small. If file names and/or line
 numbers are being output, a hyphen separator is used instead of a colon for the
-context lines. A line containing "--" is output between each group of lines,
-unless they are in fact contiguous in the input file. The value of \fInumber\fP
-is expected to be relatively small. When \fB-c\fP is used, \fB-A\fP is ignored.
+context lines (the \fB-Z\fP option can be used to change the file name
+terminator to a zero byte). A line containing "--" is output between each group
+of lines, unless they are in fact contiguous in the input file. The value of
+\fInumber\fP is expected to be relatively small. When \fB-c\fP is used,
+\fB-A\fP is ignored.
 .TP
 \fB-a\fP, \fB--text\fP
 Treat binary files as text. This is equivalent to
@ -167,9 +171,10 @@ Output up to \fInumber\fP lines of context before each matching line. Fewer
 lines are output if the previous match or the start of the file is within
 \fInumber\fP lines, or if the processing buffer size has been set too small. If
 file names and/or line numbers are being output, a hyphen separator is used
-instead of a colon for the context lines. A line containing "--" is output
-between each group of lines, unless they are in fact contiguous in the input
-file. The value of \fInumber\fP is expected to be relatively small. When
+instead of a colon for the context lines (the \fB-Z\fP option can be used to
+change the file name terminator to a zero byte). A line containing "--" is
+output between each group of lines, unless they are in fact contiguous in the
+input file. The value of \fInumber\fP is expected to be relatively small. When
 \fB-c\fP is used, \fB-B\fP is ignored.
 .TP
 \fB--binary-files=\fP\fIword\fP
@ -356,19 +361,21 @@ shown separately. This option is mutually exclusive with \fB--output\fP,
 .TP
 \fB-H\fP, \fB--with-filename\fP
 Force the inclusion of the file name at the start of output lines when
-searching a single file. By default, the file name is not shown in this case.
-For matching lines, the file name is followed by a colon; for context lines, a
-hyphen separator is used. If a line number is also being output, it follows the
-file name. When the \fB-M\fP option causes a pattern to match more than one
-line, only the first is preceded by the file name. This option overrides any
-previous \fB-h\fP, \fB-l\fP, or \fB-L\fP options.
+searching a single file. The file name is not normally shown in this case.
+By default, for matching lines, the file name is followed by a colon; for
+context lines, a hyphen separator is used. The \fB-Z\fP option can be used to
+change the terminator to a zero byte. If a line number is also being output,
+it follows the file name. When the \fB-M\fP option causes a pattern to match
+more than one line, only the first is preceded by the file name. This option
+overrides any previous \fB-h\fP, \fB-l\fP, or \fB-L\fP options.
 .TP
 \fB-h\fP, \fB--no-filename\fP
-Suppress the output file names when searching multiple files. By default,
-file names are shown when multiple files are searched. For matching lines, the
-file name is followed by a colon; for context lines, a hyphen separator is used.
-If a line number is also being output, it follows the file name. This option
-overrides any previous \fB-H\fP, \fB-L\fP, or \fB-l\fP options.
+Suppress the output file names when searching multiple files. File names are
+normally shown when multiple files are searched. By default, for matching
+lines, the file name is followed by a colon; for context lines, a hyphen
+separator is used. The \fB-Z\fP option can be used to change the terminator to
+a zero byte. If a line number is also being output, it follows the file name.
+This option overrides any previous \fB-H\fP, \fB-L\fP, or \fB-l\fP options.
 .TP
 \fB--heap-limit\fP=\fInumber\fP
 See \fB--match-limit\fP below.
@ -417,17 +424,19 @@ given any number of times. If a directory matches both \fB--include-dir\fP and
 \fB-L\fP, \fB--files-without-match\fP
 Instead of outputting lines from the files, just output the names of the files
 that do not contain any lines that would have been output. Each file name is
-output once, on a separate line. This option overrides any previous \fB-H\fP,
-\fB-h\fP, or \fB-l\fP options.
+output once, on a separate line by default, but if the \fB-Z\fP option is set, 
+they are separated by zero bytes instead of newlines. This option overrides any
+previous \fB-H\fP, \fB-h\fP, or \fB-l\fP options.
 .TP
 \fB-l\fP, \fB--files-with-matches\fP
 Instead of outputting lines from the files, just output the names of the files
 containing lines that would have been output. Each file name is output once, on
-a separate line. Searching normally stops as soon as a matching line is found
-in a file. However, if the \fB-c\fP (count) option is also used, matching
-continues in order to obtain the correct count, and those files that have at
-least one match are listed along with their counts. Using this option with
-\fB-c\fP is a way of suppressing the listing of files with no matches that
+a separate line, but if the \fB-Z\fP option is set, they are separated by zero
+bytes instead of newlines. Searching normally stops as soon as a matching line
+is found in a file. However, if the \fB-c\fP (count) option is also used,
+matching continues in order to obtain the correct count, and those files that
+have at least one match are listed along with their counts. Using this option
+with \fB-c\fP is a way of suppressing the listing of files with no matches that
 occurs with \fB-c\fP on its own. This option overrides any previous \fB-H\fP,
 \fB-h\fP, or \fB-L\fP options.
 .TP
@ -516,10 +525,7 @@ counter that is incremented each time around its main processing loop. If the
 value set by \fB--match-limit\fP is reached, an error occurs.
 .sp
 The \fB--heap-limit\fP option specifies, as a number of kibibytes (units of
-1024 bytes), the amount of heap memory that may be used for matching. Heap
-memory is needed only if matching the pattern requires a significant number of
-nested backtracking points to be remembered. This parameter can be set to zero
-to forbid the use of heap memory altogether.
+1024 bytes), the maximum amount of heap memory that may be used for matching.
 .sp
 The \fB--depth-limit\fP option limits the depth of nested backtracking points,
 which indirectly limits the amount of memory that is used. The amount of memory
@ -732,6 +738,12 @@ be more than one line. This is equivalent to having "^(?:" at the start of each
 pattern and ")$" at the end. This option applies only to the patterns that are
 matched against the contents of files; it does not apply to patterns specified
 by any of the \fB--include\fP or \fB--exclude\fP options.
+.TP
+\fB-Z\fP, \fB--null\fP
+Terminate files names in the regular output with a zero byte (the NUL
+character) instead of what would normally appear. This is useful when file
+names contain unusual characters such as colons, hyphens, or even newlines. The
+option does not apply to file names in error messages.
 .
 .
 .SH "ENVIRONMENT VARIABLES"
@ -960,6 +972,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 31 August 2021
-Copyright (c) 1997-2021 University of Cambridge.
+Last updated: 30 July 2022
+Copyright (c) 1997-2022 University of Cambridge.
 .fi
--- a/doc/pcre2grep.txt
+++ b/doc/pcre2grep.txt
--- a/doc/pcre2jit.3
+++ b/doc/pcre2jit.3
@ -1,4 +1,4 @@
-.TH PCRE2JIT 3 "23 May 2019" "PCRE2 10.34"
+.TH PCRE2JIT 3 "30 November 2021" "PCRE2 10.40"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "PCRE2 JUST-IN-TIME COMPILER SUPPORT"
@ -251,11 +251,11 @@ non-sequential matches in one thread is to use callouts: if a callout function
 starts another match, that match must use a different JIT stack to the one used
 for currently suspended match(es).
 .P
-In a multithread application, if you do not
-specify a JIT stack, or if you assign or pass back NULL from a callback, that
-is thread-safe, because each thread has its own machine stack. However, if you
-assign or pass back a non-NULL JIT stack, this must be a different stack for
-each thread so that the application is thread-safe.
+In a multithread application, if you do not specify a JIT stack, or if you
+assign or pass back NULL from a callback, that is thread-safe, because each
+thread has its own machine stack. However, if you assign or pass back a
+non-NULL JIT stack, this must be a different stack for each thread so that the
+application is thread-safe.
 .P
 Strictly speaking, even more is allowed. You can assign the same non-NULL stack
 to a match context that is used by any number of patterns, as long as they are
@ -355,8 +355,8 @@ out this complicated API.
 .B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP);
 .fi
 .P
-The JIT executable allocator does not free all memory when it is possible.
-It expects new allocations, and keeps some free memory around to improve
+The JIT executable allocator does not free all memory when it is possible. It
+expects new allocations, and keeps some free memory around to improve
 allocation speed. However, in low memory conditions, it might be better to free
 all possible memory. You can cause this to happen by calling
 pcre2_jit_free_unused_memory(). Its argument is a general context, for custom
@ -416,10 +416,10 @@ that was not compiled.
 .P
 When you call \fBpcre2_match()\fP, as well as testing for invalid options, a
 number of other sanity checks are performed on the arguments. For example, if
-the subject pointer is NULL, an immediate error is given. Also, unless
-PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for validity. In the
-interests of speed, these checks do not happen on the JIT fast path, and if
-invalid data is passed, the result is undefined.
+the subject pointer is NULL but the length is non-zero, an immediate error is
+given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested
+for validity. In the interests of speed, these checks do not happen on the JIT
+fast path, and if invalid data is passed, the result is undefined.
 .P
 Bypassing the sanity checks and the \fBpcre2_match()\fP wrapping can give
 speedups of more than 10%.
@ -445,6 +445,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 23 May 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 30 November 2021
+Copyright (c) 1997-2021 University of Cambridge.
 .fi
--- a/doc/pcre2limits.3
+++ b/doc/pcre2limits.3
@ -1,4 +1,4 @@
-.TH PCRE2LIMITS 3 "03 February 2019" "PCRE2 10.33"
+.TH PCRE2LIMITS 3 "26 July 2022" "PCRE2 10.41"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "SIZE AND OTHER LIMITATIONS"
@ -51,6 +51,10 @@ is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
 .P
 The maximum length of a string argument to a callout is the largest number a
 32-bit unsigned integer can hold.
+.P
+The maximum amount of heap memory used for matching is controlled by the heap 
+limit, which can be set in a pattern or in a match context. The default is a 
+very large number, effectively unlimited.
 .
 .
 .SH AUTHOR
@ -58,7 +62,7 @@ The maximum length of a string argument to a callout is the largest number a
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@ -67,6 +71,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 02 February 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 26 July 2022
+Copyright (c) 1997-2022 University of Cambridge.
 .fi
--- a/doc/pcre2pattern.3
+++ b/doc/pcre2pattern.3
@ -1,4 +1,4 @@
-.TH PCRE2PATTERN 3 "3o0 August 2021" "PCRE2 10.38"
+.TH PCRE2PATTERN 3 "12 January 2022" "PCRE2 10.40"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "PCRE2 REGULAR EXPRESSION DETAILS"
@ -509,7 +509,6 @@ for themselves. For example, outside a character class:
 .\" JOIN
  \e377   might be a backreference, otherwise
            the value 255 (decimal)
-.\" JOIN
  \e81    is always a backreference
 .sp
 Note that octal values of 100 or greater that are specified using this syntax
@ -773,195 +772,64 @@ can be used in any mode, though in 8-bit and 16-bit non-UTF modes these
 sequences are of course limited to testing characters whose code points are
 less than U+0100 and U+10000, respectively. In 32-bit non-UTF mode, code points
 greater than 0x10ffff (the Unicode limit) may be encountered. These are all
-treated as being in the Unknown script and with an unassigned type. The extra
-escape sequences are:
+treated as being in the Unknown script and with an unassigned type.
+.P
+Matching characters by Unicode property is not fast, because PCRE2 has to do a
+multistage table lookup in order to find a character's property. That is why
+the traditional escape sequences such as \ed and \ew do not use Unicode
+properties in PCRE2 by default, though you can make them do so by setting the
+PCRE2_UCP option or by starting the pattern with (*UCP).
+.P
+The extra escape sequences that provide property support are:
 .sp
  \ep{\fIxx\fP}   a character with the \fIxx\fP property
  \eP{\fIxx\fP}   a character without the \fIxx\fP property
  \eX       a Unicode extended grapheme cluster
 .sp
-The property names represented by \fIxx\fP above are case-sensitive. There is
-support for Unicode script names, Unicode general category properties, "Any",
-which matches any character (including newline), and some special PCRE2
-properties (described in the
+The property names represented by \fIxx\fP above are not case-sensitive, and in
+accordance with Unicode's "loose matching" rules, spaces, hyphens, and
+underscores are ignored. There is support for Unicode script names, Unicode
+general category properties, "Any", which matches any character (including
+newline), Bidi_Class, a number of binary (yes/no) properties, and some special
+PCRE2 properties (described
 .\" HTML <a href="#extraprops">
 .\" </a>
-next section).
+below).
 .\"
-Other Perl properties such as "InMusicalSymbols" are not supported by PCRE2.
-Note that \eP{Any} does not match any characters, so always causes a match
-failure.
+Certain other Perl properties such as "InMusicalSymbols" are not supported by
+PCRE2. Note that \eP{Any} does not match any characters, so always causes a
+match failure.
+.
+.
+.
+.SS "Script properties for \ep and \eP"
+.rs
+.sp
+There are three different syntax forms for matching a script. Each Unicode
+character has a basic script and, optionally, a list of other scripts ("Script
+Extensions") with which it is commonly used. Using the Adlam script as an
+example, \ep{sc:Adlam} matches characters whose basic script is Adlam, whereas
+\ep{scx:Adlam} matches, in addition, characters that have Adlam in their
+extensions list. The full names "script" and "script extensions" for the
+property types are recognized, and a equals sign is an alternative to the
+colon. If a script name is given without a property type, for example,
+\ep{Adlam}, it is treated as \ep{scx:Adlam}. Perl changed to this
+interpretation at release 5.26 and PCRE2 changed at release 10.40.
 .P
-Sets of Unicode characters are defined as belonging to certain scripts. A
-character from one of these sets can be matched using a script name. For
-example:
-.sp
-  \ep{Greek}
-  \eP{Han}
-.sp
 Unassigned characters (and in non-UTF 32-bit mode, characters with code points
 greater than 0x10FFFF) are assigned the "Unknown" script. Others that are not
 part of an identified script are lumped together as "Common". The current list
-of scripts is:
-.P
-Adlam,
-Ahom,
-Anatolian_Hieroglyphs,
-Arabic,
-Armenian,
-Avestan,
-Balinese,
-Bamum,
-Bassa_Vah,
-Batak,
-Bengali,
-Bhaiksuki,
-Bopomofo,
-Brahmi,
-Braille,
-Buginese,
-Buhid,
-Canadian_Aboriginal,
-Carian,
-Caucasian_Albanian,
-Chakma,
-Cham,
-Cherokee,
-Chorasmian,
-Common,
-Coptic,
-Cuneiform,
-Cypriot,
-Cyrillic,
-Deseret,
-Devanagari,
-Dives_Akuru,
-Dogra,
-Duployan,
-Egyptian_Hieroglyphs,
-Elbasan,
-Elymaic,
-Ethiopic,
-Georgian,
-Glagolitic,
-Gothic,
-Grantha,
-Greek,
-Gujarati,
-Gunjala_Gondi,
-Gurmukhi,
-Han,
-Hangul,
-Hanifi_Rohingya,
-Hanunoo,
-Hatran,
-Hebrew,
-Hiragana,
-Imperial_Aramaic,
-Inherited,
-Inscriptional_Pahlavi,
-Inscriptional_Parthian,
-Javanese,
-Kaithi,
-Kannada,
-Katakana,
-Kayah_Li,
-Kharoshthi,
-Khitan_Small_Script,
-Khmer,
-Khojki,
-Khudawadi,
-Lao,
-Latin,
-Lepcha,
-Limbu,
-Linear_A,
-Linear_B,
-Lisu,
-Lycian,
-Lydian,
-Mahajani,
-Makasar,
-Malayalam,
-Mandaic,
-Manichaean,
-Marchen,
-Masaram_Gondi,
-Medefaidrin,
-Meetei_Mayek,
-Mende_Kikakui,
-Meroitic_Cursive,
-Meroitic_Hieroglyphs,
-Miao,
-Modi,
-Mongolian,
-Mro,
-Multani,
-Myanmar,
-Nabataean,
-Nandinagari,
-New_Tai_Lue,
-Newa,
-Nko,
-Nushu,
-Nyakeng_Puachue_Hmong,
-Ogham,
-Ol_Chiki,
-Old_Hungarian,
-Old_Italic,
-Old_North_Arabian,
-Old_Permic,
-Old_Persian,
-Old_Sogdian,
-Old_South_Arabian,
-Old_Turkic,
-Oriya,
-Osage,
-Osmanya,
-Pahawh_Hmong,
-Palmyrene,
-Pau_Cin_Hau,
-Phags_Pa,
-Phoenician,
-Psalter_Pahlavi,
-Rejang,
-Runic,
-Samaritan,
-Saurashtra,
-Sharada,
-Shavian,
-Siddham,
-SignWriting,
-Sinhala,
-Sogdian,
-Sora_Sompeng,
-Soyombo,
-Sundanese,
-Syloti_Nagri,
-Syriac,
-Tagalog,
-Tagbanwa,
-Tai_Le,
-Tai_Tham,
-Tai_Viet,
-Takri,
-Tamil,
-Tangut,
-Telugu,
-Thaana,
-Thai,
-Tibetan,
-Tifinagh,
-Tirhuta,
-Ugaritic,
-Unknown,
-Vai,
-Wancho,
-Warang_Citi,
-Yezidi,
-Yi,
-Zanabazar_Square.
-.P
+of recognized script names and their 4-character abbreviations can be obtained
+by running this command:
+.sp
+  pcre2test -LS
+.sp
+.
+.
+.
+.SS "The general category property for \ep and \eP"
+.rs
+.sp
 Each character has exactly one Unicode general category property, specified by
 a two-letter abbreviation. For compatibility with Perl, negation can be
 specified by including a circumflex between the opening brace and the property
@ -1021,9 +889,9 @@ The following general category property codes are supported:
  Zp    Paragraph separator
  Zs    Space separator
 .sp
-The special property L& is also supported: it matches a character that has
-the Lu, Ll, or Lt property, in other words, a letter that is not classified as
-a modifier or "other".
+The special property LC, which has the synonym L&, is also supported: it
+matches a character that has the Lu, Ll, or Lt property, in other words, a
+letter that is not classified as a modifier or "other".
 .P
 The Cs (Surrogate) property applies only to characters whose code points are in
 the range U+D800 to U+DFFF. These characters are no different to any other
@ -1047,12 +915,53 @@ Unicode table.
 Specifying caseless matching does not affect these escape sequences. For
 example, \ep{Lu} always matches only upper case letters. This is different from
 the behaviour of current versions of Perl.
-.P
-Matching characters by Unicode property is not fast, because PCRE2 has to do a
-multistage table lookup in order to find a character's property. That is why
-the traditional escape sequences such as \ed and \ew do not use Unicode
-properties in PCRE2 by default, though you can make them do so by setting the
-PCRE2_UCP option or by starting the pattern with (*UCP).
+.
+.
+.SS "Binary (yes/no) properties for \ep and \eP"
+.rs
+.sp
+Unicode defines a number of binary properties, that is, properties whose only
+values are true or false. You can obtain a list of those that are recognized by
+\ep and \eP, along with their abbreviations, by running this command:
+.sp
+  pcre2test -LP
+.sp
+.
+.
+.SS "The Bidi_Class property for \ep and \eP"
+.rs
+.sp
+  \ep{Bidi_Class:<class>}   matches a character with the given class
+  \ep{BC:<class>}           matches a character with the given class
+.sp
+The recognized classes are:
+.sp
+  AL          Arabic letter
+  AN          Arabic number
+  B           paragraph separator
+  BN          boundary neutral
+  CS          common separator
+  EN          European number
+  ES          European separator
+  ET          European terminator
+  FSI         first strong isolate
+  L           left-to-right
+  LRE         left-to-right embedding
+  LRI         left-to-right isolate
+  LRO         left-to-right override
+  NSM         non-spacing mark
+  ON          other neutral
+  PDF         pop directional format
+  PDI         pop directional isolate
+  R           right-to-left
+  RLE         right-to-left embedding
+  RLI         right-to-left isolate
+  RLO         right-to-left override
+  S           segment separator
+  WS          which space
+.sp
+An equals sign may be used instead of a colon. The class names are
+case-insensitive; only the short names listed above are recognized.
 .
 .
 .SS Extended grapheme clusters
@ -1331,14 +1240,19 @@ end of the subject in both modes, and if all branches of a pattern start with
 .sp
 Outside a character class, a dot in the pattern matches any one character in
 the subject string except (by default) a character that signifies the end of a
-line.
+line. One or more characters may be specified as line terminators (see
+.\" HTML <a href="#newlines">
+.\" </a>
+"Newline conventions"
+.\"
+above).
 .P
-When a line ending is defined as a single character, dot never matches that
-character; when the two-character sequence CRLF is used, dot does not match CR
-if it is immediately followed by LF, but otherwise it matches all characters
-(including isolated CRs and LFs). When any Unicode line endings are being
-recognized, dot does not match CR or LF or any of the other line ending
-characters.
+Dot never matches a single line-ending character. When the two-character
+sequence CRLF is the only line ending, dot does not match CR if it is
+immediately followed by LF, but otherwise it matches all characters (including
+isolated CRs and LFs). When ANYCRLF is selected for line endings, no occurences
+of CR of LF match dot. When all Unicode line endings are being recognized, dot
+does not match CR or LF or any of the other line ending characters.
 .P
 The behaviour of dot with regard to newlines can be changed. If the
 PCRE2_DOTALL option is set, a dot matches any one character, without exception.
@ -2181,10 +2095,10 @@ be easier to remember:
 .sp
  (*atomic:\ed+)foo
 .sp
-This kind of parenthesized group "locks up" the  part of the pattern it
-contains once it has matched, and a failure further into the pattern is
-prevented from backtracking into it. Backtracking past it to previous items,
-however, works as normal.
+This kind of parenthesized group "locks up" the part of the pattern it contains
+once it has matched, and a failure further into the pattern is prevented from
+backtracking into it. Backtracking past it to previous items, however, works as
+normal.
 .P
 An alternative description is that a group of this type matches exactly the
 string of characters that an identical standalone pattern would match, if
@ -2930,7 +2844,7 @@ breaks):
  (?(DEFINE) (?<byte> 2[0-4]\ed | 25[0-5] | 1\ed\ed | [1-9]?\ed) )
  \eb (?&byte) (\e.(?&byte)){3} \eb
 .sp
-The first part of the pattern is a DEFINE group inside which a another group
+The first part of the pattern is a DEFINE group inside which another group
 named "byte" is defined. This matches an individual component of an IPv4
 address (a number less than 256). When matching takes place, this part of the
 pattern is skipped because DEFINE acts like a false condition. The rest of the
@ -3900,6 +3814,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 30 August 2021
-Copyright (c) 1997-2021 University of Cambridge.
+Last updated: 12 January 2022
+Copyright (c) 1997-2022 University of Cambridge.
 .fi
--- a/doc/pcre2perform.3
+++ b/doc/pcre2perform.3
@ -1,4 +1,4 @@
-.TH PCRE2PERFORM 3 "03 February 2019" "PCRE2 10.33"
+.TH PCRE2PERFORM 3 "27 July 2022" "PCRE2 10.41"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "PCRE2 PERFORMANCE"
@ -69,12 +69,28 @@ From release 10.30, the interpretive (non-JIT) version of \fBpcre2_match()\fP
 uses very little system stack at run time. In earlier releases recursive
 function calls could use a great deal of stack, and this could cause problems,
 but this usage has been eliminated. Backtracking positions are now explicitly
-remembered in memory frames controlled by the code. An initial 20KiB vector of
-frames is allocated on the system stack (enough for about 100 frames for small
-patterns), but if this is insufficient, heap memory is used. The amount of heap
-memory can be limited; if the limit is set to zero, only the initial stack
-vector is used. Rewriting patterns to be time-efficient, as described below,
-may also reduce the memory requirements.
+remembered in memory frames controlled by the code. 
+.P
+The size of each frame depends on the size of pointer variables and the number
+of capturing parenthesized groups in the pattern being matched. On a 64-bit
+system the frame size for a pattern with no captures is 128 bytes. For each
+capturing group the size increases by 16 bytes.
+.P
+Until release 10.41, an initial 20KiB frames vector was allocated on the system 
+stack, but this still caused some issues for multi-thread applications where
+each thread has a very small stack. From release 10.41 backtracking memory
+frames are always held in heap memory. An initial heap allocation is obtained
+the first time any match data block is passed to \fBpcre2_match()\fP. This is
+remembered with the match data block and re-used if that block is used for
+another match. It is freed when the match data block itself is freed.
+.P
+The size of the initial block is the larger of 20KiB or ten times the pattern's 
+frame size, unless the heap limit is less than this, in which case the heap 
+limit is used. If the initial block proves to be too small during matching, it
+is replaced by a larger block, subject to the heap limit. The heap limit is 
+checked only when a new block is to be allocated. Reducing the heap limit 
+between calls to \fBpcre2_match()\fP with the same match data block does not 
+affect the saved block.
 .P
 In contrast to \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP does use recursive
 function calls, but only for processing atomic groups, lookaround assertions,
@ -230,7 +246,7 @@ pattern to match. This is done by repeatedly matching with different limits.
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@ -239,6 +255,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 03 February 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 27 July 2022
+Copyright (c) 1997-2022 University of Cambridge.
 .fi
--- a/doc/pcre2serialize.3
+++ b/doc/pcre2serialize.3
@ -6,11 +6,11 @@ PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
 .nf
 .B int32_t pcre2_serialize_decode(pcre2_code **\fIcodes\fP,
-.B "  int32_t \fInumber_of_codes\fP, const uint32_t *\fIbytes\fP,"
+.B "  int32_t \fInumber_of_codes\fP, const uint8_t *\fIbytes\fP,"
 .B "  pcre2_general_context *\fIgcontext\fP);"
 .sp
-.B int32_t pcre2_serialize_encode(pcre2_code **\fIcodes\fP,
-.B "  int32_t \fInumber_of_codes\fP, uint32_t **\fIserialized_bytes\fP,"
+.B int32_t pcre2_serialize_encode(const pcre2_code **\fIcodes\fP,
+.B "  int32_t \fInumber_of_codes\fP, uint8_t **\fIserialized_bytes\fP,"
 .B "  PCRE2_SIZE *\fIserialized_size\fP, pcre2_general_context *\fIgcontext\fP);"
 .sp
 .B void pcre2_serialize_free(uint8_t *\fIbytes\fP);
@ -81,7 +81,7 @@ of serialized patterns, or one of the following negative error codes:
 .sp
  PCRE2_ERROR_BADDATA      the number of patterns is zero or less
  PCRE2_ERROR_BADMAGIC     mismatch of id bytes in one of the patterns
-  PCRE2_ERROR_MEMORY       memory allocation failed
+  PCRE2_ERROR_NOMEMORY     memory allocation failed
  PCRE2_ERROR_MIXEDTABLES  the patterns do not all use the same tables
  PCRE2_ERROR_NULL         the 1st, 3rd, or 4th argument is NULL
 .sp
@ -141,7 +141,6 @@ mangagement functions for the decoded patterns. If this argument is NULL,
 \fBmalloc()\fP and \fBfree()\fP are used. After deserialization, the byte
 stream is no longer needed and can be discarded.
 .sp
-  int32_t number_of_codes;
  pcre2_code *list_of_codes[2];
  uint8_t *bytes = <serialized data>;
  int32_t number_of_codes =
--- a/doc/pcre2syntax.3
+++ b/doc/pcre2syntax.3
@ -1,4 +1,4 @@
-.TH PCRE2SYNTAX 3 "30 August 2021" "PCRE2 10.38"
+.TH PCRE2SYNTAX 3 "12 January 2022" "PCRE2 10.40"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY"
@ -102,6 +102,10 @@ happening, \es and \ew may also match characters with code points in the range
 128-255. If the PCRE2_UCP option is set, the behaviour of these escape
 sequences is changed to use Unicode properties and they match many more
 characters.
+.P
+Property descriptions in \ep and \eP are matched caselessly; hyphens,
+underscores, and white space are ignored, in accordance with Unicode's "loose
+matching" rules.
 .
 .
 .SH "GENERAL CATEGORY PROPERTIES FOR \ep and \eP"
@ -120,6 +124,7 @@ characters.
  Lo         Other letter
  Lt         Title case letter
  Lu         Upper case letter
+  Lc         Ll, Lu, or Lt
  L&         Ll, Lu, or Lt
 .sp
  M          Mark
@ -167,165 +172,59 @@ Perl and POSIX space are now the same. Perl added VT to its space character set
 at release 5.18.
 .
 .
-.SH "SCRIPT NAMES FOR \ep AND \eP"
+.SH "BINARY PROPERTIES FOR \ep AND \eP"
 .rs
 .sp
-Adlam,
-Ahom,
-Anatolian_Hieroglyphs,
-Arabic,
-Armenian,
-Avestan,
-Balinese,
-Bamum,
-Bassa_Vah,
-Batak,
-Bengali,
-Bhaiksuki,
-Bopomofo,
-Brahmi,
-Braille,
-Buginese,
-Buhid,
-Canadian_Aboriginal,
-Carian,
-Caucasian_Albanian,
-Chakma,
-Cham,
-Cherokee,
-Chorasmian,
-Common,
-Coptic,
-Cuneiform,
-Cypriot,
-Cyrillic,
-Deseret,
-Devanagari,
-Dives_Akuru,
-Dogra,
-Duployan,
-Egyptian_Hieroglyphs,
-Elbasan,
-Elymaic,
-Ethiopic,
-Georgian,
-Glagolitic,
-Gothic,
-Grantha,
-Greek,
-Gujarati,
-Gunjala_Gondi,
-Gurmukhi,
-Han,
-Hangul,
-Hanifi_Rohingya,
-Hanunoo,
-Hatran,
-Hebrew,
-Hiragana,
-Imperial_Aramaic,
-Inherited,
-Inscriptional_Pahlavi,
-Inscriptional_Parthian,
-Javanese,
-Kaithi,
-Kannada,
-Katakana,
-Kayah_Li,
-Kharoshthi,
-Khitan_Small_Script,
-Khmer,
-Khojki,
-Khudawadi,
-Lao,
-Latin,
-Lepcha,
-Limbu,
-Linear_A,
-Linear_B,
-Lisu,
-Lycian,
-Lydian,
-Mahajani,
-Makasar,
-Malayalam,
-Mandaic,
-Manichaean,
-Marchen,
-Masaram_Gondi,
-Medefaidrin,
-Meetei_Mayek,
-Mende_Kikakui,
-Meroitic_Cursive,
-Meroitic_Hieroglyphs,
-Miao,
-Modi,
-Mongolian,
-Mro,
-Multani,
-Myanmar,
-Nabataean,
-Nandinagari,
-New_Tai_Lue,
-Newa,
-Nko,
-Nushu,
-Nyakeng_Puachue_Hmong,
-Ogham,
-Ol_Chiki,
-Old_Hungarian,
-Old_Italic,
-Old_North_Arabian,
-Old_Permic,
-Old_Persian,
-Old_Sogdian,
-Old_South_Arabian,
-Old_Turkic,
-Oriya,
-Osage,
-Osmanya,
-Pahawh_Hmong,
-Palmyrene,
-Pau_Cin_Hau,
-Phags_Pa,
-Phoenician,
-Psalter_Pahlavi,
-Rejang,
-Runic,
-Samaritan,
-Saurashtra,
-Sharada,
-Shavian,
-Siddham,
-SignWriting,
-Sinhala,
-Sogdian,
-Sora_Sompeng,
-Soyombo,
-Sundanese,
-Syloti_Nagri,
-Syriac,
-Tagalog,
-Tagbanwa,
-Tai_Le,
-Tai_Tham,
-Tai_Viet,
-Takri,
-Tamil,
-Tangut,
-Telugu,
-Thaana,
-Thai,
-Tibetan,
-Tifinagh,
-Tirhuta,
-Ugaritic,
-Vai,
-Wancho,
-Warang_Citi,
-Yezidi,
-Yi,
-Zanabazar_Square.
+Unicode defines a number of binary properties, that is, properties whose only
+values are true or false. You can obtain a list of those that are recognized by
+\ep and \eP, along with their abbreviations, by running this command:
+.sp
+  pcre2test -LP
+.
+.
+.
+.SH "SCRIPT MATCHING WITH \ep AND \eP"
+.rs
+.sp
+Many script names and their 4-letter abbreviations are recognized in
+\ep{sc:...} or \ep{scx:...} items, or on their own with \ep (and also \eP of
+course). You can obtain a list of these scripts by running this command:
+.sp
+  pcre2test -LS
+.
+.
+.
+.SH "THE BIDI_CLASS PROPERTY FOR \ep AND \eP"
+.rs
+.sp
+  \ep{Bidi_Class:<class>}   matches a character with the given class
+  \ep{BC:<class>}           matches a character with the given class
+.sp
+The recognized classes are:
+.sp
+  AL          Arabic letter
+  AN          Arabic number
+  B           paragraph separator
+  BN          boundary neutral
+  CS          common separator
+  EN          European number
+  ES          European separator
+  ET          European terminator
+  FSI         first strong isolate
+  L           left-to-right
+  LRE         left-to-right embedding
+  LRI         left-to-right isolate
+  LRO         left-to-right override
+  NSM         non-spacing mark
+  ON          other neutral
+  PDF         pop directional format
+  PDI         pop directional isolate
+  R           right-to-left
+  RLE         right-to-left embedding
+  RLI         right-to-left isolate
+  RLO         right-to-left override
+  S           segment separator
+  WS          which space
 .
 .
 .SH "CHARACTER CLASSES"
@ -679,6 +578,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 30 August 2021
-Copyright (c) 1997-2021 University of Cambridge.
+Last updated: 12 January 2022
+Copyright (c) 1997-2022 University of Cambridge.
 .fi
--- a/doc/pcre2test.1
+++ b/doc/pcre2test.1
@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "30 August 2021" "PCRE 10.38"
+.TH PCRE2TEST 1 "27 July 2022" "PCRE 10.41"
 .SH NAME
 pcre2test - a program for testing Perl-compatible regular expressions.
 .SH SYNOPSIS
@ -47,7 +47,7 @@ format before being passed to the library functions. Results are converted back
 to 8-bit code units for output.
 .P
 In the rest of this document, the names of library functions and structures
-are given in generic form, for example, \fBpcre_compile()\fP. The actual
+are given in generic form, for example, \fBpcre2_compile()\fP. The actual
 names used in the libraries have a suffix _8, _16, or _32, as appropriate.
 .
 .
@ -211,7 +211,17 @@ available, and the use of JIT for matching is verified.
 \fB-LM\fP
 List modifiers: write a list of available pattern and subject modifiers to the
 standard output, then exit with zero exit code. All other options are ignored.
-If both -C and -LM are present, whichever is first is recognized.
+If both -C and any -Lx options are present, whichever is first is recognized.
+.TP 10
+\fB-LP\fP
+List properties: write a list of recognized Unicode properties to the standard
+output, then exit with zero exit code. All other options are ignored. If both
+-C and any -Lx options are present, whichever is first is recognized.
+.TP 10
+\fB-LS\fP
+List scripts: write a list of recogized Unicode script names to the standard
+output, then exit with zero exit code. All other options are ignored. If both
+-C and any -Lx options are present, whichever is first is recognized.
 .TP 10
 \fB-pattern\fP \fImodifier-list\fP
 Behave as if each pattern line contains the given modifiers.
@ -1196,7 +1206,8 @@ pattern, but can be overridden by modifiers on the subject.
      copy=<number or name>      copy captured substring
      depth_limit=<n>            set a depth limit
      dfa                        use \fBpcre2_dfa_match()\fP
-      find_limits                find match and depth limits
+      find_limits                find heap, match and depth limits
+      find_limits_noheap         find match and depth limits
      get=<number or name>       extract captured substring
      getall                     extract all captured substrings
  /g  global                     global matching
@ -1206,6 +1217,8 @@ pattern, but can be overridden by modifiers on the subject.
      match_limit=<n>            set a match limit
      memory                     show heap memory usage
      null_context               match with a NULL context
+      null_replacement           substitute with NULL replacement
+      null_subject               match with NULL subject
      offset=<n>                 set starting offset
      offset_limit=<n>           set offset limit
      ovector=<n>                set size of output vector
@ -1516,7 +1529,7 @@ value that was set on the pattern.
 .sp
 The \fBheap_limit\fP, \fBmatch_limit\fP, and \fBdepth_limit\fP modifiers set
 the appropriate limits in the match context. These values are ignored when the
-\fBfind_limits\fP modifier is specified.
+\fBfind_limits\fP or \fBfind_limits_noheap\fP modifier is specified.
 .
 .
 .SS "Finding minimum limits"
@ -1526,8 +1539,12 @@ If the \fBfind_limits\fP modifier is present on a subject line, \fBpcre2test\fP
 calls the relevant matching function several times, setting different values in
 the match context via \fBpcre2_set_heap_limit()\fP,
 \fBpcre2_set_match_limit()\fP, or \fBpcre2_set_depth_limit()\fP until it finds
-the minimum values for each parameter that allows the match to complete without
-error. If JIT is being used, only the match limit is relevant.
+the smallest value for each parameter that allows the match to complete without
+a "limit exceeded" error. The match itself may succeed or fail. An alternative
+modifier, \fBfind_limits_noheap\fP, omits the heap limit. This is used in the
+standard tests, because the minimum heap limit varies between systems. If JIT
+is being used, only the match limit is relevant, and the other two are
+automatically omitted.
 .P
 When using this modifier, the pattern should not contain any limit settings
 such as (*LIMIT_MATCH=...) within it. If such a setting is present and is
@ -1551,9 +1568,7 @@ and non-recursive, to the internal matching function, thus controlling the
 overall amount of computing resource that is used.
 .P
 For both kinds of matching, the \fIheap_limit\fP number, which is in kibibytes
-(units of 1024 bytes), limits the amount of heap memory used for matching. A
-value of zero disables the use of any heap memory; many simple pattern matches
-can be done without using the heap, so zero is not an unreasonable setting.
+(units of 1024 bytes), limits the amount of heap memory used for matching.
 .
 .
 .SS "Showing MARK names"
@ -1572,12 +1587,10 @@ is added to the non-match message.
 .sp
 The \fBmemory\fP modifier causes \fBpcre2test\fP to log the sizes of all heap
 memory allocation and freeing calls that occur during a call to
-\fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP. These occur only when a match
-requires a bigger vector than the default for remembering backtracking points
-(\fBpcre2_match()\fP) or for internal workspace (\fBpcre2_dfa_match()\fP). In
-many cases there will be no heap memory used and therefore no additional
-output. No heap memory is allocated during matching with JIT, so in that case
-the \fBmemory\fP modifier never has any effect. For this modifier to work, the
+\fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP. In the latter case, heap memory
+is used only when a match requires more internal workspace that the default
+allocation on the stack, so in many cases there will be no output. No heap
+memory is allocated during matching with JIT. For this modifier to work, the
 \fBnull_context\fP modifier must not be set on both the pattern and the
 subject, though it can be set on one or the other.
 .
@ -1629,7 +1642,7 @@ When testing \fBpcre2_substitute()\fP, this modifier also has the effect of
 passing the replacement string as zero-terminated.
 .
 .
-.SS "Passing a NULL context"
+.SS "Passing a NULL context, subject, or replacement"
 .rs
 .sp
 Normally, \fBpcre2test\fP passes a context block to \fBpcre2_match()\fP,
@ -1637,7 +1650,12 @@ Normally, \fBpcre2test\fP passes a context block to \fBpcre2_match()\fP,
 If the \fBnull_context\fP modifier is set, however, NULL is passed. This is for
 testing that the matching and substitution functions behave correctly in this
 case (they use default values). This modifier cannot be used with the
-\fBfind_limits\fP or \fBsubstitute_callout\fP modifiers.
+\fBfind_limits\fP, \fBfind_limits_noheap\fP, or \fBsubstitute_callout\fP
+modifiers.
+.P
+Similarly, for testing purposes, if the \fBnull_subject\fP or
+\fBnull_replacement\fP modifier is set, the subject or replacement string
+pointers are passed as NULL, respectively, to the relevant functions.
 .
 .
 .SH "THE ALTERNATIVE MATCHING FUNCTION"
@ -2103,6 +2121,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 30 August 2021
-Copyright (c) 1997-2021 University of Cambridge.
+Last updated: 27 July 2022
+Copyright (c) 1997-2022 University of Cambridge.
 .fi
--- a/doc/pcre2test.txt
+++ b/doc/pcre2test.txt
@ -44,7 +44,7 @@ PCRE2's 8-BIT, 16-BIT AND 32-BIT LIBRARIES
       output.

       In the rest of this document, the names of library functions and struc-
-       tures  are  given in generic form, for example, pcre_compile(). The ac-
+       tures  are given in generic form, for example, pcre2_compile(). The ac-
       tual names used in the libraries have a suffix _8, _16, or _32, as  ap-
       propriate.

@ -197,7 +197,17 @@ COMMAND LINE OPTIONS

       -LM       List modifiers: write a list of available pattern and subject
                 modifiers to the standard output, then exit  with  zero  exit
-                 code.  All other options are ignored.  If both -C and -LM are
+                 code.  All other options are ignored.  If both -C and any -Lx
+                 options are present, whichever is first is recognized.
+
+       -LP       List properties: write a list of recognized  Unicode  proper-
+                 ties  to  the standard output, then exit with zero exit code.
+                 All other options are ignored. If both -C and any -Lx options
+                 are present, whichever is first is recognized.
+
+       -LS       List  scripts: write a list of recogized Unicode script names
+                 to the standard output, then exit with zero  exit  code.  All
+                 other options are ignored. If both -C and any -Lx options are
                 present, whichever is first is recognized.

       -pattern modifier-list
@ -1101,7 +1111,8 @@ SUBJECT MODIFIERS
             copy=<number or name>      copy captured substring
             depth_limit=<n>            set a depth limit
             dfa                        use pcre2_dfa_match()
-             find_limits                find match and depth limits
+             find_limits                find heap, match and depth limits
+             find_limits_noheap         find match and depth limits
             get=<number or name>       extract captured substring
             getall                     extract all captured substrings
         /g  global                     global matching
@ -1111,6 +1122,8 @@ SUBJECT MODIFIERS
             match_limit=<n>            set a match limit
             memory                     show heap memory usage
             null_context               match with a NULL context
+             null_replacement           substitute with NULL replacement
+             null_subject               match with NULL subject
             offset=<n>                 set starting offset
             offset_limit=<n>           set offset limit
             ovector=<n>                set size of output vector
@ -1399,7 +1412,7 @@ SUBJECT MODIFIERS

       The heap_limit, match_limit, and depth_limit modifiers set  the  appro-
       priate  limits  in the match context. These values are ignored when the
-       find_limits modifier is specified.
+       find_limits or find_limits_noheap modifier is specified.

   Finding minimum limits

@ -1407,8 +1420,12 @@ SUBJECT MODIFIERS
       calls  the  relevant matching function several times, setting different
       values   in   the    match    context    via    pcre2_set_heap_limit(),
       pcre2_set_match_limit(),  or pcre2_set_depth_limit() until it finds the
-       minimum values for each parameter that allows  the  match  to  complete
-       without error. If JIT is being used, only the match limit is relevant.
+       smallest value for each parameter that allows  the  match  to  complete
+       without a "limit exceeded" error. The match itself may succeed or fail.
+       An alternative modifier, find_limits_noheap, omits the heap limit. This
+       is  used  in  the standard tests, because the minimum heap limit varies
+       between systems. If JIT is being used, only the match  limit  is  rele-
+       vant, and the other two are automatically omitted.

       When using this modifier, the pattern should not contain any limit set-
       tings such as (*LIMIT_MATCH=...)  within  it.  If  such  a  setting  is
@ -1434,9 +1451,7 @@ SUBJECT MODIFIERS

       For  both  kinds  of  matching,  the  heap_limit  number,  which  is in
       kibibytes (units of 1024 bytes), limits the amount of heap memory  used
-       for matching. A value of zero disables the use of any heap memory; many
-       simple pattern matches can be done without using the heap, so  zero  is
-       not an unreasonable setting.
+       for matching.

   Showing MARK names

@ -1451,13 +1466,11 @@ SUBJECT MODIFIERS

       The  memory modifier causes pcre2test to log the sizes of all heap mem-
       ory  allocation  and  freeing  calls  that  occur  during  a  call   to
-       pcre2_match()  or  pcre2_dfa_match(). These occur only when a match re-
-       quires a bigger vector than the default  for  remembering  backtracking
-       points  (pcre2_match())  or for internal workspace (pcre2_dfa_match()).
-       In many cases there will be no heap memory used and therefore no  addi-
-       tional output. No heap memory is allocated during matching with JIT, so
-       in that case the memory modifier never has any effect. For  this  modi-
-       fier  to  work,  the  null_context modifier must not be set on both the
+       pcre2_match()  or pcre2_dfa_match(). In the latter case, heap memory is
+       used only when a match requires more internal workspace  that  the  de-
+       fault  allocation  on the stack, so in many cases there will be no out-
+       put. No heap memory is allocated during matching  with  JIT.  For  this
+       modifier to work, the null_context modifier must not be set on both the
       pattern and the subject, though it can be set on one or the other.

   Setting a starting offset
@ -1499,48 +1512,53 @@ SUBJECT MODIFIERS
       When testing pcre2_substitute(), this modifier also has the  effect  of
       passing the replacement string as zero-terminated.

-   Passing a NULL context
+   Passing a NULL context, subject, or replacement

       Normally,   pcre2test   passes   a   context  block  to  pcre2_match(),
       pcre2_dfa_match(), pcre2_jit_match()  or  pcre2_substitute().   If  the
       null_context  modifier  is  set,  however,  NULL is passed. This is for
       testing that the matching and substitution functions  behave  correctly
       in  this  case  (they use default values). This modifier cannot be used
-       with the find_limits or substitute_callout modifiers.
+       with the find_limits, find_limits_noheap, or  substitute_callout  modi-
+       fiers.
+
+       Similarly,  for  testing purposes, if the null_subject or null_replace-
+       ment modifier is set, the subject or replacement  string  pointers  are
+       passed as NULL, respectively, to the relevant functions.


 THE ALTERNATIVE MATCHING FUNCTION

-       By default,  pcre2test  uses  the  standard  PCRE2  matching  function,
+       By  default,  pcre2test  uses  the  standard  PCRE2  matching function,
       pcre2_match() to match each subject line. PCRE2 also supports an alter-
-       native matching function, pcre2_dfa_match(), which operates in  a  dif-
-       ferent  way, and has some restrictions. The differences between the two
+       native  matching  function, pcre2_dfa_match(), which operates in a dif-
+       ferent way, and has some restrictions. The differences between the  two
       functions are described in the pcre2matching documentation.

-       If the dfa modifier is set, the alternative matching function is  used.
-       This  function  finds all possible matches at a given point in the sub-
-       ject. If, however, the dfa_shortest modifier is set,  processing  stops
-       after  the  first  match is found. This is always the shortest possible
+       If  the dfa modifier is set, the alternative matching function is used.
+       This function finds all possible matches at a given point in  the  sub-
+       ject.  If,  however, the dfa_shortest modifier is set, processing stops
+       after the first match is found. This is always  the  shortest  possible
       match.


 DEFAULT OUTPUT FROM pcre2test

-       This section describes the output when the  normal  matching  function,
+       This  section  describes  the output when the normal matching function,
       pcre2_match(), is being used.

-       When  a  match  succeeds,  pcre2test  outputs the list of captured sub-
-       strings, starting with number 0 for the string that matched  the  whole
+       When a match succeeds, pcre2test outputs  the  list  of  captured  sub-
+       strings,  starting  with number 0 for the string that matched the whole
       pattern.  Otherwise, it outputs "No match" when the return is PCRE2_ER-
-       ROR_NOMATCH, or "Partial match:" followed  by  the  partially  matching
-       substring  when  the  return is PCRE2_ERROR_PARTIAL. (Note that this is
-       the entire substring that was inspected during the  partial  match;  it
-       may  include  characters  before the actual match start if a lookbehind
+       ROR_NOMATCH,  or  "Partial  match:"  followed by the partially matching
+       substring when the return is PCRE2_ERROR_PARTIAL. (Note  that  this  is
+       the  entire  substring  that was inspected during the partial match; it
+       may include characters before the actual match start  if  a  lookbehind
       assertion, \K, \b, or \B was involved.)

       For any other return, pcre2test outputs the PCRE2 negative error number
-       and  a  short  descriptive  phrase. If the error is a failed UTF string
-       check, the code unit offset of the start of the  failing  character  is
+       and a short descriptive phrase. If the error is  a  failed  UTF  string
+       check,  the  code  unit offset of the start of the failing character is
       also output. Here is an example of an interactive pcre2test run.

         $ pcre2test
@ -1556,8 +1574,8 @@ DEFAULT OUTPUT FROM pcre2test
       Unset capturing substrings that are not followed by one that is set are
       not shown by pcre2test unless the allcaptures modifier is specified. In
       the following example, there are two capturing substrings, but when the
-       first data line is matched, the second, unset substring is  not  shown.
-       An  "internal" unset substring is shown as "<unset>", as for the second
+       first  data  line is matched, the second, unset substring is not shown.
+       An "internal" unset substring is shown as "<unset>", as for the  second
       data line.

           re> /(a)|(b)/
@ -1569,11 +1587,11 @@ DEFAULT OUTPUT FROM pcre2test
          1: <unset>
          2: b

-       If the strings contain any non-printing characters, they are output  as
-       \xhh  escapes  if  the  value is less than 256 and UTF mode is not set.
+       If  the strings contain any non-printing characters, they are output as
+       \xhh escapes if the value is less than 256 and UTF  mode  is  not  set.
       Otherwise they are output as \x{hh...} escapes. See below for the defi-
-       nition  of  non-printing  characters. If the aftertext modifier is set,
-       the output for substring 0 is followed by the the rest of  the  subject
+       nition of non-printing characters. If the aftertext  modifier  is  set,
+       the  output  for substring 0 is followed by the the rest of the subject
       string, identified by "0+" like this:

           re> /cat/aftertext
@ -1593,8 +1611,8 @@ DEFAULT OUTPUT FROM pcre2test
          0: ipp
          1: pp

-       "No match" is output only if the first match attempt fails. Here is  an
-       example  of  a  failure  message (the offset 4 that is specified by the
+       "No  match" is output only if the first match attempt fails. Here is an
+       example of a failure message (the offset 4 that  is  specified  by  the
       offset modifier is past the end of the subject string):

           re> /xyz/
@ -1602,7 +1620,7 @@ DEFAULT OUTPUT FROM pcre2test
         Error -24 (bad offset value)

       Note that whereas patterns can be continued over several lines (a plain
-       ">"  prompt  is used for continuations), subject lines may not. However
+       ">" prompt is used for continuations), subject lines may  not.  However
       newlines can be included in a subject by means of the \n escape (or \r,
       \r\n, etc., depending on the newline sequence setting).

@ -1610,7 +1628,7 @@ DEFAULT OUTPUT FROM pcre2test
 OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION

       When the alternative matching function, pcre2_dfa_match(), is used, the
-       output consists of a list of all the matches that start  at  the  first
+       output  consists  of  a list of all the matches that start at the first
       point in the subject where there is at least one match. For example:

           re> /(tang|tangerine|tan)/
@ -1619,11 +1637,11 @@ OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
          1: tang
          2: tan

-       Using  the normal matching function on this data finds only "tang". The
-       longest matching string is always given first (and numbered zero).  Af-
-       ter  a PCRE2_ERROR_PARTIAL return, the output is "Partial match:", fol-
+       Using the normal matching function on this data finds only "tang".  The
+       longest  matching string is always given first (and numbered zero). Af-
+       ter a PCRE2_ERROR_PARTIAL return, the output is "Partial match:",  fol-
       lowed by the partially matching substring. Note that this is the entire
-       substring  that  was inspected during the partial match; it may include
+       substring that was inspected during the partial match; it  may  include
       characters before the actual match start if a lookbehind assertion, \b,
       or \B was involved. (\K is not supported for DFA matching.)

@ -1639,16 +1657,16 @@ OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
          1: tan
          0: tan

-       The alternative matching function does not support  substring  capture,
-       so  the  modifiers  that are concerned with captured substrings are not
+       The  alternative  matching function does not support substring capture,
+       so the modifiers that are concerned with captured  substrings  are  not
       relevant.


 RESTARTING AFTER A PARTIAL MATCH

-       When the alternative matching function has given  the  PCRE2_ERROR_PAR-
+       When  the  alternative matching function has given the PCRE2_ERROR_PAR-
       TIAL return, indicating that the subject partially matched the pattern,
-       you can restart the match with additional subject data by means of  the
+       you  can restart the match with additional subject data by means of the
       dfa_restart modifier. For example:

           re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
@ -1657,37 +1675,37 @@ RESTARTING AFTER A PARTIAL MATCH
         data> n05\=dfa,dfa_restart
          0: n05

-       For  further  information  about partial matching, see the pcre2partial
+       For further information about partial matching,  see  the  pcre2partial
       documentation.


 CALLOUTS

       If the pattern contains any callout requests, pcre2test's callout func-
-       tion  is  called during matching unless callout_none is specified. This
+       tion is called during matching unless callout_none is  specified.  This
       works with both matching functions, and with JIT, though there are some
-       differences  in behaviour. The output for callouts with numerical argu-
+       differences in behaviour. The output for callouts with numerical  argu-
       ments and those with string arguments is slightly different.

   Callouts with numerical arguments

       By default, the callout function displays the callout number, the start
-       and  current positions in the subject text at the callout time, and the
+       and current positions in the subject text at the callout time, and  the
       next pattern item to be tested. For example:

         --->pqrabcdef
           0    ^  ^     \d

-       This output indicates that callout number 0 occurred for  a  match  at-
-       tempt  starting at the fourth character of the subject string, when the
-       pointer was at the seventh character, and when the  next  pattern  item
-       was  \d.  Just  one circumflex is output if the start and current posi-
+       This  output  indicates  that callout number 0 occurred for a match at-
+       tempt starting at the fourth character of the subject string, when  the
+       pointer  was  at  the seventh character, and when the next pattern item
+       was \d. Just one circumflex is output if the start  and  current  posi-
       tions are the same, or if the current position precedes the start posi-
       tion, which can happen if the callout is in a lookbehind assertion.

       Callouts numbered 255 are assumed to be automatic callouts, inserted as
       a result of the auto_callout pattern modifier. In this case, instead of
-       showing  the  callout  number, the offset in the pattern, preceded by a
+       showing the callout number, the offset in the pattern,  preceded  by  a
       plus, is output. For example:

           re> /\d?[A-E]\*/auto_callout
@ -1714,17 +1732,17 @@ CALLOUTS
         +12 ^  ^
          0: abc

-       The mark changes between matching "a" and "b", but stays the  same  for
-       the  rest  of  the match, so nothing more is output. If, as a result of
-       backtracking, the mark reverts to being unset, the  text  "<unset>"  is
+       The  mark  changes between matching "a" and "b", but stays the same for
+       the rest of the match, so nothing more is output. If, as  a  result  of
+       backtracking,  the  mark  reverts to being unset, the text "<unset>" is
       output.

   Callouts with string arguments

       The output for a callout with a string argument is similar, except that
-       instead of outputting a callout number before the position  indicators,
-       the  callout string and its offset in the pattern string are output be-
-       fore the reflection of the subject string, and the  subject  string  is
+       instead  of outputting a callout number before the position indicators,
+       the callout string and its offset in the pattern string are output  be-
+       fore  the  reflection  of the subject string, and the subject string is
       reflected for each callout. For example:

           re> /^ab(?C'first')cd(?C"second")ef/
@ -1740,26 +1758,26 @@ CALLOUTS

   Callout modifiers

-       The  callout  function in pcre2test returns zero (carry on matching) by
-       default, but you can use a callout_fail modifier in a subject  line  to
+       The callout function in pcre2test returns zero (carry on  matching)  by
+       default,  but  you can use a callout_fail modifier in a subject line to
       change this and other parameters of the callout (see below).

       If the callout_capture modifier is set, the current captured groups are
       output when a callout occurs. This is useful only for non-DFA matching,
-       as  pcre2_dfa_match()  does  not  support capturing, so no captures are
+       as pcre2_dfa_match() does not support capturing,  so  no  captures  are
       ever shown.

       The normal callout output, showing the callout number or pattern offset
-       (as  described above) is suppressed if the callout_no_where modifier is
+       (as described above) is suppressed if the callout_no_where modifier  is
       set.

-       When using the interpretive  matching  function  pcre2_match()  without
-       JIT,  setting  the callout_extra modifier causes additional output from
-       pcre2test's callout function to be generated. For the first callout  in
-       a  match  attempt at a new starting position in the subject, "New match
-       attempt" is output. If there has been a backtrack since the last  call-
+       When  using  the  interpretive  matching function pcre2_match() without
+       JIT, setting the callout_extra modifier causes additional  output  from
+       pcre2test's  callout function to be generated. For the first callout in
+       a match attempt at a new starting position in the subject,  "New  match
+       attempt"  is output. If there has been a backtrack since the last call-
       out (or start of matching if this is the first callout), "Backtrack" is
-       output, followed by "No other matching paths" if  the  backtrack  ended
+       output,  followed  by  "No other matching paths" if the backtrack ended
       the previous match attempt. For example:

          re> /(a+)b/auto_callout,no_start_optimize,no_auto_possess
@ -1796,86 +1814,86 @@ CALLOUTS
          +1    ^    a+
         No match

-       Notice  that  various  optimizations must be turned off if you want all
-       possible matching paths to be  scanned.  If  no_start_optimize  is  not
-       used,  there  is an immediate "no match", without any callouts, because
-       the starting optimization fails to find "b" in the  subject,  which  it
-       knows  must  be  present for any match. If no_auto_possess is not used,
-       the "a+" item is turned into "a++", which reduces the number  of  back-
+       Notice that various optimizations must be turned off if  you  want  all
+       possible  matching  paths  to  be  scanned. If no_start_optimize is not
+       used, there is an immediate "no match", without any  callouts,  because
+       the  starting  optimization  fails to find "b" in the subject, which it
+       knows must be present for any match. If no_auto_possess  is  not  used,
+       the  "a+"  item is turned into "a++", which reduces the number of back-
       tracks.

-       The  callout_extra modifier has no effect if used with the DFA matching
+       The callout_extra modifier has no effect if used with the DFA  matching
       function, or with JIT.

   Return values from callouts

-       The default return from the callout  function  is  zero,  which  allows
+       The  default  return  from  the  callout function is zero, which allows
       matching to continue. The callout_fail modifier can be given one or two
       numbers. If there is only one number, 1 is returned instead of 0 (caus-
       ing matching to backtrack) when a callout of that number is reached. If
-       two numbers (<n>:<m>) are given, 1 is  returned  when  callout  <n>  is
-       reached  and  there  have been at least <m> callouts. The callout_error
+       two  numbers  (<n>:<m>)  are  given,  1 is returned when callout <n> is
+       reached and there have been at least <m>  callouts.  The  callout_error
       modifier is similar, except that PCRE2_ERROR_CALLOUT is returned, caus-
-       ing  the entire matching process to be aborted. If both these modifiers
-       are set for the same callout number,  callout_error  takes  precedence.
-       Note  that  callouts  with string arguments are always given the number
+       ing the entire matching process to be aborted. If both these  modifiers
+       are  set  for  the same callout number, callout_error takes precedence.
+       Note that callouts with string arguments are always  given  the  number
       zero.

-       The callout_data modifier can be given an unsigned or a  negative  num-
-       ber.   This  is  set  as the "user data" that is passed to the matching
-       function, and passed back when the callout  function  is  invoked.  Any
-       value  other  than  zero  is  used as a return from pcre2test's callout
+       The  callout_data  modifier can be given an unsigned or a negative num-
+       ber.  This is set as the "user data" that is  passed  to  the  matching
+       function,  and  passed  back  when the callout function is invoked. Any
+       value other than zero is used as  a  return  from  pcre2test's  callout
       function.

       Inserting callouts can be helpful when using pcre2test to check compli-
-       cated  regular expressions. For further information about callouts, see
+       cated regular expressions. For further information about callouts,  see
       the pcre2callout documentation.


 NON-PRINTING CHARACTERS

       When pcre2test is outputting text in the compiled version of a pattern,
-       bytes  other  than 32-126 are always treated as non-printing characters
+       bytes other than 32-126 are always treated as  non-printing  characters
       and are therefore shown as hex escapes.

-       When pcre2test is outputting text that is a matched part of  a  subject
-       string,  it behaves in the same way, unless a different locale has been
-       set for the pattern (using the locale modifier). In this case, the  is-
+       When  pcre2test  is outputting text that is a matched part of a subject
+       string, it behaves in the same way, unless a different locale has  been
+       set  for the pattern (using the locale modifier). In this case, the is-
       print() function is used to distinguish printing and non-printing char-
       acters.


 SAVING AND RESTORING COMPILED PATTERNS

-       It is possible to save compiled patterns  on  disc  or  elsewhere,  and
+       It  is  possible  to  save  compiled patterns on disc or elsewhere, and
       reload them later, subject to a number of restrictions. JIT data cannot
-       be saved. The host on which the patterns are reloaded must  be  running
+       be  saved.  The host on which the patterns are reloaded must be running
       the same version of PCRE2, with the same code unit width, and must also
-       have the same endianness, pointer width  and  PCRE2_SIZE  type.  Before
-       compiled  patterns  can be saved they must be serialized, that is, con-
-       verted to a stream of bytes. A single byte stream may contain any  num-
-       ber  of compiled patterns, but they must all use the same character ta-
-       bles. A single copy of the tables is included in the byte  stream  (its
+       have  the  same  endianness,  pointer width and PCRE2_SIZE type. Before
+       compiled patterns can be saved they must be serialized, that  is,  con-
+       verted  to a stream of bytes. A single byte stream may contain any num-
+       ber of compiled patterns, but they must all use the same character  ta-
+       bles.  A  single copy of the tables is included in the byte stream (its
       size is 1088 bytes).

-       The  functions whose names begin with pcre2_serialize_ are used for se-
-       rializing and de-serializing. They are described in the  pcre2serialize
-       documentation.  In  this  section we describe the features of pcre2test
+       The functions whose names begin with pcre2_serialize_ are used for  se-
+       rializing  and de-serializing. They are described in the pcre2serialize
+       documentation. In this section we describe the  features  of  pcre2test
       that can be used to test these functions.

-       Note that "serialization" in PCRE2 does not convert  compiled  patterns
-       to  an  abstract  format  like Java or .NET. It just makes a reloadable
+       Note  that  "serialization" in PCRE2 does not convert compiled patterns
+       to an abstract format like Java or .NET. It  just  makes  a  reloadable
       byte code stream.  Hence the restrictions on reloading mentioned above.

-       In pcre2test, when a pattern with push modifier  is  successfully  com-
-       piled,  it  is  pushed onto a stack of compiled patterns, and pcre2test
-       expects the next line to contain a new pattern (or command) instead  of
+       In  pcre2test,  when  a pattern with push modifier is successfully com-
+       piled, it is pushed onto a stack of compiled  patterns,  and  pcre2test
+       expects  the next line to contain a new pattern (or command) instead of
       a subject line. By contrast, the pushcopy modifier causes a copy of the
-       compiled pattern to be stacked, leaving the original available for  im-
-       mediate  matching.  By using push and/or pushcopy, a number of patterns
-       can be compiled and retained. These  modifiers  are  incompatible  with
+       compiled  pattern to be stacked, leaving the original available for im-
+       mediate matching. By using push and/or pushcopy, a number  of  patterns
+       can  be  compiled  and  retained. These modifiers are incompatible with
       posix, and control modifiers that act at match time are ignored (with a
-       message) for the stacked patterns. The jitverify modifier applies  only
+       message)  for the stacked patterns. The jitverify modifier applies only
       at compile time.

       The command
@ -1883,21 +1901,21 @@ SAVING AND RESTORING COMPILED PATTERNS
         #save <filename>

       causes all the stacked patterns to be serialized and the result written
-       to the named file. Afterwards, all the stacked patterns are freed.  The
+       to  the named file. Afterwards, all the stacked patterns are freed. The
       command

         #load <filename>

-       reads  the  data in the file, and then arranges for it to be de-serial-
-       ized, with the resulting compiled patterns added to the pattern  stack.
-       The  pattern  on the top of the stack can be retrieved by the #pop com-
-       mand, which must be followed by  lines  of  subjects  that  are  to  be
-       matched  with  the pattern, terminated as usual by an empty line or end
-       of file. This command may be followed by  a  modifier  list  containing
-       only  control  modifiers that act after a pattern has been compiled. In
-       particular, hex, posix, posix_nosub, push, and  pushcopy  are  not  al-
-       lowed,  nor  are  any option-setting modifiers.  The JIT modifiers are,
-       however permitted. Here is an example that saves and reloads  two  pat-
+       reads the data in the file, and then arranges for it to  be  de-serial-
+       ized,  with the resulting compiled patterns added to the pattern stack.
+       The pattern on the top of the stack can be retrieved by the  #pop  com-
+       mand,  which  must  be  followed  by  lines  of subjects that are to be
+       matched with the pattern, terminated as usual by an empty line  or  end
+       of  file.  This  command  may be followed by a modifier list containing
+       only control modifiers that act after a pattern has been  compiled.  In
+       particular,  hex,  posix,  posix_nosub,  push, and pushcopy are not al-
+       lowed, nor are any option-setting modifiers.  The  JIT  modifiers  are,
+       however  permitted.  Here is an example that saves and reloads two pat-
       terns.

         /abc/push
@ -1910,10 +1928,10 @@ SAVING AND RESTORING COMPILED PATTERNS
         #pop jit,bincode
         abc

-       If  jitverify  is  used with #pop, it does not automatically imply jit,
+       If jitverify is used with #pop, it does not  automatically  imply  jit,
       which is different behaviour from when it is used on a pattern.

-       The #popcopy command is analagous to the pushcopy modifier in  that  it
+       The  #popcopy  command is analagous to the pushcopy modifier in that it
       makes current a copy of the topmost stack pattern, leaving the original
       still on the stack.

@ -1933,5 +1951,5 @@ AUTHOR

 REVISION

-       Last updated: 30 August 2021
-       Copyright (c) 1997-2021 University of Cambridge.
+       Last updated: 27 July 2022
+       Copyright (c) 1997-2022 University of Cambridge.
--- a/doc/pcre2unicode.3
+++ b/doc/pcre2unicode.3
@ -1,4 +1,4 @@
-.TH PCRE2UNICODE 3 "23 February 2020" "PCRE2 10.35"
+.TH PCRE2UNICODE 3 "22 December 2021" "PCRE2 10.40"
 .SH NAME
 PCRE - Perl-compatible regular expressions (revised API)
 .SH "UNICODE AND UTF SUPPORT"
@ -40,10 +40,11 @@ handled, as documented below.
 .sp
 When PCRE2 is built with Unicode support, the escape sequences \ep{..},
 \eP{..}, and \eX can be used. This is not dependent on the PCRE2_UTF setting.
-The Unicode properties that can be tested are limited to the general category
-properties such as Lu for an upper case letter or Nd for a decimal number, the
-Unicode script names such as Arabic or Han, and the derived properties Any and
-L&. Full lists are given in the
+The Unicode properties that can be tested are a subset of those that Perl
+supports. Currently they are limited to the general category properties such as
+Lu for an upper case letter or Nd for a decimal number, the Unicode script
+names such as Arabic or Han, Bidi_Class, Bidi_Control, and the derived
+properties Any and LC (synonym L&). Full lists are given in the
 .\" HREF
 \fBpcre2pattern\fP
 .\"
@ -51,10 +52,10 @@ and
 .\" HREF
 \fBpcre2syntax\fP
 .\"
-documentation. Only the short names for properties are supported. For example,
-\ep{L} matches a letter. Its Perl synonym, \ep{Letter}, is not supported.
-Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
-compatibility with Perl 5.6. PCRE2 does not support this.
+documentation. In general, only the short names for properties are supported.
+For example, \ep{L} matches a letter. Its longer synonym, \ep{Letter}, is not
+supported. Furthermore, in Perl, many properties may optionally be prefixed by
+"Is", for compatibility with Perl 5.6. PCRE2 does not support this.
 .
 .
 .SH "WIDE CHARACTERS AND UTF MODES"
@ -448,7 +449,7 @@ can be useful when searching for UTF text in executable or other binary files.
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@ -457,6 +458,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 23 February 2020
-Copyright (c) 1997-2020 University of Cambridge.
+Last updated: 22 December 2021
+Copyright (c) 1997-2021 University of Cambridge.
 .fi
--- a/index.md
+++ b/index.md
@ -14,14 +14,14 @@ flexible API, the code of PCRE2 has been much improved since the fork.
 ## Download

 As well as downloading from the 
-[GitHub site](https://github.com/PhilipHazel/pcre2), you can download PCRE2 
+[GitHub site](https://github.com/PCRE2Project/pcre2), you can download PCRE2 
 or the older, unmaintained PCRE1 library from an 
 [*unofficial* mirror](https://sourceforge.net/projects/pcre/files/) at SourceForge.

 You can check out the PCRE2 source code via Git or Subversion:

-    git clone https://github.com/PhilipHazel/pcre2.git
-    svn co    https://github.com/PhilipHazel/pcre2.git
+    git clone https://github.com/PCRE2Project/pcre2.git
+    svn co    https://github.com/PCRE2Project/pcre2.git

 ## Contributed Ports

@ -36,7 +36,7 @@ default character encoding, can be found at
 ## Documentation

 You can read the PCRE2 documentation 
-[here](https://philiphazel.github.io/pcre2/doc/html/index.html).
+[here](https://PCRE2Project.github.io/pcre2/doc/html/index.html).

 Comparisons to Perl's regular expression semantics can be found in the
 community authored Wikipedia entry for PCRE.
--- a/maint/GenerateCommon.py
+++ b/maint/GenerateCommon.py
@ -0,0 +1,355 @@
+#! /usr/bin/python
+
+#                   PCRE2 UNICODE PROPERTY SUPPORT
+#                   ------------------------------
+
+# This file is a Python module containing common lists and functions for the
+# GenerateXXX scripts that create various.c and .h files from Unicode data
+# files. It was created as part of a re-organizaton of these scripts in
+# December 2021.
+
+
+import re
+
+
+# ---------------------------------------------------------------------------
+#                             DATA LISTS
+# ---------------------------------------------------------------------------
+
+# BIDI classes in the DerivedBidiClass.txt file, with comments.
+
+bidi_classes = [
+  'AL',  'Arabic letter',
+  'AN',  'Arabic number',
+  'B',   'Paragraph separator',
+  'BN',  'Boundary neutral',
+  'CS',  'Common separator',
+  'EN',  'European number',
+  'ES',  'European separator',
+  'ET',  'European terminator',
+  'FSI', 'First strong isolate',
+  'L',   'Left to right',
+  'LRE', 'Left to right embedding',
+  'LRI', 'Left to right isolate',
+  'LRO', 'Left to right override',
+  'NSM', 'Non-spacing mark',
+  'ON',  'Other neutral',
+  'PDF', 'Pop directional format',
+  'PDI', 'Pop directional isolate',
+  'R',   'Right to left',
+  'RLE', 'Right to left embedding',
+  'RLI', 'Right to left isolate',
+  'RLO', 'Right to left override',
+  'S',   'Segment separator',
+  'WS',  'White space'
+  ]
+
+# Particular category property names, with comments. NOTE: If ever this list
+# is changed, the table called "catposstab" in the pcre2_auto_possess.c file
+# must be edited to keep in step.
+
+category_names = [
+  'Cc', 'Control',
+  'Cf', 'Format',
+  'Cn', 'Unassigned',
+  'Co', 'Private use',
+  'Cs', 'Surrogate',
+  'Ll', 'Lower case letter',
+  'Lm', 'Modifier letter',
+  'Lo', 'Other letter',
+  'Lt', 'Title case letter',
+  'Lu', 'Upper case letter',
+  'Mc', 'Spacing mark',
+  'Me', 'Enclosing mark',
+  'Mn', 'Non-spacing mark',
+  'Nd', 'Decimal number',
+  'Nl', 'Letter number',
+  'No', 'Other number',
+  'Pc', 'Connector punctuation',
+  'Pd', 'Dash punctuation',
+  'Pe', 'Close punctuation',
+  'Pf', 'Final punctuation',
+  'Pi', 'Initial punctuation',
+  'Po', 'Other punctuation',
+  'Ps', 'Open punctuation',
+  'Sc', 'Currency symbol',
+  'Sk', 'Modifier symbol',
+  'Sm', 'Mathematical symbol',
+  'So', 'Other symbol',
+  'Zl', 'Line separator',
+  'Zp', 'Paragraph separator',
+  'Zs', 'Space separator'
+  ]
+
+# The Extended_Pictographic property is not found in the file where all the
+# others are (GraphemeBreakProperty.txt). It comes from the emoji-data.txt
+# file, but we list it here so that the name has the correct index value.
+
+break_properties = [
+  'CR',                    ' 0',
+  'LF',                    ' 1',
+  'Control',               ' 2',
+  'Extend',                ' 3',
+  'Prepend',               ' 4',
+  'SpacingMark',           ' 5',
+  'L',                     ' 6 Hangul syllable type L',
+  'V',                     ' 7 Hangul syllable type V',
+  'T',                     ' 8 Hangul syllable type T',
+  'LV',                    ' 9 Hangul syllable type LV',
+  'LVT',                   '10 Hangul syllable type LVT',
+  'Regional_Indicator',    '11',
+  'Other',                 '12',
+  'ZWJ',                   '13',
+  'Extended_Pictographic', '14'
+  ]
+
+# List of files from which the names of Boolean properties are obtained, along
+# with a list of regex patterns for properties to be ignored, and a list of
+# extra pattern names to add.
+
+bool_propsfiles = ['PropList.txt', 'DerivedCoreProperties.txt', 'emoji-data.txt']
+bool_propsignore = [r'^Other_', r'^Hyphen$']
+bool_propsextras = ['ASCII', 'Bidi_Mirrored']
+
+
+# ---------------------------------------------------------------------------
+#                   GET BOOLEAN PROPERTY NAMES
+# ---------------------------------------------------------------------------
+
+# Get a list of Boolean property names from a number of files.
+
+def getbpropslist():
+  bplist = []
+  bplast = ""
+
+  for filename in bool_propsfiles:
+    try:
+      file = open('Unicode.tables/' + filename, 'r')
+    except IOError:
+      print(f"** Couldn't open {'Unicode.tables/' + filename}\n")
+      sys.exit(1)
+
+    for line in file:
+      line = re.sub(r'#.*', '', line)
+      data = list(map(str.strip, line.split(';')))
+      if len(data) <= 1 or data[1] == bplast:
+        continue
+      bplast = data[1]
+      for pat in bool_propsignore:
+        if re.match(pat, bplast) != None:
+          break
+      else:
+        bplist.append(bplast)
+
+    file.close()
+
+  bplist.extend(bool_propsextras)
+  bplist.sort()
+  return bplist
+
+bool_properties = getbpropslist()
+bool_props_list_item_size = (len(bool_properties) + 31) // 32
+
+
+
+# ---------------------------------------------------------------------------
+#                  COLLECTING PROPERTY NAMES AND ALIASES
+# ---------------------------------------------------------------------------
+
+script_names = ['Unknown']
+abbreviations = {}
+
+def collect_property_names():
+  global script_names
+  global abbreviations
+
+  names_re = re.compile(r'^[0-9A-F]{4,6}(?:\.\.[0-9A-F]{4,6})? +; ([A-Za-z_]+) #')
+
+  last_script_name = ""
+  with open("Unicode.tables/Scripts.txt") as f:
+    for line in f:
+      match_obj = names_re.match(line)
+
+      if match_obj == None or match_obj.group(1) == last_script_name:
+        continue
+
+      last_script_name = match_obj.group(1)
+      script_names.append(last_script_name)
+
+  # Sometimes there is comment in the line
+  # so splitting around semicolon is not enough
+  value_alias_re = re.compile(r' *([A-Za-z_]+) *; *([A-Za-z_]+) *; *([A-Za-z_]+)(?: *; *([A-Za-z_ ]+))?')
+
+  with open("Unicode.tables/PropertyValueAliases.txt") as f:
+    for line in f:
+      match_obj = value_alias_re.match(line)
+
+      if match_obj == None:
+        continue
+
+      if match_obj.group(1) == "sc":
+        if match_obj.group(2) == match_obj.group(3):
+          abbreviations[match_obj.group(3)] = ()
+        elif match_obj.group(4) == None:
+          abbreviations[match_obj.group(3)] = (match_obj.group(2),)
+        else:
+          abbreviations[match_obj.group(3)] = (match_obj.group(2), match_obj.group(4))
+
+  # We can also collect Boolean property abbreviations into the same dictionary
+
+  bin_alias_re = re.compile(r' *([A-Za-z_]+) *; *([A-Za-z_]+)(?: *; *([A-Za-z_]+))?')
+  with open("Unicode.tables/PropertyAliases.txt") as f:
+    for line in f:
+      match_obj = bin_alias_re.match(line)
+      if match_obj == None:
+        continue
+
+      if match_obj.group(2) in bool_properties:
+        if match_obj.group(3) == None:
+          abbreviations[match_obj.group(2)] = (match_obj.group(1),)
+        else:
+          abbreviations[match_obj.group(2)] = (match_obj.group(1), match_obj.group(3))
+
+collect_property_names()
+
+
+
+# ---------------------------------------------------------------------------
+#                      REORDERING SCRIPT NAMES
+# ---------------------------------------------------------------------------
+
+script_abbrevs = []
+
+def reorder_scripts():
+  global script_names
+  global script_abbrevs
+  global abbreviations
+
+  for name in script_names:
+    abbrevs = abbreviations[name]
+    script_abbrevs.append(name if len(abbrevs) == 0 else abbrevs[0])
+
+  extended_script_abbrevs = set()
+  with open("Unicode.tables/ScriptExtensions.txt") as f:
+    names_re = re.compile(r'^[0-9A-F]{4,6}(?:\.\.[0-9A-F]{4,6})? +; ([A-Za-z_ ]+) #')
+
+    for line in f:
+      match_obj = names_re.match(line)
+
+      if match_obj == None:
+        continue
+
+      for name in match_obj.group(1).split(" "):
+        extended_script_abbrevs.add(name)
+
+  new_script_names = []
+  new_script_abbrevs = []
+
+  for idx, abbrev in enumerate(script_abbrevs):
+    if abbrev in extended_script_abbrevs:
+      new_script_names.append(script_names[idx])
+      new_script_abbrevs.append(abbrev)
+
+  for idx, abbrev in enumerate(script_abbrevs):
+    if abbrev not in extended_script_abbrevs:
+      new_script_names.append(script_names[idx])
+      new_script_abbrevs.append(abbrev)
+
+  script_names = new_script_names
+  script_abbrevs = new_script_abbrevs
+
+reorder_scripts()
+script_list_item_size = (script_names.index('Unknown') + 31) // 32
+
+
+# ---------------------------------------------------------------------------
+#                         DERIVED LISTS
+# ---------------------------------------------------------------------------
+
+# Create general character property names from the first letters of the
+# particular categories.
+
+gcn_set = set(category_names[i][0] for i in range(0, len(category_names), 2))
+general_category_names = list(gcn_set)
+general_category_names.sort()
+
+
+# ---------------------------------------------------------------------------
+#                           FUNCTIONS
+# ---------------------------------------------------------------------------
+
+import sys
+
+# Open an output file, using the command's argument or a default. Write common
+# preliminary header information.
+
+def open_output(default):
+  if len(sys.argv) > 2:
+    print('** Too many arguments: just give a file name')
+    sys.exit(1)
+  if len(sys.argv) == 2:
+    output_name = sys.argv[1]
+  else:
+    output_name = default
+  try:
+    file = open(output_name, "w")
+  except IOError:
+    print ("** Couldn't open %s" % output_name)
+    sys.exit(1)
+
+  script_name = sys.argv[0]
+  i = script_name.rfind('/')
+  if i >= 0:
+    script_name = script_name[i+1:]
+
+  file.write("""\
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge
+
+This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
+""")
+
+  file.write("Instead, modify the maint/%s script and run it to generate\n"
+  "a new version of this code.\n\n" % script_name)
+
+  file.write("""\
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+\n""")
+  return file
+
+# End of UcpCommon.py
--- a/maint/GenerateTest26.py
+++ b/maint/GenerateTest26.py
@ -0,0 +1,188 @@
+#! /usr/bin/python
+
+#                   PCRE2 UNICODE PROPERTY SUPPORT
+#                   ------------------------------
+#
+# This file auto-generates unicode property tests and their expected output.
+# It is recommended to re-run this generator after the unicode files are
+# updated. The names of the generated files are `testinput26` and `testoutput26`
+
+import re
+import sys
+
+from GenerateCommon import \
+  script_names, \
+  script_abbrevs
+
+def write_both(text):
+  input_file.write(text)
+  output_file.write(text)
+
+def to_string_char(ch_idx):
+  if ch_idx < 128:
+    if ch_idx < 16:
+      return "\\x{0%x}" % ch_idx
+    if ch_idx >= 32:
+      return chr(ch_idx)
+  return "\\x{%x}" % ch_idx
+
+output_directory = ""
+
+if len(sys.argv) > 2:
+  print('** Too many arguments: just give a directory name')
+  sys.exit(1)
+if len(sys.argv) == 2:
+  output_directory = sys.argv[1]
+  if not output_directory.endswith("/"):
+    output_directory += "/"
+
+try:
+  input_file = open(output_directory + "testinput26", "w")
+  output_file = open(output_directory + "testoutput26", "w")
+except IOError:
+  print ("** Couldn't open output files")
+  sys.exit(1)
+
+write_both("# These tests are generated by maint/GenerateTest26.py, do not edit.\n\n")
+
+# ---------------------------------------------------------------------------
+#                      UNICODE SCRIPT EXTENSION TESTS
+# ---------------------------------------------------------------------------
+
+write_both("# Unicode Script Extension tests.\n\n")
+
+def gen_script_tests():
+  script_data = [None] * len(script_names)
+  char_data = [None] * 0x110000
+
+  property_re = re.compile("^([0-9A-F]{4,6})(?:\\.\\.([0-9A-F]{4,6}))? +; ([A-Za-z_ ]+) #")
+  prev_name = ""
+  script_idx = -1
+
+  with open("Unicode.tables/Scripts.txt") as f:
+    for line in f:
+      match_obj = property_re.match(line)
+
+      if match_obj == None:
+        continue
+
+      name = match_obj.group(3)
+      if name != prev_name:
+        script_idx = script_names.index(name)
+        prev_name = name
+
+      low = int(match_obj.group(1), 16)
+      high = low
+      char_data[low] = name
+
+      if match_obj.group(2) != None:
+        high = int(match_obj.group(2), 16)
+        for idx in range(low + 1, high + 1):
+           char_data[idx] = name
+
+      if script_data[script_idx] == None:
+        script_data[script_idx] = [low, None, None, None, None]
+      script_data[script_idx][1] = high
+
+  extended_script_indicies = {}
+
+  with open("Unicode.tables/ScriptExtensions.txt") as f:
+    for line in f:
+      match_obj = property_re.match(line)
+
+      if match_obj == None:
+        continue
+
+      low = int(match_obj.group(1), 16)
+      high = low
+      if match_obj.group(2) != None:
+        high = int(match_obj.group(2), 16)
+
+      for abbrev in match_obj.group(3).split(" "):
+        if abbrev not in extended_script_indicies:
+          idx = script_abbrevs.index(abbrev)
+          extended_script_indicies[abbrev] = idx
+          rec = script_data[idx]
+          rec[2] = low
+          rec[3] = high
+        else:
+          idx = extended_script_indicies[abbrev]
+          rec = script_data[idx]
+          if rec[2] > low:
+            rec[2] = low
+          if rec[3] < high:
+            rec[3] = high
+
+        if rec[4] == None:
+          name = script_names[idx]
+          for idx in range(low, high + 1):
+            if char_data[idx] != name:
+              rec[4] = idx
+              break
+
+  long_property_name = False
+
+  for idx, rec in enumerate(script_data):
+    script_name = script_names[idx]
+
+    if script_name == "Unknown":
+      continue
+
+    script_abbrev = script_abbrevs[idx]
+
+    write_both("# Base script check\n")
+    write_both("/^\\p{sc=%s}/utf\n" % script_name)
+    write_both("  %s\n" % to_string_char(rec[0]))
+    output_file.write(" 0: %s\n" % to_string_char(rec[0]))
+    write_both("\n")
+
+    write_both("/^\\p{Script=%s}/utf\n" % script_abbrev)
+    write_both("  %s\n" % to_string_char(rec[1]))
+    output_file.write(" 0: %s\n" % to_string_char(rec[1]))
+    write_both("\n")
+
+    if rec[2] != None:
+      property_name = "scx"
+      if long_property_name:
+        property_name = "Script_Extensions"
+
+      write_both("# Script extension check\n")
+      write_both("/^\\p{%s}/utf\n" % script_name)
+      write_both("  %s\n" % to_string_char(rec[2]))
+      output_file.write(" 0: %s\n" % to_string_char(rec[2]))
+      write_both("\n")
+
+      write_both("/^\\p{%s=%s}/utf\n" % (property_name, script_abbrev))
+      write_both("  %s\n" % to_string_char(rec[3]))
+      output_file.write(" 0: %s\n" % to_string_char(rec[3]))
+      write_both("\n")
+
+      long_property_name = not long_property_name
+
+      if rec[4] != None:
+        write_both("# Script extension only character\n")
+        write_both("/^\\p{%s}/utf\n" % script_name)
+        write_both("  %s\n" % to_string_char(rec[4]))
+        output_file.write(" 0: %s\n" % to_string_char(rec[4]))
+        write_both("\n")
+
+        write_both("/^\\p{sc=%s}/utf\n" % script_name)
+        write_both("  %s\n" % to_string_char(rec[4]))
+        output_file.write("No match\n")
+        write_both("\n")
+      else:
+        print("External character has not found for %s" % script_name)
+
+    high = rec[1]
+    if rec[3] != None and rec[3] > rec[1]:
+      high = rec[3]
+    write_both("# Character not in script\n")
+    write_both("/^\\p{%s}/utf\n" % script_name)
+    write_both("  %s\n" % to_string_char(high + 1))
+    output_file.write("No match\n")
+    write_both("\n")
+
+
+gen_script_tests()
+
+write_both("# End of testinput26\n")
--- a/maint/GenerateUcd.py
+++ b/maint/GenerateUcd.py
@ -0,0 +1,923 @@
+#! /usr/bin/python
+
+#                   PCRE2 UNICODE PROPERTY SUPPORT
+#                   ------------------------------
+#
+# This script generates the pcre2_ucd.c file from Unicode data files. This is
+# the compressed Unicode property data used by PCRE2. The script was created in
+# December 2021 as part of the Unicode data generation refactoring. It is
+# basically a re-working of the MultiStage2.py script that was submitted to the
+# PCRE project by Peter Kankowski in 2008 as part of a previous upgrading of
+# Unicode property support. A number of extensions have since been added. The
+# main difference in the 2021 upgrade (apart from comments and layout) is that
+# the data tables (e.g. list of script names) are now listed in or generated by
+# a separate Python module that is shared with the other Generate scripts.
+#
+# This script must be run in the "maint" directory. It requires the following
+# Unicode data tables: BidiMirrorring.txt, CaseFolding.txt,
+# DerivedBidiClass.txt, DerivedCoreProperties.txt, DerivedGeneralCategory.txt,
+# GraphemeBreakProperty.txt, PropList.txt, PropertyAliases.txt,
+# PropertyValueAliases.txt, ScriptExtensions.txt, Scripts.txt, and
+# emoji-data.txt. These must be in the Unicode.tables subdirectory.
+#
+# The emoji-data.txt file is found in the "emoji" subdirectory even though it
+# is technically part of a different (but coordinated) standard as shown
+# in files associated with Unicode Technical Standard #51 ("Unicode Emoji"),
+# for example:
+#
+# http://unicode.org/Public/emoji/13.0/ReadMe.txt
+#
+# DerivedBidiClass.txt and DerivedGeneralCategory.txt are in the "extracted"
+# subdirectory of the Unicode database (UCD) on the Unicode web site;
+# GraphemeBreakProperty.txt is in the "auxiliary" subdirectory. The other files
+# are in the top-level UCD directory.
+#
+# -----------------------------------------------------------------------------
+# Minor modifications made to the original script:
+#  Added #! line at start
+#  Removed tabs
+#  Made it work with Python 2.4 by rewriting two statements that needed 2.5
+#  Consequent code tidy
+#  Adjusted data file names to take from the Unicode.tables directory
+#  Adjusted global table names by prefixing _pcre_.
+#  Commented out stuff relating to the casefolding table, which isn't used;
+#    removed completely in 2012.
+#  Corrected size calculation
+#  Add #ifndef SUPPORT_UCP to use dummy tables when no UCP support is needed.
+#  Update for PCRE2: name changes, and SUPPORT_UCP is abolished.
+#
+# Major modifications made to the original script:
+#  Added code to add a grapheme break property field to records.
+#
+#  Added code to search for sets of more than two characters that must match
+#  each other caselessly. A new table is output containing these sets, and
+#  offsets into the table are added to the main output records. This new
+#  code scans CaseFolding.txt instead of UnicodeData.txt, which is no longer
+#  used.
+#
+#  Update for Python3:
+#    . Processed with 2to3, but that didn't fix everything
+#    . Changed string.strip to str.strip
+#    . Added encoding='utf-8' to the open() call
+#    . Inserted 'int' before blocksize/ELEMS_PER_LINE because an int is
+#        required and the result of the division is a float
+#
+#  Added code to scan the emoji-data.txt file to find the Extended Pictographic
+#  property, which is used by PCRE2 as a grapheme breaking property. This was
+#  done when updating to Unicode 11.0.0 (July 2018).
+#
+#  Added code to add a Script Extensions field to records. This has increased
+#  their size from 8 to 12 bytes, only 10 of which are currently used.
+#
+#  Added code to add a bidi class field to records by scanning the
+#  DerivedBidiClass.txt and PropList.txt files. This uses one of the two spare
+#  bytes, so now 11 out of 12 are in use.
+#
+# 01-March-2010:     Updated list of scripts for Unicode 5.2.0
+# 30-April-2011:     Updated list of scripts for Unicode 6.0.0
+#     July-2012:     Updated list of scripts for Unicode 6.1.0
+# 20-August-2012:    Added scan of GraphemeBreakProperty.txt and added a new
+#                      field in the record to hold the value. Luckily, the
+#                      structure had a hole in it, so the resulting table is
+#                      not much bigger than before.
+# 18-September-2012: Added code for multiple caseless sets. This uses the
+#                      final hole in the structure.
+# 30-September-2012: Added RegionalIndicator break property from Unicode 6.2.0
+# 13-May-2014:       Updated for PCRE2
+# 03-June-2014:      Updated for Python 3
+# 20-June-2014:      Updated for Unicode 7.0.0
+# 12-August-2014:    Updated to put Unicode version into the file
+# 19-June-2015:      Updated for Unicode 8.0.0
+# 02-July-2017:      Updated for Unicode 10.0.0
+# 03-July-2018:      Updated for Unicode 11.0.0
+# 07-July-2018:      Added code to scan emoji-data.txt for the Extended
+#                      Pictographic property.
+# 01-October-2018:   Added the 'Unknown' script name
+# 03-October-2018:   Added new field for Script Extensions
+# 27-July-2019:      Updated for Unicode 12.1.0
+# 10-March-2020:     Updated for Unicode 13.0.0
+# PCRE2-10.39:       Updated for Unicode 14.0.0
+# 05-December-2021:  Added code to scan DerivedBidiClass.txt for bidi class,
+#                      and also PropList.txt for the Bidi_Control property
+# 19-December-2021:  Reworked script extensions lists to be bit maps instead
+#                      of zero-terminated lists of script numbers.
+# ----------------------------------------------------------------------------
+#
+# Changes to the refactored script:
+#
+# 26-December-2021:  Refactoring completed
+# 10-January-2022:   Addition of general Boolean property support
+# 12-January-2022:   Merge scriptx and bidiclass fields
+# 14-January-2022:   Enlarge Boolean property offset to 12 bits
+#
+# ----------------------------------------------------------------------------
+#
+#
+# The main tables generated by this script are used by macros defined in
+# pcre2_internal.h. They look up Unicode character properties using short
+# sequences of code that contains no branches, which makes for greater speed.
+#
+# Conceptually, there is a table of records (of type ucd_record), one for each
+# Unicode character. Each record contains the script number, script extension
+# value, character type, grapheme break type, offset to caseless matching set,
+# offset to the character's other case, the bidi class, and offset to bitmap of
+# Boolean properties.
+#
+# A real table covering all Unicode characters would be far too big. It can be
+# efficiently compressed by observing that many characters have the same
+# record, and many blocks of characters (taking 128 characters in a block) have
+# the same set of records as other blocks. This leads to a 2-stage lookup
+# process.
+#
+# This script constructs seven tables. The ucd_caseless_sets table contains
+# lists of characters that all match each other caselessly. Each list is
+# in order, and is terminated by NOTACHAR (0xffffffff), which is larger than
+# any valid character. The first list is empty; this is used for characters
+# that are not part of any list.
+#
+# The ucd_digit_sets table contains the code points of the '9' characters in
+# each set of 10 decimal digits in Unicode. This is used to ensure that digits
+# in script runs all come from the same set. The first element in the vector
+# contains the number of subsequent elements, which are in ascending order.
+#
+# Scripts are partitioned into two groups. Scripts that appear in at least one
+# character's script extension list come first, followed by "Unknown" and then
+# all the rest. This sorting is done automatically in the GenerateCommon.py
+# script. A script's number is its index in the script_names list.
+#
+# The ucd_script_sets table contains bitmaps that represent lists of scripts
+# for Script Extensions properties. Each bitmap consists of a fixed number of
+# unsigned 32-bit numbers, enough to allocate a bit for every script that is
+# used in any character's extension list, that is, enough for every script
+# whose number is less than ucp_Unknown. A character's script extension value
+# in its ucd record is an offset into the ucd_script_sets vector. The first
+# bitmap has no bits set; characters that have no script extensions have zero
+# as their script extensions value so that they use this map.
+#
+# The ucd_boolprop_sets table contains bitmaps that represent lists of Boolean
+# properties. Each bitmap consists of a fixed number of unsigned 32-bit
+# numbers, enough to allocate a bit for each supported Boolean property.
+#
+# The ucd_records table contains one instance of every unique character record
+# that is required. The ucd_stage1 table is indexed by a character's block
+# number, which is the character's code point divided by 128, since 128 is the
+# size of each block. The result of a lookup in ucd_stage1 a "virtual" block
+# number.
+#
+# The ucd_stage2 table is a table of "virtual" blocks; each block is indexed by
+# the offset of a character within its own block, and the result is the index
+# number of the required record in the ucd_records vector.
+#
+# The following examples are correct for the Unicode 14.0.0 database. Future
+# updates may make change the actual lookup values.
+#
+# Example: lowercase "a" (U+0061) is in block 0
+#          lookup 0 in stage1 table yields 0
+#          lookup 97 (0x61) in the first table in stage2 yields 35
+#          record 35 is { 0, 5, 12, 0, -32, 18432, 44 }
+#             0 = ucp_Latin   => Latin script
+#             5 = ucp_Ll      => Lower case letter
+#            12 = ucp_gbOther => Grapheme break property "Other"
+#             0               => Not part of a caseless set
+#           -32 (-0x20)       => Other case is U+0041
+#         18432 = 0x4800      => Combined Bidi class + script extension values
+#            44               => Offset to Boolean properties
+#
+# The top 5 bits of the sixth field are the Bidi class, with the rest being the
+# script extension value, giving:
+#
+#             9 = ucp_bidiL   => Bidi class left-to-right
+#             0               => No special script extension property
+#
+# Almost all lowercase latin characters resolve to the same record. One or two
+# are different because they are part of a multi-character caseless set (for
+# example, k, K and the Kelvin symbol are such a set).
+#
+# Example: hiragana letter A (U+3042) is in block 96 (0x60)
+#          lookup 96 in stage1 table yields 93
+#          lookup 66 (0x42) in table 93 in stage2 yields 819
+#          record 819 is { 20, 7, 12, 0, 0, 18432, 82 }
+#            20 = ucp_Hiragana => Hiragana script
+#             7 = ucp_Lo       => Other letter
+#            12 = ucp_gbOther  => Grapheme break property "Other"
+#             0                => Not part of a caseless set
+#             0                => No other case
+#         18432 = 0x4800       => Combined Bidi class + script extension values
+#            82                => Offset to Boolean properties
+#
+# The top 5 bits of the sixth field are the Bidi class, with the rest being the
+# script extension value, giving:
+#
+#             9 = ucp_bidiL   => Bidi class left-to-right
+#             0               => No special script extension property
+#
+# Example: vedic tone karshana (U+1CD0) is in block 57 (0x39)
+#          lookup 57 in stage1 table yields 55
+#          lookup 80 (0x50) in table 55 in stage2 yields 621
+#          record 621 is { 84, 12, 3, 0, 0, 26762, 96 }
+#            84 = ucp_Inherited => Script inherited from predecessor
+#            12 = ucp_Mn        => Non-spacing mark
+#             3 = ucp_gbExtend  => Grapheme break property "Extend"
+#             0                 => Not part of a caseless set
+#             0                 => No other case
+#         26762 = 0x688A        => Combined Bidi class + script extension values
+#            96                 => Offset to Boolean properties
+#
+# The top 5 bits of the sixth field are the Bidi class, with the rest being the
+# script extension value, giving:
+#
+#            13 = ucp_bidiNSM   => Bidi class non-spacing mark
+#           138                 => Script Extension list offset = 138
+#
+# At offset 138 in the ucd_script_sets vector we find a bitmap with bits 1, 8,
+# 18, and 47 set. This means that this character is expected to be used with
+# any of those scripts, which are Bengali, Devanagari, Kannada, and Grantha.
+#
+#  Philip Hazel, last updated 14 January 2022.
+##############################################################################
+
+
+# Import standard modules
+
+import re
+import string
+import sys
+
+# Import common data lists and functions
+
+from GenerateCommon import \
+  bidi_classes, \
+  bool_properties, \
+  bool_propsfiles, \
+  bool_props_list_item_size, \
+  break_properties, \
+  category_names, \
+  general_category_names, \
+  script_abbrevs, \
+  script_list_item_size, \
+  script_names, \
+  open_output
+
+# Some general parameters
+
+MAX_UNICODE = 0x110000
+NOTACHAR = 0xffffffff
+
+
+# ---------------------------------------------------------------------------
+#                         DEFINE FUNCTIONS
+# ---------------------------------------------------------------------------
+
+
+# Parse a line of Scripts.txt, GraphemeBreakProperty.txt, DerivedBidiClass.txt
+# or DerivedGeneralCategory.txt
+
+def make_get_names(enum):
+  return lambda chardata: enum.index(chardata[1])
+
+
+# Parse a line of CaseFolding.txt
+
+def get_other_case(chardata):
+  if chardata[1] == 'C' or chardata[1] == 'S':
+    return int(chardata[2], 16) - int(chardata[0], 16)
+  return 0
+
+
+# Parse a line of ScriptExtensions.txt
+
+def get_script_extension(chardata):
+  global last_script_extension
+
+  offset = len(script_lists) * script_list_item_size
+  if last_script_extension == chardata[1]:
+    return offset - script_list_item_size
+
+  last_script_extension = chardata[1]
+  script_lists.append(tuple(script_abbrevs.index(abbrev) for abbrev in last_script_extension.split(' ')))
+  return offset
+
+
+# Read a whole table in memory, setting/checking the Unicode version
+
+def read_table(file_name, get_value, default_value):
+  global unicode_version
+
+  f = re.match(r'^[^/]+/([^.]+)\.txt$', file_name)
+  file_base = f.group(1)
+  version_pat = r"^# " + re.escape(file_base) + r"-(\d+\.\d+\.\d+)\.txt$"
+  file = open(file_name, 'r', encoding='utf-8')
+  f = re.match(version_pat, file.readline())
+  version = f.group(1)
+  if unicode_version == "":
+    unicode_version = version
+  elif unicode_version != version:
+    print("WARNING: Unicode version differs in %s", file_name, file=sys.stderr)
+
+  table = [default_value] * MAX_UNICODE
+  for line in file:
+    line = re.sub(r'#.*', '', line)
+    chardata = list(map(str.strip, line.split(';')))
+    if len(chardata) <= 1:
+      continue
+    value = get_value(chardata)
+    m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', chardata[0])
+    char = int(m.group(1), 16)
+    if m.group(3) is None:
+      last = char
+    else:
+      last = int(m.group(3), 16)
+    for i in range(char, last + 1):
+      # It is important not to overwrite a previously set value because in the
+      # CaseFolding file there are lines to be ignored (returning the default
+      # value of 0) which often come after a line which has already set data.
+      if table[i] == default_value:
+        table[i] = value
+  file.close()
+  return table
+
+
+# Get the smallest possible C language type for the values in a table
+
+def get_type_size(table):
+  type_size = [("uint8_t", 1), ("uint16_t", 2), ("uint32_t", 4),
+    ("signed char", 1), ("int16_t", 2), ("int32_t", 4)]
+  limits = [(0, 255), (0, 65535), (0, 4294967295), (-128, 127),
+    (-32768, 32767), (-2147483648, 2147483647)]
+  minval = min(table)
+  maxval = max(table)
+  for num, (minlimit, maxlimit) in enumerate(limits):
+    if minlimit <= minval and maxval <= maxlimit:
+      return type_size[num]
+  raise OverflowError("Too large to fit into C types")
+
+
+# Get the total size of a list of tables
+
+def get_tables_size(*tables):
+  total_size = 0
+  for table in tables:
+    type, size = get_type_size(table)
+    total_size += size * len(table)
+  return total_size
+
+
+# Compress a table into the two stages
+
+def compress_table(table, block_size):
+  blocks = {} # Dictionary for finding identical blocks
+  stage1 = [] # Stage 1 table contains block numbers (indices into stage 2 table)
+  stage2 = [] # Stage 2 table contains the blocks with property values
+  table = tuple(table)
+  for i in range(0, len(table), block_size):
+    block = table[i:i+block_size]
+    start = blocks.get(block)
+    if start is None:
+      # Allocate a new block
+      start = len(stage2) / block_size
+      stage2 += block
+      blocks[block] = start
+    stage1.append(start)
+  return stage1, stage2
+
+
+# Output a table
+
+def write_table(table, table_name, block_size = None):
+  type, size = get_type_size(table)
+  ELEMS_PER_LINE = 16
+
+  s = "const %s %s[] = { /* %d bytes" % (type, table_name, size * len(table))
+  if block_size:
+    s += ", block = %d" % block_size
+  f.write(s + " */\n")
+  table = tuple(table)
+  if block_size is None:
+    fmt = "%3d," * ELEMS_PER_LINE + " /* U+%04X */\n"
+    mult = MAX_UNICODE / len(table)
+    for i in range(0, len(table), ELEMS_PER_LINE):
+      f.write(fmt % (table[i:i+ELEMS_PER_LINE] + (int(i * mult),)))
+  else:
+    if block_size > ELEMS_PER_LINE:
+      el = ELEMS_PER_LINE
+    else:
+      el = block_size
+    fmt = "%3d," * el + "\n"
+    if block_size > ELEMS_PER_LINE:
+      fmt = fmt * int(block_size / ELEMS_PER_LINE)
+    for i in range(0, len(table), block_size):
+      f.write(("\n/* block %d */\n" + fmt) % ((i / block_size,) + table[i:i+block_size]))
+  f.write("};\n\n")
+
+
+# Extract the unique combinations of properties into records
+
+def combine_tables(*tables):
+  records = {}
+  index = []
+  for t in zip(*tables):
+    i = records.get(t)
+    if i is None:
+      i = records[t] = len(records)
+    index.append(i)
+  return index, records
+
+
+# Create a record struct
+
+def get_record_size_struct(records):
+  size = 0
+  structure = 'typedef struct {\n'
+  for i in range(len(records[0])):
+    record_slice = [record[i] for record in records]
+    slice_type, slice_size = get_type_size(record_slice)
+    # add padding: round up to the nearest power of slice_size
+    size = (size + slice_size - 1) & -slice_size
+    size += slice_size
+    structure += '%s property_%d;\n' % (slice_type, i)
+
+  # round up to the first item of the next structure in array
+  record_slice = [record[0] for record in records]
+  slice_type, slice_size = get_type_size(record_slice)
+  size = (size + slice_size - 1) & -slice_size
+
+  structure += '} ucd_record;\n*/\n'
+  return size, structure
+
+
+# Write records
+
+def write_records(records, record_size):
+  f.write('const ucd_record PRIV(ucd_records)[] = { ' + \
+    '/* %d bytes, record size %d */\n' % (len(records) * record_size, record_size))
+  records = list(zip(list(records.keys()), list(records.values())))
+  records.sort(key = lambda x: x[1])
+  for i, record in enumerate(records):
+    f.write(('  {' + '%6d, ' * len(record[0]) + '}, /* %3d */\n') % (record[0] + (i,)))
+  f.write('};\n\n')
+
+
+# Write a bit set
+
+def write_bitsets(list, item_size):
+  for d in list:
+    bitwords = [0] * item_size
+    for idx in d:
+      bitwords[idx // 32] |= 1 << (idx & 31)
+    s = " "
+    for x in bitwords:
+      f.write("%s" % s)
+      s = ", "
+      f.write("0x%08xu" % x)
+    f.write(",\n")
+  f.write("};\n\n")
+
+
+# ---------------------------------------------------------------------------
+# This bit of code must have been useful when the original script was being
+# developed. Retain it just in case it is ever needed again.
+
+# def test_record_size():
+#   tests = [ \
+#     ( [(3,), (6,), (6,), (1,)], 1 ), \
+#     ( [(300,), (600,), (600,), (100,)], 2 ), \
+#     ( [(25, 3), (6, 6), (34, 6), (68, 1)], 2 ), \
+#     ( [(300, 3), (6, 6), (340, 6), (690, 1)], 4 ), \
+#     ( [(3, 300), (6, 6), (6, 340), (1, 690)], 4 ), \
+#     ( [(300, 300), (6, 6), (6, 340), (1, 690)], 4 ), \
+#     ( [(3, 100000), (6, 6), (6, 123456), (1, 690)], 8 ), \
+#     ( [(100000, 300), (6, 6), (123456, 6), (1, 690)], 8 ), \
+#   ]
+#   for test in tests:
+#     size, struct = get_record_size_struct(test[0])
+#     assert(size == test[1])
+# test_record_size()
+# ---------------------------------------------------------------------------
+
+
+
+# ---------------------------------------------------------------------------
+#                       MAIN CODE FOR CREATING TABLES
+# ---------------------------------------------------------------------------
+
+unicode_version = ""
+
+# Some of the tables imported from GenerateCommon.py have alternate comment
+# strings for use by GenerateUcpHeader. The comments are not wanted here, so
+# remove them.
+
+bidi_classes = bidi_classes[::2]
+break_properties = break_properties[::2]
+category_names = category_names[::2]
+
+# Create the various tables from Unicode data files
+
+script = read_table('Unicode.tables/Scripts.txt', make_get_names(script_names), script_names.index('Unknown'))
+category = read_table('Unicode.tables/DerivedGeneralCategory.txt', make_get_names(category_names), category_names.index('Cn'))
+break_props = read_table('Unicode.tables/GraphemeBreakProperty.txt', make_get_names(break_properties), break_properties.index('Other'))
+other_case = read_table('Unicode.tables/CaseFolding.txt', get_other_case, 0)
+bidi_class = read_table('Unicode.tables/DerivedBidiClass.txt', make_get_names(bidi_classes), bidi_classes.index('L'))
+
+# The grapheme breaking rules were changed for Unicode 11.0.0 (June 2018). Now
+# we need to find the Extended_Pictographic property for emoji characters. This
+# can be set as an additional grapheme break property, because the default for
+# all the emojis is "other". We scan the emoji-data.txt file and modify the
+# break-props table.
+
+file = open('Unicode.tables/emoji-data.txt', 'r', encoding='utf-8')
+for line in file:
+  line = re.sub(r'#.*', '', line)
+  chardata = list(map(str.strip, line.split(';')))
+  if len(chardata) <= 1:
+    continue
+  if chardata[1] != "Extended_Pictographic":
+    continue
+  m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', chardata[0])
+  char = int(m.group(1), 16)
+  if m.group(3) is None:
+    last = char
+  else:
+    last = int(m.group(3), 16)
+  for i in range(char, last + 1):
+    if break_props[i] != break_properties.index('Other'):
+      print("WARNING: Emoji 0x%x has break property %s, not 'Other'",
+        i, break_properties[break_props[i]], file=sys.stderr)
+    break_props[i] = break_properties.index('Extended_Pictographic')
+file.close()
+
+# Handle script extensions. The get_script_extesion() function maintains a
+# list of unique bitmaps representing lists of scripts, returning the offset
+# in that list. Initialize the list with an empty set, which is used for
+# characters that have no script extensions.
+
+script_lists = [[]]
+last_script_extension = ""
+scriptx_bidi_class = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, 0)
+
+for idx in range(len(scriptx_bidi_class)):
+  scriptx_bidi_class[idx] = scriptx_bidi_class[idx] | (bidi_class[idx] << 11)
+bidi_class = None
+
+# Find the Boolean properties of each character. This next bit of magic creates
+# a list of empty lists. Using [[]] * MAX_UNICODE gives a list of references to
+# the *same* list, which is not what we want.
+
+bprops = [[] for _ in range(MAX_UNICODE)]
+
+# Collect the properties from the various files
+
+for filename in bool_propsfiles:
+  try:
+    file = open('Unicode.tables/' + filename, 'r')
+  except IOError:
+    print(f"** Couldn't open {'Unicode.tables/' + filename}\n")
+    sys.exit(1)
+
+  for line in file:
+    line = re.sub(r'#.*', '', line)
+    data = list(map(str.strip, line.split(';')))
+    if len(data) <= 1:
+      continue
+
+    try:
+      ix = bool_properties.index(data[1])
+    except ValueError:
+      continue
+
+    m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', data[0])
+    char = int(m.group(1), 16)
+    if m.group(3) is None:
+      last = char
+    else:
+      last = int(m.group(3), 16)
+
+    for i in range(char, last + 1):
+      bprops[i].append(ix)
+
+  file.close()
+
+# The ASCII property isn't listed in any files, but it is easy enough to add
+# it manually.
+
+ix = bool_properties.index("ASCII")
+for i in range(128):
+  bprops[i].append(ix)
+
+# The Bidi_Mirrored property isn't listed in any property files. We have to
+# deduce it from the file that lists the mirrored characters.
+
+ix = bool_properties.index("Bidi_Mirrored")
+
+try:
+  file = open('Unicode.tables/BidiMirroring.txt', 'r')
+except IOError:
+  print(f"** Couldn't open {'Unicode.tables/BidiMirroring.txt'}\n")
+  sys.exit(1)
+
+for line in file:
+  line = re.sub(r'#.*', '', line)
+  data = list(map(str.strip, line.split(';')))
+  if len(data) <= 1:
+    continue
+  c = int(data[0], 16)
+  bprops[c].append(ix)
+
+file.close()
+
+# Scan each character's boolean property list and created a list of unique
+# lists, at the same time, setting the index in that list for each property in
+# the bool_props vector.
+
+bool_props = [0] * MAX_UNICODE
+bool_props_lists = [[]]
+
+for c in range(MAX_UNICODE):
+  s = set(bprops[c])
+  for i in range(len(bool_props_lists)):
+    if s == set(bool_props_lists[i]):
+      break;
+  else:
+    bool_props_lists.append(bprops[c])
+    i += 1
+
+  bool_props[c] = i * bool_props_list_item_size
+
+# This block of code was added by PH in September 2012. It scans the other_case
+# table to find sets of more than two characters that must all match each other
+# caselessly. Later in this script a table of these sets is written out.
+# However, we have to do this work here in order to compute the offsets in the
+# table that are inserted into the main table.
+
+# The CaseFolding.txt file lists pairs, but the common logic for reading data
+# sets only one value, so first we go through the table and set "return"
+# offsets for those that are not already set.
+
+for c in range(MAX_UNICODE):
+  if other_case[c] != 0 and other_case[c + other_case[c]] == 0:
+    other_case[c + other_case[c]] = -other_case[c]
+
+# Now scan again and create equivalence sets.
+
+caseless_sets = []
+
+for c in range(MAX_UNICODE):
+  o = c + other_case[c]
+
+  # Trigger when this character's other case does not point back here. We
+  # now have three characters that are case-equivalent.
+
+  if other_case[o] != -other_case[c]:
+    t = o + other_case[o]
+
+    # Scan the existing sets to see if any of the three characters are already
+    # part of a set. If so, unite the existing set with the new set.
+
+    appended = 0
+    for s in caseless_sets:
+      found = 0
+      for x in s:
+        if x == c or x == o or x == t:
+          found = 1
+
+      # Add new characters to an existing set
+
+      if found:
+        found = 0
+        for y in [c, o, t]:
+          for x in s:
+            if x == y:
+              found = 1
+          if not found:
+            s.append(y)
+        appended = 1
+
+    # If we have not added to an existing set, create a new one.
+
+    if not appended:
+      caseless_sets.append([c, o, t])
+
+# End of loop looking for caseless sets.
+
+# Now scan the sets and set appropriate offsets for the characters.
+
+caseless_offsets = [0] * MAX_UNICODE
+
+offset = 1;
+for s in caseless_sets:
+  for x in s:
+    caseless_offsets[x] = offset
+  offset += len(s) + 1
+
+# End of block of code for creating offsets for caseless matching sets.
+
+
+# Combine all the tables
+
+table, records = combine_tables(script, category, break_props,
+  caseless_offsets, other_case, scriptx_bidi_class, bool_props)
+
+# Find the record size and create a string definition of the structure for
+# outputting as a comment.
+
+record_size, record_struct = get_record_size_struct(list(records.keys()))
+
+# Find the optimum block size for the two-stage table
+
+min_size = sys.maxsize
+for block_size in [2 ** i for i in range(5,10)]:
+  size = len(records) * record_size
+  stage1, stage2 = compress_table(table, block_size)
+  size += get_tables_size(stage1, stage2)
+  #print "/* block size %5d  => %5d bytes */" % (block_size, size)
+  if size < min_size:
+    min_size = size
+    min_stage1, min_stage2 = stage1, stage2
+    min_block_size = block_size
+
+
+# ---------------------------------------------------------------------------
+#                   MAIN CODE FOR WRITING THE OUTPUT FILE
+# ---------------------------------------------------------------------------
+
+# Open the output file (no return on failure). This call also writes standard
+# header boilerplate.
+
+f = open_output("pcre2_ucd.c")
+
+# Output this file's heading text
+
+f.write("""\
+/* This file contains tables of Unicode properties that are extracted from
+Unicode data files. See the comments at the start of maint/GenerateUcd.py for
+details.
+
+As well as being part of the PCRE2 library, this file is #included by the
+pcre2test program, which redefines the PRIV macro to change table names from
+_pcre2_xxx to xxxx, thereby avoiding name clashes with the library. At present,
+just one of these tables is actually needed. When compiling the library, some
+headers are needed. */
+
+#ifndef PCRE2_PCRE2TEST
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include "pcre2_internal.h"
+#endif /* PCRE2_PCRE2TEST */
+
+/* The tables herein are needed only when UCP support is built, and in PCRE2
+that happens automatically with UTF support. This module should not be
+referenced otherwise, so it should not matter whether it is compiled or not.
+However a comment was received about space saving - maybe the guy linked all
+the modules rather than using a library - so we include a condition to cut out
+the tables when not needed. But don't leave a totally empty module because some
+compilers barf at that. Instead, just supply some small dummy tables. */
+
+#ifndef SUPPORT_UNICODE
+const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0,0,0}};
+const uint16_t PRIV(ucd_stage1)[] = {0};
+const uint16_t PRIV(ucd_stage2)[] = {0};
+const uint32_t PRIV(ucd_caseless_sets)[] = {0};
+#else
+\n""")
+
+# --- Output some variable heading stuff ---
+
+f.write("/* Total size: %d bytes, block size: %d. */\n\n" % (min_size, min_block_size))
+f.write('const char *PRIV(unicode_version) = "{}";\n\n'.format(unicode_version))
+
+f.write("""\
+/* When recompiling tables with a new Unicode version, please check the types
+in this structure definition with those in pcre2_internal.h (the actual field
+names will be different).
+\n""")
+
+f.write(record_struct)
+
+f.write("""
+/* If the 32-bit library is run in non-32-bit mode, character values greater
+than 0x10ffff may be encountered. For these we set up a special record. */
+
+#if PCRE2_CODE_UNIT_WIDTH == 32
+const ucd_record PRIV(dummy_ucd_record)[] = {{
+  ucp_Unknown,    /* script */
+  ucp_Cn,         /* type unassigned */
+  ucp_gbOther,    /* grapheme break property */
+  0,              /* case set */
+  0,              /* other case */
+  0 | (ucp_bidiL << UCD_BIDICLASS_SHIFT), /* script extension and bidi class */
+  0,              /* bool properties offset */
+  }};
+#endif
+\n""")
+
+# --- Output the table of caseless character sets ---
+
+f.write("""\
+/* This table contains lists of characters that are caseless sets of
+more than one character. Each list is terminated by NOTACHAR. */
+
+const uint32_t PRIV(ucd_caseless_sets)[] = {
+  NOTACHAR,
+""")
+
+for s in caseless_sets:
+  s = sorted(s)
+  for x in s:
+    f.write('  0x%04x,' % x)
+  f.write('  NOTACHAR,\n')
+f.write('};\n\n')
+
+# --- Other tables are not needed by pcre2test ---
+
+f.write("""\
+/* When #included in pcre2test, we don't need the table of digit sets, nor the
+the large main UCD tables. */
+
+#ifndef PCRE2_PCRE2TEST
+\n""")
+
+# --- Read Scripts.txt again for the sets of 10 digits. ---
+
+digitsets = []
+file = open('Unicode.tables/Scripts.txt', 'r', encoding='utf-8')
+
+for line in file:
+  m = re.match(r'([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)\s+;\s+\S+\s+#\s+Nd\s+', line)
+  if m is None:
+    continue
+  first = int(m.group(1),16)
+  last  = int(m.group(2),16)
+  if ((last - first + 1) % 10) != 0:
+    f.write("ERROR: %04x..%04x does not contain a multiple of 10 characters" % (first, last),
+      file=sys.stderr)
+  while first < last:
+    digitsets.append(first + 9)
+    first += 10
+file.close()
+digitsets.sort()
+
+f.write("""\
+/* This table lists the code points for the '9' characters in each set of
+decimal digits. It is used to ensure that all the digits in a script run come
+from the same set. */
+
+const uint32_t PRIV(ucd_digit_sets)[] = {
+""")
+
+f.write("  %d,  /* Number of subsequent values */" % len(digitsets))
+count = 8
+for d in digitsets:
+  if count == 8:
+    f.write("\n ")
+    count = 0
+  f.write(" 0x%05x," % d)
+  count += 1
+f.write("\n};\n\n")
+
+f.write("""\
+/* This vector is a list of script bitsets for the Script Extension property.
+The number of 32-bit words in each bitset is #defined in pcre2_ucp.h as
+ucd_script_sets_item_size. */
+
+const uint32_t PRIV(ucd_script_sets)[] = {
+""")
+write_bitsets(script_lists, script_list_item_size)
+
+f.write("""\
+/* This vector is a list of bitsets for Boolean properties. The number of
+32_bit words in each bitset is #defined as ucd_boolprop_sets_item_size in
+pcre2_ucp.h. */
+
+const uint32_t PRIV(ucd_boolprop_sets)[] = {
+""")
+write_bitsets(bool_props_lists, bool_props_list_item_size)
+
+
+# Output the main UCD tables.
+
+f.write("""\
+/* These are the main two-stage UCD tables. The fields in each record are:
+script (8 bits), character type (8 bits), grapheme break property (8 bits),
+offset to multichar other cases or zero (8 bits), offset to other case or zero
+(32 bits, signed), bidi class (5 bits) and script extension (11 bits) packed
+into a 16-bit field, and offset in binary properties table (16 bits). */
+\n""")
+
+write_records(records, record_size)
+write_table(min_stage1, 'PRIV(ucd_stage1)')
+write_table(min_stage2, 'PRIV(ucd_stage2)', min_block_size)
+
+f.write("#if UCD_BLOCK_SIZE != %d\n" % min_block_size)
+f.write("""\
+#error Please correct UCD_BLOCK_SIZE in pcre2_internal.h
+#endif
+#endif  /* SUPPORT_UNICODE */
+
+#endif  /* PCRE2_PCRE2TEST */
+
+/* End of pcre2_ucd.c */
+""")
+
+f.close
+
+# End
--- a/maint/GenerateUcpHeader.py
+++ b/maint/GenerateUcpHeader.py
@ -0,0 +1,98 @@
+#! /usr/bin/python
+
+#                   PCRE2 UNICODE PROPERTY SUPPORT
+#                   ------------------------------
+
+# This script generates the pcre2_ucp.h file from Unicode data files. This
+# header uses enumerations to give names to Unicode property types and script
+# names.
+
+# This script was created in December 2021 as part of the Unicode data
+# generation refactoring.
+
+
+# Import common data lists and functions
+
+from GenerateCommon import \
+  bidi_classes, \
+  bool_properties, \
+  bool_props_list_item_size, \
+  break_properties, \
+  category_names, \
+  general_category_names, \
+  script_list_item_size, \
+  script_names, \
+  open_output
+
+# Open the output file (no return on failure). This call also writes standard
+# header boilerplate.
+
+f = open_output("pcre2_ucp.h")
+
+# Output this file's heading text
+
+f.write("""\
+#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
+#define PCRE2_UCP_H_IDEMPOTENT_GUARD
+
+/* This file contains definitions of the Unicode property values that are
+returned by the UCD access macros and used throughout PCRE2.
+
+IMPORTANT: The specific values of the first two enums (general and particular
+character categories) are assumed by the table called catposstab in the file
+pcre2_auto_possess.c. They are unlikely to change, but should be checked after
+an update. */
+\n""")
+
+f.write("/* These are the general character categories. */\n\nenum {\n")
+for i in general_category_names:
+  f.write("  ucp_%s,\n" % i)
+f.write("};\n\n")
+
+f.write("/* These are the particular character categories. */\n\nenum {\n")
+for i in range(0, len(category_names), 2):
+  f.write("  ucp_%s,    /* %s */\n" % (category_names[i], category_names[i+1]))
+f.write("};\n\n")
+
+f.write("/* These are Boolean properties. */\n\nenum {\n")
+for i in bool_properties:
+  f.write("  ucp_%s,\n" % i)
+
+f.write("  /* This must be last */\n")
+f.write("  ucp_Bprop_Count\n};\n\n")
+
+f.write("/* Size of entries in ucd_boolprop_sets[] */\n\n")
+f.write("#define ucd_boolprop_sets_item_size %d\n\n" % bool_props_list_item_size)
+
+f.write("/* These are the bidi class values. */\n\nenum {\n")
+for i in range(0, len(bidi_classes), 2):
+  sp = ' ' * (4 - len(bidi_classes[i]))
+  f.write("  ucp_bidi%s,%s /* %s */\n" % (bidi_classes[i], sp, bidi_classes[i+1]))
+f.write("};\n\n")
+
+f.write("/* These are grapheme break properties. The Extended Pictographic "
+  "property\ncomes from the emoji-data.txt file. */\n\nenum {\n")
+for i in range(0, len(break_properties), 2):
+  sp = ' ' * (21 - len(break_properties[i]))
+  f.write("  ucp_gb%s,%s /* %s */\n" % (break_properties[i], sp, break_properties[i+1]))
+f.write("};\n\n")
+
+f.write("/* These are the script identifications. */\n\nenum {\n  /* Scripts which has characters in other scripts. */\n")
+for i in script_names:
+  if i == "Unknown":
+    f.write("\n  /* Scripts which has no characters in other scripts. */\n")
+  f.write("  ucp_%s,\n" % i)
+f.write("\n")
+
+f.write("  /* This must be last */\n")
+f.write("  ucp_Script_Count\n};\n\n")
+
+f.write("/* Size of entries in ucd_script_sets[] */\n\n")
+f.write("#define ucd_script_sets_item_size %d\n\n" % script_list_item_size)
+
+f.write("#endif  /* PCRE2_UCP_H_IDEMPOTENT_GUARD */\n\n")
+f.write("/* End of pcre2_ucp.h */\n")
+
+f.close()
+
+# End
--- a/maint/GenerateUcpTables.py
+++ b/maint/GenerateUcpTables.py
@ -0,0 +1,203 @@
+#! /usr/bin/python
+
+#                   PCRE2 UNICODE PROPERTY SUPPORT
+#                   ------------------------------
+
+# This script generates the pcre2_ucptables.c file, which contains tables for
+# recognizing Unicode property names. It is #included by pcre2_tables.c. In
+# order to reduce the number of relocations when loading the PCRE2 library, the
+# names are held as a single large string, with offsets in the table. This is
+# tedious to maintain by hand. Therefore, a script is used to generate the
+# table.
+
+# This script was created in December 2021 based on the previous GenerateUtt
+# script, whose output had to be manually edited into pcre2_tables.c. Here is
+# the history of the original script:
+
+# -----------------------------------------------------------------------------
+# Modified by PH 17-March-2009 to generate the more verbose form that works
+# for UTF-support in EBCDIC as well as ASCII environments.
+# Modified by PH 01-March-2010 to add new scripts for Unicode 5.2.0.
+# Modified by PH 04-May-2010 to add new "X.." special categories.
+# Modified by PH 30-April-2011 to add new scripts for Unicode 6.0.0
+# Modified by ChPe 30-September-2012 to add this note; no other changes were
+# necessary for Unicode 6.2.0 support.
+# Modfied by PH 26-February-2013 to add the Xuc special category.
+# Comment modified by PH 13-May-2014 to update to PCRE2 file names.
+# Script updated to Python 3 by running it through the 2to3 converter.
+# Added script names for Unicode 7.0.0, 20-June-2014.
+# Added script names for Unicode 8.0.0, 19-June-2015.
+# Added script names for Unicode 10.0.0, 02-July-2017.
+# Added script names for Unicode 11.0.0, 03-July-2018.
+# Added 'Unknown' script, 01-October-2018.
+# Added script names for Unicode 12.1.0, 27-July-2019.
+# Added script names for Unicode 13.0.0, 10-March-2020.
+# Added Script names for Unicode 14.0.0, PCRE2-10.39
+# Added support for bidi class and bidi control, 06-December-2021
+#   This also involved lower casing strings and removing underscores, in
+#   accordance with Unicode's "loose matching" rules, which Perl observes.
+# Changed default script type from PT_SC to PT_SCX, 18-December-2021
+# -----------------------------------------------------------------------------
+#
+# Note subsequent changes here:
+#
+# 27-December-2021: Added support for 4-letter script abbreviations.
+# 10-January-2022:  Further updates for Boolean property support
+# -----------------------------------------------------------------------------
+
+
+# Import common data lists and functions
+
+from GenerateCommon import \
+  abbreviations, \
+  bool_properties, \
+  bidi_classes, \
+  category_names, \
+  general_category_names, \
+  script_names, \
+  open_output
+
+# Open the output file (no return on failure). This call also writes standard
+# header boilerplate.
+
+f = open_output("pcre2_ucptables.c")
+
+# The list in bidi_classes contains just the Unicode classes such as AN, LRE,
+# etc., along with comments. We need to add "bidi" in front of each value, in
+# order to create names that don't clash with other types of property.
+
+bidi_class_names = []
+for i in range(0, len(bidi_classes), 2):
+  bidi_class_names.append("bidi" + bidi_classes[i])
+
+# Remove the comments from other lists that contain them.
+
+category_names = category_names[::2]
+
+# Create standardized versions of the names by lowercasing and removing
+# underscores.
+
+def stdname(x):
+  return x.lower().replace('_', '')
+
+def stdnames(x):
+  y = [''] * len(x)
+  for i in range(len(x)):
+    y[i] = stdname(x[i])
+  return y
+
+std_category_names = stdnames(category_names)
+std_general_category_names = stdnames(general_category_names)
+std_bidi_class_names = stdnames(bidi_class_names)
+std_bool_properties = stdnames(bool_properties)
+
+# Create the table, starting with the Unicode script, category and bidi class
+# names. We keep both the standardized name and the original, because the
+# latter is used for the ucp_xx names. NOTE: for the script abbreviations, we
+# still use the full original names.
+
+utt_table = []
+
+scx_end = script_names.index('Unknown')
+
+for idx, name in enumerate(script_names):
+  pt_type = 'PT_SCX' if idx < scx_end else 'PT_SC'
+  utt_table.append((stdname(name), name, pt_type))
+  for abbrev in abbreviations[name]:
+    utt_table.append((stdname(abbrev), name, pt_type))
+
+# Add the remaining property lists
+
+utt_table += list(zip(std_category_names, category_names, ['PT_PC'] * len(category_names)))
+utt_table += list(zip(std_general_category_names, general_category_names, ['PT_GC'] * len(general_category_names)))
+utt_table += list(zip(std_bidi_class_names, bidi_class_names, ['PT_BIDICL'] * len(bidi_class_names)))
+
+for name in bool_properties:
+  utt_table.append((stdname(name), name, 'PT_BOOL'))
+  if name in abbreviations: 
+    for abbrev in abbreviations[name]:
+      utt_table.append((stdname(abbrev), name, 'PT_BOOL'))
+
+# Now add specials and synonyms. Note both the standardized and capitalized
+# forms are needed.
+
+utt_table.append(('any', 'Any', 'PT_ANY'))
+utt_table.append(('l&',  'L&',  'PT_LAMP'))
+utt_table.append(('lc',  'LC',  'PT_LAMP'))
+utt_table.append(('xan', 'Xan', 'PT_ALNUM'))
+utt_table.append(('xps', 'Xps', 'PT_PXSPACE'))
+utt_table.append(('xsp', 'Xsp', 'PT_SPACE'))
+utt_table.append(('xuc', 'Xuc', 'PT_UCNC'))
+utt_table.append(('xwd', 'Xwd', 'PT_WORD'))
+
+# Remove duplicates from the table and then sort it.
+
+utt_table = list(set(utt_table)) 
+utt_table.sort()
+
+# Output file-specific heading
+
+f.write("""\
+#ifdef SUPPORT_UNICODE
+
+/* The PRIV(utt)[] table below translates Unicode property names into type and
+code values. It is searched by binary chop, so must be in collating sequence of
+name. Originally, the table contained pointers to the name strings in the first
+field of each entry. However, that leads to a large number of relocations when
+a shared library is dynamically loaded. A significant reduction is made by
+putting all the names into a single, large string and using offsets instead.
+All letters are lower cased, and underscores are removed, in accordance with
+the "loose matching" rules that Unicode advises and Perl uses. */
+\n""")
+
+# We have to use STR_ macros to define the strings so that it all works in
+# UTF-8 mode on EBCDIC platforms.
+
+for utt in utt_table:
+  f.write('#define STRING_%s0' % (utt[0].replace('&', '_AMPERSAND')))
+  for c in utt[0]:
+    if c == '&':
+      f.write(' STR_AMPERSAND')
+    else:
+      f.write(' STR_%s' % c);
+  f.write(' "\\0"\n')
+
+# Output the long string of concatenated names
+
+f.write('\nconst char PRIV(utt_names)[] =\n');
+last = ''
+for utt in utt_table:
+  if utt == utt_table[-1]:
+    last = ';'
+  f.write('  STRING_%s0%s\n' % (utt[0].replace('&', '_AMPERSAND'), last))
+
+# Output the property type table
+
+f.write('\nconst ucp_type_table PRIV(utt)[] = {\n')
+offset = 0
+last = ','
+for utt in utt_table:
+  if utt[2] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE',
+      'PT_SPACE', 'PT_UCNC', 'PT_WORD'):
+    value = '0'
+  else:
+    value = 'ucp_' + utt[1]
+  if utt == utt_table[-1]:
+    last = ''
+  f.write('  { %3d, %s, %s }%s\n' % (offset, utt[2], value, last))
+  offset += len(utt[0]) + 1
+f.write('};\n\n')
+
+# Ending text
+
+f.write("""\
+const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
+
+#endif /* SUPPORT_UNICODE */
+
+/* End of pcre2_ucptables.c */
+""")
+
+f.close
+
+# End
--- a/maint/GenerateUtt.py
+++ b/maint/GenerateUtt.py
@ -1,137 +0,0 @@
-#! /usr/bin/python
-
-# Generate utt tables. Note: this script has now been converted to Python 3.
-
-# The source file pcre2_tables.c contains (amongst other things), a table that
-# is indexed by script name. In order to reduce the number of relocations when
-# loading the library, the names are held as a single large string, with
-# offsets in the table. This is tedious to maintain by hand. Therefore, this
-# script is used to generate the table. The output is sent to stdout; usually
-# that should be directed to a temporary file. Then pcre2_tables.c can be
-# edited by replacing the relevant definitions and table therein with the
-# temporary file.
-
-# Modified by PH 17-March-2009 to generate the more verbose form that works
-# for UTF-support in EBCDIC as well as ASCII environments.
-# Modified by PH 01-March-2010 to add new scripts for Unicode 5.2.0.
-# Modified by PH 04-May-2010 to add new "X.." special categories.
-# Modified by PH 30-April-2011 to add new scripts for Unicode 6.0.0
-# Modified by ChPe 30-September-2012 to add this note; no other changes were
-# necessary for Unicode 6.2.0 support.
-# Modfied by PH 26-February-2013 to add the Xuc special category.
-# Comment modified by PH 13-May-2014 to update to PCRE2 file names.
-# Script updated to Python 3 by running it through the 2to3 converter.
-# Added script names for Unicode 7.0.0, 20-June-2014.
-# Added script names for Unicode 8.0.0, 19-June-2015.
-# Added script names for Unicode 10.0.0, 02-July-2017.
-# Added script names for Unicode 11.0.0, 03-July-2018.
-# Added 'Unknown' script, 01-October-2018.
-# Added script names for Unicode 12.1.0, 27-July-2019.
-# Added script names for Unicode 13.0.0, 10-March-2020.
-
-script_names = ['Unknown', 'Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \
- 'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \
- 'Glagolitic', 'Gothic', 'Greek', 'Gujarati', 'Gurmukhi', 'Han', 'Hangul', 'Hanunoo', 'Hebrew', 'Hiragana', \
- 'Inherited', 'Kannada', 'Katakana', 'Kharoshthi', 'Khmer', 'Lao', 'Latin', 'Limbu', 'Linear_B', 'Malayalam', \
- 'Mongolian', 'Myanmar', 'New_Tai_Lue', 'Ogham', 'Old_Italic', 'Old_Persian', 'Oriya', 'Osmanya', 'Runic', \
- 'Shavian', 'Sinhala', 'Syloti_Nagri', 'Syriac', 'Tagalog', 'Tagbanwa', 'Tai_Le', 'Tamil', 'Telugu', 'Thaana', \
- 'Thai', 'Tibetan', 'Tifinagh', 'Ugaritic', 'Yi', \
- # New for Unicode 5.0
- 'Balinese', 'Cuneiform', 'Nko', 'Phags_Pa', 'Phoenician', \
- # New for Unicode 5.1
- 'Carian', 'Cham', 'Kayah_Li', 'Lepcha', 'Lycian', 'Lydian', 'Ol_Chiki', 'Rejang', 'Saurashtra', 'Sundanese', 'Vai', \
- # New for Unicode 5.2
- 'Avestan', 'Bamum', 'Egyptian_Hieroglyphs', 'Imperial_Aramaic', \
- 'Inscriptional_Pahlavi', 'Inscriptional_Parthian', \
- 'Javanese', 'Kaithi', 'Lisu', 'Meetei_Mayek', \
- 'Old_South_Arabian', 'Old_Turkic', 'Samaritan', 'Tai_Tham', 'Tai_Viet', \
- # New for Unicode 6.0.0
- 'Batak', 'Brahmi', 'Mandaic', \
-# New for Unicode 6.1.0
- 'Chakma', 'Meroitic_Cursive', 'Meroitic_Hieroglyphs', 'Miao', 'Sharada', 'Sora_Sompeng', 'Takri',
-# New for Unicode 7.0.0
- 'Bassa_Vah', 'Caucasian_Albanian', 'Duployan', 'Elbasan', 'Grantha', 'Khojki', 'Khudawadi',
- 'Linear_A', 'Mahajani', 'Manichaean', 'Mende_Kikakui', 'Modi', 'Mro', 'Nabataean',
- 'Old_North_Arabian', 'Old_Permic', 'Pahawh_Hmong', 'Palmyrene', 'Psalter_Pahlavi',
- 'Pau_Cin_Hau', 'Siddham', 'Tirhuta', 'Warang_Citi',
-# New for Unicode 8.0.0
- 'Ahom', 'Anatolian_Hieroglyphs', 'Hatran', 'Multani', 'Old_Hungarian',
- 'SignWriting',
-# New for Unicode 10.0.0
- 'Adlam', 'Bhaiksuki', 'Marchen', 'Newa', 'Osage', 'Tangut', 'Masaram_Gondi',
- 'Nushu', 'Soyombo', 'Zanabazar_Square',
-# New for Unicode 11.0.0
-  'Dogra', 'Gunjala_Gondi', 'Hanifi_Rohingya', 'Makasar', 'Medefaidrin',
-  'Old_Sogdian', 'Sogdian',
-# New for Unicode 12.0.0
-  'Elymaic', 'Nandinagari', 'Nyiakeng_Puachue_Hmong', 'Wancho',
-# New for Unicode 13.0.0
-  'Chorasmian', 'Dives_Akuru', 'Khitan_Small_Script', 'Yezidi'
- ]
-
-category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',
-  'Mc', 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', 'Ps',
-  'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs' ]
-
-general_category_names = ['C', 'L', 'M', 'N', 'P', 'S', 'Z']
-
-# First add the Unicode script and category names.
-
-utt_table  = list(zip(script_names, ['PT_SC'] * len(script_names)))
-utt_table += list(zip(category_names, ['PT_PC'] * len(category_names)))
-utt_table += list(zip(general_category_names, ['PT_GC'] * len(general_category_names)))
-
-# Now add our own specials.
-
-utt_table.append(('Any', 'PT_ANY'))
-utt_table.append(('L&',  'PT_LAMP'))
-utt_table.append(('Xan', 'PT_ALNUM'))
-utt_table.append(('Xps', 'PT_PXSPACE'))
-utt_table.append(('Xsp', 'PT_SPACE'))
-utt_table.append(('Xuc', 'PT_UCNC'))
-utt_table.append(('Xwd', 'PT_WORD'))
-
-# Sort the table.
-
-utt_table.sort()
-
-# We have to use STR_ macros to define the strings so that it all works in
-# UTF-8 mode on EBCDIC platforms.
-
-for utt in utt_table:
-        print('#define STRING_%s0' % (utt[0].replace('&', '_AMPERSAND')), end=' ')
-        for c in utt[0]:
-                if c == '_':
-                        print('STR_UNDERSCORE', end=' ')
-                elif c == '&':
-                        print('STR_AMPERSAND', end=' ')
-                else:
-                        print('STR_%s' % c, end=' ');
-        print('"\\0"')
-
-# Print the actual table, using the string names
-
-print('')
-print('const char PRIV(utt_names)[] =');
-last = ''
-for utt in utt_table:
-        if utt == utt_table[-1]:
-                last = ';'
-        print('  STRING_%s0%s' % (utt[0].replace('&', '_AMPERSAND'), last))
-# This was how it was done before the EBCDIC-compatible modification.
-#        print '  "%s\\0"%s' % (utt[0], last)
-
-print('\nconst ucp_type_table PRIV(utt)[] = {')
-offset = 0
-last = ','
-for utt in utt_table:
-        if utt[1] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE', 
-          'PT_SPACE', 'PT_UCNC', 'PT_WORD'):
-                value = '0'
-        else:
-                value = 'ucp_' + utt[0]
-        if utt == utt_table[-1]:
-                last = ''
-        print('  { %3d, %s, %s }%s' % (offset, utt[1], value, last))
-        offset += len(utt[0]) + 1
-print('};')
--- a/maint/MultiStage2.py
+++ b/maint/MultiStage2.py
@ -1,814 +0,0 @@
-#! /usr/bin/python
-
-# Multistage table builder
-# (c) Peter Kankowski, 2008
-
-##############################################################################
-# This script was submitted to the PCRE project by Peter Kankowski as part of
-# the upgrading of Unicode property support. The new code speeds up property
-# matching many times. The script is for the use of PCRE maintainers, to
-# generate the pcre2_ucd.c file that contains a digested form of the Unicode
-# data tables. A number of extensions have been added to the original script.
-#
-# The script has now been upgraded to Python 3 for PCRE2, and should be run in
-# the maint subdirectory, using the command
-#
-# [python3] ./MultiStage2.py >../src/pcre2_ucd.c
-#
-# It requires six Unicode data tables: DerivedGeneralCategory.txt,
-# GraphemeBreakProperty.txt, Scripts.txt, ScriptExtensions.txt,
-# CaseFolding.txt, and emoji-data.txt. These must be in the
-# maint/Unicode.tables subdirectory.
-#
-# DerivedGeneralCategory.txt is found in the "extracted" subdirectory of the
-# Unicode database (UCD) on the Unicode web site; GraphemeBreakProperty.txt is
-# in the "auxiliary" subdirectory. Scripts.txt, ScriptExtensions.txt, and
-# CaseFolding.txt are directly in the UCD directory.
-#
-# The emoji-data.txt file is found in the "emoji" subdirectory even though it
-# is technically part of a different (but coordinated) standard as shown
-# in files associated with Unicode Technical Standard #51 ("Unicode Emoji"),
-# for example:
-#
-# http://unicode.org/Public/emoji/13.0/ReadMe.txt
-#
-# -----------------------------------------------------------------------------
-# Minor modifications made to this script:
-#  Added #! line at start
-#  Removed tabs
-#  Made it work with Python 2.4 by rewriting two statements that needed 2.5
-#  Consequent code tidy
-#  Adjusted data file names to take from the Unicode.tables directory
-#  Adjusted global table names by prefixing _pcre_.
-#  Commented out stuff relating to the casefolding table, which isn't used;
-#    removed completely in 2012.
-#  Corrected size calculation
-#  Add #ifndef SUPPORT_UCP to use dummy tables when no UCP support is needed.
-#  Update for PCRE2: name changes, and SUPPORT_UCP is abolished.
-#
-# Major modifications made to this script:
-#  Added code to add a grapheme break property field to records.
-#
-#  Added code to search for sets of more than two characters that must match
-#  each other caselessly. A new table is output containing these sets, and
-#  offsets into the table are added to the main output records. This new
-#  code scans CaseFolding.txt instead of UnicodeData.txt, which is no longer
-#  used.
-#
-#  Update for Python3:
-#    . Processed with 2to3, but that didn't fix everything
-#    . Changed string.strip to str.strip
-#    . Added encoding='utf-8' to the open() call
-#    . Inserted 'int' before blocksize/ELEMS_PER_LINE because an int is
-#        required and the result of the division is a float
-#
-#  Added code to scan the emoji-data.txt file to find the Extended Pictographic
-#  property, which is used by PCRE2 as a grapheme breaking property. This was
-#  done when updating to Unicode 11.0.0 (July 2018).
-#
-#  Added code to add a Script Extensions field to records. This has increased
-#  their size from 8 to 12 bytes, only 10 of which are currently used.
-#
-# 01-March-2010:     Updated list of scripts for Unicode 5.2.0
-# 30-April-2011:     Updated list of scripts for Unicode 6.0.0
-#     July-2012:     Updated list of scripts for Unicode 6.1.0
-# 20-August-2012:    Added scan of GraphemeBreakProperty.txt and added a new
-#                      field in the record to hold the value. Luckily, the
-#                      structure had a hole in it, so the resulting table is
-#                      not much bigger than before.
-# 18-September-2012: Added code for multiple caseless sets. This uses the
-#                      final hole in the structure.
-# 30-September-2012: Added RegionalIndicator break property from Unicode 6.2.0
-# 13-May-2014:       Updated for PCRE2
-# 03-June-2014:      Updated for Python 3
-# 20-June-2014:      Updated for Unicode 7.0.0
-# 12-August-2014:    Updated to put Unicode version into the file
-# 19-June-2015:      Updated for Unicode 8.0.0
-# 02-July-2017:      Updated for Unicode 10.0.0
-# 03-July-2018:      Updated for Unicode 11.0.0
-# 07-July-2018:      Added code to scan emoji-data.txt for the Extended
-#                      Pictographic property.
-# 01-October-2018:   Added the 'Unknown' script name
-# 03-October-2018:   Added new field for Script Extensions
-# 27-July-2019:      Updated for Unicode 12.1.0
-# 10-March-2020:     Updated for Unicode 13.0.0
-# ----------------------------------------------------------------------------
-#
-#
-# The main tables generated by this script are used by macros defined in
-# pcre2_internal.h. They look up Unicode character properties using short
-# sequences of code that contains no branches, which makes for greater speed.
-#
-# Conceptually, there is a table of records (of type ucd_record), containing a
-# script number, script extension value, character type, grapheme break type,
-# offset to caseless matching set, offset to the character's other case, for
-# every Unicode character. However, a real table covering all Unicode
-# characters would be far too big. It can be efficiently compressed by
-# observing that many characters have the same record, and many blocks of
-# characters (taking 128 characters in a block) have the same set of records as
-# other blocks. This leads to a 2-stage lookup process.
-#
-# This script constructs six tables. The ucd_caseless_sets table contains
-# lists of characters that all match each other caselessly. Each list is
-# in order, and is terminated by NOTACHAR (0xffffffff), which is larger than
-# any valid character. The first list is empty; this is used for characters
-# that are not part of any list.
-#
-# The ucd_digit_sets table contains the code points of the '9' characters in
-# each set of 10 decimal digits in Unicode. This is used to ensure that digits
-# in script runs all come from the same set. The first element in the vector
-# contains the number of subsequent elements, which are in ascending order.
-#
-# The ucd_script_sets vector contains lists of script numbers that are the
-# Script Extensions properties of certain characters. Each list is terminated
-# by zero (ucp_Unknown). A character with more than one script listed for its
-# Script Extension property has a negative value in its record. This is the
-# negated offset to the start of the relevant list in the ucd_script_sets
-# vector.
-#
-# The ucd_records table contains one instance of every unique record that is
-# required. The ucd_stage1 table is indexed by a character's block number,
-# which is the character's code point divided by 128, since 128 is the size
-# of each block. The result of a lookup in ucd_stage1 a "virtual" block number.
-#
-# The ucd_stage2 table is a table of "virtual" blocks; each block is indexed by
-# the offset of a character within its own block, and the result is the index
-# number of the required record in the ucd_records vector.
-#
-# The following examples are correct for the Unicode 11.0.0 database. Future
-# updates may make change the actual lookup values.
-#
-# Example: lowercase "a" (U+0061) is in block 0
-#          lookup 0 in stage1 table yields 0
-#          lookup 97 (0x61) in the first table in stage2 yields 17
-#          record 17 is { 34, 5, 12, 0, -32, 34, 0 }
-#            34 = ucp_Latin   => Latin script
-#             5 = ucp_Ll      => Lower case letter
-#            12 = ucp_gbOther => Grapheme break property "Other"
-#             0               => Not part of a caseless set
-#           -32 (-0x20)       => Other case is U+0041
-#            34 = ucp_Latin   => No special Script Extension property
-#             0               => Dummy value, unused at present
-#
-# Almost all lowercase latin characters resolve to the same record. One or two
-# are different because they are part of a multi-character caseless set (for
-# example, k, K and the Kelvin symbol are such a set).
-#
-# Example: hiragana letter A (U+3042) is in block 96 (0x60)
-#          lookup 96 in stage1 table yields 90
-#          lookup 66 (0x42) in table 90 in stage2 yields 564
-#          record 564 is { 27, 7, 12, 0, 0, 27, 0 }
-#            27 = ucp_Hiragana => Hiragana script
-#             7 = ucp_Lo       => Other letter
-#            12 = ucp_gbOther  => Grapheme break property "Other"
-#             0                => Not part of a caseless set
-#             0                => No other case
-#            27 = ucp_Hiragana => No special Script Extension property
-#             0                => Dummy value, unused at present
-#
-# Example: vedic tone karshana (U+1CD0) is in block 57 (0x39)
-#          lookup 57 in stage1 table yields 55
-#          lookup 80 (0x50) in table 55 in stage2 yields 458
-#          record 458 is { 28, 12, 3, 0, 0, -101, 0 }
-#            28 = ucp_Inherited => Script inherited from predecessor
-#            12 = ucp_Mn        => Non-spacing mark
-#             3 = ucp_gbExtend  => Grapheme break property "Extend"
-#             0                 => Not part of a caseless set
-#             0                 => No other case
-#          -101                 => Script Extension list offset = 101
-#             0                 => Dummy value, unused at present
-#
-# At offset 101 in the ucd_script_sets vector we find the list 3, 15, 107, 29,
-# and terminator 0. This means that this character is expected to be used with
-# any of those scripts, which are Bengali, Devanagari, Grantha, and Kannada.
-#
-#  Philip Hazel, 03 July 2008
-##############################################################################
-
-
-import re
-import string
-import sys
-
-MAX_UNICODE = 0x110000
-NOTACHAR = 0xffffffff
-
-
-# Parse a line of Scripts.txt, GraphemeBreakProperty.txt or DerivedGeneralCategory.txt
-def make_get_names(enum):
-        return lambda chardata: enum.index(chardata[1])
-
-# Parse a line of CaseFolding.txt
-def get_other_case(chardata):
-        if chardata[1] == 'C' or chardata[1] == 'S':
-          return int(chardata[2], 16) - int(chardata[0], 16)
-        return 0
-
-# Parse a line of ScriptExtensions.txt
-def get_script_extension(chardata):
-        this_script_list = list(chardata[1].split(' '))
-        if len(this_script_list) == 1:
-          return script_abbrevs.index(this_script_list[0])
-
-        script_numbers = []
-        for d in this_script_list:
-          script_numbers.append(script_abbrevs.index(d))
-        script_numbers.append(0)
-        script_numbers_length = len(script_numbers)
-
-        for i in range(1, len(script_lists) - script_numbers_length + 1):
-          for j in range(0, script_numbers_length):
-            found = True
-            if script_lists[i+j] != script_numbers[j]:
-              found = False
-              break
-          if found:
-            return -i
-
-        # Not found in existing lists
-
-        return_value = len(script_lists)
-        script_lists.extend(script_numbers)
-        return -return_value
-
-# Read the whole table in memory, setting/checking the Unicode version
-def read_table(file_name, get_value, default_value):
-        global unicode_version
-
-        f = re.match(r'^[^/]+/([^.]+)\.txt$', file_name)
-        file_base = f.group(1)
-        version_pat = r"^# " + re.escape(file_base) + r"-(\d+\.\d+\.\d+)\.txt$"
-        file = open(file_name, 'r', encoding='utf-8')
-        f = re.match(version_pat, file.readline())
-        version = f.group(1)
-        if unicode_version == "":
-                unicode_version = version
-        elif unicode_version != version:
-                print("WARNING: Unicode version differs in %s", file_name, file=sys.stderr)
-
-        table = [default_value] * MAX_UNICODE
-        for line in file:
-                line = re.sub(r'#.*', '', line)
-                chardata = list(map(str.strip, line.split(';')))
-                if len(chardata) <= 1:
-                        continue
-                value = get_value(chardata)
-                m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', chardata[0])
-                char = int(m.group(1), 16)
-                if m.group(3) is None:
-                        last = char
-                else:
-                        last = int(m.group(3), 16)
-                for i in range(char, last + 1):
-                        # It is important not to overwrite a previously set
-                        # value because in the CaseFolding file there are lines
-                        # to be ignored (returning the default value of 0)
-                        # which often come after a line which has already set
-                        # data.
-                        if table[i] == default_value:
-                          table[i] = value
-        file.close()
-        return table
-
-# Get the smallest possible C language type for the values
-def get_type_size(table):
-        type_size = [("uint8_t", 1), ("uint16_t", 2), ("uint32_t", 4),
-                                 ("signed char", 1), ("pcre_int16", 2), ("pcre_int32", 4)]
-        limits = [(0, 255), (0, 65535), (0, 4294967295),
-                          (-128, 127), (-32768, 32767), (-2147483648, 2147483647)]
-        minval = min(table)
-        maxval = max(table)
-        for num, (minlimit, maxlimit) in enumerate(limits):
-                if minlimit <= minval and maxval <= maxlimit:
-                        return type_size[num]
-        else:
-                raise OverflowError("Too large to fit into C types")
-
-def get_tables_size(*tables):
-        total_size = 0
-        for table in tables:
-                type, size = get_type_size(table)
-                total_size += size * len(table)
-        return total_size
-
-# Compress the table into the two stages
-def compress_table(table, block_size):
-        blocks = {} # Dictionary for finding identical blocks
-        stage1 = [] # Stage 1 table contains block numbers (indices into stage 2 table)
-        stage2 = [] # Stage 2 table contains the blocks with property values
-        table = tuple(table)
-        for i in range(0, len(table), block_size):
-                block = table[i:i+block_size]
-                start = blocks.get(block)
-                if start is None:
-                        # Allocate a new block
-                        start = len(stage2) / block_size
-                        stage2 += block
-                        blocks[block] = start
-                stage1.append(start)
-
-        return stage1, stage2
-
-# Print a table
-def print_table(table, table_name, block_size = None):
-        type, size = get_type_size(table)
-        ELEMS_PER_LINE = 16
-
-        s = "const %s %s[] = { /* %d bytes" % (type, table_name, size * len(table))
-        if block_size:
-                s += ", block = %d" % block_size
-        print(s + " */")
-        table = tuple(table)
-        if block_size is None:
-                fmt = "%3d," * ELEMS_PER_LINE + " /* U+%04X */"
-                mult = MAX_UNICODE / len(table)
-                for i in range(0, len(table), ELEMS_PER_LINE):
-                        print(fmt % (table[i:i+ELEMS_PER_LINE] +
-                          (int(i * mult),)))
-        else:
-                if block_size > ELEMS_PER_LINE:
-                        el = ELEMS_PER_LINE
-                else:
-                        el = block_size
-                fmt = "%3d," * el + "\n"
-                if block_size > ELEMS_PER_LINE:
-                        fmt = fmt * int(block_size / ELEMS_PER_LINE)
-                for i in range(0, len(table), block_size):
-                        print(("/* block %d */\n" + fmt) % ((i / block_size,) + table[i:i+block_size]))
-        print("};\n")
-
-# Extract the unique combinations of properties into records
-def combine_tables(*tables):
-        records = {}
-        index = []
-        for t in zip(*tables):
-                i = records.get(t)
-                if i is None:
-                        i = records[t] = len(records)
-                index.append(i)
-        return index, records
-
-def get_record_size_struct(records):
-        size = 0
-        structure = '/* When recompiling tables with a new Unicode version, please check the\n' + \
-        'types in this structure definition from pcre2_internal.h (the actual\n' + \
-        'field names will be different):\n\ntypedef struct {\n'
-        for i in range(len(records[0])):
-                record_slice = [record[i] for record in records]
-                slice_type, slice_size = get_type_size(record_slice)
-                # add padding: round up to the nearest power of slice_size
-                size = (size + slice_size - 1) & -slice_size
-                size += slice_size
-                structure += '%s property_%d;\n' % (slice_type, i)
-
-        # round up to the first item of the next structure in array
-        record_slice = [record[0] for record in records]
-        slice_type, slice_size = get_type_size(record_slice)
-        size = (size + slice_size - 1) & -slice_size
-
-        structure += '} ucd_record;\n*/\n'
-        return size, structure
-
-def test_record_size():
-        tests = [ \
-          ( [(3,), (6,), (6,), (1,)], 1 ), \
-          ( [(300,), (600,), (600,), (100,)], 2 ), \
-          ( [(25, 3), (6, 6), (34, 6), (68, 1)], 2 ), \
-          ( [(300, 3), (6, 6), (340, 6), (690, 1)], 4 ), \
-          ( [(3, 300), (6, 6), (6, 340), (1, 690)], 4 ), \
-          ( [(300, 300), (6, 6), (6, 340), (1, 690)], 4 ), \
-          ( [(3, 100000), (6, 6), (6, 123456), (1, 690)], 8 ), \
-          ( [(100000, 300), (6, 6), (123456, 6), (1, 690)], 8 ), \
-        ]
-        for test in tests:
-            size, struct = get_record_size_struct(test[0])
-            assert(size == test[1])
-            #print struct
-
-def print_records(records, record_size):
-        print('const ucd_record PRIV(ucd_records)[] = { ' + \
-              '/* %d bytes, record size %d */' % (len(records) * record_size, record_size))
-
-        records = list(zip(list(records.keys()), list(records.values())))
-        records.sort(key = lambda x: x[1])
-        for i, record in enumerate(records):
-                print(('  {' + '%6d, ' * len(record[0]) + '}, /* %3d */') % (record[0] + (i,)))
-        print('};\n')
-
-script_names = ['Unknown', 'Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal',
- 'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian',
- 'Glagolitic', 'Gothic', 'Greek', 'Gujarati', 'Gurmukhi', 'Han', 'Hangul', 'Hanunoo', 'Hebrew', 'Hiragana',
- 'Inherited', 'Kannada', 'Katakana', 'Kharoshthi', 'Khmer', 'Lao', 'Latin', 'Limbu', 'Linear_B', 'Malayalam',
- 'Mongolian', 'Myanmar', 'New_Tai_Lue', 'Ogham', 'Old_Italic', 'Old_Persian', 'Oriya', 'Osmanya', 'Runic',
- 'Shavian', 'Sinhala', 'Syloti_Nagri', 'Syriac', 'Tagalog', 'Tagbanwa', 'Tai_Le', 'Tamil', 'Telugu', 'Thaana',
- 'Thai', 'Tibetan', 'Tifinagh', 'Ugaritic', 'Yi',
-# New for Unicode 5.0
- 'Balinese', 'Cuneiform', 'Nko', 'Phags_Pa', 'Phoenician',
-# New for Unicode 5.1
- 'Carian', 'Cham', 'Kayah_Li', 'Lepcha', 'Lycian', 'Lydian', 'Ol_Chiki', 'Rejang', 'Saurashtra', 'Sundanese', 'Vai',
-# New for Unicode 5.2
- 'Avestan', 'Bamum', 'Egyptian_Hieroglyphs', 'Imperial_Aramaic',
- 'Inscriptional_Pahlavi', 'Inscriptional_Parthian',
- 'Javanese', 'Kaithi', 'Lisu', 'Meetei_Mayek',
- 'Old_South_Arabian', 'Old_Turkic', 'Samaritan', 'Tai_Tham', 'Tai_Viet',
-# New for Unicode 6.0.0
- 'Batak', 'Brahmi', 'Mandaic',
-# New for Unicode 6.1.0
- 'Chakma', 'Meroitic_Cursive', 'Meroitic_Hieroglyphs', 'Miao', 'Sharada', 'Sora_Sompeng', 'Takri',
-# New for Unicode 7.0.0
- 'Bassa_Vah', 'Caucasian_Albanian', 'Duployan', 'Elbasan', 'Grantha', 'Khojki', 'Khudawadi',
- 'Linear_A', 'Mahajani', 'Manichaean', 'Mende_Kikakui', 'Modi', 'Mro', 'Nabataean',
- 'Old_North_Arabian', 'Old_Permic', 'Pahawh_Hmong', 'Palmyrene', 'Psalter_Pahlavi',
- 'Pau_Cin_Hau', 'Siddham', 'Tirhuta', 'Warang_Citi',
-# New for Unicode 8.0.0
- 'Ahom', 'Anatolian_Hieroglyphs', 'Hatran', 'Multani', 'Old_Hungarian',
- 'SignWriting',
-# New for Unicode 10.0.0
- 'Adlam', 'Bhaiksuki', 'Marchen', 'Newa', 'Osage', 'Tangut', 'Masaram_Gondi',
- 'Nushu', 'Soyombo', 'Zanabazar_Square',
-# New for Unicode 11.0.0
-  'Dogra', 'Gunjala_Gondi', 'Hanifi_Rohingya', 'Makasar', 'Medefaidrin',
-  'Old_Sogdian', 'Sogdian',
-# New for Unicode 12.0.0
-  'Elymaic', 'Nandinagari', 'Nyiakeng_Puachue_Hmong', 'Wancho',
-# New for Unicode 13.0.0
-  'Chorasmian', 'Dives_Akuru', 'Khitan_Small_Script', 'Yezidi'
- ]
-
-script_abbrevs = [
-  'Zzzz', 'Arab', 'Armn', 'Beng', 'Bopo', 'Brai', 'Bugi', 'Buhd', 'Cans',
-  'Cher', 'Zyyy', 'Copt', 'Cprt', 'Cyrl', 'Dsrt', 'Deva', 'Ethi', 'Geor',
-  'Glag', 'Goth', 'Grek', 'Gujr', 'Guru', 'Hani', 'Hang', 'Hano', 'Hebr',
-  'Hira', 'Zinh', 'Knda', 'Kana', 'Khar', 'Khmr', 'Laoo', 'Latn', 'Limb',
-  'Linb', 'Mlym', 'Mong', 'Mymr', 'Talu', 'Ogam', 'Ital', 'Xpeo', 'Orya',
-  'Osma', 'Runr', 'Shaw', 'Sinh', 'Sylo', 'Syrc', 'Tglg', 'Tagb', 'Tale',
-  'Taml', 'Telu', 'Thaa', 'Thai', 'Tibt', 'Tfng', 'Ugar', 'Yiii',
-#New for Unicode 5.0
-  'Bali', 'Xsux', 'Nkoo', 'Phag', 'Phnx',
-#New for Unicode 5.1
-  'Cari', 'Cham', 'Kali', 'Lepc', 'Lyci', 'Lydi', 'Olck', 'Rjng', 'Saur',
-  'Sund', 'Vaii',
-#New for Unicode 5.2
-  'Avst', 'Bamu', 'Egyp', 'Armi', 'Phli', 'Prti', 'Java', 'Kthi', 'Lisu',
-  'Mtei', 'Sarb', 'Orkh', 'Samr', 'Lana', 'Tavt',
-#New for Unicode 6.0.0
-  'Batk', 'Brah', 'Mand',
-#New for Unicode 6.1.0
-  'Cakm', 'Merc', 'Mero', 'Plrd', 'Shrd', 'Sora', 'Takr',
-#New for Unicode 7.0.0
-  'Bass', 'Aghb', 'Dupl', 'Elba', 'Gran', 'Khoj', 'Sind', 'Lina', 'Mahj',
-  'Mani', 'Mend', 'Modi', 'Mroo', 'Nbat', 'Narb', 'Perm', 'Hmng', 'Palm',
-  'Phlp', 'Pauc', 'Sidd', 'Tirh', 'Wara',
-#New for Unicode 8.0.0
-  'Ahom', 'Hluw', 'Hatr', 'Mult', 'Hung', 'Sgnw',
-#New for Unicode 10.0.0
-  'Adlm', 'Bhks', 'Marc', 'Newa', 'Osge', 'Tang', 'Gonm', 'Nshu', 'Soyo',
-  'Zanb',
-#New for Unicode 11.0.0
-  'Dogr', 'Gong', 'Rohg', 'Maka', 'Medf', 'Sogo', 'Sogd',
-#New for Unicode 12.0.0
-  'Elym', 'Nand', 'Hmnp', 'Wcho',
-#New for Unicode 13.0.0
-  'Chrs', 'Diak', 'Kits', 'Yezi'
-  ]
-
-category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',
-  'Mc', 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', 'Ps',
-  'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs' ]
-
-# The Extended_Pictographic property is not found in the file where all the
-# others are (GraphemeBreakProperty.txt). It comes from the emoji-data.txt
-# file, but we list it here so that the name has the correct index value.
-
-break_property_names = ['CR', 'LF', 'Control', 'Extend', 'Prepend',
-  'SpacingMark', 'L', 'V', 'T', 'LV', 'LVT', 'Regional_Indicator', 'Other',
-  'ZWJ', 'Extended_Pictographic' ]
-
-test_record_size()
-unicode_version = ""
-
-script = read_table('Unicode.tables/Scripts.txt', make_get_names(script_names), script_names.index('Unknown'))
-category = read_table('Unicode.tables/DerivedGeneralCategory.txt', make_get_names(category_names), category_names.index('Cn'))
-break_props = read_table('Unicode.tables/GraphemeBreakProperty.txt', make_get_names(break_property_names), break_property_names.index('Other'))
-other_case = read_table('Unicode.tables/CaseFolding.txt', get_other_case, 0)
-
-# The grapheme breaking rules were changed for Unicode 11.0.0 (June 2018). Now
-# we need to find the Extended_Pictographic property for emoji characters. This
-# can be set as an additional grapheme break property, because the default for
-# all the emojis is "other". We scan the emoji-data.txt file and modify the
-# break-props table.
-
-file = open('Unicode.tables/emoji-data.txt', 'r', encoding='utf-8')
-for line in file:
-        line = re.sub(r'#.*', '', line)
-        chardata = list(map(str.strip, line.split(';')))
-        if len(chardata) <= 1:
-                continue
-
-        if chardata[1] != "Extended_Pictographic":
-                continue
-
-        m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', chardata[0])
-        char = int(m.group(1), 16)
-        if m.group(3) is None:
-                last = char
-        else:
-                last = int(m.group(3), 16)
-        for i in range(char, last + 1):
-                if break_props[i] != break_property_names.index('Other'):
-                   print("WARNING: Emoji 0x%x has break property %s, not 'Other'",
-                     i, break_property_names[break_props[i]], file=sys.stderr)
-                break_props[i] = break_property_names.index('Extended_Pictographic')
-file.close()
-
-# The Script Extensions property default value is the Script value. Parse the
-# file, setting 'Unknown' as the default (this will never be a Script Extension
-# value), then scan it and fill in the default from Scripts. Code added by PH
-# in October 2018. Positive values are used for just a single script for a
-# code point. Negative values are negated offsets in a list of lists of
-# multiple scripts. Initialize this list with a single entry, as the zeroth
-# element is never used.
-
-script_lists = [0]
-script_abbrevs_default = script_abbrevs.index('Zzzz')
-scriptx = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, script_abbrevs_default)
-
-for i in range(0, MAX_UNICODE):
-  if scriptx[i] == script_abbrevs_default:
-    scriptx[i] = script[i]
-
-# With the addition of the new Script Extensions field, we need some padding
-# to get the Unicode records up to 12 bytes (multiple of 4). Set a value
-# greater than 255 to make the field 16 bits.
-
-padding_dummy = [0] * MAX_UNICODE
-padding_dummy[0] = 256
-
-# This block of code was added by PH in September 2012. I am not a Python
-# programmer, so the style is probably dreadful, but it does the job. It scans
-# the other_case table to find sets of more than two characters that must all
-# match each other caselessly. Later in this script a table of these sets is
-# written out. However, we have to do this work here in order to compute the
-# offsets in the table that are inserted into the main table.
-
-# The CaseFolding.txt file lists pairs, but the common logic for reading data
-# sets only one value, so first we go through the table and set "return"
-# offsets for those that are not already set.
-
-for c in range(MAX_UNICODE):
-  if other_case[c] != 0 and other_case[c + other_case[c]] == 0:
-    other_case[c + other_case[c]] = -other_case[c]
-
-# Now scan again and create equivalence sets.
-
-sets = []
-
-for c in range(MAX_UNICODE):
-  o = c + other_case[c]
-
-  # Trigger when this character's other case does not point back here. We
-  # now have three characters that are case-equivalent.
-
-  if other_case[o] != -other_case[c]:
-    t = o + other_case[o]
-
-    # Scan the existing sets to see if any of the three characters are already
-    # part of a set. If so, unite the existing set with the new set.
-
-    appended = 0
-    for s in sets:
-      found = 0
-      for x in s:
-        if x == c or x == o or x == t:
-          found = 1
-
-      # Add new characters to an existing set
-
-      if found:
-        found = 0
-        for y in [c, o, t]:
-          for x in s:
-            if x == y:
-              found = 1
-          if not found:
-            s.append(y)
-        appended = 1
-
-    # If we have not added to an existing set, create a new one.
-
-    if not appended:
-      sets.append([c, o, t])
-
-# End of loop looking for caseless sets.
-
-# Now scan the sets and set appropriate offsets for the characters.
-
-caseless_offsets = [0] * MAX_UNICODE
-
-offset = 1;
-for s in sets:
-  for x in s:
-    caseless_offsets[x] = offset
-  offset += len(s) + 1
-
-# End of block of code for creating offsets for caseless matching sets.
-
-
-# Combine the tables
-
-table, records = combine_tables(script, category, break_props,
-  caseless_offsets, other_case, scriptx, padding_dummy)
-
-record_size, record_struct = get_record_size_struct(list(records.keys()))
-
-# Find the optimum block size for the two-stage table
-min_size = sys.maxsize
-for block_size in [2 ** i for i in range(5,10)]:
-        size = len(records) * record_size
-        stage1, stage2 = compress_table(table, block_size)
-        size += get_tables_size(stage1, stage2)
-        #print "/* block size %5d  => %5d bytes */" % (block_size, size)
-        if size < min_size:
-                min_size = size
-                min_stage1, min_stage2 = stage1, stage2
-                min_block_size = block_size
-
-print("/* This module is generated by the maint/MultiStage2.py script.")
-print("Do not modify it by hand. Instead modify the script and run it")
-print("to regenerate this code.")
-print()
-print("As well as being part of the PCRE2 library, this module is #included")
-print("by the pcre2test program, which redefines the PRIV macro to change")
-print("table names from _pcre2_xxx to xxxx, thereby avoiding name clashes")
-print("with the library. At present, just one of these tables is actually")
-print("needed. */")
-print()
-print("#ifndef PCRE2_PCRE2TEST")
-print()
-print("#ifdef HAVE_CONFIG_H")
-print("#include \"config.h\"")
-print("#endif")
-print()
-print("#include \"pcre2_internal.h\"")
-print()
-print("#endif /* PCRE2_PCRE2TEST */")
-print()
-print("/* Unicode character database. */")
-print("/* This file was autogenerated by the MultiStage2.py script. */")
-print("/* Total size: %d bytes, block size: %d. */" % (min_size, min_block_size))
-print()
-print("/* The tables herein are needed only when UCP support is built,")
-print("and in PCRE2 that happens automatically with UTF support.")
-print("This module should not be referenced otherwise, so")
-print("it should not matter whether it is compiled or not. However")
-print("a comment was received about space saving - maybe the guy linked")
-print("all the modules rather than using a library - so we include a")
-print("condition to cut out the tables when not needed. But don't leave")
-print("a totally empty module because some compilers barf at that.")
-print("Instead, just supply some small dummy tables. */")
-print()
-print("#ifndef SUPPORT_UNICODE")
-print("const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0,0,0 }};")
-print("const uint16_t PRIV(ucd_stage1)[] = {0};")
-print("const uint16_t PRIV(ucd_stage2)[] = {0};")
-print("const uint32_t PRIV(ucd_caseless_sets)[] = {0};")
-print("#else")
-print()
-print("const char *PRIV(unicode_version) = \"{}\";".format(unicode_version))
-print()
-print("/* If the 32-bit library is run in non-32-bit mode, character values")
-print("greater than 0x10ffff may be encountered. For these we set up a")
-print("special record. */")
-print()
-print("#if PCRE2_CODE_UNIT_WIDTH == 32")
-print("const ucd_record PRIV(dummy_ucd_record)[] = {{")
-print("  ucp_Unknown,    /* script */")
-print("  ucp_Cn,         /* type unassigned */")
-print("  ucp_gbOther,    /* grapheme break property */")
-print("  0,              /* case set */")
-print("  0,              /* other case */")
-print("  ucp_Unknown,    /* script extension */")
-print("  0,              /* dummy filler */")
-print("  }};")
-print("#endif")
-print()
-print(record_struct)
-
-# --- Added by PH: output the table of caseless character sets ---
-
-print("/* This table contains lists of characters that are caseless sets of")
-print("more than one character. Each list is terminated by NOTACHAR. */\n")
-
-print("const uint32_t PRIV(ucd_caseless_sets)[] = {")
-print("  NOTACHAR,")
-for s in sets:
-  s = sorted(s)
-  for x in s:
-    print('  0x%04x,' % x, end=' ')
-  print('  NOTACHAR,')
-print('};')
-print()
-
-# ------
-
-print("/* When #included in pcre2test, we don't need the table of digit")
-print("sets, nor the the large main UCD tables. */")
-print()
-print("#ifndef PCRE2_PCRE2TEST")
-print()
-
-# --- Added by PH: read Scripts.txt again for the sets of 10 digits. ---
-
-digitsets = []
-file = open('Unicode.tables/Scripts.txt', 'r', encoding='utf-8')
-
-for line in file:
-  m = re.match(r'([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)\s+;\s+\S+\s+#\s+Nd\s+', line)
-  if m is None:
-    continue
-  first = int(m.group(1),16)
-  last  = int(m.group(2),16)
-  if ((last - first + 1) % 10) != 0:
-    print("ERROR: %04x..%04x does not contain a multiple of 10 characters" % (first, last),
-      file=sys.stderr)
-  while first < last:
-    digitsets.append(first + 9)
-    first += 10
-file.close()
-digitsets.sort()
-
-print("/* This table lists the code points for the '9' characters in each")
-print("set of decimal digits. It is used to ensure that all the digits in")
-print("a script run come from the same set. */\n")
-print("const uint32_t PRIV(ucd_digit_sets)[] = {")
-
-print("  %d,  /* Number of subsequent values */" % len(digitsets), end='')
-count = 8
-for d in digitsets:
-  if count == 8:
-    print("\n ", end='')
-    count = 0
-  print(" 0x%05x," % d, end='')
-  count += 1
-print("\n};\n")
-
-print("/* This vector is a list of lists of scripts for the Script Extension")
-print("property. Each sublist is zero-terminated. */\n")
-print("const uint8_t PRIV(ucd_script_sets)[] = {")
-
-count = 0
-print("  /*   0 */", end='')
-for d in script_lists:
-  print(" %3d," % d, end='')
-  count += 1
-  if d == 0:
-    print("\n  /* %3d */" % count, end='')
-print("\n};\n")
-
-# Output the main UCD tables.
-
-print("/* These are the main two-stage UCD tables. The fields in each record are:")
-print("script (8 bits), character type (8 bits), grapheme break property (8 bits),")
-print("offset to multichar other cases or zero (8 bits), offset to other case")
-print("or zero (32 bits, signed), script extension (16 bits, signed), and a dummy")
-print("16-bit field to make the whole thing a multiple of 4 bytes. */\n")
-
-print_records(records, record_size)
-print_table(min_stage1, 'PRIV(ucd_stage1)')
-print_table(min_stage2, 'PRIV(ucd_stage2)', min_block_size)
-print("#if UCD_BLOCK_SIZE != %d" % min_block_size)
-print("#error Please correct UCD_BLOCK_SIZE in pcre2_internal.h")
-print("#endif")
-print("#endif  /* SUPPORT_UNICODE */")
-print()
-print("#endif  /* PCRE2_PCRE2TEST */")
-
-
-# This code was part of the original contribution, but is commented out as it
-# was never used. A two-stage table has sufficed.
-
-"""
-
-# Three-stage tables:
-
-# Find the optimum block size for 3-stage table
-min_size = sys.maxint
-for stage3_block in [2 ** i for i in range(2,6)]:
-        stage_i, stage3 = compress_table(table, stage3_block)
-        for stage2_block in [2 ** i for i in range(5,10)]:
-                size = len(records) * 4
-                stage1, stage2 = compress_table(stage_i, stage2_block)
-                size += get_tables_size(stage1, stage2, stage3)
-                # print "/* %5d / %3d  => %5d bytes */" % (stage2_block, stage3_block, size)
-                if size < min_size:
-                        min_size = size
-                        min_stage1, min_stage2, min_stage3 = stage1, stage2, stage3
-                        min_stage2_block, min_stage3_block = stage2_block, stage3_block
-
-print "/* Total size: %d bytes" % min_size */
-print_records(records)
-print_table(min_stage1, 'ucd_stage1')
-print_table(min_stage2, 'ucd_stage2', min_stage2_block)
-print_table(min_stage3, 'ucd_stage3', min_stage3_block)
-
-"""
--- a/maint/README
+++ b/maint/README
@ -16,99 +16,122 @@ and also contains some notes for maintainers. Its contents are:
 Files in the maint directory
 ============================

-GenerateUtt.py   A Python script to generate part of the pcre2_tables.c file
-                 that contains Unicode script names in a long string with
-                 offsets, which is tedious to maintain by hand.
+GenerateCommon.py
+  A Python module containing data and functions that are used by the other
+  Generate scripts.
+  
+GenerateTest26.py
+  A Python script that generates input and expected output test data for test
+  26, which tests certain aspects of Unicode property support.  

-ManyConfigTests  A shell script that runs "configure, make, test" a number of
-                 times with different configuration settings.
+GenerateUcd.py
+  A Python script that generates the file pcre2_ucd.c from GenerateCommon.py
+  and Unicode data files, which are themselves downloaded from the Unicode web
+  site. The generated file contains the tables for a 2-stage lookup of Unicode
+  properties, along with some auxiliary tables. The script starts with a long
+  comment that gives details of the tables it constructs. 

-MultiStage2.py   A Python script that generates the file pcre2_ucd.c from six
-                 Unicode data files, which are themselves downloaded from the
-                 Unicode web site. Run this script in the "maint" directory.
-                 The generated file is written to stdout. It contains the
-                 tables for a 2-stage lookup of Unicode properties, along with
-                 some auxiliary tables.
+GenerateUcpHeader.py
+  A Python script that generates the file pcre2_ucp.h from GenerateCommon.py
+  and Unicode data files. The generated file defines constants for various
+  Unicode property values.
+
+GenerateUcpTables.py
+  A Python script that generates the file pcre2_ucptables.c from
+  GenerateCommon.py and Unicode data files. The generated file contains tables
+  for looking up Unicode property names.
+
+ManyConfigTests
+  A shell script that runs "configure, make, test" a number of times with
+  different configuration settings.

 pcre2_chartables.c.non-standard
-                 This is a set of character tables that came from a Windows
-                 system. It has characters greater than 128 that are set as
-                 spaces, amongst other things. I kept it so that it can be
-                 used for testing from time to time.
+  This is a set of character tables that came from a Windows system. It has
+  characters greater than 128 that are set as spaces, amongst other things. I
+  kept it so that it can be used for testing from time to time.

-README           This file.
+README
+  This file.

-Unicode.tables   The files in this directory were downloaded from the Unicode
-                 web site. They contain information about Unicode characters
-                 and scripts. The ones used by the MultiStage2.py script are
-                 CaseFolding.txt, DerivedGeneralCategory.txt, Scripts.txt,
-                 ScriptExtensions.txt, GraphemeBreakProperty.txt, and
-                 emoji-data.txt. I've kept UnicodeData.txt (which is no longer
-                 used by the script) because it is useful occasionally for
-                 manually looking up the details of certain characters.
-                 However, note that character names in this file such as
-                 "Arabic sign sanah" do NOT mean that the character is in a
-                 particular script (in this case, Arabic). Scripts.txt and
-                 ScriptExtensions.txt are where to look for script information.
+Unicode.tables
+  The files in this directory were downloaded from the Unicode web site. They
+  contain information about Unicode characters and scripts, and are used by the
+  Generate scripts. There is also UnicodeData.txt, which is no longer used by
+  any script, because it is useful occasionally for manually looking up the
+  details of certain characters. However, note that character names in this
+  file such as "Arabic sign sanah" do NOT mean that the character is in a
+  particular script (in this case, Arabic). Scripts.txt and
+  ScriptExtensions.txt are where to look for script information.

-ucptest.c        A short C program for testing the Unicode property macros
-                 that do lookups in the pcre2_ucd.c data, mainly useful after
-                 rebuilding the Unicode property table. Compile and run this in
-                 the "maint" directory (see comments at its head). This program
-                 can also be used to find characters with specific properties.
+ucptest.c
+  A program for testing the Unicode property macros that do lookups in the
+  pcre2_ucd.c data, mainly useful after rebuilding the Unicode property tables.
+  Compile and run this in the "maint" directory (see comments at its head).
+  This program can also be used to find characters with specific properties and 
+  to list which properties are supported. 

-ucptestdata      A directory containing four files, testinput{1,2} and
-                 testoutput{1,2}, for use in conjunction with the ucptest
-                 program.
+ucptestdata
+  A directory containing four files, testinput{1,2} and testoutput{1,2}, for
+  use in conjunction with the ucptest program.

-utf8.c           A short, freestanding C program for converting a Unicode code
-                 point into a sequence of bytes in the UTF-8 encoding, and vice
-                 versa. If its argument is a hex number such as 0x1234, it
-                 outputs a list of the equivalent UTF-8 bytes. If its argument
-                 is a sequence of concatenated UTF-8 bytes (e.g. e188b4) it
-                 treats them as a UTF-8 character and outputs the equivalent
-                 code point in hex. See comments at its head for details.
+utf8.c
+  A short, freestanding C program for converting a Unicode code point into a
+  sequence of bytes in the UTF-8 encoding, and vice versa. If its argument is a
+  hex number such as 0x1234, it outputs a list of the equivalent UTF-8 bytes.
+  If its argument is a sequence of concatenated UTF-8 bytes (e.g. 12e188b4) it
+  treats them as a UTF-8 string and outputs the equivalent code points in hex.
+  See comments at its head for details.


 Updating to a new Unicode release
 =================================

 When there is a new release of Unicode, the files in Unicode.tables must be
-refreshed from the web site. If the new version of Unicode adds new character
-scripts, the source file pcre2_ucp.h and both the MultiStage2.py and the
-GenerateUtt.py scripts must be edited to add the new names. I have been adding
-each new group at the end of the relevant list, with a comment. Note also that
-both the pcre2syntax.3 and pcre2pattern.3 man pages contain lists of Unicode
-script names.
+refreshed from the web site. Once that is done, the four Python scripts that 
+generate files from the Unicode data can be run from within the "maint" 
+directory.

-MultiStage2.py has two lists: the full names and the abbreviations that are
-found in the ScriptExtensions.txt file. A list of script names and their
-abbreviations can be found in the PropertyValueAliases.txt file on the
-Unicode web site. There is also a Wikipedia page that lists them, and notes the
-Unicode version in which they were introduced:
+Note: Previously, it was necessary to update lists of scripts and their 
+abbreviations by hand before running the Python scripts. This is no longer
+necessary because the scripts have been upgraded to extract this information
+themselves. Also, there used to be explicit lists of scripts in two of the man
+pages. This is no longer the case; the pcre2test program can now output a list 
+of supported scripts.

-https://en.wikipedia.org/wiki/Unicode_scripts#Table_of_Unicode_scripts
+You can give an output file name as an argument to the following scripts, but
+by default:

-Once the script name lists have been updated, MultiStage2.py can be run to
-generate a new version of pcre2_ucd.c, and GenerateUtt.py can be run to
-generate the tricky tables for inclusion in pcre2_tables.c (which must be
-hand-edited). If MultiStage2.py gives the error "ValueError: list.index(x): x
-not in list", the cause is usually a missing (or misspelt) name in one of the
-lists of scripts.
+GenerateUcd.py        creates pcre2_ucd.c        )
+GenerateUcpHeader.py  creates pcre2_ucp.h        ) in the current directory
+GenerateUcpTables.py  creates pcre2_ucptables.c  )

-The ucptest program can be compiled and used to check that the new tables in
-pcre2_ucd.c work properly, using the data files in ucptestdata to check a
-number of test characters. It used to be necessary to update the source
-ucptest.c whenever new Unicode scripts were added, but this is no longer
-required because that program now uses the lists in the PCRE2 source. However,
-adding a few tests for new scripts to the files in ucptestdata is a good idea.
+These files can be compared against the existing versions in the src directory
+to check on any changes before replacing the old files, but you can also
+generate directly into the final location by running:
+
+./GenerateUcd.py       ../src/pcre2_ucd.c
+./GenerateUcpHeader.py ../src/pcre2_ucp.h
+./GenerateUcpTables.py ../src/pcre2_ucptables.c
+
+Once the .c and .h files are in the ../src directory, the ucptest program can
+be compiled and used to check that the new tables work properly. The data files
+in ucptestdata are set up to check a number of test characters. See the
+comments at the start of ucptest.c. If there are new scripts, adding a few
+tests to the files in ucptestdata is a good idea.
+
+Finally, you should run the GenerateTest26.py script to regenerate new versions 
+of the input and expected output from a series of Unicode property tests that 
+are automatically generated from the Unicode data files. By default, the files
+are written to testinput26 and testoutput26 in the current directory, but you
+can give an alternative directory name as an argument to the script. These
+files should eventually be installed in the main testdata directory.


 Preparing for a PCRE2 release
 =============================

-This section contains a checklist of things that I consult before building a
-distribution for a new release.
+This section contains a checklist of things that I do before building a new
+release.

 . Ensure that the version number and version date are correct in configure.ac.

@ -117,17 +140,16 @@ distribution for a new release.

 . If new build options or new source files have been added, ensure that they
  are added to the CMake files as well as to the autoconf files. The relevant
-  files are CMakeLists.txt and config-cmake.h.in. After making a release
-  tarball, test it out with CMake if there have been changes here.
+  files are CMakeLists.txt and config-cmake.h.in. After making a release, test
+  it out with CMake if there have been changes here.

 . Run ./autogen.sh to ensure everything is up-to-date.

 . Compile and test with many different config options, and combinations of
  options. Also, test with valgrind by running "RunTest valgrind" and
-  "RunGrepTest valgrind" (which takes quite a long time). The script
-  maint/ManyConfigTests now encapsulates this testing. It runs tests with
-  different configurations, and it also runs some of them with valgrind, all of
-  which can take quite some time.
+  "RunGrepTest valgrind". The script maint/ManyConfigTests now encapsulates
+  this testing. It runs tests with different configurations, and it also runs
+  some of them with valgrind, all of which can take quite some time.

 . Run tests in both 32-bit and 64-bit environments if possible. I can no longer
  run 32-bit tests.
@ -142,7 +164,8 @@ distribution for a new release.
  -fsanitize=signed-integer-overflow

 . Do a test build using CMake. Remove src/config.h first, lest it override the
-  version that CMake creates. Do NOT use parallel make.
+  version that CMake creates. Also do a CMake unity build to check that it 
+  still works: [c]cmake -DCMAKE_UNITY_BUILD=ON sets up a unity build.

 . Run perltest.sh on the test data for tests 1 and 4. The output should match
  the PCRE2 test output, apart from the version identification at the start of
@ -161,11 +184,12 @@ distribution for a new release.
  systems. For example, on Solaris it is helpful to test using Sun's cc
  compiler as a change from gcc. Adding -xarch=v9 to the cc options does a
  64-bit test, but it also needs -S 64 for pcre2test to increase the stack size
-  for test 2. Since I retired I can no longer do much of this, but instead I
-  rely on putting out release candidates for testing by the community.
+  for test 2. Since I retired I can no longer do much of this. There are 
+  automated tests under Ubuntu, Alpine, and Windows that are now set up as 
+  GitHub actions. Check that they are running clean.

 . The buildbots at http://buildfarm.opencsw.org/ do some automated testing
-  of PCRE2 and should be checked before putting out a release.
+  of PCRE2 and should also be checked before putting out a release.


 Updating version info for libtool
@ -221,10 +245,11 @@ it reports them and then aborts. Otherwise it removes trailing spaces from
 sources and refreshes the HTML documentation. Update the GitHub repository with
 "git push".

-Once PrepareRelease has run clean, run "make distcheck" to create the tarball
+Once PrepareRelease has run clean, run "make distcheck" to create the tarballs
 and the zipball. I then sign these files. Double-check with "git status" that
-the repository is fully up-to-date, then create a new tag on GitHub. Upload the
-tarball, zipball, and the signatures as "assets" of the GitHub release.
+the repository is fully up-to-date, then create a new tag and a release on
+GitHub. Upload the tarballs, zipball, and the signatures as "assets" of the
+GitHub release.

 When the new release is out, don't forget to tell webmaster@pcre.org and the
 mailing list.
@ -343,8 +368,6 @@ years.

  See Unicode TR 29. The last two are very much aimed at natural language.

-. (?[...]) extended classes: big project.
-
 . Allow a callout to specify a number of characters to skip. This can be done
  compatibly via an extra callout field.

@ -414,13 +437,8 @@ years.
  with lookarounds for \b and \B. Ideally the setting should last till the end
  of the group, which means remembering all previous settings; maybe a fixed
  amount of stack would do - how deep would anyone want to nest these things?
-  See GitHub issue #13 for a compendium of character class issues.
-
-. Recognize the short script names. They are already listed in maint/
-  Multistage2.py because they are needed for scanning the script extensions
-  file.
-
-. Use script extensions for \p?
+  See GitHub issue #13 for a compendium of character class issues, including
+  (?[...]) extended classes.

 . A user suggested something like --with-build-info to set a build information
  string that could be retrieved by pcre2_config(). However, there's no
@ -439,4 +457,4 @@ years.
 Philip Hazel
 Email local part: Philip.Hazel
 Email domain: gmail.com
-Last updated: 26 August 2021
+Last updated: 25 April 2022
--- a/maint/Unicode.tables/BidiMirroring.txt
+++ b/maint/Unicode.tables/BidiMirroring.txt
@ -0,0 +1,633 @@
+# BidiMirroring-14.0.0.txt
+# Date: 2021-08-08, 22:55:00 GMT [KW, RP]
+# © 2021 Unicode®, Inc.
+# For terms of use, see https://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see https://www.unicode.org/reports/tr44/
+#
+# Bidi_Mirroring_Glyph Property
+#
+# This file is an informative contributory data file in the
+# Unicode Character Database.
+#
+# This data file lists characters that have the Bidi_Mirrored=Yes property
+# value, for which there is another Unicode character that typically has a glyph
+# that is the mirror image of the original character's glyph.
+#
+# The repertoire covered by the file is Unicode 14.0.0.
+#
+# The file contains a list of lines with mappings from one code point
+# to another one for character-based mirroring.
+# Note that for "real" mirroring, a rendering engine needs to select
+# appropriate alternative glyphs, and that many Unicode characters do not
+# have a mirror-image Unicode character.
+#
+# Each mapping line contains two fields, separated by a semicolon (';').
+# Each of the two fields contains a code point represented as a
+# variable-length hexadecimal value with 4 to 6 digits.
+# A comment indicates where the characters are "BEST FIT" mirroring.
+#
+# Code points for which Bidi_Mirrored=Yes, but for which no appropriate
+# characters exist with mirrored glyphs, are
+# listed as comments at the end of the file.
+#
+# Formally, the default value of the Bidi_Mirroring_Glyph property
+# for each code point is <none>, unless a mapping to
+# some other character is specified in this data file. When a code
+# point has the default value for the Bidi_Mirroring_Glyph property,
+# that means that no other character exists whose glyph is suitable
+# for character-based mirroring.
+#
+# For information on bidi mirroring, see UAX #9: Unicode Bidirectional Algorithm,
+# at https://www.unicode.org/reports/tr9/
+#
+# This file was originally created by Markus Scherer.
+# Extended for Unicode 3.2, 4.0, 4.1, 5.0, 5.1, 5.2, and 6.0 by Ken Whistler,
+# and for subsequent versions by Ken Whistler, Laurentiu Iancu, and Roozbeh Pournader.
+#
+# Historical and Compatibility Information:
+#
+# The OpenType Mirroring Pairs List (OMPL) is frozen to match the
+# Unicode 5.1 version of the Bidi_Mirroring_Glyph property (2008).
+# See https://www.microsoft.com/typography/otspec/ompl.txt
+#
+# The Unicode 6.1 version of the Bidi_Mirroring_Glyph property (2011)
+# added one mirroring pair: 27CB <--> 27CD.
+#
+# The Unicode 11.0 version of the Bidi_Mirroring_Glyph property (2018)
+# underwent a substantial revision, to formally recognize all of the
+# exact mirroring pairs and "BEST FIT" mirroring pairs that had been
+# added after the freezing of the OMPL list. As a result, starting
+# with Unicode 11.0, the bmg mapping values more accurately reflect
+# the current status of glyphs for Bidi_Mirrored characters in
+# the Unicode Standard, but this listing now extends significantly
+# beyond the frozen OMPL list. Implementers should be aware of this
+# intentional distinction.
+#
+# ############################################################
+#
+# Property:	Bidi_Mirroring_Glyph
+#
+# @missing: 0000..10FFFF; <none>
+
+0028; 0029 # LEFT PARENTHESIS
+0029; 0028 # RIGHT PARENTHESIS
+003C; 003E # LESS-THAN SIGN
+003E; 003C # GREATER-THAN SIGN
+005B; 005D # LEFT SQUARE BRACKET
+005D; 005B # RIGHT SQUARE BRACKET
+007B; 007D # LEFT CURLY BRACKET
+007D; 007B # RIGHT CURLY BRACKET
+00AB; 00BB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+00BB; 00AB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0F3A; 0F3B # TIBETAN MARK GUG RTAGS GYON
+0F3B; 0F3A # TIBETAN MARK GUG RTAGS GYAS
+0F3C; 0F3D # TIBETAN MARK ANG KHANG GYON
+0F3D; 0F3C # TIBETAN MARK ANG KHANG GYAS
+169B; 169C # OGHAM FEATHER MARK
+169C; 169B # OGHAM REVERSED FEATHER MARK
+2039; 203A # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+203A; 2039 # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+2045; 2046 # LEFT SQUARE BRACKET WITH QUILL
+2046; 2045 # RIGHT SQUARE BRACKET WITH QUILL
+207D; 207E # SUPERSCRIPT LEFT PARENTHESIS
+207E; 207D # SUPERSCRIPT RIGHT PARENTHESIS
+208D; 208E # SUBSCRIPT LEFT PARENTHESIS
+208E; 208D # SUBSCRIPT RIGHT PARENTHESIS
+2208; 220B # ELEMENT OF
+2209; 220C # [BEST FIT] NOT AN ELEMENT OF
+220A; 220D # SMALL ELEMENT OF
+220B; 2208 # CONTAINS AS MEMBER
+220C; 2209 # [BEST FIT] DOES NOT CONTAIN AS MEMBER
+220D; 220A # SMALL CONTAINS AS MEMBER
+2215; 29F5 # DIVISION SLASH
+221F; 2BFE # RIGHT ANGLE
+2220; 29A3 # ANGLE
+2221; 299B # MEASURED ANGLE
+2222; 29A0 # SPHERICAL ANGLE
+2224; 2AEE # DOES NOT DIVIDE
+223C; 223D # TILDE OPERATOR
+223D; 223C # REVERSED TILDE
+2243; 22CD # ASYMPTOTICALLY EQUAL TO
+2245; 224C # APPROXIMATELY EQUAL TO
+224C; 2245 # ALL EQUAL TO
+2252; 2253 # APPROXIMATELY EQUAL TO OR THE IMAGE OF
+2253; 2252 # IMAGE OF OR APPROXIMATELY EQUAL TO
+2254; 2255 # COLON EQUALS
+2255; 2254 # EQUALS COLON
+2264; 2265 # LESS-THAN OR EQUAL TO
+2265; 2264 # GREATER-THAN OR EQUAL TO
+2266; 2267 # LESS-THAN OVER EQUAL TO
+2267; 2266 # GREATER-THAN OVER EQUAL TO
+2268; 2269 # [BEST FIT] LESS-THAN BUT NOT EQUAL TO
+2269; 2268 # [BEST FIT] GREATER-THAN BUT NOT EQUAL TO
+226A; 226B # MUCH LESS-THAN
+226B; 226A # MUCH GREATER-THAN
+226E; 226F # [BEST FIT] NOT LESS-THAN
+226F; 226E # [BEST FIT] NOT GREATER-THAN
+2270; 2271 # [BEST FIT] NEITHER LESS-THAN NOR EQUAL TO
+2271; 2270 # [BEST FIT] NEITHER GREATER-THAN NOR EQUAL TO
+2272; 2273 # [BEST FIT] LESS-THAN OR EQUIVALENT TO
+2273; 2272 # [BEST FIT] GREATER-THAN OR EQUIVALENT TO
+2274; 2275 # [BEST FIT] NEITHER LESS-THAN NOR EQUIVALENT TO
+2275; 2274 # [BEST FIT] NEITHER GREATER-THAN NOR EQUIVALENT TO
+2276; 2277 # LESS-THAN OR GREATER-THAN
+2277; 2276 # GREATER-THAN OR LESS-THAN
+2278; 2279 # [BEST FIT] NEITHER LESS-THAN NOR GREATER-THAN
+2279; 2278 # [BEST FIT] NEITHER GREATER-THAN NOR LESS-THAN
+227A; 227B # PRECEDES
+227B; 227A # SUCCEEDS
+227C; 227D # PRECEDES OR EQUAL TO
+227D; 227C # SUCCEEDS OR EQUAL TO
+227E; 227F # [BEST FIT] PRECEDES OR EQUIVALENT TO
+227F; 227E # [BEST FIT] SUCCEEDS OR EQUIVALENT TO
+2280; 2281 # [BEST FIT] DOES NOT PRECEDE
+2281; 2280 # [BEST FIT] DOES NOT SUCCEED
+2282; 2283 # SUBSET OF
+2283; 2282 # SUPERSET OF
+2284; 2285 # [BEST FIT] NOT A SUBSET OF
+2285; 2284 # [BEST FIT] NOT A SUPERSET OF
+2286; 2287 # SUBSET OF OR EQUAL TO
+2287; 2286 # SUPERSET OF OR EQUAL TO
+2288; 2289 # [BEST FIT] NEITHER A SUBSET OF NOR EQUAL TO
+2289; 2288 # [BEST FIT] NEITHER A SUPERSET OF NOR EQUAL TO
+228A; 228B # [BEST FIT] SUBSET OF WITH NOT EQUAL TO
+228B; 228A # [BEST FIT] SUPERSET OF WITH NOT EQUAL TO
+228F; 2290 # SQUARE IMAGE OF
+2290; 228F # SQUARE ORIGINAL OF
+2291; 2292 # SQUARE IMAGE OF OR EQUAL TO
+2292; 2291 # SQUARE ORIGINAL OF OR EQUAL TO
+2298; 29B8 # CIRCLED DIVISION SLASH
+22A2; 22A3 # RIGHT TACK
+22A3; 22A2 # LEFT TACK
+22A6; 2ADE # ASSERTION
+22A8; 2AE4 # TRUE
+22A9; 2AE3 # FORCES
+22AB; 2AE5 # DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
+22B0; 22B1 # PRECEDES UNDER RELATION
+22B1; 22B0 # SUCCEEDS UNDER RELATION
+22B2; 22B3 # NORMAL SUBGROUP OF
+22B3; 22B2 # CONTAINS AS NORMAL SUBGROUP
+22B4; 22B5 # NORMAL SUBGROUP OF OR EQUAL TO
+22B5; 22B4 # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO
+22B6; 22B7 # ORIGINAL OF
+22B7; 22B6 # IMAGE OF
+22B8; 27DC # MULTIMAP
+22C9; 22CA # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT
+22CA; 22C9 # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT
+22CB; 22CC # LEFT SEMIDIRECT PRODUCT
+22CC; 22CB # RIGHT SEMIDIRECT PRODUCT
+22CD; 2243 # REVERSED TILDE EQUALS
+22D0; 22D1 # DOUBLE SUBSET
+22D1; 22D0 # DOUBLE SUPERSET
+22D6; 22D7 # LESS-THAN WITH DOT
+22D7; 22D6 # GREATER-THAN WITH DOT
+22D8; 22D9 # VERY MUCH LESS-THAN
+22D9; 22D8 # VERY MUCH GREATER-THAN
+22DA; 22DB # LESS-THAN EQUAL TO OR GREATER-THAN
+22DB; 22DA # GREATER-THAN EQUAL TO OR LESS-THAN
+22DC; 22DD # EQUAL TO OR LESS-THAN
+22DD; 22DC # EQUAL TO OR GREATER-THAN
+22DE; 22DF # EQUAL TO OR PRECEDES
+22DF; 22DE # EQUAL TO OR SUCCEEDS
+22E0; 22E1 # [BEST FIT] DOES NOT PRECEDE OR EQUAL
+22E1; 22E0 # [BEST FIT] DOES NOT SUCCEED OR EQUAL
+22E2; 22E3 # [BEST FIT] NOT SQUARE IMAGE OF OR EQUAL TO
+22E3; 22E2 # [BEST FIT] NOT SQUARE ORIGINAL OF OR EQUAL TO
+22E4; 22E5 # [BEST FIT] SQUARE IMAGE OF OR NOT EQUAL TO
+22E5; 22E4 # [BEST FIT] SQUARE ORIGINAL OF OR NOT EQUAL TO
+22E6; 22E7 # [BEST FIT] LESS-THAN BUT NOT EQUIVALENT TO
+22E7; 22E6 # [BEST FIT] GREATER-THAN BUT NOT EQUIVALENT TO
+22E8; 22E9 # [BEST FIT] PRECEDES BUT NOT EQUIVALENT TO
+22E9; 22E8 # [BEST FIT] SUCCEEDS BUT NOT EQUIVALENT TO
+22EA; 22EB # [BEST FIT] NOT NORMAL SUBGROUP OF
+22EB; 22EA # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP
+22EC; 22ED # [BEST FIT] NOT NORMAL SUBGROUP OF OR EQUAL TO
+22ED; 22EC # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL
+22F0; 22F1 # UP RIGHT DIAGONAL ELLIPSIS
+22F1; 22F0 # DOWN RIGHT DIAGONAL ELLIPSIS
+22F2; 22FA # ELEMENT OF WITH LONG HORIZONTAL STROKE
+22F3; 22FB # ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
+22F4; 22FC # SMALL ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
+22F6; 22FD # ELEMENT OF WITH OVERBAR
+22F7; 22FE # SMALL ELEMENT OF WITH OVERBAR
+22FA; 22F2 # CONTAINS WITH LONG HORIZONTAL STROKE
+22FB; 22F3 # CONTAINS WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
+22FC; 22F4 # SMALL CONTAINS WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
+22FD; 22F6 # CONTAINS WITH OVERBAR
+22FE; 22F7 # SMALL CONTAINS WITH OVERBAR
+2308; 2309 # LEFT CEILING
+2309; 2308 # RIGHT CEILING
+230A; 230B # LEFT FLOOR
+230B; 230A # RIGHT FLOOR
+2329; 232A # LEFT-POINTING ANGLE BRACKET
+232A; 2329 # RIGHT-POINTING ANGLE BRACKET
+2768; 2769 # MEDIUM LEFT PARENTHESIS ORNAMENT
+2769; 2768 # MEDIUM RIGHT PARENTHESIS ORNAMENT
+276A; 276B # MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
+276B; 276A # MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT
+276C; 276D # MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT
+276D; 276C # MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT
+276E; 276F # HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT
+276F; 276E # HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT
+2770; 2771 # HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT
+2771; 2770 # HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT
+2772; 2773 # LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT
+2773; 2772 # LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT
+2774; 2775 # MEDIUM LEFT CURLY BRACKET ORNAMENT
+2775; 2774 # MEDIUM RIGHT CURLY BRACKET ORNAMENT
+27C3; 27C4 # OPEN SUBSET
+27C4; 27C3 # OPEN SUPERSET
+27C5; 27C6 # LEFT S-SHAPED BAG DELIMITER
+27C6; 27C5 # RIGHT S-SHAPED BAG DELIMITER
+27C8; 27C9 # REVERSE SOLIDUS PRECEDING SUBSET
+27C9; 27C8 # SUPERSET PRECEDING SOLIDUS
+27CB; 27CD # MATHEMATICAL RISING DIAGONAL
+27CD; 27CB # MATHEMATICAL FALLING DIAGONAL
+27D5; 27D6 # LEFT OUTER JOIN
+27D6; 27D5 # RIGHT OUTER JOIN
+27DC; 22B8 # LEFT MULTIMAP
+27DD; 27DE # LONG RIGHT TACK
+27DE; 27DD # LONG LEFT TACK
+27E2; 27E3 # WHITE CONCAVE-SIDED DIAMOND WITH LEFTWARDS TICK
+27E3; 27E2 # WHITE CONCAVE-SIDED DIAMOND WITH RIGHTWARDS TICK
+27E4; 27E5 # WHITE SQUARE WITH LEFTWARDS TICK
+27E5; 27E4 # WHITE SQUARE WITH RIGHTWARDS TICK
+27E6; 27E7 # MATHEMATICAL LEFT WHITE SQUARE BRACKET
+27E7; 27E6 # MATHEMATICAL RIGHT WHITE SQUARE BRACKET
+27E8; 27E9 # MATHEMATICAL LEFT ANGLE BRACKET
+27E9; 27E8 # MATHEMATICAL RIGHT ANGLE BRACKET
+27EA; 27EB # MATHEMATICAL LEFT DOUBLE ANGLE BRACKET
+27EB; 27EA # MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET
+27EC; 27ED # MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET
+27ED; 27EC # MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET
+27EE; 27EF # MATHEMATICAL LEFT FLATTENED PARENTHESIS
+27EF; 27EE # MATHEMATICAL RIGHT FLATTENED PARENTHESIS
+2983; 2984 # LEFT WHITE CURLY BRACKET
+2984; 2983 # RIGHT WHITE CURLY BRACKET
+2985; 2986 # LEFT WHITE PARENTHESIS
+2986; 2985 # RIGHT WHITE PARENTHESIS
+2987; 2988 # Z NOTATION LEFT IMAGE BRACKET
+2988; 2987 # Z NOTATION RIGHT IMAGE BRACKET
+2989; 298A # Z NOTATION LEFT BINDING BRACKET
+298A; 2989 # Z NOTATION RIGHT BINDING BRACKET
+298B; 298C # LEFT SQUARE BRACKET WITH UNDERBAR
+298C; 298B # RIGHT SQUARE BRACKET WITH UNDERBAR
+298D; 2990 # LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
+298E; 298F # RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
+298F; 298E # LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
+2990; 298D # RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER
+2991; 2992 # LEFT ANGLE BRACKET WITH DOT
+2992; 2991 # RIGHT ANGLE BRACKET WITH DOT
+2993; 2994 # LEFT ARC LESS-THAN BRACKET
+2994; 2993 # RIGHT ARC GREATER-THAN BRACKET
+2995; 2996 # DOUBLE LEFT ARC GREATER-THAN BRACKET
+2996; 2995 # DOUBLE RIGHT ARC LESS-THAN BRACKET
+2997; 2998 # LEFT BLACK TORTOISE SHELL BRACKET
+2998; 2997 # RIGHT BLACK TORTOISE SHELL BRACKET
+299B; 2221 # MEASURED ANGLE OPENING LEFT
+29A0; 2222 # SPHERICAL ANGLE OPENING LEFT
+29A3; 2220 # REVERSED ANGLE
+29A4; 29A5 # ANGLE WITH UNDERBAR
+29A5; 29A4 # REVERSED ANGLE WITH UNDERBAR
+29A8; 29A9 # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING UP AND RIGHT
+29A9; 29A8 # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING UP AND LEFT
+29AA; 29AB # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING DOWN AND RIGHT
+29AB; 29AA # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING DOWN AND LEFT
+29AC; 29AD # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING RIGHT AND UP
+29AD; 29AC # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING LEFT AND UP
+29AE; 29AF # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING RIGHT AND DOWN
+29AF; 29AE # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING LEFT AND DOWN
+29B8; 2298 # CIRCLED REVERSE SOLIDUS
+29C0; 29C1 # CIRCLED LESS-THAN
+29C1; 29C0 # CIRCLED GREATER-THAN
+29C4; 29C5 # SQUARED RISING DIAGONAL SLASH
+29C5; 29C4 # SQUARED FALLING DIAGONAL SLASH
+29CF; 29D0 # LEFT TRIANGLE BESIDE VERTICAL BAR
+29D0; 29CF # VERTICAL BAR BESIDE RIGHT TRIANGLE
+29D1; 29D2 # BOWTIE WITH LEFT HALF BLACK
+29D2; 29D1 # BOWTIE WITH RIGHT HALF BLACK
+29D4; 29D5 # TIMES WITH LEFT HALF BLACK
+29D5; 29D4 # TIMES WITH RIGHT HALF BLACK
+29D8; 29D9 # LEFT WIGGLY FENCE
+29D9; 29D8 # RIGHT WIGGLY FENCE
+29DA; 29DB # LEFT DOUBLE WIGGLY FENCE
+29DB; 29DA # RIGHT DOUBLE WIGGLY FENCE
+29E8; 29E9 # DOWN-POINTING TRIANGLE WITH LEFT HALF BLACK
+29E9; 29E8 # DOWN-POINTING TRIANGLE WITH RIGHT HALF BLACK
+29F5; 2215 # REVERSE SOLIDUS OPERATOR
+29F8; 29F9 # BIG SOLIDUS
+29F9; 29F8 # BIG REVERSE SOLIDUS
+29FC; 29FD # LEFT-POINTING CURVED ANGLE BRACKET
+29FD; 29FC # RIGHT-POINTING CURVED ANGLE BRACKET
+2A2B; 2A2C # MINUS SIGN WITH FALLING DOTS
+2A2C; 2A2B # MINUS SIGN WITH RISING DOTS
+2A2D; 2A2E # PLUS SIGN IN LEFT HALF CIRCLE
+2A2E; 2A2D # PLUS SIGN IN RIGHT HALF CIRCLE
+2A34; 2A35 # MULTIPLICATION SIGN IN LEFT HALF CIRCLE
+2A35; 2A34 # MULTIPLICATION SIGN IN RIGHT HALF CIRCLE
+2A3C; 2A3D # INTERIOR PRODUCT
+2A3D; 2A3C # RIGHTHAND INTERIOR PRODUCT
+2A64; 2A65 # Z NOTATION DOMAIN ANTIRESTRICTION
+2A65; 2A64 # Z NOTATION RANGE ANTIRESTRICTION
+2A79; 2A7A # LESS-THAN WITH CIRCLE INSIDE
+2A7A; 2A79 # GREATER-THAN WITH CIRCLE INSIDE
+2A7B; 2A7C # [BEST FIT] LESS-THAN WITH QUESTION MARK ABOVE
+2A7C; 2A7B # [BEST FIT] GREATER-THAN WITH QUESTION MARK ABOVE
+2A7D; 2A7E # LESS-THAN OR SLANTED EQUAL TO
+2A7E; 2A7D # GREATER-THAN OR SLANTED EQUAL TO
+2A7F; 2A80 # LESS-THAN OR SLANTED EQUAL TO WITH DOT INSIDE
+2A80; 2A7F # GREATER-THAN OR SLANTED EQUAL TO WITH DOT INSIDE
+2A81; 2A82 # LESS-THAN OR SLANTED EQUAL TO WITH DOT ABOVE
+2A82; 2A81 # GREATER-THAN OR SLANTED EQUAL TO WITH DOT ABOVE
+2A83; 2A84 # LESS-THAN OR SLANTED EQUAL TO WITH DOT ABOVE RIGHT
+2A84; 2A83 # GREATER-THAN OR SLANTED EQUAL TO WITH DOT ABOVE LEFT
+2A85; 2A86 # [BEST FIT] LESS-THAN OR APPROXIMATE
+2A86; 2A85 # [BEST FIT] GREATER-THAN OR APPROXIMATE
+2A87; 2A88 # [BEST FIT] LESS-THAN AND SINGLE-LINE NOT EQUAL TO
+2A88; 2A87 # [BEST FIT] GREATER-THAN AND SINGLE-LINE NOT EQUAL TO
+2A89; 2A8A # [BEST FIT] LESS-THAN AND NOT APPROXIMATE
+2A8A; 2A89 # [BEST FIT] GREATER-THAN AND NOT APPROXIMATE
+2A8B; 2A8C # LESS-THAN ABOVE DOUBLE-LINE EQUAL ABOVE GREATER-THAN
+2A8C; 2A8B # GREATER-THAN ABOVE DOUBLE-LINE EQUAL ABOVE LESS-THAN
+2A8D; 2A8E # [BEST FIT] LESS-THAN ABOVE SIMILAR OR EQUAL
+2A8E; 2A8D # [BEST FIT] GREATER-THAN ABOVE SIMILAR OR EQUAL
+2A8F; 2A90 # [BEST FIT] LESS-THAN ABOVE SIMILAR ABOVE GREATER-THAN
+2A90; 2A8F # [BEST FIT] GREATER-THAN ABOVE SIMILAR ABOVE LESS-THAN
+2A91; 2A92 # LESS-THAN ABOVE GREATER-THAN ABOVE DOUBLE-LINE EQUAL
+2A92; 2A91 # GREATER-THAN ABOVE LESS-THAN ABOVE DOUBLE-LINE EQUAL
+2A93; 2A94 # LESS-THAN ABOVE SLANTED EQUAL ABOVE GREATER-THAN ABOVE SLANTED EQUAL
+2A94; 2A93 # GREATER-THAN ABOVE SLANTED EQUAL ABOVE LESS-THAN ABOVE SLANTED EQUAL
+2A95; 2A96 # SLANTED EQUAL TO OR LESS-THAN
+2A96; 2A95 # SLANTED EQUAL TO OR GREATER-THAN
+2A97; 2A98 # SLANTED EQUAL TO OR LESS-THAN WITH DOT INSIDE
+2A98; 2A97 # SLANTED EQUAL TO OR GREATER-THAN WITH DOT INSIDE
+2A99; 2A9A # DOUBLE-LINE EQUAL TO OR LESS-THAN
+2A9A; 2A99 # DOUBLE-LINE EQUAL TO OR GREATER-THAN
+2A9B; 2A9C # DOUBLE-LINE SLANTED EQUAL TO OR LESS-THAN
+2A9C; 2A9B # DOUBLE-LINE SLANTED EQUAL TO OR GREATER-THAN
+2A9D; 2A9E # [BEST FIT] SIMILAR OR LESS-THAN
+2A9E; 2A9D # [BEST FIT] SIMILAR OR GREATER-THAN
+2A9F; 2AA0 # [BEST FIT] SIMILAR ABOVE LESS-THAN ABOVE EQUALS SIGN
+2AA0; 2A9F # [BEST FIT] SIMILAR ABOVE GREATER-THAN ABOVE EQUALS SIGN
+2AA1; 2AA2 # DOUBLE NESTED LESS-THAN
+2AA2; 2AA1 # DOUBLE NESTED GREATER-THAN
+2AA6; 2AA7 # LESS-THAN CLOSED BY CURVE
+2AA7; 2AA6 # GREATER-THAN CLOSED BY CURVE
+2AA8; 2AA9 # LESS-THAN CLOSED BY CURVE ABOVE SLANTED EQUAL
+2AA9; 2AA8 # GREATER-THAN CLOSED BY CURVE ABOVE SLANTED EQUAL
+2AAA; 2AAB # SMALLER THAN
+2AAB; 2AAA # LARGER THAN
+2AAC; 2AAD # SMALLER THAN OR EQUAL TO
+2AAD; 2AAC # LARGER THAN OR EQUAL TO
+2AAF; 2AB0 # PRECEDES ABOVE SINGLE-LINE EQUALS SIGN
+2AB0; 2AAF # SUCCEEDS ABOVE SINGLE-LINE EQUALS SIGN
+2AB1; 2AB2 # [BEST FIT] PRECEDES ABOVE SINGLE-LINE NOT EQUAL TO
+2AB2; 2AB1 # [BEST FIT] SUCCEEDS ABOVE SINGLE-LINE NOT EQUAL TO
+2AB3; 2AB4 # PRECEDES ABOVE EQUALS SIGN
+2AB4; 2AB3 # SUCCEEDS ABOVE EQUALS SIGN
+2AB5; 2AB6 # [BEST FIT] PRECEDES ABOVE NOT EQUAL TO
+2AB6; 2AB5 # [BEST FIT] SUCCEEDS ABOVE NOT EQUAL TO
+2AB7; 2AB8 # [BEST FIT] PRECEDES ABOVE ALMOST EQUAL TO
+2AB8; 2AB7 # [BEST FIT] SUCCEEDS ABOVE ALMOST EQUAL TO
+2AB9; 2ABA # [BEST FIT] PRECEDES ABOVE NOT ALMOST EQUAL TO
+2ABA; 2AB9 # [BEST FIT] SUCCEEDS ABOVE NOT ALMOST EQUAL TO
+2ABB; 2ABC # DOUBLE PRECEDES
+2ABC; 2ABB # DOUBLE SUCCEEDS
+2ABD; 2ABE # SUBSET WITH DOT
+2ABE; 2ABD # SUPERSET WITH DOT
+2ABF; 2AC0 # SUBSET WITH PLUS SIGN BELOW
+2AC0; 2ABF # SUPERSET WITH PLUS SIGN BELOW
+2AC1; 2AC2 # SUBSET WITH MULTIPLICATION SIGN BELOW
+2AC2; 2AC1 # SUPERSET WITH MULTIPLICATION SIGN BELOW
+2AC3; 2AC4 # SUBSET OF OR EQUAL TO WITH DOT ABOVE
+2AC4; 2AC3 # SUPERSET OF OR EQUAL TO WITH DOT ABOVE
+2AC5; 2AC6 # SUBSET OF ABOVE EQUALS SIGN
+2AC6; 2AC5 # SUPERSET OF ABOVE EQUALS SIGN
+2AC7; 2AC8 # [BEST FIT] SUBSET OF ABOVE TILDE OPERATOR
+2AC8; 2AC7 # [BEST FIT] SUPERSET OF ABOVE TILDE OPERATOR
+2AC9; 2ACA # [BEST FIT] SUBSET OF ABOVE ALMOST EQUAL TO
+2ACA; 2AC9 # [BEST FIT] SUPERSET OF ABOVE ALMOST EQUAL TO
+2ACB; 2ACC # [BEST FIT] SUBSET OF ABOVE NOT EQUAL TO
+2ACC; 2ACB # [BEST FIT] SUPERSET OF ABOVE NOT EQUAL TO
+2ACD; 2ACE # SQUARE LEFT OPEN BOX OPERATOR
+2ACE; 2ACD # SQUARE RIGHT OPEN BOX OPERATOR
+2ACF; 2AD0 # CLOSED SUBSET
+2AD0; 2ACF # CLOSED SUPERSET
+2AD1; 2AD2 # CLOSED SUBSET OR EQUAL TO
+2AD2; 2AD1 # CLOSED SUPERSET OR EQUAL TO
+2AD3; 2AD4 # SUBSET ABOVE SUPERSET
+2AD4; 2AD3 # SUPERSET ABOVE SUBSET
+2AD5; 2AD6 # SUBSET ABOVE SUBSET
+2AD6; 2AD5 # SUPERSET ABOVE SUPERSET
+2ADE; 22A6 # SHORT LEFT TACK
+2AE3; 22A9 # DOUBLE VERTICAL BAR LEFT TURNSTILE
+2AE4; 22A8 # VERTICAL BAR DOUBLE LEFT TURNSTILE
+2AE5; 22AB # DOUBLE VERTICAL BAR DOUBLE LEFT TURNSTILE
+2AEC; 2AED # DOUBLE STROKE NOT SIGN
+2AED; 2AEC # REVERSED DOUBLE STROKE NOT SIGN
+2AEE; 2224 # DOES NOT DIVIDE WITH REVERSED NEGATION SLASH
+2AF7; 2AF8 # TRIPLE NESTED LESS-THAN
+2AF8; 2AF7 # TRIPLE NESTED GREATER-THAN
+2AF9; 2AFA # DOUBLE-LINE SLANTED LESS-THAN OR EQUAL TO
+2AFA; 2AF9 # DOUBLE-LINE SLANTED GREATER-THAN OR EQUAL TO
+2BFE; 221F # REVERSED RIGHT ANGLE
+2E02; 2E03 # LEFT SUBSTITUTION BRACKET
+2E03; 2E02 # RIGHT SUBSTITUTION BRACKET
+2E04; 2E05 # LEFT DOTTED SUBSTITUTION BRACKET
+2E05; 2E04 # RIGHT DOTTED SUBSTITUTION BRACKET
+2E09; 2E0A # LEFT TRANSPOSITION BRACKET
+2E0A; 2E09 # RIGHT TRANSPOSITION BRACKET
+2E0C; 2E0D # LEFT RAISED OMISSION BRACKET
+2E0D; 2E0C # RIGHT RAISED OMISSION BRACKET
+2E1C; 2E1D # LEFT LOW PARAPHRASE BRACKET
+2E1D; 2E1C # RIGHT LOW PARAPHRASE BRACKET
+2E20; 2E21 # LEFT VERTICAL BAR WITH QUILL
+2E21; 2E20 # RIGHT VERTICAL BAR WITH QUILL
+2E22; 2E23 # TOP LEFT HALF BRACKET
+2E23; 2E22 # TOP RIGHT HALF BRACKET
+2E24; 2E25 # BOTTOM LEFT HALF BRACKET
+2E25; 2E24 # BOTTOM RIGHT HALF BRACKET
+2E26; 2E27 # LEFT SIDEWAYS U BRACKET
+2E27; 2E26 # RIGHT SIDEWAYS U BRACKET
+2E28; 2E29 # LEFT DOUBLE PARENTHESIS
+2E29; 2E28 # RIGHT DOUBLE PARENTHESIS
+2E55; 2E56 # LEFT SQUARE BRACKET WITH STROKE
+2E56; 2E55 # RIGHT SQUARE BRACKET WITH STROKE
+2E57; 2E58 # LEFT SQUARE BRACKET WITH DOUBLE STROKE
+2E58; 2E57 # RIGHT SQUARE BRACKET WITH DOUBLE STROKE
+2E59; 2E5A # TOP HALF LEFT PARENTHESIS
+2E5A; 2E59 # TOP HALF RIGHT PARENTHESIS
+2E5B; 2E5C # BOTTOM HALF LEFT PARENTHESIS
+2E5C; 2E5B # BOTTOM HALF RIGHT PARENTHESIS
+3008; 3009 # LEFT ANGLE BRACKET
+3009; 3008 # RIGHT ANGLE BRACKET
+300A; 300B # LEFT DOUBLE ANGLE BRACKET
+300B; 300A # RIGHT DOUBLE ANGLE BRACKET
+300C; 300D # [BEST FIT] LEFT CORNER BRACKET
+300D; 300C # [BEST FIT] RIGHT CORNER BRACKET
+300E; 300F # [BEST FIT] LEFT WHITE CORNER BRACKET
+300F; 300E # [BEST FIT] RIGHT WHITE CORNER BRACKET
+3010; 3011 # LEFT BLACK LENTICULAR BRACKET
+3011; 3010 # RIGHT BLACK LENTICULAR BRACKET
+3014; 3015 # LEFT TORTOISE SHELL BRACKET
+3015; 3014 # RIGHT TORTOISE SHELL BRACKET
+3016; 3017 # LEFT WHITE LENTICULAR BRACKET
+3017; 3016 # RIGHT WHITE LENTICULAR BRACKET
+3018; 3019 # LEFT WHITE TORTOISE SHELL BRACKET
+3019; 3018 # RIGHT WHITE TORTOISE SHELL BRACKET
+301A; 301B # LEFT WHITE SQUARE BRACKET
+301B; 301A # RIGHT WHITE SQUARE BRACKET
+FE59; FE5A # SMALL LEFT PARENTHESIS
+FE5A; FE59 # SMALL RIGHT PARENTHESIS
+FE5B; FE5C # SMALL LEFT CURLY BRACKET
+FE5C; FE5B # SMALL RIGHT CURLY BRACKET
+FE5D; FE5E # SMALL LEFT TORTOISE SHELL BRACKET
+FE5E; FE5D # SMALL RIGHT TORTOISE SHELL BRACKET
+FE64; FE65 # SMALL LESS-THAN SIGN
+FE65; FE64 # SMALL GREATER-THAN SIGN
+FF08; FF09 # FULLWIDTH LEFT PARENTHESIS
+FF09; FF08 # FULLWIDTH RIGHT PARENTHESIS
+FF1C; FF1E # FULLWIDTH LESS-THAN SIGN
+FF1E; FF1C # FULLWIDTH GREATER-THAN SIGN
+FF3B; FF3D # FULLWIDTH LEFT SQUARE BRACKET
+FF3D; FF3B # FULLWIDTH RIGHT SQUARE BRACKET
+FF5B; FF5D # FULLWIDTH LEFT CURLY BRACKET
+FF5D; FF5B # FULLWIDTH RIGHT CURLY BRACKET
+FF5F; FF60 # FULLWIDTH LEFT WHITE PARENTHESIS
+FF60; FF5F # FULLWIDTH RIGHT WHITE PARENTHESIS
+FF62; FF63 # [BEST FIT] HALFWIDTH LEFT CORNER BRACKET
+FF63; FF62 # [BEST FIT] HALFWIDTH RIGHT CORNER BRACKET
+
+# The following characters have no appropriate mirroring character.
+# For these characters it is up to the rendering system
+#   to provide mirrored glyphs.
+
+# 2140; DOUBLE-STRUCK N-ARY SUMMATION
+# 2201; COMPLEMENT
+# 2202; PARTIAL DIFFERENTIAL
+# 2203; THERE EXISTS
+# 2204; THERE DOES NOT EXIST
+# 2211; N-ARY SUMMATION
+# 2216; SET MINUS
+# 221A; SQUARE ROOT
+# 221B; CUBE ROOT
+# 221C; FOURTH ROOT
+# 221D; PROPORTIONAL TO
+# 2226; NOT PARALLEL TO
+# 222B; INTEGRAL
+# 222C; DOUBLE INTEGRAL
+# 222D; TRIPLE INTEGRAL
+# 222E; CONTOUR INTEGRAL
+# 222F; SURFACE INTEGRAL
+# 2230; VOLUME INTEGRAL
+# 2231; CLOCKWISE INTEGRAL
+# 2232; CLOCKWISE CONTOUR INTEGRAL
+# 2233; ANTICLOCKWISE CONTOUR INTEGRAL
+# 2239; EXCESS
+# 223B; HOMOTHETIC
+# 223E; INVERTED LAZY S
+# 223F; SINE WAVE
+# 2240; WREATH PRODUCT
+# 2241; NOT TILDE
+# 2242; MINUS TILDE
+# 2244; NOT ASYMPTOTICALLY EQUAL TO
+# 2246; APPROXIMATELY BUT NOT ACTUALLY EQUAL TO
+# 2247; NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO
+# 2248; ALMOST EQUAL TO
+# 2249; NOT ALMOST EQUAL TO
+# 224A; ALMOST EQUAL OR EQUAL TO
+# 224B; TRIPLE TILDE
+# 225F; QUESTIONED EQUAL TO
+# 2260; NOT EQUAL TO
+# 2262; NOT IDENTICAL TO
+# 228C; MULTISET
+# 22A7; MODELS
+# 22AA; TRIPLE VERTICAL BAR RIGHT TURNSTILE
+# 22AC; DOES NOT PROVE
+# 22AD; NOT TRUE
+# 22AE; DOES NOT FORCE
+# 22AF; NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
+# 22BE; RIGHT ANGLE WITH ARC
+# 22BF; RIGHT TRIANGLE
+# 22F5; ELEMENT OF WITH DOT ABOVE
+# 22F8; ELEMENT OF WITH UNDERBAR
+# 22F9; ELEMENT OF WITH TWO HORIZONTAL STROKES
+# 22FF; Z NOTATION BAG MEMBERSHIP
+# 2320; TOP HALF INTEGRAL
+# 2321; BOTTOM HALF INTEGRAL
+# 27C0; THREE DIMENSIONAL ANGLE
+# 27CC; LONG DIVISION
+# 27D3; LOWER RIGHT CORNER WITH DOT
+# 27D4; UPPER LEFT CORNER WITH DOT
+# 299C; RIGHT ANGLE VARIANT WITH SQUARE
+# 299D; MEASURED RIGHT ANGLE WITH DOT
+# 299E; ANGLE WITH S INSIDE
+# 299F; ACUTE ANGLE
+# 29A2; TURNED ANGLE
+# 29A6; OBLIQUE ANGLE OPENING UP
+# 29A7; OBLIQUE ANGLE OPENING DOWN
+# 29C2; CIRCLE WITH SMALL CIRCLE TO THE RIGHT
+# 29C3; CIRCLE WITH TWO HORIZONTAL STROKES TO THE RIGHT
+# 29C9; TWO JOINED SQUARES
+# 29CE; RIGHT TRIANGLE ABOVE LEFT TRIANGLE
+# 29DC; INCOMPLETE INFINITY
+# 29E1; INCREASES AS
+# 29E3; EQUALS SIGN AND SLANTED PARALLEL
+# 29E4; EQUALS SIGN AND SLANTED PARALLEL WITH TILDE ABOVE
+# 29E5; IDENTICAL TO AND SLANTED PARALLEL
+# 29F4; RULE-DELAYED
+# 29F6; SOLIDUS WITH OVERBAR
+# 29F7; REVERSE SOLIDUS WITH HORIZONTAL STROKE
+# 2A0A; MODULO TWO SUM
+# 2A0B; SUMMATION WITH INTEGRAL
+# 2A0C; QUADRUPLE INTEGRAL OPERATOR
+# 2A0D; FINITE PART INTEGRAL
+# 2A0E; INTEGRAL WITH DOUBLE STROKE
+# 2A0F; INTEGRAL AVERAGE WITH SLASH
+# 2A10; CIRCULATION FUNCTION
+# 2A11; ANTICLOCKWISE INTEGRATION
+# 2A12; LINE INTEGRATION WITH RECTANGULAR PATH AROUND POLE
+# 2A13; LINE INTEGRATION WITH SEMICIRCULAR PATH AROUND POLE
+# 2A14; LINE INTEGRATION NOT INCLUDING THE POLE
+# 2A15; INTEGRAL AROUND A POINT OPERATOR
+# 2A16; QUATERNION INTEGRAL OPERATOR
+# 2A17; INTEGRAL WITH LEFTWARDS ARROW WITH HOOK
+# 2A18; INTEGRAL WITH TIMES SIGN
+# 2A19; INTEGRAL WITH INTERSECTION
+# 2A1A; INTEGRAL WITH UNION
+# 2A1B; INTEGRAL WITH OVERBAR
+# 2A1C; INTEGRAL WITH UNDERBAR
+# 2A1E; LARGE LEFT TRIANGLE OPERATOR
+# 2A1F; Z NOTATION SCHEMA COMPOSITION
+# 2A20; Z NOTATION SCHEMA PIPING
+# 2A21; Z NOTATION SCHEMA PROJECTION
+# 2A24; PLUS SIGN WITH TILDE ABOVE
+# 2A26; PLUS SIGN WITH TILDE BELOW
+# 2A29; MINUS SIGN WITH COMMA ABOVE
+# 2A3E; Z NOTATION RELATIONAL COMPOSITION
+# 2A57; SLOPING LARGE OR
+# 2A58; SLOPING LARGE AND
+# 2A6A; TILDE OPERATOR WITH DOT ABOVE
+# 2A6B; TILDE OPERATOR WITH RISING DOTS
+# 2A6C; SIMILAR MINUS SIMILAR
+# 2A6D; CONGRUENT WITH DOT ABOVE
+# 2A6F; ALMOST EQUAL TO WITH CIRCUMFLEX ACCENT
+# 2A70; APPROXIMATELY EQUAL OR EQUAL TO
+# 2A73; EQUALS SIGN ABOVE TILDE OPERATOR
+# 2A74; DOUBLE COLON EQUAL
+# 2AA3; DOUBLE NESTED LESS-THAN WITH UNDERBAR
+# 2ADC; FORKING
+# 2AE2; VERTICAL BAR TRIPLE RIGHT TURNSTILE
+# 2AE6; LONG DASH FROM LEFT MEMBER OF DOUBLE VERTICAL
+# 2AF3; PARALLEL WITH TILDE OPERATOR
+# 2AFB; TRIPLE SOLIDUS BINARY RELATION
+# 2AFD; DOUBLE SOLIDUS OPERATOR
+# 1D6DB; MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
+# 1D715; MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL
+# 1D74F; MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL
+# 1D789; MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL
+# 1D7C3; MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
+
+# EOF
--- a/maint/Unicode.tables/CaseFolding.txt
+++ b/maint/Unicode.tables/CaseFolding.txt
@ -1,6 +1,6 @@
-# CaseFolding-13.0.0.txt
-# Date: 2019-09-08, 23:30:59 GMT
-# © 2019 Unicode®, Inc.
+# CaseFolding-14.0.0.txt
+# Date: 2021-03-08, 19:35:41 GMT
+# © 2021 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
 # For terms of use, see http://www.unicode.org/terms_of_use.html
 #
@ -1050,6 +1050,7 @@
 2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC
 2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A
 2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
+2C2F; C; 2C5F; # GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI
 2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR
 2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE
 2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE
@ -1230,12 +1231,16 @@ A7B8; C; A7B9; # LATIN CAPITAL LETTER U WITH STROKE
 A7BA; C; A7BB; # LATIN CAPITAL LETTER GLOTTAL A
 A7BC; C; A7BD; # LATIN CAPITAL LETTER GLOTTAL I
 A7BE; C; A7BF; # LATIN CAPITAL LETTER GLOTTAL U
+A7C0; C; A7C1; # LATIN CAPITAL LETTER OLD POLISH O
 A7C2; C; A7C3; # LATIN CAPITAL LETTER ANGLICANA W
 A7C4; C; A794; # LATIN CAPITAL LETTER C WITH PALATAL HOOK
 A7C5; C; 0282; # LATIN CAPITAL LETTER S WITH HOOK
 A7C6; C; 1D8E; # LATIN CAPITAL LETTER Z WITH PALATAL HOOK
 A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
 A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
+A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G
+A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S
+A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S
 A7F5; C; A7F6; # LATIN CAPITAL LETTER REVERSED HALF H
 AB70; C; 13A0; # CHEROKEE SMALL LETTER A
 AB71; C; 13A1; # CHEROKEE SMALL LETTER E
@ -1431,6 +1436,41 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
 104D1; C; 104F9; # OSAGE CAPITAL LETTER GHA
 104D2; C; 104FA; # OSAGE CAPITAL LETTER ZA
 104D3; C; 104FB; # OSAGE CAPITAL LETTER ZHA
+10570; C; 10597; # VITHKUQI CAPITAL LETTER A
+10571; C; 10598; # VITHKUQI CAPITAL LETTER BBE
+10572; C; 10599; # VITHKUQI CAPITAL LETTER BE
+10573; C; 1059A; # VITHKUQI CAPITAL LETTER CE
+10574; C; 1059B; # VITHKUQI CAPITAL LETTER CHE
+10575; C; 1059C; # VITHKUQI CAPITAL LETTER DE
+10576; C; 1059D; # VITHKUQI CAPITAL LETTER DHE
+10577; C; 1059E; # VITHKUQI CAPITAL LETTER EI
+10578; C; 1059F; # VITHKUQI CAPITAL LETTER E
+10579; C; 105A0; # VITHKUQI CAPITAL LETTER FE
+1057A; C; 105A1; # VITHKUQI CAPITAL LETTER GA
+1057C; C; 105A3; # VITHKUQI CAPITAL LETTER HA
+1057D; C; 105A4; # VITHKUQI CAPITAL LETTER HHA
+1057E; C; 105A5; # VITHKUQI CAPITAL LETTER I
+1057F; C; 105A6; # VITHKUQI CAPITAL LETTER IJE
+10580; C; 105A7; # VITHKUQI CAPITAL LETTER JE
+10581; C; 105A8; # VITHKUQI CAPITAL LETTER KA
+10582; C; 105A9; # VITHKUQI CAPITAL LETTER LA
+10583; C; 105AA; # VITHKUQI CAPITAL LETTER LLA
+10584; C; 105AB; # VITHKUQI CAPITAL LETTER ME
+10585; C; 105AC; # VITHKUQI CAPITAL LETTER NE
+10586; C; 105AD; # VITHKUQI CAPITAL LETTER NJE
+10587; C; 105AE; # VITHKUQI CAPITAL LETTER O
+10588; C; 105AF; # VITHKUQI CAPITAL LETTER PE
+10589; C; 105B0; # VITHKUQI CAPITAL LETTER QA
+1058A; C; 105B1; # VITHKUQI CAPITAL LETTER RE
+1058C; C; 105B3; # VITHKUQI CAPITAL LETTER SE
+1058D; C; 105B4; # VITHKUQI CAPITAL LETTER SHE
+1058E; C; 105B5; # VITHKUQI CAPITAL LETTER TE
+1058F; C; 105B6; # VITHKUQI CAPITAL LETTER THE
+10590; C; 105B7; # VITHKUQI CAPITAL LETTER U
+10591; C; 105B8; # VITHKUQI CAPITAL LETTER VE
+10592; C; 105B9; # VITHKUQI CAPITAL LETTER XE
+10594; C; 105BB; # VITHKUQI CAPITAL LETTER Y
+10595; C; 105BC; # VITHKUQI CAPITAL LETTER ZE
 10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A
 10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA
 10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB
--- a/maint/Unicode.tables/DerivedBidiClass.txt
+++ b/maint/Unicode.tables/DerivedBidiClass.txt
--- a/maint/Unicode.tables/DerivedCoreProperties.txt
+++ b/maint/Unicode.tables/DerivedCoreProperties.txt
--- a/maint/Unicode.tables/DerivedGeneralCategory.txt
+++ b/maint/Unicode.tables/DerivedGeneralCategory.txt
@ -1,6 +1,6 @@
-# DerivedGeneralCategory-13.0.0.txt
-# Date: 2019-10-21, 14:30:32 GMT
-# © 2019 Unicode®, Inc.
+# DerivedGeneralCategory-14.0.0.txt
+# Date: 2021-07-10, 00:35:08 GMT
+# © 2021 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
 # For terms of use, see http://www.unicode.org/terms_of_use.html
 #
@ -27,7 +27,6 @@
 05C8..05CF    ; Cn #   [8] <reserved-05C8>..<reserved-05CF>
 05EB..05EE    ; Cn #   [4] <reserved-05EB>..<reserved-05EE>
 05F5..05FF    ; Cn #  [11] <reserved-05F5>..<reserved-05FF>
-061D          ; Cn #       <reserved-061D>
 070E          ; Cn #       <reserved-070E>
 074B..074C    ; Cn #   [2] <reserved-074B>..<reserved-074C>
 07B2..07BF    ; Cn #  [14] <reserved-07B2>..<reserved-07BF>
@ -36,9 +35,9 @@
 083F          ; Cn #       <reserved-083F>
 085C..085D    ; Cn #   [2] <reserved-085C>..<reserved-085D>
 085F          ; Cn #       <reserved-085F>
-086B..089F    ; Cn #  [53] <reserved-086B>..<reserved-089F>
-08B5          ; Cn #       <reserved-08B5>
-08C8..08D2    ; Cn #  [11] <reserved-08C8>..<reserved-08D2>
+086B..086F    ; Cn #   [5] <reserved-086B>..<reserved-086F>
+088F          ; Cn #       <reserved-088F>
+0892..0897    ; Cn #   [6] <reserved-0892>..<reserved-0897>
 0984          ; Cn #       <reserved-0984>
 098D..098E    ; Cn #   [2] <reserved-098D>..<reserved-098E>
 0991..0992    ; Cn #   [2] <reserved-0991>..<reserved-0992>
@ -116,12 +115,13 @@
 0C0D          ; Cn #       <reserved-0C0D>
 0C11          ; Cn #       <reserved-0C11>
 0C29          ; Cn #       <reserved-0C29>
-0C3A..0C3C    ; Cn #   [3] <reserved-0C3A>..<reserved-0C3C>
+0C3A..0C3B    ; Cn #   [2] <reserved-0C3A>..<reserved-0C3B>
 0C45          ; Cn #       <reserved-0C45>
 0C49          ; Cn #       <reserved-0C49>
 0C4E..0C54    ; Cn #   [7] <reserved-0C4E>..<reserved-0C54>
 0C57          ; Cn #       <reserved-0C57>
-0C5B..0C5F    ; Cn #   [5] <reserved-0C5B>..<reserved-0C5F>
+0C5B..0C5C    ; Cn #   [2] <reserved-0C5B>..<reserved-0C5C>
+0C5E..0C5F    ; Cn #   [2] <reserved-0C5E>..<reserved-0C5F>
 0C64..0C65    ; Cn #   [2] <reserved-0C64>..<reserved-0C65>
 0C70..0C76    ; Cn #   [7] <reserved-0C70>..<reserved-0C76>
 0C8D          ; Cn #       <reserved-0C8D>
@ -132,7 +132,7 @@
 0CC5          ; Cn #       <reserved-0CC5>
 0CC9          ; Cn #       <reserved-0CC9>
 0CCE..0CD4    ; Cn #   [7] <reserved-0CCE>..<reserved-0CD4>
-0CD7..0CDD    ; Cn #   [7] <reserved-0CD7>..<reserved-0CDD>
+0CD7..0CDC    ; Cn #   [6] <reserved-0CD7>..<reserved-0CDC>
 0CDF          ; Cn #       <reserved-0CDF>
 0CE4..0CE5    ; Cn #   [2] <reserved-0CE4>..<reserved-0CE5>
 0CF0          ; Cn #       <reserved-0CF0>
@ -200,8 +200,7 @@
 13FE..13FF    ; Cn #   [2] <reserved-13FE>..<reserved-13FF>
 169D..169F    ; Cn #   [3] <reserved-169D>..<reserved-169F>
 16F9..16FF    ; Cn #   [7] <reserved-16F9>..<reserved-16FF>
-170D          ; Cn #       <reserved-170D>
-1715..171F    ; Cn #  [11] <reserved-1715>..<reserved-171F>
+1716..171E    ; Cn #   [9] <reserved-1716>..<reserved-171E>
 1737..173F    ; Cn #   [9] <reserved-1737>..<reserved-173F>
 1754..175F    ; Cn #  [12] <reserved-1754>..<reserved-175F>
 176D          ; Cn #       <reserved-176D>
@ -210,7 +209,6 @@
 17DE..17DF    ; Cn #   [2] <reserved-17DE>..<reserved-17DF>
 17EA..17EF    ; Cn #   [6] <reserved-17EA>..<reserved-17EF>
 17FA..17FF    ; Cn #   [6] <reserved-17FA>..<reserved-17FF>
-180F          ; Cn #       <reserved-180F>
 181A..181F    ; Cn #   [6] <reserved-181A>..<reserved-181F>
 1879..187F    ; Cn #   [7] <reserved-1879>..<reserved-187F>
 18AB..18AF    ; Cn #   [5] <reserved-18AB>..<reserved-18AF>
@ -230,9 +228,9 @@
 1A8A..1A8F    ; Cn #   [6] <reserved-1A8A>..<reserved-1A8F>
 1A9A..1A9F    ; Cn #   [6] <reserved-1A9A>..<reserved-1A9F>
 1AAE..1AAF    ; Cn #   [2] <reserved-1AAE>..<reserved-1AAF>
-1AC1..1AFF    ; Cn #  [63] <reserved-1AC1>..<reserved-1AFF>
-1B4C..1B4F    ; Cn #   [4] <reserved-1B4C>..<reserved-1B4F>
-1B7D..1B7F    ; Cn #   [3] <reserved-1B7D>..<reserved-1B7F>
+1ACF..1AFF    ; Cn #  [49] <reserved-1ACF>..<reserved-1AFF>
+1B4D..1B4F    ; Cn #   [3] <reserved-1B4D>..<reserved-1B4F>
+1B7F          ; Cn #       <reserved-1B7F>
 1BF4..1BFB    ; Cn #   [8] <reserved-1BF4>..<reserved-1BFB>
 1C38..1C3A    ; Cn #   [3] <reserved-1C38>..<reserved-1C3A>
 1C4A..1C4C    ; Cn #   [3] <reserved-1C4A>..<reserved-1C4C>
@ -240,7 +238,6 @@
 1CBB..1CBC    ; Cn #   [2] <reserved-1CBB>..<reserved-1CBC>
 1CC8..1CCF    ; Cn #   [8] <reserved-1CC8>..<reserved-1CCF>
 1CFB..1CFF    ; Cn #   [5] <reserved-1CFB>..<reserved-1CFF>
-1DFA          ; Cn #       <reserved-1DFA>
 1F16..1F17    ; Cn #   [2] <reserved-1F16>..<reserved-1F17>
 1F1E..1F1F    ; Cn #   [2] <reserved-1F1E>..<reserved-1F1F>
 1F46..1F47    ; Cn #   [2] <reserved-1F46>..<reserved-1F47>
@ -261,15 +258,13 @@
 2072..2073    ; Cn #   [2] <reserved-2072>..<reserved-2073>
 208F          ; Cn #       <reserved-208F>
 209D..209F    ; Cn #   [3] <reserved-209D>..<reserved-209F>
-20C0..20CF    ; Cn #  [16] <reserved-20C0>..<reserved-20CF>
+20C1..20CF    ; Cn #  [15] <reserved-20C1>..<reserved-20CF>
 20F1..20FF    ; Cn #  [15] <reserved-20F1>..<reserved-20FF>
 218C..218F    ; Cn #   [4] <reserved-218C>..<reserved-218F>
 2427..243F    ; Cn #  [25] <reserved-2427>..<reserved-243F>
 244B..245F    ; Cn #  [21] <reserved-244B>..<reserved-245F>
 2B74..2B75    ; Cn #   [2] <reserved-2B74>..<reserved-2B75>
 2B96          ; Cn #       <reserved-2B96>
-2C2F          ; Cn #       <reserved-2C2F>
-2C5F          ; Cn #       <reserved-2C5F>
 2CF4..2CF8    ; Cn #   [5] <reserved-2CF4>..<reserved-2CF8>
 2D26          ; Cn #       <reserved-2D26>
 2D28..2D2C    ; Cn #   [5] <reserved-2D28>..<reserved-2D2C>
@ -285,7 +280,7 @@
 2DCF          ; Cn #       <reserved-2DCF>
 2DD7          ; Cn #       <reserved-2DD7>
 2DDF          ; Cn #       <reserved-2DDF>
-2E53..2E7F    ; Cn #  [45] <reserved-2E53>..<reserved-2E7F>
+2E5E..2E7F    ; Cn #  [34] <reserved-2E5E>..<reserved-2E7F>
 2E9A          ; Cn #       <reserved-2E9A>
 2EF4..2EFF    ; Cn #  [12] <reserved-2EF4>..<reserved-2EFF>
 2FD6..2FEF    ; Cn #  [26] <reserved-2FD6>..<reserved-2FEF>
@ -297,13 +292,14 @@
 318F          ; Cn #       <reserved-318F>
 31E4..31EF    ; Cn #  [12] <reserved-31E4>..<reserved-31EF>
 321F          ; Cn #       <reserved-321F>
-9FFD..9FFF    ; Cn #   [3] <reserved-9FFD>..<reserved-9FFF>
 A48D..A48F    ; Cn #   [3] <reserved-A48D>..<reserved-A48F>
 A4C7..A4CF    ; Cn #   [9] <reserved-A4C7>..<reserved-A4CF>
 A62C..A63F    ; Cn #  [20] <reserved-A62C>..<reserved-A63F>
 A6F8..A6FF    ; Cn #   [8] <reserved-A6F8>..<reserved-A6FF>
-A7C0..A7C1    ; Cn #   [2] <reserved-A7C0>..<reserved-A7C1>
-A7CB..A7F4    ; Cn #  [42] <reserved-A7CB>..<reserved-A7F4>
+A7CB..A7CF    ; Cn #   [5] <reserved-A7CB>..<reserved-A7CF>
+A7D2          ; Cn #       <reserved-A7D2>
+A7D4          ; Cn #       <reserved-A7D4>
+A7DA..A7F1    ; Cn #  [24] <reserved-A7DA>..<reserved-A7F1>
 A82D..A82F    ; Cn #   [3] <reserved-A82D>..<reserved-A82F>
 A83A..A83F    ; Cn #   [6] <reserved-A83A>..<reserved-A83F>
 A878..A87F    ; Cn #   [8] <reserved-A878>..<reserved-A87F>
@ -339,11 +335,10 @@ FB3D          ; Cn #       <reserved-FB3D>
 FB3F          ; Cn #       <reserved-FB3F>
 FB42          ; Cn #       <reserved-FB42>
 FB45          ; Cn #       <reserved-FB45>
-FBC2..FBD2    ; Cn #  [17] <reserved-FBC2>..<reserved-FBD2>
-FD40..FD4F    ; Cn #  [16] <reserved-FD40>..<reserved-FD4F>
+FBC3..FBD2    ; Cn #  [16] <reserved-FBC3>..<reserved-FBD2>
 FD90..FD91    ; Cn #   [2] <reserved-FD90>..<reserved-FD91>
-FDC8..FDEF    ; Cn #  [40] <reserved-FDC8>..<noncharacter-FDEF>
-FDFE..FDFF    ; Cn #   [2] <reserved-FDFE>..<reserved-FDFF>
+FDC8..FDCE    ; Cn #   [7] <reserved-FDC8>..<reserved-FDCE>
+FDD0..FDEF    ; Cn #  [32] <noncharacter-FDD0>..<noncharacter-FDEF>
 FE1A..FE1F    ; Cn #   [6] <reserved-FE1A>..<reserved-FE1F>
 FE53          ; Cn #       <reserved-FE53>
 FE67          ; Cn #       <reserved-FE67>
@ -387,10 +382,20 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 104FC..104FF  ; Cn #   [4] <reserved-104FC>..<reserved-104FF>
 10528..1052F  ; Cn #   [8] <reserved-10528>..<reserved-1052F>
 10564..1056E  ; Cn #  [11] <reserved-10564>..<reserved-1056E>
-10570..105FF  ; Cn # [144] <reserved-10570>..<reserved-105FF>
+1057B         ; Cn #       <reserved-1057B>
+1058B         ; Cn #       <reserved-1058B>
+10593         ; Cn #       <reserved-10593>
+10596         ; Cn #       <reserved-10596>
+105A2         ; Cn #       <reserved-105A2>
+105B2         ; Cn #       <reserved-105B2>
+105BA         ; Cn #       <reserved-105BA>
+105BD..105FF  ; Cn #  [67] <reserved-105BD>..<reserved-105FF>
 10737..1073F  ; Cn #   [9] <reserved-10737>..<reserved-1073F>
 10756..1075F  ; Cn #  [10] <reserved-10756>..<reserved-1075F>
-10768..107FF  ; Cn # [152] <reserved-10768>..<reserved-107FF>
+10768..1077F  ; Cn #  [24] <reserved-10768>..<reserved-1077F>
+10786         ; Cn #       <reserved-10786>
+107B1         ; Cn #       <reserved-107B1>
+107BB..107FF  ; Cn #  [69] <reserved-107BB>..<reserved-107FF>
 10806..10807  ; Cn #   [2] <reserved-10806>..<reserved-10807>
 10809         ; Cn #       <reserved-10809>
 10836         ; Cn #       <reserved-10836>
@ -433,12 +438,13 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 10EAE..10EAF  ; Cn #   [2] <reserved-10EAE>..<reserved-10EAF>
 10EB2..10EFF  ; Cn #  [78] <reserved-10EB2>..<reserved-10EFF>
 10F28..10F2F  ; Cn #   [8] <reserved-10F28>..<reserved-10F2F>
-10F5A..10FAF  ; Cn #  [86] <reserved-10F5A>..<reserved-10FAF>
+10F5A..10F6F  ; Cn #  [22] <reserved-10F5A>..<reserved-10F6F>
+10F8A..10FAF  ; Cn #  [38] <reserved-10F8A>..<reserved-10FAF>
 10FCC..10FDF  ; Cn #  [20] <reserved-10FCC>..<reserved-10FDF>
 10FF7..10FFF  ; Cn #   [9] <reserved-10FF7>..<reserved-10FFF>
 1104E..11051  ; Cn #   [4] <reserved-1104E>..<reserved-11051>
-11070..1107E  ; Cn #  [15] <reserved-11070>..<reserved-1107E>
-110C2..110CC  ; Cn #  [11] <reserved-110C2>..<reserved-110CC>
+11076..1107E  ; Cn #   [9] <reserved-11076>..<reserved-1107E>
+110C3..110CC  ; Cn #  [10] <reserved-110C3>..<reserved-110CC>
 110CE..110CF  ; Cn #   [2] <reserved-110CE>..<reserved-110CF>
 110E9..110EF  ; Cn #   [7] <reserved-110E9>..<reserved-110EF>
 110FA..110FF  ; Cn #   [6] <reserved-110FA>..<reserved-110FF>
@ -480,11 +486,11 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 11645..1164F  ; Cn #  [11] <reserved-11645>..<reserved-1164F>
 1165A..1165F  ; Cn #   [6] <reserved-1165A>..<reserved-1165F>
 1166D..1167F  ; Cn #  [19] <reserved-1166D>..<reserved-1167F>
-116B9..116BF  ; Cn #   [7] <reserved-116B9>..<reserved-116BF>
+116BA..116BF  ; Cn #   [6] <reserved-116BA>..<reserved-116BF>
 116CA..116FF  ; Cn #  [54] <reserved-116CA>..<reserved-116FF>
 1171B..1171C  ; Cn #   [2] <reserved-1171B>..<reserved-1171C>
 1172C..1172F  ; Cn #   [4] <reserved-1172C>..<reserved-1172F>
-11740..117FF  ; Cn # [192] <reserved-11740>..<reserved-117FF>
+11747..117FF  ; Cn # [185] <reserved-11747>..<reserved-117FF>
 1183C..1189F  ; Cn # [100] <reserved-1183C>..<reserved-1189F>
 118F3..118FE  ; Cn #  [12] <reserved-118F3>..<reserved-118FE>
 11907..11908  ; Cn #   [2] <reserved-11907>..<reserved-11908>
@ -499,7 +505,7 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 119D8..119D9  ; Cn #   [2] <reserved-119D8>..<reserved-119D9>
 119E5..119FF  ; Cn #  [27] <reserved-119E5>..<reserved-119FF>
 11A48..11A4F  ; Cn #   [8] <reserved-11A48>..<reserved-11A4F>
-11AA3..11ABF  ; Cn #  [29] <reserved-11AA3>..<reserved-11ABF>
+11AA3..11AAF  ; Cn #  [13] <reserved-11AA3>..<reserved-11AAF>
 11AF9..11BFF  ; Cn # [263] <reserved-11AF9>..<reserved-11BFF>
 11C09         ; Cn #       <reserved-11C09>
 11C37         ; Cn #       <reserved-11C37>
@ -527,14 +533,16 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 1239A..123FF  ; Cn # [102] <reserved-1239A>..<reserved-123FF>
 1246F         ; Cn #       <reserved-1246F>
 12475..1247F  ; Cn #  [11] <reserved-12475>..<reserved-1247F>
-12544..12FFF  ; Cn # [2748] <reserved-12544>..<reserved-12FFF>
+12544..12F8F  ; Cn # [2636] <reserved-12544>..<reserved-12F8F>
+12FF3..12FFF  ; Cn #  [13] <reserved-12FF3>..<reserved-12FFF>
 1342F         ; Cn #       <reserved-1342F>
 13439..143FF  ; Cn # [4039] <reserved-13439>..<reserved-143FF>
 14647..167FF  ; Cn # [8633] <reserved-14647>..<reserved-167FF>
 16A39..16A3F  ; Cn #   [7] <reserved-16A39>..<reserved-16A3F>
 16A5F         ; Cn #       <reserved-16A5F>
 16A6A..16A6D  ; Cn #   [4] <reserved-16A6A>..<reserved-16A6D>
-16A70..16ACF  ; Cn #  [96] <reserved-16A70>..<reserved-16ACF>
+16ABF         ; Cn #       <reserved-16ABF>
+16ACA..16ACF  ; Cn #   [6] <reserved-16ACA>..<reserved-16ACF>
 16AEE..16AEF  ; Cn #   [2] <reserved-16AEE>..<reserved-16AEF>
 16AF6..16AFF  ; Cn #  [10] <reserved-16AF6>..<reserved-16AFF>
 16B46..16B4F  ; Cn #  [10] <reserved-16B46>..<reserved-16B4F>
@ -550,8 +558,11 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 16FF2..16FFF  ; Cn #  [14] <reserved-16FF2>..<reserved-16FFF>
 187F8..187FF  ; Cn #   [8] <reserved-187F8>..<reserved-187FF>
 18CD6..18CFF  ; Cn #  [42] <reserved-18CD6>..<reserved-18CFF>
-18D09..1AFFF  ; Cn # [8951] <reserved-18D09>..<reserved-1AFFF>
-1B11F..1B14F  ; Cn #  [49] <reserved-1B11F>..<reserved-1B14F>
+18D09..1AFEF  ; Cn # [8935] <reserved-18D09>..<reserved-1AFEF>
+1AFF4         ; Cn #       <reserved-1AFF4>
+1AFFC         ; Cn #       <reserved-1AFFC>
+1AFFF         ; Cn #       <reserved-1AFFF>
+1B123..1B14F  ; Cn #  [45] <reserved-1B123>..<reserved-1B14F>
 1B153..1B163  ; Cn #  [17] <reserved-1B153>..<reserved-1B163>
 1B168..1B16F  ; Cn #   [8] <reserved-1B168>..<reserved-1B16F>
 1B2FC..1BBFF  ; Cn # [2308] <reserved-1B2FC>..<reserved-1BBFF>
@ -559,10 +570,13 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 1BC7D..1BC7F  ; Cn #   [3] <reserved-1BC7D>..<reserved-1BC7F>
 1BC89..1BC8F  ; Cn #   [7] <reserved-1BC89>..<reserved-1BC8F>
 1BC9A..1BC9B  ; Cn #   [2] <reserved-1BC9A>..<reserved-1BC9B>
-1BCA4..1CFFF  ; Cn # [4956] <reserved-1BCA4>..<reserved-1CFFF>
+1BCA4..1CEFF  ; Cn # [4700] <reserved-1BCA4>..<reserved-1CEFF>
+1CF2E..1CF2F  ; Cn #   [2] <reserved-1CF2E>..<reserved-1CF2F>
+1CF47..1CF4F  ; Cn #   [9] <reserved-1CF47>..<reserved-1CF4F>
+1CFC4..1CFFF  ; Cn #  [60] <reserved-1CFC4>..<reserved-1CFFF>
 1D0F6..1D0FF  ; Cn #  [10] <reserved-1D0F6>..<reserved-1D0FF>
 1D127..1D128  ; Cn #   [2] <reserved-1D127>..<reserved-1D128>
-1D1E9..1D1FF  ; Cn #  [23] <reserved-1D1E9>..<reserved-1D1FF>
+1D1EB..1D1FF  ; Cn #  [21] <reserved-1D1EB>..<reserved-1D1FF>
 1D246..1D2DF  ; Cn # [154] <reserved-1D246>..<reserved-1D2DF>
 1D2F4..1D2FF  ; Cn #  [12] <reserved-1D2F4>..<reserved-1D2FF>
 1D357..1D35F  ; Cn #   [9] <reserved-1D357>..<reserved-1D35F>
@ -589,7 +603,8 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 1D7CC..1D7CD  ; Cn #   [2] <reserved-1D7CC>..<reserved-1D7CD>
 1DA8C..1DA9A  ; Cn #  [15] <reserved-1DA8C>..<reserved-1DA9A>
 1DAA0         ; Cn #       <reserved-1DAA0>
-1DAB0..1DFFF  ; Cn # [1360] <reserved-1DAB0>..<reserved-1DFFF>
+1DAB0..1DEFF  ; Cn # [1104] <reserved-1DAB0>..<reserved-1DEFF>
+1DF1F..1DFFF  ; Cn # [225] <reserved-1DF1F>..<reserved-1DFFF>
 1E007         ; Cn #       <reserved-1E007>
 1E019..1E01A  ; Cn #   [2] <reserved-1E019>..<reserved-1E01A>
 1E022         ; Cn #       <reserved-1E022>
@ -598,9 +613,14 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 1E12D..1E12F  ; Cn #   [3] <reserved-1E12D>..<reserved-1E12F>
 1E13E..1E13F  ; Cn #   [2] <reserved-1E13E>..<reserved-1E13F>
 1E14A..1E14D  ; Cn #   [4] <reserved-1E14A>..<reserved-1E14D>
-1E150..1E2BF  ; Cn # [368] <reserved-1E150>..<reserved-1E2BF>
+1E150..1E28F  ; Cn # [320] <reserved-1E150>..<reserved-1E28F>
+1E2AF..1E2BF  ; Cn #  [17] <reserved-1E2AF>..<reserved-1E2BF>
 1E2FA..1E2FE  ; Cn #   [5] <reserved-1E2FA>..<reserved-1E2FE>
-1E300..1E7FF  ; Cn # [1280] <reserved-1E300>..<reserved-1E7FF>
+1E300..1E7DF  ; Cn # [1248] <reserved-1E300>..<reserved-1E7DF>
+1E7E7         ; Cn #       <reserved-1E7E7>
+1E7EC         ; Cn #       <reserved-1E7EC>
+1E7EF         ; Cn #       <reserved-1E7EF>
+1E7FF         ; Cn #       <reserved-1E7FF>
 1E8C5..1E8C6  ; Cn #   [2] <reserved-1E8C5>..<reserved-1E8C6>
 1E8D7..1E8FF  ; Cn #  [41] <reserved-1E8D7>..<reserved-1E8FF>
 1E94C..1E94F  ; Cn #   [4] <reserved-1E94C>..<reserved-1E94F>
@ -654,34 +674,35 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 1F249..1F24F  ; Cn #   [7] <reserved-1F249>..<reserved-1F24F>
 1F252..1F25F  ; Cn #  [14] <reserved-1F252>..<reserved-1F25F>
 1F266..1F2FF  ; Cn # [154] <reserved-1F266>..<reserved-1F2FF>
-1F6D8..1F6DF  ; Cn #   [8] <reserved-1F6D8>..<reserved-1F6DF>
+1F6D8..1F6DC  ; Cn #   [5] <reserved-1F6D8>..<reserved-1F6DC>
 1F6ED..1F6EF  ; Cn #   [3] <reserved-1F6ED>..<reserved-1F6EF>
 1F6FD..1F6FF  ; Cn #   [3] <reserved-1F6FD>..<reserved-1F6FF>
 1F774..1F77F  ; Cn #  [12] <reserved-1F774>..<reserved-1F77F>
 1F7D9..1F7DF  ; Cn #   [7] <reserved-1F7D9>..<reserved-1F7DF>
-1F7EC..1F7FF  ; Cn #  [20] <reserved-1F7EC>..<reserved-1F7FF>
+1F7EC..1F7EF  ; Cn #   [4] <reserved-1F7EC>..<reserved-1F7EF>
+1F7F1..1F7FF  ; Cn #  [15] <reserved-1F7F1>..<reserved-1F7FF>
 1F80C..1F80F  ; Cn #   [4] <reserved-1F80C>..<reserved-1F80F>
 1F848..1F84F  ; Cn #   [8] <reserved-1F848>..<reserved-1F84F>
 1F85A..1F85F  ; Cn #   [6] <reserved-1F85A>..<reserved-1F85F>
 1F888..1F88F  ; Cn #   [8] <reserved-1F888>..<reserved-1F88F>
 1F8AE..1F8AF  ; Cn #   [2] <reserved-1F8AE>..<reserved-1F8AF>
 1F8B2..1F8FF  ; Cn #  [78] <reserved-1F8B2>..<reserved-1F8FF>
-1F979         ; Cn #       <reserved-1F979>
-1F9CC         ; Cn #       <reserved-1F9CC>
 1FA54..1FA5F  ; Cn #  [12] <reserved-1FA54>..<reserved-1FA5F>
 1FA6E..1FA6F  ; Cn #   [2] <reserved-1FA6E>..<reserved-1FA6F>
 1FA75..1FA77  ; Cn #   [3] <reserved-1FA75>..<reserved-1FA77>
-1FA7B..1FA7F  ; Cn #   [5] <reserved-1FA7B>..<reserved-1FA7F>
+1FA7D..1FA7F  ; Cn #   [3] <reserved-1FA7D>..<reserved-1FA7F>
 1FA87..1FA8F  ; Cn #   [9] <reserved-1FA87>..<reserved-1FA8F>
-1FAA9..1FAAF  ; Cn #   [7] <reserved-1FAA9>..<reserved-1FAAF>
-1FAB7..1FABF  ; Cn #   [9] <reserved-1FAB7>..<reserved-1FABF>
-1FAC3..1FACF  ; Cn #  [13] <reserved-1FAC3>..<reserved-1FACF>
-1FAD7..1FAFF  ; Cn #  [41] <reserved-1FAD7>..<reserved-1FAFF>
+1FAAD..1FAAF  ; Cn #   [3] <reserved-1FAAD>..<reserved-1FAAF>
+1FABB..1FABF  ; Cn #   [5] <reserved-1FABB>..<reserved-1FABF>
+1FAC6..1FACF  ; Cn #  [10] <reserved-1FAC6>..<reserved-1FACF>
+1FADA..1FADF  ; Cn #   [6] <reserved-1FADA>..<reserved-1FADF>
+1FAE8..1FAEF  ; Cn #   [8] <reserved-1FAE8>..<reserved-1FAEF>
+1FAF7..1FAFF  ; Cn #   [9] <reserved-1FAF7>..<reserved-1FAFF>
 1FB93         ; Cn #       <reserved-1FB93>
 1FBCB..1FBEF  ; Cn #  [37] <reserved-1FBCB>..<reserved-1FBEF>
 1FBFA..1FFFF  ; Cn # [1030] <reserved-1FBFA>..<noncharacter-1FFFF>
-2A6DE..2A6FF  ; Cn #  [34] <reserved-2A6DE>..<reserved-2A6FF>
-2B735..2B73F  ; Cn #  [11] <reserved-2B735>..<reserved-2B73F>
+2A6E0..2A6FF  ; Cn #  [32] <reserved-2A6E0>..<reserved-2A6FF>
+2B739..2B73F  ; Cn #   [7] <reserved-2B739>..<reserved-2B73F>
 2B81E..2B81F  ; Cn #   [2] <reserved-2B81E>..<reserved-2B81F>
 2CEA2..2CEAF  ; Cn #  [14] <reserved-2CEA2>..<reserved-2CEAF>
 2EBE1..2F7FF  ; Cn # [3103] <reserved-2EBE1>..<reserved-2F7FF>
@ -693,7 +714,7 @@ E01F0..EFFFF  ; Cn # [65040] <reserved-E01F0>..<noncharacter-EFFFF>
 FFFFE..FFFFF  ; Cn #   [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
 10FFFE..10FFFF; Cn #   [2] <noncharacter-10FFFE>..<noncharacter-10FFFF>

-# Total code points: 830672
+# Total code points: 829834

 # ================================================

@ -1130,7 +1151,7 @@ FFFFE..FFFFF  ; Cn #   [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
 213E..213F    ; Lu #   [2] DOUBLE-STRUCK CAPITAL GAMMA..DOUBLE-STRUCK CAPITAL PI
 2145          ; Lu #       DOUBLE-STRUCK ITALIC CAPITAL D
 2183          ; Lu #       ROMAN NUMERAL REVERSED ONE HUNDRED
-2C00..2C2E    ; Lu #  [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
+2C00..2C2F    ; Lu #  [48] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI
 2C60          ; Lu #       LATIN CAPITAL LETTER L WITH DOUBLE BAR
 2C62..2C64    ; Lu #   [3] LATIN CAPITAL LETTER L WITH MIDDLE TILDE..LATIN CAPITAL LETTER R WITH TAIL
 2C67          ; Lu #       LATIN CAPITAL LETTER H WITH DESCENDER
@ -1295,13 +1316,21 @@ A7B8          ; Lu #       LATIN CAPITAL LETTER U WITH STROKE
 A7BA          ; Lu #       LATIN CAPITAL LETTER GLOTTAL A
 A7BC          ; Lu #       LATIN CAPITAL LETTER GLOTTAL I
 A7BE          ; Lu #       LATIN CAPITAL LETTER GLOTTAL U
+A7C0          ; Lu #       LATIN CAPITAL LETTER OLD POLISH O
 A7C2          ; Lu #       LATIN CAPITAL LETTER ANGLICANA W
 A7C4..A7C7    ; Lu #   [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
 A7C9          ; Lu #       LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
+A7D0          ; Lu #       LATIN CAPITAL LETTER CLOSED INSULAR G
+A7D6          ; Lu #       LATIN CAPITAL LETTER MIDDLE SCOTS S
+A7D8          ; Lu #       LATIN CAPITAL LETTER SIGMOID S
 A7F5          ; Lu #       LATIN CAPITAL LETTER REVERSED HALF H
 FF21..FF3A    ; Lu #  [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
 10400..10427  ; Lu #  [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW
 104B0..104D3  ; Lu #  [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA
+10570..1057A  ; Lu #  [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA
+1057C..1058A  ; Lu #  [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE
+1058C..10592  ; Lu #   [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE
+10594..10595  ; Lu #   [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE
 10C80..10CB2  ; Lu #  [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US
 118A0..118BF  ; Lu #  [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO
 16E40..16E5F  ; Lu #  [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y
@ -1338,7 +1367,7 @@ FF21..FF3A    ; Lu #  [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP
 1D7CA         ; Lu #       MATHEMATICAL BOLD CAPITAL DIGAMMA
 1E900..1E921  ; Lu #  [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA

-# Total code points: 1791
+# Total code points: 1831

 # ================================================

@ -1775,7 +1804,7 @@ FF21..FF3A    ; Lu #  [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP
 2146..2149    ; Ll #   [4] DOUBLE-STRUCK ITALIC SMALL D..DOUBLE-STRUCK ITALIC SMALL J
 214E          ; Ll #       TURNED SMALL F
 2184          ; Ll #       LATIN SMALL LETTER REVERSED C
-2C30..2C5E    ; Ll #  [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
+2C30..2C5F    ; Ll #  [48] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI
 2C61          ; Ll #       LATIN SMALL LETTER L WITH DOUBLE BAR
 2C65..2C66    ; Ll #   [2] LATIN SMALL LETTER A WITH STROKE..LATIN SMALL LETTER T WITH DIAGONAL STROKE
 2C68          ; Ll #       LATIN SMALL LETTER H WITH DESCENDER
@ -1944,9 +1973,15 @@ A7B9          ; Ll #       LATIN SMALL LETTER U WITH STROKE
 A7BB          ; Ll #       LATIN SMALL LETTER GLOTTAL A
 A7BD          ; Ll #       LATIN SMALL LETTER GLOTTAL I
 A7BF          ; Ll #       LATIN SMALL LETTER GLOTTAL U
+A7C1          ; Ll #       LATIN SMALL LETTER OLD POLISH O
 A7C3          ; Ll #       LATIN SMALL LETTER ANGLICANA W
 A7C8          ; Ll #       LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY
 A7CA          ; Ll #       LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
+A7D1          ; Ll #       LATIN SMALL LETTER CLOSED INSULAR G
+A7D3          ; Ll #       LATIN SMALL LETTER DOUBLE THORN
+A7D5          ; Ll #       LATIN SMALL LETTER DOUBLE WYNN
+A7D7          ; Ll #       LATIN SMALL LETTER MIDDLE SCOTS S
+A7D9          ; Ll #       LATIN SMALL LETTER SIGMOID S
 A7F6          ; Ll #       LATIN SMALL LETTER REVERSED HALF H
 A7FA          ; Ll #       LATIN LETTER SMALL CAPITAL TURNED M
 AB30..AB5A    ; Ll #  [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
@ -1957,6 +1992,10 @@ FB13..FB17    ; Ll #   [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGAT
 FF41..FF5A    ; Ll #  [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
 10428..1044F  ; Ll #  [40] DESERET SMALL LETTER LONG I..DESERET SMALL LETTER EW
 104D8..104FB  ; Ll #  [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA
+10597..105A1  ; Ll #  [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA
+105A3..105B1  ; Ll #  [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
+105B3..105B9  ; Ll #   [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
+105BB..105BC  ; Ll #   [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
 10CC0..10CF2  ; Ll #  [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US
 118C0..118DF  ; Ll #  [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO
 16E60..16E7F  ; Ll #  [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y
@ -1988,9 +2027,11 @@ FF41..FF5A    ; Ll #  [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL
 1D7AA..1D7C2  ; Ll #  [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
 1D7C4..1D7C9  ; Ll #   [6] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL
 1D7CB         ; Ll #       MATHEMATICAL BOLD SMALL DIGAMMA
+1DF00..1DF09  ; Ll #  [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
+1DF0B..1DF1E  ; Ll #  [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL
 1E922..1E943  ; Ll #  [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA

-# Total code points: 2155
+# Total code points: 2227

 # ================================================

@ -2028,6 +2069,7 @@ FF41..FF5A    ; Ll #  [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL
 081A          ; Lm #       SAMARITAN MODIFIER LETTER EPENTHETIC YUT
 0824          ; Lm #       SAMARITAN MODIFIER LETTER SHORT A
 0828          ; Lm #       SAMARITAN MODIFIER LETTER I
+08C9          ; Lm #       ARABIC SMALL FARSI YEH
 0971          ; Lm #       DEVANAGARI SIGN HIGH SPACING DOT
 0E46          ; Lm #       THAI CHARACTER MAIYAMOK
 0EC6          ; Lm #       LAO KO LA
@ -2058,6 +2100,7 @@ A69C..A69D    ; Lm #   [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER C
 A717..A71F    ; Lm #   [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
 A770          ; Lm #       MODIFIER LETTER US
 A788          ; Lm #       MODIFIER LETTER LOW CIRCUMFLEX ACCENT
+A7F2..A7F4    ; Lm #   [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q
 A7F8..A7F9    ; Lm #   [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
 A9CF          ; Lm #       JAVANESE PANGRANGKEP
 A9E6          ; Lm #       MYANMAR MODIFIER LETTER SHAN REDUPLICATION
@ -2068,14 +2111,20 @@ AB5C..AB5F    ; Lm #   [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U W
 AB69          ; Lm #       MODIFIER LETTER SMALL TURNED W
 FF70          ; Lm #       HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
 FF9E..FF9F    ; Lm #   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
+10780..10785  ; Lm #   [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK
+10787..107B0  ; Lm #  [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK
+107B2..107BA  ; Lm #   [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL
 16B40..16B43  ; Lm #   [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM
 16F93..16F9F  ; Lm #  [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8
 16FE0..16FE1  ; Lm #   [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK
 16FE3         ; Lm #       OLD CHINESE ITERATION MARK
+1AFF0..1AFF3  ; Lm #   [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5
+1AFF5..1AFFB  ; Lm #   [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5
+1AFFD..1AFFE  ; Lm #   [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
 1E137..1E13D  ; Lm #   [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
 1E94B         ; Lm #       ADLAM NASALIZATION MARK

-# Total code points: 260
+# Total code points: 334

 # ================================================

@ -2104,8 +2153,9 @@ FF9E..FF9F    ; Lm #   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK
 0800..0815    ; Lo #  [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF
 0840..0858    ; Lo #  [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
 0860..086A    ; Lo #  [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
-08A0..08B4    ; Lo #  [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW
-08B6..08C7    ; Lo #  [18] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE
+0870..0887    ; Lo #  [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
+0889..088E    ; Lo #   [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
+08A0..08C8    ; Lo #  [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
 0904..0939    ; Lo #  [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
 093D          ; Lo #       DEVANAGARI SIGN AVAGRAHA
 0950          ; Lo #       DEVANAGARI OM
@ -2170,6 +2220,7 @@ FF9E..FF9F    ; Lm #   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK
 0C2A..0C39    ; Lo #  [16] TELUGU LETTER PA..TELUGU LETTER HA
 0C3D          ; Lo #       TELUGU SIGN AVAGRAHA
 0C58..0C5A    ; Lo #   [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
+0C5D          ; Lo #       TELUGU LETTER NAKAARA POLLU
 0C60..0C61    ; Lo #   [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
 0C80          ; Lo #       KANNADA SIGN SPACING CANDRABINDU
 0C85..0C8C    ; Lo #   [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
@ -2178,7 +2229,7 @@ FF9E..FF9F    ; Lm #   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK
 0CAA..0CB3    ; Lo #  [10] KANNADA LETTER PA..KANNADA LETTER LLA
 0CB5..0CB9    ; Lo #   [5] KANNADA LETTER VA..KANNADA LETTER HA
 0CBD          ; Lo #       KANNADA SIGN AVAGRAHA
-0CDE          ; Lo #       KANNADA LETTER FA
+0CDD..0CDE    ; Lo #   [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
 0CE0..0CE1    ; Lo #   [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
 0CF1..0CF2    ; Lo #   [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
 0D04..0D0C    ; Lo #   [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
@ -2242,9 +2293,8 @@ FF9E..FF9F    ; Lm #   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK
 1681..169A    ; Lo #  [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH
 16A0..16EA    ; Lo #  [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
 16F1..16F8    ; Lo #   [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC
-1700..170C    ; Lo #  [13] TAGALOG LETTER A..TAGALOG LETTER YA
-170E..1711    ; Lo #   [4] TAGALOG LETTER LA..TAGALOG LETTER HA
-1720..1731    ; Lo #  [18] HANUNOO LETTER A..HANUNOO LETTER HA
+1700..1711    ; Lo #  [18] TAGALOG LETTER A..TAGALOG LETTER HA
+171F..1731    ; Lo #  [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA
 1740..1751    ; Lo #  [18] BUHID LETTER A..BUHID LETTER HA
 1760..176C    ; Lo #  [13] TAGBANWA LETTER A..TAGBANWA LETTER YA
 176E..1770    ; Lo #   [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA
@ -2264,7 +2314,7 @@ FF9E..FF9F    ; Lm #   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK
 1A00..1A16    ; Lo #  [23] BUGINESE LETTER KA..BUGINESE LETTER HA
 1A20..1A54    ; Lo #  [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA
 1B05..1B33    ; Lo #  [47] BALINESE LETTER AKARA..BALINESE LETTER HA
-1B45..1B4B    ; Lo #   [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK
+1B45..1B4C    ; Lo #   [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA
 1B83..1BA0    ; Lo #  [30] SUNDANESE LETTER A..SUNDANESE LETTER HA
 1BAE..1BAF    ; Lo #   [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA
 1BBA..1BE5    ; Lo #  [44] SUNDANESE AVAGRAHA..BATAK LETTER U
@ -2297,8 +2347,7 @@ FF9E..FF9F    ; Lm #   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK
 31A0..31BF    ; Lo #  [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH
 31F0..31FF    ; Lo #  [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
 3400..4DBF    ; Lo # [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF
-4E00..9FFC    ; Lo # [20989] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC
-A000..A014    ; Lo #  [21] YI SYLLABLE IT..YI SYLLABLE E
+4E00..A014    ; Lo # [21013] CJK UNIFIED IDEOGRAPH-4E00..YI SYLLABLE E
 A016..A48C    ; Lo # [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
 A4D0..A4F7    ; Lo #  [40] LISU LETTER BA..LISU LETTER OE
 A500..A60B    ; Lo # [268] VAI SYLLABLE EE..VAI SYLLABLE NG
@ -2426,9 +2475,12 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 10F00..10F1C  ; Lo #  [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
 10F27         ; Lo #       OLD SOGDIAN LIGATURE AYIN-DALETH
 10F30..10F45  ; Lo #  [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
+10F70..10F81  ; Lo #  [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH
 10FB0..10FC4  ; Lo #  [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW
 10FE0..10FF6  ; Lo #  [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH
 11003..11037  ; Lo #  [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA
+11071..11072  ; Lo #   [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O
+11075         ; Lo #       BRAHMI LETTER OLD TAMIL LLA
 11083..110AF  ; Lo #  [45] KAITHI LETTER A..KAITHI LETTER HA
 110D0..110E8  ; Lo #  [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE
 11103..11126  ; Lo #  [36] CHAKMA LETTER AA..CHAKMA LETTER HAA
@ -2470,6 +2522,7 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 11680..116AA  ; Lo #  [43] TAKRI LETTER A..TAKRI LETTER RRA
 116B8         ; Lo #       TAKRI LETTER ARCHAIC KHA
 11700..1171A  ; Lo #  [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA
+11740..11746  ; Lo #   [7] AHOM LETTER CA..AHOM LETTER LLA
 11800..1182B  ; Lo #  [44] DOGRA LETTER A..DOGRA LETTER RRA
 118FF..11906  ; Lo #   [8] WARANG CITI OM..DIVES AKURU LETTER E
 11909         ; Lo #       DIVES AKURU LETTER O
@ -2488,7 +2541,7 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 11A50         ; Lo #       SOYOMBO LETTER A
 11A5C..11A89  ; Lo #  [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA
 11A9D         ; Lo #       SOYOMBO MARK PLUTA
-11AC0..11AF8  ; Lo #  [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL
+11AB0..11AF8  ; Lo #  [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
 11C00..11C08  ; Lo #   [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
 11C0A..11C2E  ; Lo #  [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
 11C40         ; Lo #       BHAIKSUKI SIGN AVAGRAHA
@ -2505,10 +2558,12 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 11FB0         ; Lo #       LISU LETTER YHA
 12000..12399  ; Lo # [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U
 12480..12543  ; Lo # [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
+12F90..12FF0  ; Lo #  [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114
 13000..1342E  ; Lo # [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
 14400..14646  ; Lo # [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530
 16800..16A38  ; Lo # [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
 16A40..16A5E  ; Lo #  [31] MRO LETTER TA..MRO LETTER TEK
+16A70..16ABE  ; Lo #  [79] TANGSA LETTER OZ..TANGSA LETTER ZA
 16AD0..16AED  ; Lo #  [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I
 16B00..16B2F  ; Lo #  [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU
 16B63..16B77  ; Lo #  [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS
@ -2518,7 +2573,7 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 17000..187F7  ; Lo # [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
 18800..18CD5  ; Lo # [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5
 18D00..18D08  ; Lo #   [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
-1B000..1B11E  ; Lo # [287] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER N-MU-MO-2
+1B000..1B122  ; Lo # [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU
 1B150..1B152  ; Lo #   [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
 1B164..1B167  ; Lo #   [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
 1B170..1B2FB  ; Lo # [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
@ -2526,9 +2581,15 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 1BC70..1BC7C  ; Lo #  [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
 1BC80..1BC88  ; Lo #   [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL
 1BC90..1BC99  ; Lo #  [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW
+1DF0A         ; Lo #       LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
 1E100..1E12C  ; Lo #  [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
 1E14E         ; Lo #       NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ
+1E290..1E2AD  ; Lo #  [30] TOTO LETTER PA..TOTO LETTER A
 1E2C0..1E2EB  ; Lo #  [44] WANCHO LETTER AA..WANCHO LETTER YIH
+1E7E0..1E7E6  ; Lo #   [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
+1E7E8..1E7EB  ; Lo #   [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
+1E7ED..1E7EE  ; Lo #   [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
+1E7F0..1E7FE  ; Lo #  [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE
 1E800..1E8C4  ; Lo # [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON
 1EE00..1EE03  ; Lo #   [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL
 1EE05..1EE1F  ; Lo #  [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF
@ -2563,15 +2624,15 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 1EEA1..1EEA3  ; Lo #   [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL
 1EEA5..1EEA9  ; Lo #   [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH
 1EEAB..1EEBB  ; Lo #  [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
-20000..2A6DD  ; Lo # [42718] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DD
-2A700..2B734  ; Lo # [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
+20000..2A6DF  ; Lo # [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF
+2A700..2B738  ; Lo # [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738
 2B740..2B81D  ; Lo # [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
 2B820..2CEA1  ; Lo # [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
 2CEB0..2EBE0  ; Lo # [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
 2F800..2FA1D  ; Lo # [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
 30000..3134A  ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A

-# Total code points: 127004
+# Total code points: 127333

 # ================================================

@ -2601,7 +2662,8 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 0825..0827    ; Mn #   [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
 0829..082D    ; Mn #   [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
 0859..085B    ; Mn #   [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
-08D3..08E1    ; Mn #  [15] ARABIC SMALL LOW WAW..ARABIC SMALL HIGH SIGN SAFHA
+0898..089F    ; Mn #   [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+08CA..08E1    ; Mn #  [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
 08E3..0902    ; Mn #  [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
 093A          ; Mn #       DEVANAGARI VOWEL SIGN OE
 093C          ; Mn #       DEVANAGARI SIGN NUKTA
@ -2642,6 +2704,7 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 0BCD          ; Mn #       TAMIL SIGN VIRAMA
 0C00          ; Mn #       TELUGU SIGN COMBINING CANDRABINDU ABOVE
 0C04          ; Mn #       TELUGU SIGN COMBINING ANUSVARA ABOVE
+0C3C          ; Mn #       TELUGU SIGN NUKTA
 0C3E..0C40    ; Mn #   [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
 0C46..0C48    ; Mn #   [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
 0C4A..0C4D    ; Mn #   [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
@ -2691,7 +2754,7 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 109D          ; Mn #       MYANMAR VOWEL SIGN AITON AI
 135D..135F    ; Mn #   [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
 1712..1714    ; Mn #   [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
-1732..1734    ; Mn #   [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
+1732..1733    ; Mn #   [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U
 1752..1753    ; Mn #   [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
 1772..1773    ; Mn #   [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
 17B4..17B5    ; Mn #   [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
@ -2700,6 +2763,7 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 17C9..17D3    ; Mn #  [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
 17DD          ; Mn #       KHMER SIGN ATTHACAN
 180B..180D    ; Mn #   [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
+180F          ; Mn #       MONGOLIAN FREE VARIATION SELECTOR FOUR
 1885..1886    ; Mn #   [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
 18A9          ; Mn #       MONGOLIAN LETTER ALI GALI DAGALGA
 1920..1922    ; Mn #   [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
@ -2716,7 +2780,7 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 1A73..1A7C    ; Mn #  [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
 1A7F          ; Mn #       TAI THAM COMBINING CRYPTOGRAMMIC DOT
 1AB0..1ABD    ; Mn #  [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
-1ABF..1AC0    ; Mn #   [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
+1ABF..1ACE    ; Mn #  [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T
 1B00..1B03    ; Mn #   [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
 1B34          ; Mn #       BALINESE SIGN REREKAN
 1B36..1B3A    ; Mn #   [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
@ -2739,8 +2803,7 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 1CED          ; Mn #       VEDIC SIGN TIRYAK
 1CF4          ; Mn #       VEDIC TONE CANDRA ABOVE
 1CF8..1CF9    ; Mn #   [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
-1DC0..1DF9    ; Mn #  [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW
-1DFB..1DFF    ; Mn #   [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
+1DC0..1DFF    ; Mn #  [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
 20D0..20DC    ; Mn #  [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
 20E1          ; Mn #       COMBINING LEFT RIGHT ARROW ABOVE
 20E5..20F0    ; Mn #  [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
@ -2799,11 +2862,15 @@ FE20..FE2F    ; Mn #  [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
 10D24..10D27  ; Mn #   [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
 10EAB..10EAC  ; Mn #   [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
 10F46..10F50  ; Mn #  [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
+10F82..10F85  ; Mn #   [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
 11001         ; Mn #       BRAHMI SIGN ANUSVARA
 11038..11046  ; Mn #  [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
+11070         ; Mn #       BRAHMI SIGN OLD TAMIL VIRAMA
+11073..11074  ; Mn #   [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O
 1107F..11081  ; Mn #   [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA
 110B3..110B6  ; Mn #   [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
 110B9..110BA  ; Mn #   [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
+110C2         ; Mn #       KAITHI VOWEL SIGN VOCALIC R
 11100..11102  ; Mn #   [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA
 11127..1112B  ; Mn #   [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU
 1112D..11134  ; Mn #   [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA
@ -2883,6 +2950,8 @@ FE20..FE2F    ; Mn #  [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
 16F8F..16F92  ; Mn #   [4] MIAO TONE RIGHT..MIAO TONE BELOW
 16FE4         ; Mn #       KHITAN SMALL SCRIPT FILLER
 1BC9D..1BC9E  ; Mn #   [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
+1CF00..1CF2D  ; Mn #  [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
+1CF30..1CF46  ; Mn #  [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
 1D167..1D169  ; Mn #   [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
 1D17B..1D182  ; Mn #   [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
 1D185..1D18B  ; Mn #   [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
@ -2900,12 +2969,13 @@ FE20..FE2F    ; Mn #  [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
 1E023..1E024  ; Mn #   [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
 1E026..1E02A  ; Mn #   [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
 1E130..1E136  ; Mn #   [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
+1E2AE         ; Mn #       TOTO SIGN RISING TONE
 1E2EC..1E2EF  ; Mn #   [4] WANCHO TONE TUP..WANCHO TONE KOINI
 1E8D0..1E8D6  ; Mn #   [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
 1E944..1E94A  ; Mn #   [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
 E0100..E01EF  ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256

-# Total code points: 1839
+# Total code points: 1950

 # ================================================

@ -2980,6 +3050,8 @@ A670..A672    ; Me #   [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRIL
 1087..108C    ; Mc #   [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
 108F          ; Mc #       MYANMAR SIGN RUMAI PALAUNG TONE-5
 109A..109C    ; Mc #   [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A
+1715          ; Mc #       TAGALOG SIGN PAMUDPOD
+1734          ; Mc #       HANUNOO SIGN PAMUDPOD
 17B6          ; Mc #       KHMER VOWEL SIGN AA
 17BE..17C5    ; Mc #   [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU
 17C7..17C8    ; Mc #   [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU
@ -3099,7 +3171,7 @@ ABEC          ; Mc #       MEETEI MAYEK LUM IYEK
 1D165..1D166  ; Mc #   [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
 1D16D..1D172  ; Mc #   [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5

-# Total code points: 443
+# Total code points: 445

 # ================================================

@ -3160,6 +3232,7 @@ FF10..FF19    ; Nd #  [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
 11D50..11D59  ; Nd #  [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
 11DA0..11DA9  ; Nd #  [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE
 16A60..16A69  ; Nd #  [10] MRO DIGIT ZERO..MRO DIGIT NINE
+16AC0..16AC9  ; Nd #  [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE
 16B50..16B59  ; Nd #  [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE
 1D7CE..1D7FF  ; Nd #  [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
 1E140..1E149  ; Nd #  [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
@ -3167,7 +3240,7 @@ FF10..FF19    ; Nd #  [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
 1E950..1E959  ; Nd #  [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
 1FBF0..1FBF9  ; Nd #  [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE

-# Total code points: 650
+# Total code points: 660

 # ================================================

@ -3314,6 +3387,7 @@ A830..A835    ; No #   [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTIO
 061C          ; Cf #       ARABIC LETTER MARK
 06DD          ; Cf #       ARABIC END OF AYAH
 070F          ; Cf #       SYRIAC ABBREVIATION MARK
+0890..0891    ; Cf #   [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
 08E2          ; Cf #       ARABIC DISPUTED END OF AYAH
 180E          ; Cf #       MONGOLIAN VOWEL SEPARATOR
 200B..200F    ; Cf #   [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK
@ -3330,7 +3404,7 @@ FFF9..FFFB    ; Cf #   [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION
 E0001         ; Cf #       LANGUAGE TAG
 E0020..E007F  ; Cf #  [96] TAG SPACE..CANCEL TAG

-# Total code points: 161
+# Total code points: 163

 # ================================================

@ -3364,6 +3438,7 @@ D800..DFFF    ; Cs # [2048] <surrogate-D800>..<surrogate-DFFF>
 2E1A          ; Pd #       HYPHEN WITH DIAERESIS
 2E3A..2E3B    ; Pd #   [2] TWO-EM DASH..THREE-EM DASH
 2E40          ; Pd #       DOUBLE HYPHEN
+2E5D          ; Pd #       OBLIQUE HYPHEN
 301C          ; Pd #       WAVE DASH
 3030          ; Pd #       WAVY DASH
 30A0          ; Pd #       KATAKANA-HIRAGANA DOUBLE HYPHEN
@ -3373,7 +3448,7 @@ FE63          ; Pd #       SMALL HYPHEN-MINUS
 FF0D          ; Pd #       FULLWIDTH HYPHEN-MINUS
 10EAD         ; Pd #       YEZIDI HYPHENATION MARK

-# Total code points: 25
+# Total code points: 26

 # ================================================

@ -3425,6 +3500,10 @@ FF0D          ; Pd #       FULLWIDTH HYPHEN-MINUS
 2E26          ; Ps #       LEFT SIDEWAYS U BRACKET
 2E28          ; Ps #       LEFT DOUBLE PARENTHESIS
 2E42          ; Ps #       DOUBLE LOW-REVERSED-9 QUOTATION MARK
+2E55          ; Ps #       LEFT SQUARE BRACKET WITH STROKE
+2E57          ; Ps #       LEFT SQUARE BRACKET WITH DOUBLE STROKE
+2E59          ; Ps #       TOP HALF LEFT PARENTHESIS
+2E5B          ; Ps #       BOTTOM HALF LEFT PARENTHESIS
 3008          ; Ps #       LEFT ANGLE BRACKET
 300A          ; Ps #       LEFT DOUBLE ANGLE BRACKET
 300C          ; Ps #       LEFT CORNER BRACKET
@ -3455,7 +3534,7 @@ FF5B          ; Ps #       FULLWIDTH LEFT CURLY BRACKET
 FF5F          ; Ps #       FULLWIDTH LEFT WHITE PARENTHESIS
 FF62          ; Ps #       HALFWIDTH LEFT CORNER BRACKET

-# Total code points: 75
+# Total code points: 79

 # ================================================

@ -3504,6 +3583,10 @@ FF62          ; Ps #       HALFWIDTH LEFT CORNER BRACKET
 2E25          ; Pe #       BOTTOM RIGHT HALF BRACKET
 2E27          ; Pe #       RIGHT SIDEWAYS U BRACKET
 2E29          ; Pe #       RIGHT DOUBLE PARENTHESIS
+2E56          ; Pe #       RIGHT SQUARE BRACKET WITH STROKE
+2E58          ; Pe #       RIGHT SQUARE BRACKET WITH DOUBLE STROKE
+2E5A          ; Pe #       TOP HALF RIGHT PARENTHESIS
+2E5C          ; Pe #       BOTTOM HALF RIGHT PARENTHESIS
 3009          ; Pe #       RIGHT ANGLE BRACKET
 300B          ; Pe #       RIGHT DOUBLE ANGLE BRACKET
 300D          ; Pe #       RIGHT CORNER BRACKET
@ -3534,7 +3617,7 @@ FF5D          ; Pe #       FULLWIDTH RIGHT CURLY BRACKET
 FF60          ; Pe #       FULLWIDTH RIGHT WHITE PARENTHESIS
 FF63          ; Pe #       HALFWIDTH RIGHT CORNER BRACKET

-# Total code points: 73
+# Total code points: 77

 # ================================================

@ -3576,7 +3659,7 @@ FF3F          ; Pc #       FULLWIDTH LOW LINE
 0609..060A    ; Po #   [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN
 060C..060D    ; Po #   [2] ARABIC COMMA..ARABIC DATE SEPARATOR
 061B          ; Po #       ARABIC SEMICOLON
-061E..061F    ; Po #   [2] ARABIC TRIPLE DOT PUNCTUATION MARK..ARABIC QUESTION MARK
+061D..061F    ; Po #   [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK
 066A..066D    ; Po #   [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR
 06D4          ; Po #       ARABIC FULL STOP
 0700..070D    ; Po #  [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS
@ -3613,6 +3696,7 @@ FF3F          ; Pc #       FULLWIDTH LOW LINE
 1AA0..1AA6    ; Po #   [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA
 1AA8..1AAD    ; Po #   [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG
 1B5A..1B60    ; Po #   [7] BALINESE PANTI..BALINESE PAMENENG
+1B7D..1B7E    ; Po #   [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG
 1BFC..1BFF    ; Po #   [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT
 1C3B..1C3F    ; Po #   [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK
 1C7E..1C7F    ; Po #   [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
@ -3641,7 +3725,7 @@ FF3F          ; Pc #       FULLWIDTH LOW LINE
 2E3C..2E3F    ; Po #   [4] STENOGRAPHIC FULL STOP..CAPITULUM
 2E41          ; Po #       REVERSED COMMA
 2E43..2E4F    ; Po #  [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER
-2E52          ; Po #       TIRONIAN SIGN CAPITAL ET
+2E52..2E54    ; Po #   [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK
 3001..3003    ; Po #   [3] IDEOGRAPHIC COMMA..DITTO MARK
 303D          ; Po #       PART ALTERNATION MARK
 30FB          ; Po #       KATAKANA MIDDLE DOT
@ -3695,6 +3779,7 @@ FF64..FF65    ; Po #   [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL
 10B39..10B3F  ; Po #   [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION
 10B99..10B9C  ; Po #   [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT
 10F55..10F59  ; Po #   [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT
+10F86..10F89  ; Po #   [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS
 11047..1104D  ; Po #   [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS
 110BB..110BC  ; Po #   [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN
 110BE..110C1  ; Po #   [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
@ -3713,6 +3798,7 @@ FF64..FF65    ; Po #   [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL
 115C1..115D7  ; Po #  [23] SIDDHAM SIGN SIDDHAM..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES
 11641..11643  ; Po #   [3] MODI DANDA..MODI ABBREVIATION SIGN
 11660..1166C  ; Po #  [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT
+116B9         ; Po #       TAKRI ABBREVIATION SIGN
 1173C..1173E  ; Po #   [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
 1183B         ; Po #       DOGRA ABBREVIATION SIGN
 11944..11946  ; Po #   [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK
@ -3725,6 +3811,7 @@ FF64..FF65    ; Po #   [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL
 11EF7..11EF8  ; Po #   [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION
 11FFF         ; Po #       TAMIL PUNCTUATION END OF TEXT
 12470..12474  ; Po #   [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON
+12FF1..12FF2  ; Po #   [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302
 16A6E..16A6F  ; Po #   [2] MRO DANDA..MRO DOUBLE DANDA
 16AF5         ; Po #       BASSA VAH FULL STOP
 16B37..16B3B  ; Po #   [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM
@ -3735,7 +3822,7 @@ FF64..FF65    ; Po #   [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL
 1DA87..1DA8B  ; Po #   [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS
 1E95E..1E95F  ; Po #   [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK

-# Total code points: 593
+# Total code points: 605

 # ================================================

@ -3823,7 +3910,7 @@ FFE9..FFEC    ; Sm #   [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW
 0BF9          ; Sc #       TAMIL RUPEE SIGN
 0E3F          ; Sc #       THAI CURRENCY SYMBOL BAHT
 17DB          ; Sc #       KHMER CURRENCY SYMBOL RIEL
-20A0..20BF    ; Sc #  [32] EURO-CURRENCY SIGN..BITCOIN SIGN
+20A0..20C0    ; Sc #  [33] EURO-CURRENCY SIGN..SOM SIGN
 A838          ; Sc #       NORTH INDIC RUPEE MARK
 FDFC          ; Sc #       RIAL SIGN
 FE69          ; Sc #       SMALL DOLLAR SIGN
@ -3834,7 +3921,7 @@ FFE5..FFE6    ; Sc #   [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN
 1E2FF         ; Sc #       WANCHO NGUN SIGN
 1ECB0         ; Sc #       INDIC SIYAQ RUPEE MARK

-# Total code points: 62
+# Total code points: 63

 # ================================================

@ -3853,6 +3940,7 @@ FFE5..FFE6    ; Sc #   [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN
 02EF..02FF    ; Sk #  [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW
 0375          ; Sk #       GREEK LOWER NUMERAL SIGN
 0384..0385    ; Sk #   [2] GREEK TONOS..GREEK DIALYTIKA TONOS
+0888          ; Sk #       ARABIC RAISED ROUND DOT
 1FBD          ; Sk #       GREEK KORONIS
 1FBF..1FC1    ; Sk #   [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
 1FCD..1FCF    ; Sk #   [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI
@ -3865,13 +3953,13 @@ A720..A721    ; Sk #   [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER
 A789..A78A    ; Sk #   [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN
 AB5B          ; Sk #       MODIFIER BREVE WITH INVERTED BREVE
 AB6A..AB6B    ; Sk #   [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK
-FBB2..FBC1    ; Sk #  [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW
+FBB2..FBC2    ; Sk #  [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE
 FF3E          ; Sk #       FULLWIDTH CIRCUMFLEX ACCENT
 FF40          ; Sk #       FULLWIDTH GRAVE ACCENT
 FFE3          ; Sk #       FULLWIDTH MACRON
 1F3FB..1F3FF  ; Sk #   [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6

-# Total code points: 123
+# Total code points: 125

 # ================================================

@ -3984,7 +4072,9 @@ A828..A82B    ; So #   [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-
 A836..A837    ; So #   [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK
 A839          ; So #       NORTH INDIC QUANTITY MARK
 AA77..AA79    ; So #   [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO
-FDFD          ; So #       ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
+FD40..FD4F    ; So #  [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH
+FDCF          ; So #       ARABIC LIGATURE SALAAMUHU ALAYNAA
+FDFD..FDFF    ; So #   [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL
 FFE4          ; So #       FULLWIDTH BROKEN BAR
 FFE8          ; So #       HALFWIDTH FORMS LIGHT VERTICAL
 FFED..FFEE    ; So #   [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE
@ -4003,13 +4093,14 @@ FFFC..FFFD    ; So #   [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
 16B3C..16B3F  ; So #   [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB
 16B45         ; So #       PAHAWH HMONG SIGN CIM TSOV ROG
 1BC9C         ; So #       DUPLOYAN SIGN O WITH CROSS
+1CF50..1CFC3  ; So # [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK
 1D000..1D0F5  ; So # [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
 1D100..1D126  ; So #  [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
 1D129..1D164  ; So #  [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
 1D16A..1D16C  ; So #   [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3
 1D183..1D184  ; So #   [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN
 1D18C..1D1A9  ; So #  [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH
-1D1AE..1D1E8  ; So #  [59] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KIEVAN FLAT SIGN
+1D1AE..1D1EA  ; So #  [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON
 1D200..1D241  ; So #  [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
 1D245         ; So #       GREEK MUSICAL LEIMMA
 1D300..1D356  ; So #  [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
@ -4035,32 +4126,33 @@ FFFC..FFFD    ; So #   [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
 1F260..1F265  ; So #   [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI
 1F300..1F3FA  ; So # [251] CYCLONE..AMPHORA
 1F400..1F6D7  ; So # [728] RAT..ELEVATOR
-1F6E0..1F6EC  ; So #  [13] HAMMER AND WRENCH..AIRPLANE ARRIVING
+1F6DD..1F6EC  ; So #  [16] PLAYGROUND SLIDE..AIRPLANE ARRIVING
 1F6F0..1F6FC  ; So #  [13] SATELLITE..ROLLER SKATE
 1F700..1F773  ; So # [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
 1F780..1F7D8  ; So #  [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE
 1F7E0..1F7EB  ; So #  [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE
+1F7F0         ; So #       HEAVY EQUALS SIGN
 1F800..1F80B  ; So #  [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
 1F810..1F847  ; So #  [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW
 1F850..1F859  ; So #  [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
 1F860..1F887  ; So #  [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
 1F890..1F8AD  ; So #  [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
 1F8B0..1F8B1  ; So #   [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
-1F900..1F978  ; So # [121] CIRCLED CROSS FORMEE WITH FOUR DOTS..DISGUISED FACE
-1F97A..1F9CB  ; So #  [82] FACE WITH PLEADING EYES..BUBBLE TEA
-1F9CD..1FA53  ; So # [135] STANDING PERSON..BLACK CHESS KNIGHT-BISHOP
+1F900..1FA53  ; So # [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP
 1FA60..1FA6D  ; So #  [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
 1FA70..1FA74  ; So #   [5] BALLET SHOES..THONG SANDAL
-1FA78..1FA7A  ; So #   [3] DROP OF BLOOD..STETHOSCOPE
+1FA78..1FA7C  ; So #   [5] DROP OF BLOOD..CRUTCH
 1FA80..1FA86  ; So #   [7] YO-YO..NESTING DOLLS
-1FA90..1FAA8  ; So #  [25] RINGED PLANET..ROCK
-1FAB0..1FAB6  ; So #   [7] FLY..FEATHER
-1FAC0..1FAC2  ; So #   [3] ANATOMICAL HEART..PEOPLE HUGGING
-1FAD0..1FAD6  ; So #   [7] BLUEBERRIES..TEAPOT
+1FA90..1FAAC  ; So #  [29] RINGED PLANET..HAMSA
+1FAB0..1FABA  ; So #  [11] FLY..NEST WITH EGGS
+1FAC0..1FAC5  ; So #   [6] ANATOMICAL HEART..PERSON WITH CROWN
+1FAD0..1FAD9  ; So #  [10] BLUEBERRIES..JAR
+1FAE0..1FAE7  ; So #   [8] MELTING FACE..BUBBLES
+1FAF0..1FAF6  ; So #   [7] HAND WITH INDEX FINGER AND THUMB CROSSED..HEART HANDS
 1FB00..1FB92  ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
 1FB94..1FBCA  ; So #  [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON

-# Total code points: 6431
+# Total code points: 6605

 # ================================================

--- a/maint/Unicode.tables/GraphemeBreakProperty.txt
+++ b/maint/Unicode.tables/GraphemeBreakProperty.txt
@ -1,6 +1,6 @@
-# GraphemeBreakProperty-13.0.0.txt
-# Date: 2019-10-21, 14:30:35 GMT
-# © 2019 Unicode®, Inc.
+# GraphemeBreakProperty-14.0.0.txt
+# Date: 2021-08-12, 23:13:02 GMT
+# © 2021 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
 # For terms of use, see http://www.unicode.org/terms_of_use.html
 #
@ -21,6 +21,7 @@
 0600..0605    ; Prepend # Cf   [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE
 06DD          ; Prepend # Cf       ARABIC END OF AYAH
 070F          ; Prepend # Cf       SYRIAC ABBREVIATION MARK
+0890..0891    ; Prepend # Cf   [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
 08E2          ; Prepend # Cf       ARABIC DISPUTED END OF AYAH
 0D4E          ; Prepend # Lo       MALAYALAM LETTER DOT REPH
 110BD         ; Prepend # Cf       KAITHI NUMBER SIGN
@ -32,7 +33,7 @@
 11A84..11A89  ; Prepend # Lo   [6] SOYOMBO SIGN JIHVAMULIYA..SOYOMBO CLUSTER-INITIAL LETTER SA
 11D46         ; Prepend # Lo       MASARAM GONDI REPHA

-# Total code points: 24
+# Total code points: 26

 # ================================================

@ -104,7 +105,8 @@ E01F0..E0FFF  ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
 0825..0827    ; Extend # Mn   [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
 0829..082D    ; Extend # Mn   [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
 0859..085B    ; Extend # Mn   [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
-08D3..08E1    ; Extend # Mn  [15] ARABIC SMALL LOW WAW..ARABIC SMALL HIGH SIGN SAFHA
+0898..089F    ; Extend # Mn   [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+08CA..08E1    ; Extend # Mn  [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
 08E3..0902    ; Extend # Mn  [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
 093A          ; Extend # Mn       DEVANAGARI VOWEL SIGN OE
 093C          ; Extend # Mn       DEVANAGARI SIGN NUKTA
@ -151,6 +153,7 @@ E01F0..E0FFF  ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
 0BD7          ; Extend # Mc       TAMIL AU LENGTH MARK
 0C00          ; Extend # Mn       TELUGU SIGN COMBINING CANDRABINDU ABOVE
 0C04          ; Extend # Mn       TELUGU SIGN COMBINING ANUSVARA ABOVE
+0C3C          ; Extend # Mn       TELUGU SIGN NUKTA
 0C3E..0C40    ; Extend # Mn   [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
 0C46..0C48    ; Extend # Mn   [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
 0C4A..0C4D    ; Extend # Mn   [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
@ -206,7 +209,7 @@ E01F0..E0FFF  ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
 109D          ; Extend # Mn       MYANMAR VOWEL SIGN AITON AI
 135D..135F    ; Extend # Mn   [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
 1712..1714    ; Extend # Mn   [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
-1732..1734    ; Extend # Mn   [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
+1732..1733    ; Extend # Mn   [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U
 1752..1753    ; Extend # Mn   [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
 1772..1773    ; Extend # Mn   [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
 17B4..17B5    ; Extend # Mn   [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
@ -215,6 +218,7 @@ E01F0..E0FFF  ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
 17C9..17D3    ; Extend # Mn  [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
 17DD          ; Extend # Mn       KHMER SIGN ATTHACAN
 180B..180D    ; Extend # Mn   [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
+180F          ; Extend # Mn       MONGOLIAN FREE VARIATION SELECTOR FOUR
 1885..1886    ; Extend # Mn   [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
 18A9          ; Extend # Mn       MONGOLIAN LETTER ALI GALI DAGALGA
 1920..1922    ; Extend # Mn   [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
@ -232,7 +236,7 @@ E01F0..E0FFF  ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
 1A7F          ; Extend # Mn       TAI THAM COMBINING CRYPTOGRAMMIC DOT
 1AB0..1ABD    ; Extend # Mn  [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
 1ABE          ; Extend # Me       COMBINING PARENTHESES OVERLAY
-1ABF..1AC0    ; Extend # Mn   [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
+1ABF..1ACE    ; Extend # Mn  [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T
 1B00..1B03    ; Extend # Mn   [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
 1B34          ; Extend # Mn       BALINESE SIGN REREKAN
 1B35          ; Extend # Mc       BALINESE VOWEL SIGN TEDUNG
@ -256,8 +260,7 @@ E01F0..E0FFF  ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
 1CED          ; Extend # Mn       VEDIC SIGN TIRYAK
 1CF4          ; Extend # Mn       VEDIC TONE CANDRA ABOVE
 1CF8..1CF9    ; Extend # Mn   [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
-1DC0..1DF9    ; Extend # Mn  [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW
-1DFB..1DFF    ; Extend # Mn   [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
+1DC0..1DFF    ; Extend # Mn  [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
 200C          ; Extend # Cf       ZERO WIDTH NON-JOINER
 20D0..20DC    ; Extend # Mn  [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
 20DD..20E0    ; Extend # Me   [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
@ -322,11 +325,15 @@ FF9E..FF9F    ; Extend # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
 10D24..10D27  ; Extend # Mn   [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
 10EAB..10EAC  ; Extend # Mn   [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
 10F46..10F50  ; Extend # Mn  [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
+10F82..10F85  ; Extend # Mn   [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
 11001         ; Extend # Mn       BRAHMI SIGN ANUSVARA
 11038..11046  ; Extend # Mn  [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
+11070         ; Extend # Mn       BRAHMI SIGN OLD TAMIL VIRAMA
+11073..11074  ; Extend # Mn   [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O
 1107F..11081  ; Extend # Mn   [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA
 110B3..110B6  ; Extend # Mn   [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
 110B9..110BA  ; Extend # Mn   [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
+110C2         ; Extend # Mn       KAITHI VOWEL SIGN VOCALIC R
 11100..11102  ; Extend # Mn   [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA
 11127..1112B  ; Extend # Mn   [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU
 1112D..11134  ; Extend # Mn   [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA
@ -412,6 +419,8 @@ FF9E..FF9F    ; Extend # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
 16F8F..16F92  ; Extend # Mn   [4] MIAO TONE RIGHT..MIAO TONE BELOW
 16FE4         ; Extend # Mn       KHITAN SMALL SCRIPT FILLER
 1BC9D..1BC9E  ; Extend # Mn   [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
+1CF00..1CF2D  ; Extend # Mn  [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
+1CF30..1CF46  ; Extend # Mn  [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
 1D165         ; Extend # Mc       MUSICAL SYMBOL COMBINING STEM
 1D167..1D169  ; Extend # Mn   [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
 1D16E..1D172  ; Extend # Mc   [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5
@ -431,6 +440,7 @@ FF9E..FF9F    ; Extend # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
 1E023..1E024  ; Extend # Mn   [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
 1E026..1E02A  ; Extend # Mn   [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
 1E130..1E136  ; Extend # Mn   [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
+1E2AE         ; Extend # Mn       TOTO SIGN RISING TONE
 1E2EC..1E2EF  ; Extend # Mn   [4] WANCHO TONE TUP..WANCHO TONE KOINI
 1E8D0..1E8D6  ; Extend # Mn   [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
 1E944..1E94A  ; Extend # Mn   [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
@ -438,7 +448,7 @@ FF9E..FF9F    ; Extend # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
 E0020..E007F  ; Extend # Cf  [96] TAG SPACE..CANCEL TAG
 E0100..E01EF  ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256

-# Total code points: 1984
+# Total code points: 2095

 # ================================================

@ -495,6 +505,8 @@ E0100..E01EF  ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
 103B..103C    ; SpacingMark # Mc   [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA
 1056..1057    ; SpacingMark # Mc   [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR
 1084          ; SpacingMark # Mc       MYANMAR VOWEL SIGN SHAN E
+1715          ; SpacingMark # Mc       TAGALOG SIGN PAMUDPOD
+1734          ; SpacingMark # Mc       HANUNOO SIGN PAMUDPOD
 17B6          ; SpacingMark # Mc       KHMER VOWEL SIGN AA
 17BE..17C5    ; SpacingMark # Mc   [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU
 17C7..17C8    ; SpacingMark # Mc   [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU
@ -579,7 +591,6 @@ ABEC          ; SpacingMark # Mc       MEETEI MAYEK LUM IYEK
 116AC         ; SpacingMark # Mc       TAKRI SIGN VISARGA
 116AE..116AF  ; SpacingMark # Mc   [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II
 116B6         ; SpacingMark # Mc       TAKRI SIGN VIRAMA
-11720..11721  ; SpacingMark # Mc   [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
 11726         ; SpacingMark # Mc       AHOM VOWEL SIGN E
 1182C..1182E  ; SpacingMark # Mc   [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II
 11838         ; SpacingMark # Mc       DOGRA SIGN VISARGA
--- a/maint/Unicode.tables/PropList.txt
+++ b/maint/Unicode.tables/PropList.txt
--- a/maint/Unicode.tables/PropertyAliases.txt
+++ b/maint/Unicode.tables/PropertyAliases.txt
@ -0,0 +1,212 @@
+# PropertyAliases-14.0.0.txt
+# Date: 2021-03-08, 19:35:48 GMT
+# © 2021 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+#   For documentation, see http://www.unicode.org/reports/tr44/
+#
+# This file contains aliases for properties used in the UCD.
+# These names can be used for XML formats of UCD data, for regular-expression
+# property tests, and other programmatic textual descriptions of Unicode data.
+#
+# The names may be translated in appropriate environments, and additional
+# aliases may be useful.
+#
+# FORMAT
+#
+# Each line has two or more fields, separated by semicolons.
+#
+# First Field: The first field is the short name for the property.
+# It is typically an abbreviation, but in a number of cases it is simply
+# a duplicate of the "long name" in the second field.
+# For Unihan database tags, the short name is actually a longer string than
+# the tag specified in the second field.
+#
+# Second Field: The second field is the long name for the property,
+# typically the formal name used in documentation about the property.
+#
+# The above are the preferred aliases. Other aliases may be listed in additional fields.
+#
+# Loose matching should be applied to all property names and property values, with
+# the exception of String Property values. With loose matching of property names and
+# values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property
+# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1".
+#
+# NOTE: Property value names are NOT unique across properties. For example:
+#
+#   AL means Arabic Letter for the Bidi_Class property, and
+#   AL means Above_Left for the Combining_Class property, and
+#   AL means Alphabetic for the Line_Break property.
+#
+# In addition, some property names may be the same as some property value names.
+# For example:
+#
+#   sc means the Script property, and
+#   Sc means the General_Category property value Currency_Symbol (Sc)
+#
+# The combination of property value and property name is, however, unique.
+#
+# For more information, see UAX #44, Unicode Character Database, and
+# UTS #18, Unicode Regular Expressions.
+# ================================================
+
+
+# ================================================
+# Numeric Properties
+# ================================================
+cjkAccountingNumeric     ; kAccountingNumeric
+cjkOtherNumeric          ; kOtherNumeric
+cjkPrimaryNumeric        ; kPrimaryNumeric
+nv                       ; Numeric_Value
+
+# ================================================
+# String Properties
+# ================================================
+cf                       ; Case_Folding
+cjkCompatibilityVariant  ; kCompatibilityVariant
+dm                       ; Decomposition_Mapping
+FC_NFKC                  ; FC_NFKC_Closure
+lc                       ; Lowercase_Mapping
+NFKC_CF                  ; NFKC_Casefold
+scf                      ; Simple_Case_Folding         ; sfc
+slc                      ; Simple_Lowercase_Mapping
+stc                      ; Simple_Titlecase_Mapping
+suc                      ; Simple_Uppercase_Mapping
+tc                       ; Titlecase_Mapping
+uc                       ; Uppercase_Mapping
+
+# ================================================
+# Miscellaneous Properties
+# ================================================
+bmg                      ; Bidi_Mirroring_Glyph
+bpb                      ; Bidi_Paired_Bracket
+cjkIICore                ; kIICore
+cjkIRG_GSource           ; kIRG_GSource
+cjkIRG_HSource           ; kIRG_HSource
+cjkIRG_JSource           ; kIRG_JSource
+cjkIRG_KPSource          ; kIRG_KPSource
+cjkIRG_KSource           ; kIRG_KSource
+cjkIRG_MSource           ; kIRG_MSource
+cjkIRG_SSource           ; kIRG_SSource
+cjkIRG_TSource           ; kIRG_TSource
+cjkIRG_UKSource          ; kIRG_UKSource
+cjkIRG_USource           ; kIRG_USource
+cjkIRG_VSource           ; kIRG_VSource
+cjkRSUnicode             ; kRSUnicode                  ; Unicode_Radical_Stroke; URS
+EqUIdeo                  ; Equivalent_Unified_Ideograph
+isc                      ; ISO_Comment
+JSN                      ; Jamo_Short_Name
+na                       ; Name
+na1                      ; Unicode_1_Name
+Name_Alias               ; Name_Alias
+scx                      ; Script_Extensions
+
+# ================================================
+# Catalog Properties
+# ================================================
+age                      ; Age
+blk                      ; Block
+sc                       ; Script
+
+# ================================================
+# Enumerated Properties
+# ================================================
+bc                       ; Bidi_Class
+bpt                      ; Bidi_Paired_Bracket_Type
+ccc                      ; Canonical_Combining_Class
+dt                       ; Decomposition_Type
+ea                       ; East_Asian_Width
+gc                       ; General_Category
+GCB                      ; Grapheme_Cluster_Break
+hst                      ; Hangul_Syllable_Type
+InPC                     ; Indic_Positional_Category
+InSC                     ; Indic_Syllabic_Category
+jg                       ; Joining_Group
+jt                       ; Joining_Type
+lb                       ; Line_Break
+NFC_QC                   ; NFC_Quick_Check
+NFD_QC                   ; NFD_Quick_Check
+NFKC_QC                  ; NFKC_Quick_Check
+NFKD_QC                  ; NFKD_Quick_Check
+nt                       ; Numeric_Type
+SB                       ; Sentence_Break
+vo                       ; Vertical_Orientation
+WB                       ; Word_Break
+
+# ================================================
+# Binary Properties
+# ================================================
+AHex                     ; ASCII_Hex_Digit
+Alpha                    ; Alphabetic
+Bidi_C                   ; Bidi_Control
+Bidi_M                   ; Bidi_Mirrored
+Cased                    ; Cased
+CE                       ; Composition_Exclusion
+CI                       ; Case_Ignorable
+Comp_Ex                  ; Full_Composition_Exclusion
+CWCF                     ; Changes_When_Casefolded
+CWCM                     ; Changes_When_Casemapped
+CWKCF                    ; Changes_When_NFKC_Casefolded
+CWL                      ; Changes_When_Lowercased
+CWT                      ; Changes_When_Titlecased
+CWU                      ; Changes_When_Uppercased
+Dash                     ; Dash
+Dep                      ; Deprecated
+DI                       ; Default_Ignorable_Code_Point
+Dia                      ; Diacritic
+EBase                    ; Emoji_Modifier_Base
+EComp                    ; Emoji_Component
+EMod                     ; Emoji_Modifier
+Emoji                    ; Emoji
+EPres                    ; Emoji_Presentation
+Ext                      ; Extender
+ExtPict                  ; Extended_Pictographic
+Gr_Base                  ; Grapheme_Base
+Gr_Ext                   ; Grapheme_Extend
+Gr_Link                  ; Grapheme_Link
+Hex                      ; Hex_Digit
+Hyphen                   ; Hyphen
+IDC                      ; ID_Continue
+Ideo                     ; Ideographic
+IDS                      ; ID_Start
+IDSB                     ; IDS_Binary_Operator
+IDST                     ; IDS_Trinary_Operator
+Join_C                   ; Join_Control
+LOE                      ; Logical_Order_Exception
+Lower                    ; Lowercase
+Math                     ; Math
+NChar                    ; Noncharacter_Code_Point
+OAlpha                   ; Other_Alphabetic
+ODI                      ; Other_Default_Ignorable_Code_Point
+OGr_Ext                  ; Other_Grapheme_Extend
+OIDC                     ; Other_ID_Continue
+OIDS                     ; Other_ID_Start
+OLower                   ; Other_Lowercase
+OMath                    ; Other_Math
+OUpper                   ; Other_Uppercase
+Pat_Syn                  ; Pattern_Syntax
+Pat_WS                   ; Pattern_White_Space
+PCM                      ; Prepended_Concatenation_Mark
+QMark                    ; Quotation_Mark
+Radical                  ; Radical
+RI                       ; Regional_Indicator
+SD                       ; Soft_Dotted
+STerm                    ; Sentence_Terminal
+Term                     ; Terminal_Punctuation
+UIdeo                    ; Unified_Ideograph
+Upper                    ; Uppercase
+VS                       ; Variation_Selector
+WSpace                   ; White_Space                 ; space
+XIDC                     ; XID_Continue
+XIDS                     ; XID_Start
+XO_NFC                   ; Expands_On_NFC
+XO_NFD                   ; Expands_On_NFD
+XO_NFKC                  ; Expands_On_NFKC
+XO_NFKD                  ; Expands_On_NFKD
+
+# ================================================
+# Total:    129
+
+# EOF
--- a/maint/Unicode.tables/PropertyValueAliases.txt
+++ b/maint/Unicode.tables/PropertyValueAliases.txt
--- a/maint/Unicode.tables/ScriptExtensions.txt
+++ b/maint/Unicode.tables/ScriptExtensions.txt
@ -1,6 +1,6 @@
-# ScriptExtensions-13.0.0.txt
-# Date: 2020-01-22, 00:07:43 GMT
-# © 2020 Unicode®, Inc.
+# ScriptExtensions-14.0.0.txt
+# Date: 2021-06-04, 02:19:38 GMT
+# © 2021 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
 # For terms of use, see http://www.unicode.org/terms_of_use.html
 #
@ -11,10 +11,10 @@
 # with more than one script, but with a limited number of scripts.
 # For each code point, there is one or more property values.  Each such value is a Script property value.
 # For more information, see:
-#   UAX #24, Unicode Script Property: http://www.unicode.org/reports/tr24/
+#   UAX #24, Unicode Script Property: https://www.unicode.org/reports/tr24/
 #     Especially the sections:
-#       http://www.unicode.org/reports/tr24/#Assignment_Script_Values
-#       http://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values
+#       https://www.unicode.org/reports/tr24/#Assignment_Script_Values
+#       https://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values
 #
 # Each Script_Extensions value in this file consists of a set
 # of one or more abbreviated Script property values. The ordering of the
@ -119,6 +119,14 @@

 # ================================================

+# Script_Extensions=Syrc
+
+1DFA          ; Syrc # Mn       COMBINING DOT BELOW LEFT
+
+# Total code points: 1
+
+# ================================================
+
 # Script_Extensions=Arab Copt

 102E0         ; Arab Copt # Mn       COPTIC EPACT THOUSANDS MARK
@ -136,6 +144,15 @@

 # ================================================

+# Script_Extensions=Arab Nkoo
+
+FD3E          ; Arab Nkoo # Pe       ORNATE LEFT PARENTHESIS
+FD3F          ; Arab Nkoo # Ps       ORNATE RIGHT PARENTHESIS
+
+# Total code points: 2
+
+# ================================================
+
 # Script_Extensions=Arab Syrc

 064B..0655    ; Arab Syrc # Mn  [11] ARABIC FATHATAN..ARABIC HAMZA BELOW
@ -186,10 +203,10 @@ A9CF          ; Bugi Java # Lm       JAVANESE PANGRANGKEP

 # Script_Extensions=Cprt Linb

-10100..10102  ; Cprt Linb # Po   [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK
+10102         ; Cprt Linb # Po       AEGEAN CHECK MARK
 10137..1013F  ; Cprt Linb # So   [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT

-# Total code points: 12
+# Total code points: 10

 # ================================================

@ -342,6 +359,14 @@ FF9E..FF9F    ; Hira Kana # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFW

 # ================================================

+# Script_Extensions=Mani Ougr
+
+10AF2         ; Mani Ougr # Po       MANICHAEAN PUNCTUATION DOUBLE DOT WITHIN DOT
+
+# Total code points: 1
+
+# ================================================
+
 # Script_Extensions=Mong Phag

 1802..1803    ; Mong Phag # Po   [2] MONGOLIAN COMMA..MONGOLIAN FULL STOP
@ -383,6 +408,14 @@ FF9E..FF9F    ; Hira Kana # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFW

 # ================================================

+# Script_Extensions=Cpmn Cprt Linb
+
+10100..10101  ; Cpmn Cprt Linb # Po   [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT
+
+# Total code points: 2
+
+# ================================================
+
 # Script_Extensions=Cprt Lina Linb

 10107..10133  ; Cprt Lina Linb # No  [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND
@ -449,16 +482,6 @@ A92E          ; Kali Latn Mymr # Po       KAYAH LI SIGN CWI

 # ================================================

-# Script_Extensions=Arab Rohg Syrc Thaa Yezi
-
-060C          ; Arab Rohg Syrc Thaa Yezi # Po       ARABIC COMMA
-061B          ; Arab Rohg Syrc Thaa Yezi # Po       ARABIC SEMICOLON
-061F          ; Arab Rohg Syrc Thaa Yezi # Po       ARABIC QUESTION MARK
-
-# Total code points: 3
-
-# ================================================
-
 # Script_Extensions=Bopo Hang Hani Hira Kana

 3003          ; Bopo Hang Hani Hira Kana # Po       DITTO MARK
@ -474,6 +497,15 @@ FE45..FE46    ; Bopo Hang Hani Hira Kana # Po   [2] SESAME DOT..WHITE SESAME DOT

 # ================================================

+# Script_Extensions=Arab Nkoo Rohg Syrc Thaa Yezi
+
+060C          ; Arab Nkoo Rohg Syrc Thaa Yezi # Po       ARABIC COMMA
+061B          ; Arab Nkoo Rohg Syrc Thaa Yezi # Po       ARABIC SEMICOLON
+
+# Total code points: 2
+
+# ================================================
+
 # Script_Extensions=Bopo Hang Hani Hira Kana Yiii

 3001..3002    ; Bopo Hang Hani Hira Kana Yiii # Po   [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP
@ -513,9 +545,9 @@ FF64..FF65    ; Bopo Hang Hani Hira Kana Yiii # Po   [2] HALFWIDTH IDEOGRAPHIC C

 # ================================================

-# Script_Extensions=Adlm Arab Mand Mani Phlp Rohg Sogd Syrc
+# Script_Extensions=Adlm Arab Nkoo Rohg Syrc Thaa Yezi

-0640          ; Adlm Arab Mand Mani Phlp Rohg Sogd Syrc # Lm       ARABIC TATWEEL
+061F          ; Adlm Arab Nkoo Rohg Syrc Thaa Yezi # Po       ARABIC QUESTION MARK

 # Total code points: 1

@ -529,6 +561,14 @@ FF64..FF65    ; Bopo Hang Hani Hira Kana Yiii # Po   [2] HALFWIDTH IDEOGRAPHIC C

 # ================================================

+# Script_Extensions=Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc
+
+0640          ; Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc # Lm       ARABIC TATWEEL
+
+# Total code points: 1
+
+# ================================================
+
 # Script_Extensions=Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh

 A836..A837    ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So   [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK
--- a/maint/Unicode.tables/Scripts.txt
+++ b/maint/Unicode.tables/Scripts.txt
@ -1,16 +1,16 @@
-# Scripts-13.0.0.txt
-# Date: 2020-01-22, 00:07:43 GMT
-# © 2020 Unicode®, Inc.
+# Scripts-14.0.0.txt
+# Date: 2021-07-10, 00:35:31 GMT
+# © 2021 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
 # For terms of use, see http://www.unicode.org/terms_of_use.html
 #
 # Unicode Character Database
 #   For documentation, see http://www.unicode.org/reports/tr44/
 # For more information, see:
-#   UAX #24, Unicode Script Property: http://www.unicode.org/reports/tr24/
+#   UAX #24, Unicode Script Property: https://www.unicode.org/reports/tr24/
 #     Especially the sections:
-#       http://www.unicode.org/reports/tr24/#Assignment_Script_Values
-#       http://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values
+#       https://www.unicode.org/reports/tr24/#Assignment_Script_Values
+#       https://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values
 #

 # ================================================
@ -154,7 +154,7 @@
 208A..208C    ; Common # Sm   [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
 208D          ; Common # Ps       SUBSCRIPT LEFT PARENTHESIS
 208E          ; Common # Pe       SUBSCRIPT RIGHT PARENTHESIS
-20A0..20BF    ; Common # Sc  [32] EURO-CURRENCY SIGN..BITCOIN SIGN
+20A0..20C0    ; Common # Sc  [33] EURO-CURRENCY SIGN..SOM SIGN
 2100..2101    ; Common # So   [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
 2102          ; Common # L&       DOUBLE-STRUCK CAPITAL C
 2103..2106    ; Common # So   [4] DEGREE CELSIUS..CADA UNA
@ -347,7 +347,16 @@
 2E42          ; Common # Ps       DOUBLE LOW-REVERSED-9 QUOTATION MARK
 2E43..2E4F    ; Common # Po  [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER
 2E50..2E51    ; Common # So   [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR
-2E52          ; Common # Po       TIRONIAN SIGN CAPITAL ET
+2E52..2E54    ; Common # Po   [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK
+2E55          ; Common # Ps       LEFT SQUARE BRACKET WITH STROKE
+2E56          ; Common # Pe       RIGHT SQUARE BRACKET WITH STROKE
+2E57          ; Common # Ps       LEFT SQUARE BRACKET WITH DOUBLE STROKE
+2E58          ; Common # Pe       RIGHT SQUARE BRACKET WITH DOUBLE STROKE
+2E59          ; Common # Ps       TOP HALF LEFT PARENTHESIS
+2E5A          ; Common # Pe       TOP HALF RIGHT PARENTHESIS
+2E5B          ; Common # Ps       BOTTOM HALF LEFT PARENTHESIS
+2E5C          ; Common # Pe       BOTTOM HALF RIGHT PARENTHESIS
+2E5D          ; Common # Pd       OBLIQUE HYPHEN
 2FF0..2FFB    ; Common # So  [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
 3000          ; Common # Zs       IDEOGRAPHIC SPACE
 3001..3003    ; Common # Po   [3] IDEOGRAPHIC COMMA..DITTO MARK
@ -511,9 +520,8 @@ FFFC..FFFD    ; Common # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
 10190..1019C  ; Common # So  [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL
 101D0..101FC  ; Common # So  [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
 102E1..102FB  ; Common # No  [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED
-16FE2         ; Common # Po       OLD CHINESE HOOK MARK
-16FE3         ; Common # Lm       OLD CHINESE ITERATION MARK
 1BCA0..1BCA3  ; Common # Cf   [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
+1CF50..1CFC3  ; Common # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK
 1D000..1D0F5  ; Common # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
 1D100..1D126  ; Common # So  [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
 1D129..1D164  ; Common # So  [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
@ -523,7 +531,7 @@ FFFC..FFFD    ; Common # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
 1D173..1D17A  ; Common # Cf   [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
 1D183..1D184  ; Common # So   [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN
 1D18C..1D1A9  ; Common # So  [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH
-1D1AE..1D1E8  ; Common # So  [59] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KIEVAN FLAT SIGN
+1D1AE..1D1EA  ; Common # So  [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON
 1D2E0..1D2F3  ; Common # No  [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN
 1D300..1D356  ; Common # So  [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
 1D360..1D378  ; Common # No  [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE
@ -593,35 +601,36 @@ FFFC..FFFD    ; Common # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
 1F300..1F3FA  ; Common # So [251] CYCLONE..AMPHORA
 1F3FB..1F3FF  ; Common # Sk   [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
 1F400..1F6D7  ; Common # So [728] RAT..ELEVATOR
-1F6E0..1F6EC  ; Common # So  [13] HAMMER AND WRENCH..AIRPLANE ARRIVING
+1F6DD..1F6EC  ; Common # So  [16] PLAYGROUND SLIDE..AIRPLANE ARRIVING
 1F6F0..1F6FC  ; Common # So  [13] SATELLITE..ROLLER SKATE
 1F700..1F773  ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
 1F780..1F7D8  ; Common # So  [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE
 1F7E0..1F7EB  ; Common # So  [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE
+1F7F0         ; Common # So       HEAVY EQUALS SIGN
 1F800..1F80B  ; Common # So  [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
 1F810..1F847  ; Common # So  [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW
 1F850..1F859  ; Common # So  [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
 1F860..1F887  ; Common # So  [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
 1F890..1F8AD  ; Common # So  [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
 1F8B0..1F8B1  ; Common # So   [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
-1F900..1F978  ; Common # So [121] CIRCLED CROSS FORMEE WITH FOUR DOTS..DISGUISED FACE
-1F97A..1F9CB  ; Common # So  [82] FACE WITH PLEADING EYES..BUBBLE TEA
-1F9CD..1FA53  ; Common # So [135] STANDING PERSON..BLACK CHESS KNIGHT-BISHOP
+1F900..1FA53  ; Common # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP
 1FA60..1FA6D  ; Common # So  [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
 1FA70..1FA74  ; Common # So   [5] BALLET SHOES..THONG SANDAL
-1FA78..1FA7A  ; Common # So   [3] DROP OF BLOOD..STETHOSCOPE
+1FA78..1FA7C  ; Common # So   [5] DROP OF BLOOD..CRUTCH
 1FA80..1FA86  ; Common # So   [7] YO-YO..NESTING DOLLS
-1FA90..1FAA8  ; Common # So  [25] RINGED PLANET..ROCK
-1FAB0..1FAB6  ; Common # So   [7] FLY..FEATHER
-1FAC0..1FAC2  ; Common # So   [3] ANATOMICAL HEART..PEOPLE HUGGING
-1FAD0..1FAD6  ; Common # So   [7] BLUEBERRIES..TEAPOT
+1FA90..1FAAC  ; Common # So  [29] RINGED PLANET..HAMSA
+1FAB0..1FABA  ; Common # So  [11] FLY..NEST WITH EGGS
+1FAC0..1FAC5  ; Common # So   [6] ANATOMICAL HEART..PERSON WITH CROWN
+1FAD0..1FAD9  ; Common # So  [10] BLUEBERRIES..JAR
+1FAE0..1FAE7  ; Common # So   [8] MELTING FACE..BUBBLES
+1FAF0..1FAF6  ; Common # So   [7] HAND WITH INDEX FINGER AND THUMB CROSSED..HEART HANDS
 1FB00..1FB92  ; Common # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
 1FB94..1FBCA  ; Common # So  [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
 1FBF0..1FBF9  ; Common # Nd  [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
 E0001         ; Common # Cf       LANGUAGE TAG
 E0020..E007F  ; Common # Cf  [96] TAG SPACE..CANCEL TAG

-# Total code points: 8087
+# Total code points: 8252

 # ================================================

@ -664,8 +673,11 @@ A770          ; Latin # Lm       MODIFIER LETTER US
 A771..A787    ; Latin # L&  [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
 A78B..A78E    ; Latin # L&   [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
 A78F          ; Latin # Lo       LATIN LETTER SINOLOGICAL DOT
-A790..A7BF    ; Latin # L&  [48] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER GLOTTAL U
-A7C2..A7CA    ; Latin # L&   [9] LATIN CAPITAL LETTER ANGLICANA W..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
+A790..A7CA    ; Latin # L&  [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
+A7D0..A7D1    ; Latin # L&   [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G
+A7D3          ; Latin # L&       LATIN SMALL LETTER DOUBLE THORN
+A7D5..A7D9    ; Latin # L&   [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S
+A7F2..A7F4    ; Latin # Lm   [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q
 A7F5..A7F6    ; Latin # L&   [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
 A7F7          ; Latin # Lo       LATIN EPIGRAPHIC LETTER SIDEWAYS I
 A7F8..A7F9    ; Latin # Lm   [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
@ -679,8 +691,14 @@ AB69          ; Latin # Lm       MODIFIER LETTER SMALL TURNED W
 FB00..FB06    ; Latin # L&   [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
 FF21..FF3A    ; Latin # L&  [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
 FF41..FF5A    ; Latin # L&  [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
+10780..10785  ; Latin # Lm   [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK
+10787..107B0  ; Latin # Lm  [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK
+107B2..107BA  ; Latin # Lm   [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL
+1DF00..1DF09  ; Latin # L&  [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
+1DF0A         ; Latin # Lo       LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
+1DF0B..1DF1E  ; Latin # L&  [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL

-# Total code points: 1374
+# Total code points: 1475

 # ================================================

@ -820,7 +838,7 @@ FB46..FB4F    ; Hebrew # Lo  [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
 060E..060F    ; Arabic # So   [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
 0610..061A    ; Arabic # Mn  [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
 061C          ; Arabic # Cf       ARABIC LETTER MARK
-061E          ; Arabic # Po       ARABIC TRIPLE DOT PUNCTUATION MARK
+061D..061E    ; Arabic # Po   [2] ARABIC END OF TEXT MARK..ARABIC TRIPLE DOT PUNCTUATION MARK
 0620..063F    ; Arabic # Lo  [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
 0641..064A    ; Arabic # Lo  [10] ARABIC LETTER FEH..ARABIC LETTER YEH
 0656..065F    ; Arabic # Mn  [10] ARABIC SUBSCRIPT ALEF..ARABIC WAVY HAMZA BELOW
@ -843,18 +861,25 @@ FB46..FB4F    ; Hebrew # Lo  [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
 06FD..06FE    ; Arabic # So   [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN
 06FF          ; Arabic # Lo       ARABIC LETTER HEH WITH INVERTED V
 0750..077F    ; Arabic # Lo  [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
-08A0..08B4    ; Arabic # Lo  [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW
-08B6..08C7    ; Arabic # Lo  [18] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE
-08D3..08E1    ; Arabic # Mn  [15] ARABIC SMALL LOW WAW..ARABIC SMALL HIGH SIGN SAFHA
+0870..0887    ; Arabic # Lo  [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
+0888          ; Arabic # Sk       ARABIC RAISED ROUND DOT
+0889..088E    ; Arabic # Lo   [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
+0890..0891    ; Arabic # Cf   [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
+0898..089F    ; Arabic # Mn   [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+08A0..08C8    ; Arabic # Lo  [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
+08C9          ; Arabic # Lm       ARABIC SMALL FARSI YEH
+08CA..08E1    ; Arabic # Mn  [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
 08E3..08FF    ; Arabic # Mn  [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA
 FB50..FBB1    ; Arabic # Lo  [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
-FBB2..FBC1    ; Arabic # Sk  [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW
+FBB2..FBC2    ; Arabic # Sk  [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE
 FBD3..FD3D    ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
+FD40..FD4F    ; Arabic # So  [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH
 FD50..FD8F    ; Arabic # Lo  [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
 FD92..FDC7    ; Arabic # Lo  [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
+FDCF          ; Arabic # So       ARABIC LIGATURE SALAAMUHU ALAYNAA
 FDF0..FDFB    ; Arabic # Lo  [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU
 FDFC          ; Arabic # Sc       RIAL SIGN
-FDFD          ; Arabic # So       ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
+FDFD..FDFF    ; Arabic # So   [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL
 FE70..FE74    ; Arabic # Lo   [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM
 FE76..FEFC    ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
 10E60..10E7E  ; Arabic # No  [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
@ -893,7 +918,7 @@ FE76..FEFC    ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
 1EEAB..1EEBB  ; Arabic # Lo  [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
 1EEF0..1EEF1  ; Arabic # Sm   [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL

-# Total code points: 1291
+# Total code points: 1365

 # ================================================

@ -1113,6 +1138,7 @@ A8FF          ; Devanagari # Mn       DEVANAGARI VOWEL SIGN AY
 0C0E..0C10    ; Telugu # Lo   [3] TELUGU LETTER E..TELUGU LETTER AI
 0C12..0C28    ; Telugu # Lo  [23] TELUGU LETTER O..TELUGU LETTER NA
 0C2A..0C39    ; Telugu # Lo  [16] TELUGU LETTER PA..TELUGU LETTER HA
+0C3C          ; Telugu # Mn       TELUGU SIGN NUKTA
 0C3D          ; Telugu # Lo       TELUGU SIGN AVAGRAHA
 0C3E..0C40    ; Telugu # Mn   [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
 0C41..0C44    ; Telugu # Mc   [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
@ -1120,6 +1146,7 @@ A8FF          ; Devanagari # Mn       DEVANAGARI VOWEL SIGN AY
 0C4A..0C4D    ; Telugu # Mn   [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
 0C55..0C56    ; Telugu # Mn   [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
 0C58..0C5A    ; Telugu # Lo   [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
+0C5D          ; Telugu # Lo       TELUGU LETTER NAKAARA POLLU
 0C60..0C61    ; Telugu # Lo   [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
 0C62..0C63    ; Telugu # Mn   [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
 0C66..0C6F    ; Telugu # Nd  [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
@ -1127,7 +1154,7 @@ A8FF          ; Devanagari # Mn       DEVANAGARI VOWEL SIGN AY
 0C78..0C7E    ; Telugu # No   [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
 0C7F          ; Telugu # So       TELUGU SIGN TUUMU

-# Total code points: 98
+# Total code points: 100

 # ================================================

@ -1150,13 +1177,13 @@ A8FF          ; Devanagari # Mn       DEVANAGARI VOWEL SIGN AY
 0CCA..0CCB    ; Kannada # Mc   [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
 0CCC..0CCD    ; Kannada # Mn   [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
 0CD5..0CD6    ; Kannada # Mc   [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
-0CDE          ; Kannada # Lo       KANNADA LETTER FA
+0CDD..0CDE    ; Kannada # Lo   [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
 0CE0..0CE1    ; Kannada # Lo   [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
 0CE2..0CE3    ; Kannada # Mn   [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
 0CE6..0CEF    ; Kannada # Nd  [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
 0CF1..0CF2    ; Kannada # Lo   [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA

-# Total code points: 89
+# Total code points: 90

 # ================================================

@ -1411,8 +1438,12 @@ AB09..AB0E    ; Ethiopic # Lo   [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DD
 AB11..AB16    ; Ethiopic # Lo   [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO
 AB20..AB26    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO
 AB28..AB2E    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO
+1E7E0..1E7E6  ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
+1E7E8..1E7EB  ; Ethiopic # Lo   [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
+1E7ED..1E7EE  ; Ethiopic # Lo   [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
+1E7F0..1E7FE  ; Ethiopic # Lo  [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE

-# Total code points: 495
+# Total code points: 523

 # ================================================

@ -1430,8 +1461,9 @@ AB70..ABBF    ; Cherokee # L&  [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETT
 166E          ; Canadian_Aboriginal # Po       CANADIAN SYLLABICS FULL STOP
 166F..167F    ; Canadian_Aboriginal # Lo  [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W
 18B0..18F5    ; Canadian_Aboriginal # Lo  [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
+11AB0..11ABF  ; Canadian_Aboriginal # Lo  [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA

-# Total code points: 710
+# Total code points: 726

 # ================================================

@ -1480,6 +1512,7 @@ AB70..ABBF    ; Cherokee # L&  [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETT
 1807..180A    ; Mongolian # Po   [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU
 180B..180D    ; Mongolian # Mn   [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
 180E          ; Mongolian # Cf       MONGOLIAN VOWEL SEPARATOR
+180F          ; Mongolian # Mn       MONGOLIAN FREE VARIATION SELECTOR FOUR
 1810..1819    ; Mongolian # Nd  [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
 1820..1842    ; Mongolian # Lo  [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
 1843          ; Mongolian # Lm       MONGOLIAN LETTER TODO LONG VOWEL SIGN
@ -1491,18 +1524,18 @@ AB70..ABBF    ; Cherokee # L&  [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETT
 18AA          ; Mongolian # Lo       MONGOLIAN LETTER MANCHU ALI GALI LHA
 11660..1166C  ; Mongolian # Po  [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT

-# Total code points: 167
+# Total code points: 168

 # ================================================

 3041..3096    ; Hiragana # Lo  [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
 309D..309E    ; Hiragana # Lm   [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
 309F          ; Hiragana # Lo       HIRAGANA DIGRAPH YORI
-1B001..1B11E  ; Hiragana # Lo [286] HIRAGANA LETTER ARCHAIC YE..HENTAIGANA LETTER N-MU-MO-2
+1B001..1B11F  ; Hiragana # Lo [287] HIRAGANA LETTER ARCHAIC YE..HIRAGANA LETTER ARCHAIC WU
 1B150..1B152  ; Hiragana # Lo   [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
 1F200         ; Hiragana # So       SQUARE HIRAGANA HOKA

-# Total code points: 379
+# Total code points: 380

 # ================================================

@ -1514,10 +1547,14 @@ AB70..ABBF    ; Cherokee # L&  [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETT
 3300..3357    ; Katakana # So  [88] SQUARE APAATO..SQUARE WATTO
 FF66..FF6F    ; Katakana # Lo  [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
 FF71..FF9D    ; Katakana # Lo  [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
+1AFF0..1AFF3  ; Katakana # Lm   [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5
+1AFF5..1AFFB  ; Katakana # Lm   [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5
+1AFFD..1AFFE  ; Katakana # Lm   [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
 1B000         ; Katakana # Lo       KATAKANA LETTER ARCHAIC E
+1B120..1B122  ; Katakana # Lo   [3] KATAKANA LETTER ARCHAIC YI..KATAKANA LETTER ARCHAIC WU
 1B164..1B167  ; Katakana # Lo   [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N

-# Total code points: 304
+# Total code points: 320

 # ================================================

@ -1538,19 +1575,21 @@ FF71..FF9D    ; Katakana # Lo  [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
 3038..303A    ; Han # Nl   [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
 303B          ; Han # Lm       VERTICAL IDEOGRAPHIC ITERATION MARK
 3400..4DBF    ; Han # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF
-4E00..9FFC    ; Han # Lo [20989] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC
+4E00..9FFF    ; Han # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF
 F900..FA6D    ; Han # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D
 FA70..FAD9    ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
+16FE2         ; Han # Po       OLD CHINESE HOOK MARK
+16FE3         ; Han # Lm       OLD CHINESE ITERATION MARK
 16FF0..16FF1  ; Han # Mc   [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
-20000..2A6DD  ; Han # Lo [42718] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DD
-2A700..2B734  ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
+20000..2A6DF  ; Han # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF
+2A700..2B738  ; Han # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738
 2B740..2B81D  ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
 2B820..2CEA1  ; Han # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
 2CEB0..2EBE0  ; Han # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
 2F800..2FA1D  ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
 30000..3134A  ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A

-# Total code points: 94204
+# Total code points: 94215

 # ================================================

@ -1593,15 +1632,14 @@ A490..A4C6    ; Yi # So  [55] YI RADICAL QOT..YI RADICAL KE
 0951..0954    ; Inherited # Mn   [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
 1AB0..1ABD    ; Inherited # Mn  [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
 1ABE          ; Inherited # Me       COMBINING PARENTHESES OVERLAY
-1ABF..1AC0    ; Inherited # Mn   [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
+1ABF..1ACE    ; Inherited # Mn  [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T
 1CD0..1CD2    ; Inherited # Mn   [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
 1CD4..1CE0    ; Inherited # Mn  [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
 1CE2..1CE8    ; Inherited # Mn   [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
 1CED          ; Inherited # Mn       VEDIC SIGN TIRYAK
 1CF4          ; Inherited # Mn       VEDIC TONE CANDRA ABOVE
 1CF8..1CF9    ; Inherited # Mn   [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
-1DC0..1DF9    ; Inherited # Mn  [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW
-1DFB..1DFF    ; Inherited # Mn   [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
+1DC0..1DFF    ; Inherited # Mn  [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
 200C..200D    ; Inherited # Cf   [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
 20D0..20DC    ; Inherited # Mn  [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
 20DD..20E0    ; Inherited # Me   [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
@ -1615,26 +1653,30 @@ FE20..FE2D    ; Inherited # Mn  [14] COMBINING LIGATURE LEFT HALF..COMBINING CON
 101FD         ; Inherited # Mn       PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
 102E0         ; Inherited # Mn       COPTIC EPACT THOUSANDS MARK
 1133B         ; Inherited # Mn       COMBINING BINDU BELOW
+1CF00..1CF2D  ; Inherited # Mn  [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
+1CF30..1CF46  ; Inherited # Mn  [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
 1D167..1D169  ; Inherited # Mn   [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
 1D17B..1D182  ; Inherited # Mn   [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
 1D185..1D18B  ; Inherited # Mn   [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
 1D1AA..1D1AD  ; Inherited # Mn   [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
 E0100..E01EF  ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256

-# Total code points: 573
+# Total code points: 657

 # ================================================

-1700..170C    ; Tagalog # Lo  [13] TAGALOG LETTER A..TAGALOG LETTER YA
-170E..1711    ; Tagalog # Lo   [4] TAGALOG LETTER LA..TAGALOG LETTER HA
+1700..1711    ; Tagalog # Lo  [18] TAGALOG LETTER A..TAGALOG LETTER HA
 1712..1714    ; Tagalog # Mn   [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
+1715          ; Tagalog # Mc       TAGALOG SIGN PAMUDPOD
+171F          ; Tagalog # Lo       TAGALOG LETTER ARCHAIC RA

-# Total code points: 20
+# Total code points: 23

 # ================================================

 1720..1731    ; Hanunoo # Lo  [18] HANUNOO LETTER A..HANUNOO LETTER HA
-1732..1734    ; Hanunoo # Mn   [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
+1732..1733    ; Hanunoo # Mn   [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U
+1734          ; Hanunoo # Mc       HANUNOO SIGN PAMUDPOD

 # Total code points: 21

@ -1762,15 +1804,14 @@ E0100..E01EF  ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2

 # ================================================

-2C00..2C2E    ; Glagolitic # L&  [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
-2C30..2C5E    ; Glagolitic # L&  [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
+2C00..2C5F    ; Glagolitic # L&  [96] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI
 1E000..1E006  ; Glagolitic # Mn   [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
 1E008..1E018  ; Glagolitic # Mn  [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
 1E01B..1E021  ; Glagolitic # Mn   [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
 1E023..1E024  ; Glagolitic # Mn   [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
 1E026..1E02A  ; Glagolitic # Mn   [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA

-# Total code points: 132
+# Total code points: 134

 # ================================================

@ -1836,14 +1877,15 @@ A82C          ; Syloti_Nagri # Mn       SYLOTI NAGRI SIGN ALTERNATE HASANTA
 1B3D..1B41    ; Balinese # Mc   [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
 1B42          ; Balinese # Mn       BALINESE VOWEL SIGN PEPET
 1B43..1B44    ; Balinese # Mc   [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG
-1B45..1B4B    ; Balinese # Lo   [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK
+1B45..1B4C    ; Balinese # Lo   [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA
 1B50..1B59    ; Balinese # Nd  [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
 1B5A..1B60    ; Balinese # Po   [7] BALINESE PANTI..BALINESE PAMENENG
 1B61..1B6A    ; Balinese # So  [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE
 1B6B..1B73    ; Balinese # Mn   [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
 1B74..1B7C    ; Balinese # So   [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
+1B7D..1B7E    ; Balinese # Po   [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG

-# Total code points: 121
+# Total code points: 124

 # ================================================

@ -2178,9 +2220,10 @@ ABF0..ABF9    ; Meetei_Mayek # Nd  [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
 110BB..110BC  ; Kaithi # Po   [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN
 110BD         ; Kaithi # Cf       KAITHI NUMBER SIGN
 110BE..110C1  ; Kaithi # Po   [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
+110C2         ; Kaithi # Mn       KAITHI VOWEL SIGN VOCALIC R
 110CD         ; Kaithi # Cf       KAITHI NUMBER SIGN ABOVE

-# Total code points: 67
+# Total code points: 68

 # ================================================

@ -2207,9 +2250,13 @@ ABF0..ABF9    ; Meetei_Mayek # Nd  [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
 11047..1104D  ; Brahmi # Po   [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS
 11052..11065  ; Brahmi # No  [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND
 11066..1106F  ; Brahmi # Nd  [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE
+11070         ; Brahmi # Mn       BRAHMI SIGN OLD TAMIL VIRAMA
+11071..11072  ; Brahmi # Lo   [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O
+11073..11074  ; Brahmi # Mn   [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O
+11075         ; Brahmi # Lo       BRAHMI LETTER OLD TAMIL LLA
 1107F         ; Brahmi # Mn       BRAHMI NUMBER JOINER

-# Total code points: 109
+# Total code points: 115

 # ================================================

@ -2301,9 +2348,10 @@ ABF0..ABF9    ; Meetei_Mayek # Nd  [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
 116B6         ; Takri # Mc       TAKRI SIGN VIRAMA
 116B7         ; Takri # Mn       TAKRI SIGN NUKTA
 116B8         ; Takri # Lo       TAKRI LETTER ARCHAIC KHA
+116B9         ; Takri # Po       TAKRI ABBREVIATION SIGN
 116C0..116C9  ; Takri # Nd  [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE

-# Total code points: 67
+# Total code points: 68

 # ================================================

@ -2561,8 +2609,9 @@ ABF0..ABF9    ; Meetei_Mayek # Nd  [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
 1173A..1173B  ; Ahom # No   [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY
 1173C..1173E  ; Ahom # Po   [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
 1173F         ; Ahom # So       AHOM SYMBOL VI
+11740..11746  ; Ahom # Lo   [7] AHOM LETTER CA..AHOM LETTER LLA

-# Total code points: 58
+# Total code points: 65

 # ================================================

@ -2897,4 +2946,46 @@ ABF0..ABF9    ; Meetei_Mayek # Nd  [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI

 # Total code points: 47

+# ================================================
+
+12F90..12FF0  ; Cypro_Minoan # Lo  [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114
+12FF1..12FF2  ; Cypro_Minoan # Po   [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302
+
+# Total code points: 99
+
+# ================================================
+
+10F70..10F81  ; Old_Uyghur # Lo  [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH
+10F82..10F85  ; Old_Uyghur # Mn   [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
+10F86..10F89  ; Old_Uyghur # Po   [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS
+
+# Total code points: 26
+
+# ================================================
+
+16A70..16ABE  ; Tangsa # Lo  [79] TANGSA LETTER OZ..TANGSA LETTER ZA
+16AC0..16AC9  ; Tangsa # Nd  [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE
+
+# Total code points: 89
+
+# ================================================
+
+1E290..1E2AD  ; Toto # Lo  [30] TOTO LETTER PA..TOTO LETTER A
+1E2AE         ; Toto # Mn       TOTO SIGN RISING TONE
+
+# Total code points: 31
+
+# ================================================
+
+10570..1057A  ; Vithkuqi # L&  [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA
+1057C..1058A  ; Vithkuqi # L&  [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE
+1058C..10592  ; Vithkuqi # L&   [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE
+10594..10595  ; Vithkuqi # L&   [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE
+10597..105A1  ; Vithkuqi # L&  [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA
+105A3..105B1  ; Vithkuqi # L&  [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
+105B3..105B9  ; Vithkuqi # L&   [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
+105BB..105BC  ; Vithkuqi # L&   [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+
+# Total code points: 70
+
 # EOF
--- a/maint/Unicode.tables/UnicodeData.txt
+++ b/maint/Unicode.tables/UnicodeData.txt
--- a/maint/Unicode.tables/emoji-data.txt
+++ b/maint/Unicode.tables/emoji-data.txt
@ -1,11 +1,11 @@
-# emoji-data.txt
-# Date: 2020-01-28, 20:52:38 GMT
-# © 2020 Unicode®, Inc.
+# emoji-data-14.0.0.txt
+# Date: 2021-08-26, 17:22:22 GMT
+# © 2021 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
 # For terms of use, see http://www.unicode.org/terms_of_use.html
 #
 # Emoji Data for UTS #51
-# Version: 13.0
+# Used with Emoji Version 14.0 and subsequent minor revisions (if any)
 #
 # For documentation and usage, see http://www.unicode.org/reports/tr51
 #
@ -22,7 +22,7 @@
 # All omitted code points have Emoji=No 
 # @missing: 0000..10FFFF  ; Emoji ; No

-0023          ; Emoji                # E0.0   [1] (#️)       number sign
+0023          ; Emoji                # E0.0   [1] (#️)       hash sign
 002A          ; Emoji                # E0.0   [1] (*️)       asterisk
 0030..0039    ; Emoji                # E0.0  [10] (0️..9️)    digit zero..digit nine
 00A9          ; Emoji                # E0.6   [1] (©️)       copyright
@ -119,8 +119,8 @@
 2747          ; Emoji                # E0.6   [1] (❇️)       sparkle
 274C          ; Emoji                # E0.6   [1] (❌)       cross mark
 274E          ; Emoji                # E0.6   [1] (❎)       cross mark button
-2753..2755    ; Emoji                # E0.6   [3] (❓..❕)    question mark..white exclamation mark
-2757          ; Emoji                # E0.6   [1] (❗)       exclamation mark
+2753..2755    ; Emoji                # E0.6   [3] (❓..❕)    red question mark..white exclamation mark
+2757          ; Emoji                # E0.6   [1] (❗)       red exclamation mark
 2763          ; Emoji                # E1.0   [1] (❣️)       heart exclamation
 2764          ; Emoji                # E0.6   [1] (❤️)       red heart
 2795..2797    ; Emoji                # E0.6   [3] (➕..➗)    plus..divide
@ -239,7 +239,7 @@
 1F509         ; Emoji                # E1.0   [1] (🔉)       speaker medium volume
 1F50A..1F514  ; Emoji                # E0.6  [11] (🔊..🔔)    speaker high volume..bell
 1F515         ; Emoji                # E1.0   [1] (🔕)       bell with slash
-1F516..1F52B  ; Emoji                # E0.6  [22] (🔖..🔫)    bookmark..pistol
+1F516..1F52B  ; Emoji                # E0.6  [22] (🔖..🔫)    bookmark..water pistol
 1F52C..1F52D  ; Emoji                # E1.0   [2] (🔬..🔭)    microscope..telescope
 1F52E..1F53D  ; Emoji                # E0.6  [16] (🔮..🔽)    crystal ball..downwards button
 1F549..1F54A  ; Emoji                # E0.7   [2] (🕉️..🕊️)    om..dove
@ -294,7 +294,7 @@
 1F62E..1F62F  ; Emoji                # E1.0   [2] (😮..😯)    face with open mouth..hushed face
 1F630..1F633  ; Emoji                # E0.6   [4] (😰..😳)    anxious face with sweat..flushed face
 1F634         ; Emoji                # E1.0   [1] (😴)       sleeping face
-1F635         ; Emoji                # E0.6   [1] (😵)       dizzy face
+1F635         ; Emoji                # E0.6   [1] (😵)       face with crossed-out eyes
 1F636         ; Emoji                # E1.0   [1] (😶)       face without mouth
 1F637..1F640  ; Emoji                # E0.6  [10] (😷..🙀)    face with medical mask..weary cat
 1F641..1F644  ; Emoji                # E1.0   [4] (🙁..🙄)    slightly frowning face..face with rolling eyes
@ -341,6 +341,7 @@
 1F6D1..1F6D2  ; Emoji                # E3.0   [2] (🛑..🛒)    stop sign..shopping cart
 1F6D5         ; Emoji                # E12.0  [1] (🛕)       hindu temple
 1F6D6..1F6D7  ; Emoji                # E13.0  [2] (🛖..🛗)    hut..elevator
+1F6DD..1F6DF  ; Emoji                # E14.0  [3] (🛝..🛟)    playground slide..ring buoy
 1F6E0..1F6E5  ; Emoji                # E0.7   [6] (🛠️..🛥️)    hammer and wrench..motor boat
 1F6E9         ; Emoji                # E0.7   [1] (🛩️)       small airplane
 1F6EB..1F6EC  ; Emoji                # E1.0   [2] (🛫..🛬)    airplane departure..airplane arrival
@ -352,6 +353,7 @@
 1F6FA         ; Emoji                # E12.0  [1] (🛺)       auto rickshaw
 1F6FB..1F6FC  ; Emoji                # E13.0  [2] (🛻..🛼)    pickup truck..roller skate
 1F7E0..1F7EB  ; Emoji                # E12.0 [12] (🟠..🟫)    orange circle..brown square
+1F7F0         ; Emoji                # E14.0  [1] (🟰)       heavy equals sign
 1F90C         ; Emoji                # E13.0  [1] (🤌)       pinched fingers
 1F90D..1F90F  ; Emoji                # E12.0  [3] (🤍..🤏)    white heart..pinching hand
 1F910..1F918  ; Emoji                # E1.0   [9] (🤐..🤘)    zipper-mouth face..sign of the horns
@ -375,6 +377,7 @@
 1F972         ; Emoji                # E13.0  [1] (🥲)       smiling face with tear
 1F973..1F976  ; Emoji                # E11.0  [4] (🥳..🥶)    partying face..cold face
 1F977..1F978  ; Emoji                # E13.0  [2] (🥷..🥸)    ninja..disguised face
+1F979         ; Emoji                # E14.0  [1] (🥹)       face holding back tears
 1F97A         ; Emoji                # E11.0  [1] (🥺)       pleading face
 1F97B         ; Emoji                # E12.0  [1] (🥻)       sari
 1F97C..1F97F  ; Emoji                # E11.0  [4] (🥼..🥿)    lab coat..flat shoe
@ -392,21 +395,29 @@
 1F9C1..1F9C2  ; Emoji                # E11.0  [2] (🧁..🧂)    cupcake..salt
 1F9C3..1F9CA  ; Emoji                # E12.0  [8] (🧃..🧊)    beverage box..ice
 1F9CB         ; Emoji                # E13.0  [1] (🧋)       bubble tea
+1F9CC         ; Emoji                # E14.0  [1] (🧌)       troll
 1F9CD..1F9CF  ; Emoji                # E12.0  [3] (🧍..🧏)    person standing..deaf person
 1F9D0..1F9E6  ; Emoji                # E5.0  [23] (🧐..🧦)    face with monocle..socks
 1F9E7..1F9FF  ; Emoji                # E11.0 [25] (🧧..🧿)    red envelope..nazar amulet
 1FA70..1FA73  ; Emoji                # E12.0  [4] (🩰..🩳)    ballet shoes..shorts
 1FA74         ; Emoji                # E13.0  [1] (🩴)       thong sandal
 1FA78..1FA7A  ; Emoji                # E12.0  [3] (🩸..🩺)    drop of blood..stethoscope
+1FA7B..1FA7C  ; Emoji                # E14.0  [2] (🩻..🩼)    x-ray..crutch
 1FA80..1FA82  ; Emoji                # E12.0  [3] (🪀..🪂)    yo-yo..parachute
 1FA83..1FA86  ; Emoji                # E13.0  [4] (🪃..🪆)    boomerang..nesting dolls
 1FA90..1FA95  ; Emoji                # E12.0  [6] (🪐..🪕)    ringed planet..banjo
 1FA96..1FAA8  ; Emoji                # E13.0 [19] (🪖..🪨)    military helmet..rock
+1FAA9..1FAAC  ; Emoji                # E14.0  [4] (🪩..🪬)    mirror ball..hamsa
 1FAB0..1FAB6  ; Emoji                # E13.0  [7] (🪰..🪶)    fly..feather
+1FAB7..1FABA  ; Emoji                # E14.0  [4] (🪷..🪺)    lotus..nest with eggs
 1FAC0..1FAC2  ; Emoji                # E13.0  [3] (🫀..🫂)    anatomical heart..people hugging
+1FAC3..1FAC5  ; Emoji                # E14.0  [3] (🫃..🫅)    pregnant man..person with crown
 1FAD0..1FAD6  ; Emoji                # E13.0  [7] (🫐..🫖)    blueberries..teapot
+1FAD7..1FAD9  ; Emoji                # E14.0  [3] (🫗..🫙)    pouring liquid..jar
+1FAE0..1FAE7  ; Emoji                # E14.0  [8] (🫠..🫧)    melting face..bubbles
+1FAF0..1FAF6  ; Emoji                # E14.0  [7] (🫰..🫶)    hand with index finger and thumb crossed..heart hands

-# Total elements: 1367
+# Total elements: 1404

 # ================================================

@ -438,8 +449,8 @@
 2728          ; Emoji_Presentation   # E0.6   [1] (✨)       sparkles
 274C          ; Emoji_Presentation   # E0.6   [1] (❌)       cross mark
 274E          ; Emoji_Presentation   # E0.6   [1] (❎)       cross mark button
-2753..2755    ; Emoji_Presentation   # E0.6   [3] (❓..❕)    question mark..white exclamation mark
-2757          ; Emoji_Presentation   # E0.6   [1] (❗)       exclamation mark
+2753..2755    ; Emoji_Presentation   # E0.6   [3] (❓..❕)    red question mark..white exclamation mark
+2757          ; Emoji_Presentation   # E0.6   [1] (❗)       red exclamation mark
 2795..2797    ; Emoji_Presentation   # E0.6   [3] (➕..➗)    plus..divide
 27B0          ; Emoji_Presentation   # E0.6   [1] (➰)       curly loop
 27BF          ; Emoji_Presentation   # E1.0   [1] (➿)       double curly loop
@ -533,7 +544,7 @@
 1F509         ; Emoji_Presentation   # E1.0   [1] (🔉)       speaker medium volume
 1F50A..1F514  ; Emoji_Presentation   # E0.6  [11] (🔊..🔔)    speaker high volume..bell
 1F515         ; Emoji_Presentation   # E1.0   [1] (🔕)       bell with slash
-1F516..1F52B  ; Emoji_Presentation   # E0.6  [22] (🔖..🔫)    bookmark..pistol
+1F516..1F52B  ; Emoji_Presentation   # E0.6  [22] (🔖..🔫)    bookmark..water pistol
 1F52C..1F52D  ; Emoji_Presentation   # E1.0   [2] (🔬..🔭)    microscope..telescope
 1F52E..1F53D  ; Emoji_Presentation   # E0.6  [16] (🔮..🔽)    crystal ball..downwards button
 1F54B..1F54E  ; Emoji_Presentation   # E1.0   [4] (🕋..🕎)    kaaba..menorah
@ -569,7 +580,7 @@
 1F62E..1F62F  ; Emoji_Presentation   # E1.0   [2] (😮..😯)    face with open mouth..hushed face
 1F630..1F633  ; Emoji_Presentation   # E0.6   [4] (😰..😳)    anxious face with sweat..flushed face
 1F634         ; Emoji_Presentation   # E1.0   [1] (😴)       sleeping face
-1F635         ; Emoji_Presentation   # E0.6   [1] (😵)       dizzy face
+1F635         ; Emoji_Presentation   # E0.6   [1] (😵)       face with crossed-out eyes
 1F636         ; Emoji_Presentation   # E1.0   [1] (😶)       face without mouth
 1F637..1F640  ; Emoji_Presentation   # E0.6  [10] (😷..🙀)    face with medical mask..weary cat
 1F641..1F644  ; Emoji_Presentation   # E1.0   [4] (🙁..🙄)    slightly frowning face..face with rolling eyes
@ -614,6 +625,7 @@
 1F6D1..1F6D2  ; Emoji_Presentation   # E3.0   [2] (🛑..🛒)    stop sign..shopping cart
 1F6D5         ; Emoji_Presentation   # E12.0  [1] (🛕)       hindu temple
 1F6D6..1F6D7  ; Emoji_Presentation   # E13.0  [2] (🛖..🛗)    hut..elevator
+1F6DD..1F6DF  ; Emoji_Presentation   # E14.0  [3] (🛝..🛟)    playground slide..ring buoy
 1F6EB..1F6EC  ; Emoji_Presentation   # E1.0   [2] (🛫..🛬)    airplane departure..airplane arrival
 1F6F4..1F6F6  ; Emoji_Presentation   # E3.0   [3] (🛴..🛶)    kick scooter..canoe
 1F6F7..1F6F8  ; Emoji_Presentation   # E5.0   [2] (🛷..🛸)    sled..flying saucer
@ -621,6 +633,7 @@
 1F6FA         ; Emoji_Presentation   # E12.0  [1] (🛺)       auto rickshaw
 1F6FB..1F6FC  ; Emoji_Presentation   # E13.0  [2] (🛻..🛼)    pickup truck..roller skate
 1F7E0..1F7EB  ; Emoji_Presentation   # E12.0 [12] (🟠..🟫)    orange circle..brown square
+1F7F0         ; Emoji_Presentation   # E14.0  [1] (🟰)       heavy equals sign
 1F90C         ; Emoji_Presentation   # E13.0  [1] (🤌)       pinched fingers
 1F90D..1F90F  ; Emoji_Presentation   # E12.0  [3] (🤍..🤏)    white heart..pinching hand
 1F910..1F918  ; Emoji_Presentation   # E1.0   [9] (🤐..🤘)    zipper-mouth face..sign of the horns
@ -644,6 +657,7 @@
 1F972         ; Emoji_Presentation   # E13.0  [1] (🥲)       smiling face with tear
 1F973..1F976  ; Emoji_Presentation   # E11.0  [4] (🥳..🥶)    partying face..cold face
 1F977..1F978  ; Emoji_Presentation   # E13.0  [2] (🥷..🥸)    ninja..disguised face
+1F979         ; Emoji_Presentation   # E14.0  [1] (🥹)       face holding back tears
 1F97A         ; Emoji_Presentation   # E11.0  [1] (🥺)       pleading face
 1F97B         ; Emoji_Presentation   # E12.0  [1] (🥻)       sari
 1F97C..1F97F  ; Emoji_Presentation   # E11.0  [4] (🥼..🥿)    lab coat..flat shoe
@ -661,21 +675,29 @@
 1F9C1..1F9C2  ; Emoji_Presentation   # E11.0  [2] (🧁..🧂)    cupcake..salt
 1F9C3..1F9CA  ; Emoji_Presentation   # E12.0  [8] (🧃..🧊)    beverage box..ice
 1F9CB         ; Emoji_Presentation   # E13.0  [1] (🧋)       bubble tea
+1F9CC         ; Emoji_Presentation   # E14.0  [1] (🧌)       troll
 1F9CD..1F9CF  ; Emoji_Presentation   # E12.0  [3] (🧍..🧏)    person standing..deaf person
 1F9D0..1F9E6  ; Emoji_Presentation   # E5.0  [23] (🧐..🧦)    face with monocle..socks
 1F9E7..1F9FF  ; Emoji_Presentation   # E11.0 [25] (🧧..🧿)    red envelope..nazar amulet
 1FA70..1FA73  ; Emoji_Presentation   # E12.0  [4] (🩰..🩳)    ballet shoes..shorts
 1FA74         ; Emoji_Presentation   # E13.0  [1] (🩴)       thong sandal
 1FA78..1FA7A  ; Emoji_Presentation   # E12.0  [3] (🩸..🩺)    drop of blood..stethoscope
+1FA7B..1FA7C  ; Emoji_Presentation   # E14.0  [2] (🩻..🩼)    x-ray..crutch
 1FA80..1FA82  ; Emoji_Presentation   # E12.0  [3] (🪀..🪂)    yo-yo..parachute
 1FA83..1FA86  ; Emoji_Presentation   # E13.0  [4] (🪃..🪆)    boomerang..nesting dolls
 1FA90..1FA95  ; Emoji_Presentation   # E12.0  [6] (🪐..🪕)    ringed planet..banjo
 1FA96..1FAA8  ; Emoji_Presentation   # E13.0 [19] (🪖..🪨)    military helmet..rock
+1FAA9..1FAAC  ; Emoji_Presentation   # E14.0  [4] (🪩..🪬)    mirror ball..hamsa
 1FAB0..1FAB6  ; Emoji_Presentation   # E13.0  [7] (🪰..🪶)    fly..feather
+1FAB7..1FABA  ; Emoji_Presentation   # E14.0  [4] (🪷..🪺)    lotus..nest with eggs
 1FAC0..1FAC2  ; Emoji_Presentation   # E13.0  [3] (🫀..🫂)    anatomical heart..people hugging
+1FAC3..1FAC5  ; Emoji_Presentation   # E14.0  [3] (🫃..🫅)    pregnant man..person with crown
 1FAD0..1FAD6  ; Emoji_Presentation   # E13.0  [7] (🫐..🫖)    blueberries..teapot
+1FAD7..1FAD9  ; Emoji_Presentation   # E14.0  [3] (🫗..🫙)    pouring liquid..jar
+1FAE0..1FAE7  ; Emoji_Presentation   # E14.0  [8] (🫠..🫧)    melting face..bubbles
+1FAF0..1FAF6  ; Emoji_Presentation   # E14.0  [7] (🫰..🫶)    hand with index finger and thumb crossed..heart hands

-# Total elements: 1148
+# Total elements: 1185

 # ================================================

@ -738,15 +760,17 @@
 1F9BB         ; Emoji_Modifier_Base  # E12.0  [1] (🦻)       ear with hearing aid
 1F9CD..1F9CF  ; Emoji_Modifier_Base  # E12.0  [3] (🧍..🧏)    person standing..deaf person
 1F9D1..1F9DD  ; Emoji_Modifier_Base  # E5.0  [13] (🧑..🧝)    person..elf
+1FAC3..1FAC5  ; Emoji_Modifier_Base  # E14.0  [3] (🫃..🫅)    pregnant man..person with crown
+1FAF0..1FAF6  ; Emoji_Modifier_Base  # E14.0  [7] (🫰..🫶)    hand with index finger and thumb crossed..heart hands

-# Total elements: 122
+# Total elements: 132

 # ================================================

 # All omitted code points have Emoji_Component=No 
 # @missing: 0000..10FFFF  ; Emoji_Component ; No

-0023          ; Emoji_Component      # E0.0   [1] (#️)       number sign
+0023          ; Emoji_Component      # E0.0   [1] (#️)       hash sign
 002A          ; Emoji_Component      # E0.0   [1] (*️)       asterisk
 0030..0039    ; Emoji_Component      # E0.0  [10] (0️..9️)    digit zero..digit nine
 200D          ; Emoji_Component      # E0.0   [1] (‍)        zero width joiner
@ -902,8 +926,8 @@ E0020..E007F  ; Emoji_Component      # E0.0  [96] (󠀠..󠁿)      tag space..c
 2747          ; Extended_Pictographic# E0.6   [1] (❇️)       sparkle
 274C          ; Extended_Pictographic# E0.6   [1] (❌)       cross mark
 274E          ; Extended_Pictographic# E0.6   [1] (❎)       cross mark button
-2753..2755    ; Extended_Pictographic# E0.6   [3] (❓..❕)    question mark..white exclamation mark
-2757          ; Extended_Pictographic# E0.6   [1] (❗)       exclamation mark
+2753..2755    ; Extended_Pictographic# E0.6   [3] (❓..❕)    red question mark..white exclamation mark
+2757          ; Extended_Pictographic# E0.6   [1] (❗)       red exclamation mark
 2763          ; Extended_Pictographic# E1.0   [1] (❣️)       heart exclamation
 2764          ; Extended_Pictographic# E0.6   [1] (❤️)       red heart
 2765..2767    ; Extended_Pictographic# E0.0   [3] (❥..❧)    ROTATED HEAVY BLACK HEART BULLET..ROTATED FLORAL HEART BULLET
@ -1041,7 +1065,7 @@ E0020..E007F  ; Emoji_Component      # E0.0  [96] (󠀠..󠁿)      tag space..c
 1F509         ; Extended_Pictographic# E1.0   [1] (🔉)       speaker medium volume
 1F50A..1F514  ; Extended_Pictographic# E0.6  [11] (🔊..🔔)    speaker high volume..bell
 1F515         ; Extended_Pictographic# E1.0   [1] (🔕)       bell with slash
-1F516..1F52B  ; Extended_Pictographic# E0.6  [22] (🔖..🔫)    bookmark..pistol
+1F516..1F52B  ; Extended_Pictographic# E0.6  [22] (🔖..🔫)    bookmark..water pistol
 1F52C..1F52D  ; Extended_Pictographic# E1.0   [2] (🔬..🔭)    microscope..telescope
 1F52E..1F53D  ; Extended_Pictographic# E0.6  [16] (🔮..🔽)    crystal ball..downwards button
 1F546..1F548  ; Extended_Pictographic# E0.0   [3] (🕆..🕈)    WHITE LATIN CROSS..CELTIC CROSS
@ -1117,7 +1141,7 @@ E0020..E007F  ; Emoji_Component      # E0.0  [96] (󠀠..󠁿)      tag space..c
 1F62E..1F62F  ; Extended_Pictographic# E1.0   [2] (😮..😯)    face with open mouth..hushed face
 1F630..1F633  ; Extended_Pictographic# E0.6   [4] (😰..😳)    anxious face with sweat..flushed face
 1F634         ; Extended_Pictographic# E1.0   [1] (😴)       sleeping face
-1F635         ; Extended_Pictographic# E0.6   [1] (😵)       dizzy face
+1F635         ; Extended_Pictographic# E0.6   [1] (😵)       face with crossed-out eyes
 1F636         ; Extended_Pictographic# E1.0   [1] (😶)       face without mouth
 1F637..1F640  ; Extended_Pictographic# E0.6  [10] (😷..🙀)    face with medical mask..weary cat
 1F641..1F644  ; Extended_Pictographic# E1.0   [4] (🙁..🙄)    slightly frowning face..face with rolling eyes
@ -1166,7 +1190,8 @@ E0020..E007F  ; Emoji_Component      # E0.0  [96] (󠀠..󠁿)      tag space..c
 1F6D3..1F6D4  ; Extended_Pictographic# E0.0   [2] (🛓..🛔)    STUPA..PAGODA
 1F6D5         ; Extended_Pictographic# E12.0  [1] (🛕)       hindu temple
 1F6D6..1F6D7  ; Extended_Pictographic# E13.0  [2] (🛖..🛗)    hut..elevator
-1F6D8..1F6DF  ; Extended_Pictographic# E0.0   [8] (🛘..🛟)    <reserved-1F6D8>..<reserved-1F6DF>
+1F6D8..1F6DC  ; Extended_Pictographic# E0.0   [5] (🛘..🛜)    <reserved-1F6D8>..<reserved-1F6DC>
+1F6DD..1F6DF  ; Extended_Pictographic# E14.0  [3] (🛝..🛟)    playground slide..ring buoy
 1F6E0..1F6E5  ; Extended_Pictographic# E0.7   [6] (🛠️..🛥️)    hammer and wrench..motor boat
 1F6E6..1F6E8  ; Extended_Pictographic# E0.0   [3] (🛦..🛨)    UP-POINTING MILITARY AIRPLANE..UP-POINTING SMALL AIRPLANE
 1F6E9         ; Extended_Pictographic# E0.7   [1] (🛩️)       small airplane
@ -1185,7 +1210,9 @@ E0020..E007F  ; Emoji_Component      # E0.0  [96] (󠀠..󠁿)      tag space..c
 1F774..1F77F  ; Extended_Pictographic# E0.0  [12] (🝴..🝿)    <reserved-1F774>..<reserved-1F77F>
 1F7D5..1F7DF  ; Extended_Pictographic# E0.0  [11] (🟕..🟟)    CIRCLED TRIANGLE..<reserved-1F7DF>
 1F7E0..1F7EB  ; Extended_Pictographic# E12.0 [12] (🟠..🟫)    orange circle..brown square
-1F7EC..1F7FF  ; Extended_Pictographic# E0.0  [20] (🟬..🟿)    <reserved-1F7EC>..<reserved-1F7FF>
+1F7EC..1F7EF  ; Extended_Pictographic# E0.0   [4] (🟬..🟯)    <reserved-1F7EC>..<reserved-1F7EF>
+1F7F0         ; Extended_Pictographic# E14.0  [1] (🟰)       heavy equals sign
+1F7F1..1F7FF  ; Extended_Pictographic# E0.0  [15] (🟱..🟿)    <reserved-1F7F1>..<reserved-1F7FF>
 1F80C..1F80F  ; Extended_Pictographic# E0.0   [4] (🠌..🠏)    <reserved-1F80C>..<reserved-1F80F>
 1F848..1F84F  ; Extended_Pictographic# E0.0   [8] (🡈..🡏)    <reserved-1F848>..<reserved-1F84F>
 1F85A..1F85F  ; Extended_Pictographic# E0.0   [6] (🡚..🡟)    <reserved-1F85A>..<reserved-1F85F>
@ -1214,7 +1241,7 @@ E0020..E007F  ; Emoji_Component      # E0.0  [96] (󠀠..󠁿)      tag space..c
 1F972         ; Extended_Pictographic# E13.0  [1] (🥲)       smiling face with tear
 1F973..1F976  ; Extended_Pictographic# E11.0  [4] (🥳..🥶)    partying face..cold face
 1F977..1F978  ; Extended_Pictographic# E13.0  [2] (🥷..🥸)    ninja..disguised face
-1F979         ; Extended_Pictographic# E0.0   [1] (🥹)       <reserved-1F979>
+1F979         ; Extended_Pictographic# E14.0  [1] (🥹)       face holding back tears
 1F97A         ; Extended_Pictographic# E11.0  [1] (🥺)       pleading face
 1F97B         ; Extended_Pictographic# E12.0  [1] (🥻)       sari
 1F97C..1F97F  ; Extended_Pictographic# E11.0  [4] (🥼..🥿)    lab coat..flat shoe
@ -1232,7 +1259,7 @@ E0020..E007F  ; Emoji_Component      # E0.0  [96] (󠀠..󠁿)      tag space..c
 1F9C1..1F9C2  ; Extended_Pictographic# E11.0  [2] (🧁..🧂)    cupcake..salt
 1F9C3..1F9CA  ; Extended_Pictographic# E12.0  [8] (🧃..🧊)    beverage box..ice
 1F9CB         ; Extended_Pictographic# E13.0  [1] (🧋)       bubble tea
-1F9CC         ; Extended_Pictographic# E0.0   [1] (🧌)       <reserved-1F9CC>
+1F9CC         ; Extended_Pictographic# E14.0  [1] (🧌)       troll
 1F9CD..1F9CF  ; Extended_Pictographic# E12.0  [3] (🧍..🧏)    person standing..deaf person
 1F9D0..1F9E6  ; Extended_Pictographic# E5.0  [23] (🧐..🧦)    face with monocle..socks
 1F9E7..1F9FF  ; Extended_Pictographic# E11.0 [25] (🧧..🧿)    red envelope..nazar amulet
@ -1241,19 +1268,28 @@ E0020..E007F  ; Emoji_Component      # E0.0  [96] (󠀠..󠁿)      tag space..c
 1FA74         ; Extended_Pictographic# E13.0  [1] (🩴)       thong sandal
 1FA75..1FA77  ; Extended_Pictographic# E0.0   [3] (🩵..🩷)    <reserved-1FA75>..<reserved-1FA77>
 1FA78..1FA7A  ; Extended_Pictographic# E12.0  [3] (🩸..🩺)    drop of blood..stethoscope
-1FA7B..1FA7F  ; Extended_Pictographic# E0.0   [5] (🩻..🩿)    <reserved-1FA7B>..<reserved-1FA7F>
+1FA7B..1FA7C  ; Extended_Pictographic# E14.0  [2] (🩻..🩼)    x-ray..crutch
+1FA7D..1FA7F  ; Extended_Pictographic# E0.0   [3] (🩽..🩿)    <reserved-1FA7D>..<reserved-1FA7F>
 1FA80..1FA82  ; Extended_Pictographic# E12.0  [3] (🪀..🪂)    yo-yo..parachute
 1FA83..1FA86  ; Extended_Pictographic# E13.0  [4] (🪃..🪆)    boomerang..nesting dolls
 1FA87..1FA8F  ; Extended_Pictographic# E0.0   [9] (🪇..🪏)    <reserved-1FA87>..<reserved-1FA8F>
 1FA90..1FA95  ; Extended_Pictographic# E12.0  [6] (🪐..🪕)    ringed planet..banjo
 1FA96..1FAA8  ; Extended_Pictographic# E13.0 [19] (🪖..🪨)    military helmet..rock
-1FAA9..1FAAF  ; Extended_Pictographic# E0.0   [7] (🪩..🪯)    <reserved-1FAA9>..<reserved-1FAAF>
+1FAA9..1FAAC  ; Extended_Pictographic# E14.0  [4] (🪩..🪬)    mirror ball..hamsa
+1FAAD..1FAAF  ; Extended_Pictographic# E0.0   [3] (🪭..🪯)    <reserved-1FAAD>..<reserved-1FAAF>
 1FAB0..1FAB6  ; Extended_Pictographic# E13.0  [7] (🪰..🪶)    fly..feather
-1FAB7..1FABF  ; Extended_Pictographic# E0.0   [9] (🪷..🪿)    <reserved-1FAB7>..<reserved-1FABF>
+1FAB7..1FABA  ; Extended_Pictographic# E14.0  [4] (🪷..🪺)    lotus..nest with eggs
+1FABB..1FABF  ; Extended_Pictographic# E0.0   [5] (🪻..🪿)    <reserved-1FABB>..<reserved-1FABF>
 1FAC0..1FAC2  ; Extended_Pictographic# E13.0  [3] (🫀..🫂)    anatomical heart..people hugging
-1FAC3..1FACF  ; Extended_Pictographic# E0.0  [13] (🫃..🫏)    <reserved-1FAC3>..<reserved-1FACF>
+1FAC3..1FAC5  ; Extended_Pictographic# E14.0  [3] (🫃..🫅)    pregnant man..person with crown
+1FAC6..1FACF  ; Extended_Pictographic# E0.0  [10] (🫆..🫏)    <reserved-1FAC6>..<reserved-1FACF>
 1FAD0..1FAD6  ; Extended_Pictographic# E13.0  [7] (🫐..🫖)    blueberries..teapot
-1FAD7..1FAFF  ; Extended_Pictographic# E0.0  [41] (🫗..🫿)    <reserved-1FAD7>..<reserved-1FAFF>
+1FAD7..1FAD9  ; Extended_Pictographic# E14.0  [3] (🫗..🫙)    pouring liquid..jar
+1FADA..1FADF  ; Extended_Pictographic# E0.0   [6] (🫚..🫟)    <reserved-1FADA>..<reserved-1FADF>
+1FAE0..1FAE7  ; Extended_Pictographic# E14.0  [8] (🫠..🫧)    melting face..bubbles
+1FAE8..1FAEF  ; Extended_Pictographic# E0.0   [8] (🫨..🫯)    <reserved-1FAE8>..<reserved-1FAEF>
+1FAF0..1FAF6  ; Extended_Pictographic# E14.0  [7] (🫰..🫶)    hand with index finger and thumb crossed..heart hands
+1FAF7..1FAFF  ; Extended_Pictographic# E0.0   [9] (🫷..🫿)    <reserved-1FAF7>..<reserved-1FAFF>
 1FC00..1FFFD  ; Extended_Pictographic# E0.0[1022] (🰀..🿽)    <reserved-1FC00>..<reserved-1FFFD>

 # Total elements: 3537
--- a/maint/ucptest.c
+++ b/maint/ucptest.c
@ -2,7 +2,7 @@
 * A program for testing the Unicode property table *
 ***************************************************/

-/* Copyright (c) University of Cambridge 2008-2020 */
+/* Copyright (c) University of Cambridge 2008-2022 */

 /* Compile thus:

@ -14,40 +14,50 @@
 */

 /* This is a hacked-up program for testing the Unicode properties tables of
-PCRE2. It can also be used for finding characters with certain properties.
-I wrote it to help with debugging PCRE, and have added things that I found
-useful, in a rather haphazard way. The code has never been seriously tidied or
-checked for robustness, but it shouldn't now give compiler warnings.
+PCRE2. It can also be used for finding characters with certain properties. I
+wrote it to help with debugging, and have added things that I found useful, in
+a rather haphazard way. The code has never been seriously tidied or checked for
+robustness, but it shouldn't now give compiler warnings.

-There is only one option: "-s". If given, it applies only to the "findprop" 
-command. It causes the UTF-8 sequence of bytes that encode the character to be 
-output between angle brackets at the end of the line. On a UTF-8 terminal, this 
+There is only one option: "-s". If given, it applies only to the "findprop"
+command. It causes the UTF-8 sequence of bytes that encode the character to be
+output between angle brackets at the end of the line. On a UTF-8 terminal, this
 will show the appropriate graphic for the code point.

 If the command has arguments, they are concatenated into a buffer, separated by
 spaces. If the first argument starts "U+" or consists entirely of hexadecimal
 digits, "findprop" is inserted at the start. The buffer is then processed as a
 single line file, after which the program exits. If there are no arguments, the
-program reads commands line by line on stdin and writes output to stdout. The 
+program reads commands line by line on stdin and writes output to stdout. The
 return code is always zero.

 There are three commands:

-"findprop" must be followed by a space-separated list of Unicode code points as
-hex numbers, either without any prefix or starting with "U+". The output is one
-line per character, giving its Unicode properties followed by its other case or 
-cases if one or more exist, followed by its Script Extension list if it is not
-just the same as the base script. This list is in square brackets. The
-properties are:
+The command "findprop" must be followed by a space-separated list of Unicode
+code points as hex numbers, either without any prefix or starting with "U+", or
+as individual UTF-8 characters preceded by '+'. For example:

-General type        e.g. Letter
-Specific type       e.g. Upper case letter
-Script              e.g. Medefaidrin
-Grapheme break type e.g. Extend (most common is Other)
+  findprop U+1234 5Abc +?

-"find" must be followed by a list of property names and their values. The 
-values are case-sensitive. This finds characters that have those properties. If
-multiple properties are listed, they must all be matched. Currently supported:
+The output is one long line per character, listing Unicode properties that have
+values, followed by its other case or cases if one or more exist, followed by
+its Script Extension list if there is one. This list is in square brackets. A
+second list in square brackets gives all the Boolean properties of the
+character. The properties that come first are:
+
+  Bidi class          e.g. NSM (most common is L)
+  General type        e.g. Letter
+  Specific type       e.g. Upper case letter
+  Script              e.g. Medefaidrin
+  Grapheme break type e.g. Extend (most common is Other)
+
+Script names and Boolean property names are all in lower case, with underscores
+and hyphens removed, because that's how they are stored for "loose" matching.
+
+The command "find" must be followed by a list of property types and their
+values. The values are case-sensitive, except for bidi class. This finds
+characters that have those properties. If multiple properties are listed, they
+must all be matched. Currently supported:

  script <name>    The character must have this script property. Only one
                     such script may be given.
@ -56,17 +66,20 @@ multiple properties are listed, they must all be matched. Currently supported:
                     scripts must be present.
  type <abbrev>    The character's specific type (e.g. Lu or Nd) must match.
  gbreak <name>    The grapheme break property must match.
+  bidi <class>     The character's bidi class must match.
+  bool <name>      The character's Boolean property list must contain this
+                     property.

 If a <name> or <abbrev> is preceded by !, the value must NOT be present. For
-Script Extensions, there may be a mixture of positive and negative
-requirements. All must be satisfied.
+Script Extensions and Boolean properties, there may be a mixture of positive
+and negative requirements. All must be satisfied.

 Sequences of two or more characters are shown as ranges, for example
 U+0041..U+004A. No more than 100 lines are are output. If there are more
-characters, the list ends with ... 
+characters, the list ends with ...

-"list" must be followed by a property name (script, type, or gbreak). The
-defined values for that property are listed. */
+The command "list" must be followed by one of property names script, bool,
+type, gbreak or bidi. The defined values for that property are listed. */


 #ifdef HAVE_CONFIG_H
@ -97,6 +110,9 @@ defined values for that property are listed. */
 #include <editline/readline.h>
 #else
 #include <readline/readline.h>
+#ifdef RL_VERSION_MAJOR
+#include <readline/history.h>
+#endif
 #endif
 #endif
 #endif
@ -145,7 +161,7 @@ static const unsigned char *type_names[] = {
  US"So", US"Other symbol",
  US"Zl", US"Line separator",
  US"Zp", US"Paragraph separator",
-  US"Zs", US"Space separator" 
+  US"Zs", US"Space separator"
 };

 static const unsigned char *gb_names[] = {
@ -160,12 +176,37 @@ static const unsigned char *gb_names[] = {
  US"T",                     US"Hangul syllable type T",
  US"LV",                    US"Hangul syllable type LV",
  US"LVT",                   US"Hangul syllable type LVT",
-  US"RegionalIndicator",     US"",
+  US"Regional_Indicator",    US"",
  US"Other",                 US"",
  US"ZWJ",                   US"zero width joiner",
  US"Extended_Pictographic", US""
 };

+static const unsigned char *bd_names[] = {
+  US"AL",   US"Arabic letter",
+  US"AN",   US"Arabid number",
+  US"B",    US"Paragraph separator",
+  US"BN",   US"Boundary neutral",
+  US"CS",   US"Common separator",
+  US"EN",   US"European number",
+  US"ES",   US"European separator",
+  US"ET",   US"European terminator",
+  US"FSI",  US"First string isolate",
+  US"L",    US"Left-to-right",
+  US"LRE",  US"Left-to-right embedding",
+  US"LRI",  US"Left-to-right isolate",
+  US"LRO",  US"Left-to-right override",
+  US"NSM",  US"Non-spacing mark",
+  US"ON",   US"Other neutral",
+  US"PDF",  US"Pop directional format",
+  US"PDI",  US"Pop directional isolate",
+  US"R",    US"Right-to-left",
+  US"RLE",  US"Right-to-left embedding",
+  US"RLI",  US"Right-to-left isolate",
+  US"RLO",  US"Right-to-left override",
+  US"S",    US"Segment separator",
+  US"WS",   US"White space"
+};

 static const unsigned int utf8_table1[] = {
  0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
@ -173,6 +214,41 @@ static const unsigned int utf8_table1[] = {
 static const int utf8_table2[] = {
  0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};

+/* Macro to pick up the remaining bytes of a UTF-8 character, advancing
+the pointer. */
+
+#define GETUTF8INC(c, eptr) \
+    { \
+    if ((c & 0x20u) == 0) \
+      c = ((c & 0x1fu) << 6) | (*eptr++ & 0x3fu); \
+    else if ((c & 0x10u) == 0) \
+      { \
+      c = ((c & 0x0fu) << 12) | ((*eptr & 0x3fu) << 6) | (eptr[1] & 0x3fu); \
+      eptr += 2; \
+      } \
+    else if ((c & 0x08u) == 0) \
+      { \
+      c = ((c & 0x07u) << 18) | ((*eptr & 0x3fu) << 12) | \
+          ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
+      eptr += 3; \
+      } \
+    else if ((c & 0x04u) == 0) \
+      { \
+      c = ((c & 0x03u) << 24) | ((*eptr & 0x3fu) << 18) | \
+          ((eptr[1] & 0x3fu) << 12) | ((eptr[2] & 0x3fu) << 6) | \
+          (eptr[3] & 0x3fu); \
+      eptr += 4; \
+      } \
+    else \
+      { \
+      c = ((c & 0x01u) << 30) | ((*eptr & 0x3fu) << 24) | \
+          ((eptr[1] & 0x3fu) << 18) | ((eptr[2] & 0x3fu) << 12) | \
+          ((eptr[3] & 0x3fu) << 6) | (eptr[4] & 0x3fu); \
+      eptr += 5; \
+      } \
+    }
+
+

 /*************************************************
 *       Convert character value to UTF-8         *
@ -224,25 +300,54 @@ return isatty(fileno(stdin));


 /*************************************************
-*      Get script name from ucp ident            *
+*            Get  name from ucp ident            *
 *************************************************/

-static const char *
-get_scriptname(int script)
-{
-size_t i;
-const ucp_type_table *u;
+/* The utt table contains both full names and abbreviations. So search for both
+and use the longer if two are found, unless the first one is only 3 characters
+and we are looking for a script (some scripts have 3-character names). If this
+were not just a test program it might be worth making some kind of reverse
+index. */

+static const char *
+get_propname(int prop, int type)
+{
+size_t i, j, len;
+size_t foundlist[2];
+const char *yield;
+int typex = (type == PT_SC)? PT_SCX : type;
+
+j = 0;
 for (i = 0; i < PRIV(utt_size); i++)
  {
-  u = PRIV(utt) + i; 
-  if (u->type == PT_SC && u->value == script) break;
+  const ucp_type_table *u = PRIV(utt) + i;
+  if ((u->type == type || u->type == typex) && u->value == prop)
+    {
+    foundlist[j++] = i;
+    if (j >= 2) break;
+    }
  }
-if (i < PRIV(utt_size))
-  return PRIV(utt_names) + u->name_offset;
  
-return "??";
-}  
+if (j == 0) return "??";
+
+yield = NULL;
+len = 0;
+
+for (i = 0; i < j; i++)
+  {
+  const char *s = PRIV(utt_names) + (PRIV(utt) + foundlist[i])->name_offset;
+  size_t sl = strlen(s);
+
+  if (sl > len)
+    {
+    yield = s;
+    if (sl == 3 && type == PT_SC) break;
+    len = sl;
+    }
+  }
+
+return yield;
+}


 /*************************************************
@ -257,13 +362,16 @@ int fulltype = UCD_CHARTYPE(c);
 int script = UCD_SCRIPT(c);
 int scriptx = UCD_SCRIPTX(c);
 int gbprop = UCD_GRAPHBREAK(c);
+int bidi = UCD_BIDICLASS(c);
 unsigned int othercase = UCD_OTHERCASE(c);
 int caseset = UCD_CASESET(c);
+int bprops = UCD_BPROPS(c);

 const unsigned char *fulltypename = US"??";
 const unsigned char *typename = US"??";
 const unsigned char *graphbreak = US"??";
-const unsigned char *scriptname = CUS get_scriptname(script);
+const unsigned char *bidiclass = US"??";
+const unsigned char *scriptname = CUS get_propname(script, PT_SC);

 switch (type)
  {
@ -323,7 +431,7 @@ switch(gbprop)
  case ucp_gbT:            graphbreak = US"Hangul syllable type T"; break;
  case ucp_gbLV:           graphbreak = US"Hangul syllable type LV"; break;
  case ucp_gbLVT:          graphbreak = US"Hangul syllable type LVT"; break;
-  case ucp_gbRegionalIndicator:
+  case ucp_gbRegional_Indicator:
                           graphbreak = US"Regional Indicator"; break;
  case ucp_gbOther:        graphbreak = US"Other"; break;
  case ucp_gbZWJ:          graphbreak = US"Zero Width Joiner"; break;
@ -332,7 +440,37 @@ switch(gbprop)
  default:                 graphbreak = US"Unknown"; break;
  }

-printf("U+%04X %s: %s, %s, %s", c, typename, fulltypename, scriptname, graphbreak);
+switch(bidi)
+  {
+  case ucp_bidiAL:   bidiclass = US"AL "; break;
+  case ucp_bidiFSI:  bidiclass = US"FSI"; break;
+  case ucp_bidiL:    bidiclass = US"L  "; break;
+  case ucp_bidiLRE:  bidiclass = US"LRE"; break;
+  case ucp_bidiLRI:  bidiclass = US"LRI"; break;
+  case ucp_bidiLRO:  bidiclass = US"LRO"; break;
+  case ucp_bidiPDF:  bidiclass = US"PDF"; break;
+  case ucp_bidiPDI:  bidiclass = US"PDI"; break;
+  case ucp_bidiR:    bidiclass = US"R  "; break;
+  case ucp_bidiRLE:  bidiclass = US"RLE"; break;
+  case ucp_bidiRLI:  bidiclass = US"RLI"; break;
+  case ucp_bidiRLO:  bidiclass = US"RLO"; break;
+  case ucp_bidiAN:   bidiclass = US"AN "; break;
+  case ucp_bidiB:    bidiclass = US"B  "; break;
+  case ucp_bidiBN:   bidiclass = US"BN "; break;
+  case ucp_bidiCS:   bidiclass = US"CS "; break;
+  case ucp_bidiEN:   bidiclass = US"EN "; break;
+  case ucp_bidiES:   bidiclass = US"ES "; break;
+  case ucp_bidiET:   bidiclass = US"ET "; break;
+  case ucp_bidiNSM:  bidiclass = US"NSM"; break;
+  case ucp_bidiON:   bidiclass = US"ON "; break;
+  case ucp_bidiS:    bidiclass = US"S  "; break;
+  case ucp_bidiWS:   bidiclass = US"WS "; break;
+  default:           bidiclass = US"???"; break;
+  }
+
+printf("U+%04X %s %s: %s, %s, %s", c, bidiclass, typename, fulltypename,
+  scriptname, graphbreak);
+
 if (is_just_one && othercase != c)
  {
  printf(", U+%04X", othercase);
@ -341,36 +479,47 @@ if (is_just_one && othercase != c)
    const uint32_t *p = PRIV(ucd_caseless_sets) + caseset - 1;
    while (*(++p) < NOTACHAR)
      {
-      unsigned int d = *p;  
+      unsigned int d = *p;
      if (d != othercase && d != c) printf(", U+%04X", d);
-      } 
+      }
    }
  }

-if (scriptx != script)
+if (scriptx != 0)
  {
+  const char *sep = "";
+  const uint32_t *p = PRIV(ucd_script_sets) + scriptx;
  printf(", [");
-  if (scriptx >= 0)
-    printf("%s", get_scriptname(scriptx));
-  else
+  for (int i = 0; i < ucp_Unknown; i++)
+  if (MAPBIT(p, i) != 0)
    {
-    const char *sep = "";
-    const uint8_t *p = PRIV(ucd_script_sets) - scriptx;
-    while (*p != 0)
-      {
-      printf("%s%s", sep, get_scriptname(*p++));
-      sep = ", ";
-      }
+    printf("%s%s", sep, get_propname(i, PT_SC));
+    sep = ", ";
    }
  printf("]");
  }
-  
+
+if (bprops != 0)
+  {
+  const char *sep = "";
+  const uint32_t *p = PRIV(ucd_boolprop_sets) + 
+    bprops * ucd_boolprop_sets_item_size;
+  printf(", [");
+  for (int i = 0; i < ucp_Bprop_Count; i++)
+  if (MAPBIT(p, i) != 0)
+    {
+    printf("%s%s", sep, get_propname(i, PT_BOOL));
+    sep = ", ";
+    }
+  printf("]");
+  }
+
 if (show_character && is_just_one)
  {
  unsigned char buffer[8];
  size_t len = ord2utf8(c, buffer);
-  printf(", >%.*s<", (int)len, buffer);  
-  }  
+  printf(", >%.*s<", (int)len, buffer);
+  }

 printf("\n");
 }
@ -384,19 +533,23 @@ printf("\n");
 static void
 find_chars(unsigned char *s)
 {
-unsigned char name[24];
-unsigned char value[24];
+unsigned char name[128];
+unsigned char value[128];
 unsigned char *t;
 unsigned int count= 0;
-int scriptx_list[24];
+int scriptx_list[128];
 unsigned int scriptx_count = 0;
+int bprop_list[128];
+unsigned int bprop_count = 0;
 uint32_t i, c;
 int script = -1;
 int type = -1;
 int gbreak = -1;
+int bidiclass = -1;
 BOOL script_not = FALSE;
 BOOL type_not = FALSE;
 BOOL gbreak_not = FALSE;
+BOOL bidiclass_not = FALSE;
 BOOL hadrange = FALSE;
 const ucd_record *ucd, *next_ucd;
 const char *pad = "        ";
@ -410,13 +563,18 @@ while (*s != 0)
  *t = 0;
  while (isspace(*s)) s++;

-  for (t = value; *s != 0 && !isspace(*s); s++) *t++ = *s;
+  for (t = value; *s != 0 && !isspace(*s); s++) 
+    {
+    if (*s != '_' && *s != '-') *t++ = *s;
+    } 
  *t = 0;
  while (isspace(*s)) s++;

  if (strcmp(CS name, "script") == 0 ||
      strcmp(CS name, "scriptx") == 0)
    {
+    for (t = value; *t != 0; t++) *t = tolower(*t);
+ 
    if (value[0] == '!')
      {
      if (name[6] == 'x') scriptx_not = TRUE;
@ -426,11 +584,11 @@ while (*s != 0)

    for (i = 0; i < PRIV(utt_size); i++)
      {
-      const ucp_type_table *u = PRIV(utt) + i; 
-      if (u->type == PT_SC && strcmp(CS(value + offset), 
+      const ucp_type_table *u = PRIV(utt) + i;
+      if ((u->type == PT_SCX || u->type == PT_SC) && strcmp(CS(value + offset),
            PRIV(utt_names) + u->name_offset) == 0)
        {
-        c = u->value; 
+        c = u->value;
        if (name[6] == 'x')
          {
          scriptx_list[scriptx_count++] = scriptx_not? (-c):c;
@ -454,6 +612,33 @@ while (*s != 0)
      }
    }

+  else if (strcmp(CS name, "bool") == 0)
+    {
+    int not = 1;
+    if (value[0] == '!')
+      {
+      not = -1;
+      offset = 1;
+      }
+
+    for (i = 0; i < PRIV(utt_size); i++)
+      {
+      const ucp_type_table *u = PRIV(utt) + i;
+      if (u->type == PT_BOOL && strcmp(CS(value + offset),
+            PRIV(utt_names) + u->name_offset) == 0)
+        {
+        bprop_list[bprop_count++] = u->value * not;
+        break;
+        }
+      }
+
+    if (i >= PRIV(utt_size))
+      {
+      printf("** Unrecognized property name \"%s\"\n", value);
+      return;
+      }
+    }
+
  else if (strcmp(CS name, "type") == 0)
    {
    if (type >= 0)
@ -516,6 +701,38 @@ while (*s != 0)
      }
    }

+  else if (strcmp(CS name, "bidi") == 0 ||
+           strcmp(CS name, "bidiclass") == 0 ||
+           strcmp(CS name, "bidi_class") == 0 )
+    {
+    if (bidiclass >= 0)
+      {
+      printf("** Only 1 bidi class value allowed\n");
+      return;
+      }
+    else
+      {
+      if (value[0] == '!')
+        {
+        bidiclass_not = TRUE;
+        offset = 1;
+        }
+      for (i = 0; i < sizeof(bd_names)/sizeof(char *); i += 2)
+        {
+        if (strcasecmp(CS (value + offset), CS bd_names[i]) == 0)
+          {
+          bidiclass = i/2;
+          break;
+          }
+        }
+      if (i >= sizeof(bd_names)/sizeof(char *))
+        {
+        printf("** Unrecognized bidi class name \"%s\"\n", value);
+        return;
+        }
+      }
+    }
+
  else
    {
    printf("** Unrecognized property name \"%s\"\n", name);
@ -523,7 +740,8 @@ while (*s != 0)
    }
  }

-if (script < 0 && scriptx_count == 0 && type < 0 && gbreak < 0)
+if (script < 0 && scriptx_count == 0 && bprop_count == 0 && type < 0 &&
+    gbreak < 0 && bidiclass < 0)
  {
  printf("** No properties specified\n");
  return;
@ -535,55 +753,55 @@ for (c = 0; c <= 0x10ffff; c++)

  if (scriptx_count > 0)
    {
-    const uint8_t *char_scriptx = NULL;
+    const uint32_t *bits_scriptx = PRIV(ucd_script_sets) + UCD_SCRIPTX(c);
    unsigned int found = 0;
-    int scriptx = UCD_SCRIPTX(c);
-
-    if (scriptx < 0) char_scriptx = PRIV(ucd_script_sets) - scriptx;

    for (i = 0; i < scriptx_count; i++)
      {
+      int x = scriptx_list[i]/32;
+      int y = scriptx_list[i]%32;
+
      /* Positive requirment */
      if (scriptx_list[i] >= 0)
        {
-        if (scriptx >= 0)
-          {
-          if (scriptx == scriptx_list[i]) found++;
-          }
-
-        else
-          {
-          const uint8_t *p;
-          for (p = char_scriptx; *p != 0; p++)
-            {
-            if (scriptx_list[i] == *p)
-              {
-              found++;
-              break;
-              }
-            }
-          }
+        if ((bits_scriptx[x] & (1u<<y)) != 0) found++;
        }
      /* Negative requirement */
      else
        {
-        if (scriptx >= 0)
-          {
-          if (scriptx != -scriptx_list[i]) found++;
-          }
-        else
-          {
-          const uint8_t *p;
-          for (p = char_scriptx; *p != 0; p++)
-            if (-scriptx_list[i] == *p) break;
-          if (*p == 0) found++;
-          }
+        if ((bits_scriptx[x] & (1u<<y)) == 0) found++;
        }
      }

    if (found != scriptx_count) continue;
    }

+  if (bprop_count > 0)
+    {
+    const uint32_t *bits_bprop = PRIV(ucd_boolprop_sets) + 
+      UCD_BPROPS(c) * ucd_boolprop_sets_item_size;
+    unsigned int found = 0;
+
+    for (i = 0; i < bprop_count; i++)
+      {
+      int x = bprop_list[i]/32;
+      int y = bprop_list[i]%32;
+
+      /* Positive requirement */
+      if (bprop_list[i] >= 0)
+        {
+        if ((bits_bprop[x] & (1u<<y)) != 0) found++;
+        }
+      /* Negative requirement */
+      else
+        {
+        if ((bits_bprop[-x] & (1u<<(-y))) == 0) found++;
+        }
+      }
+
+    if (found != bprop_count) continue;
+    }
+
  if (type >= 0)
    {
    if (type_not)
@ -608,6 +826,18 @@ for (c = 0; c <= 0x10ffff; c++)
      }
    }

+  if (bidiclass >= 0)
+    {
+    if (bidiclass_not)
+      {
+      if (bidiclass == UCD_BIDICLASS(c)) continue;
+      }
+    else
+      {
+      if (bidiclass != UCD_BIDICLASS(c)) continue;
+      }
+    }
+
  /* All conditions are met. Look for runs. */

  ucd = GET_UCD(c);
@ -663,23 +893,37 @@ if (strcmp(CS name, "findprop") == 0)
  {
  while (*s != 0)
    {
-    unsigned int c; 
+    unsigned int c;
    unsigned char *endptr;
-    t = s; 
-    if (strncmp(CS t, "U+", 2) == 0) t += 2;
-    c = strtoul(CS t, CSS(&endptr), 16);
+    t = s;
+
+    if (*t == '+')
+      {
+      c = *(++t);
+      if (c > 0x7fu)
+        {
+        GETCHARINC(c, t);
+        }
+      endptr = t+1;
+      }
+    else
+      {
+      if (strncmp(CS t, "U+", 2) == 0) t += 2;
+      c = strtoul(CS t, CSS(&endptr), 16);
+      }
+
    if (*endptr != 0 && !isspace(*endptr))
      {
      while (*endptr != 0 && !isspace(*endptr)) endptr++;
-      printf("** Invalid hex number: ignored \"%.*s\"\n", (int)(endptr-s), s);
+      printf("** Invalid character specifier: ignored \"%.*s\"\n", (int)(endptr-s), s);
      }
-    else  
+    else
      {
-      if (c > 0x10ffff) 
+      if (c > 0x10ffff)
        printf("** U+%x is too big for a Unicode code point\n", c);
-      else   
+      else
        print_prop(c, TRUE);
-      } 
+      }
    s = endptr;
    while (isspace(*s)) s++;
    }
@ -689,7 +933,7 @@ else if (strcmp(CS name, "find") == 0)
  {
  find_chars(s);
  }
-  
+
 else if (strcmp(CS name, "list") == 0)
  {
  while (*s != 0)
@ -698,38 +942,52 @@ else if (strcmp(CS name, "list") == 0)
    for (t = name; *s != 0 && !isspace(*s); s++) *t++ = *s;
    *t = 0;
    while (isspace(*s)) s++;
-    
+
    if (strcmp(CS name, "script") == 0 || strcmp(CS name, "scripts") == 0)
      {
-      for (i = 0; i < PRIV(utt_size); i++) 
-        if (PRIV(utt)[i].type == PT_SC)
-          printf("%s\n", PRIV(utt_names) + PRIV(utt)[i].name_offset);  
+      for (i = 0; i < PRIV(utt_size); i++)
+        if (PRIV(utt)[i].type == PT_SCX || PRIV(utt)[i].type == PT_SC)
+          printf("%s\n", PRIV(utt_names) + PRIV(utt)[i].name_offset);
      }
-      
+
+    else if (strcmp(CS name, "bool") == 0)
+      {
+      for (i = 0; i < PRIV(utt_size); i++)
+        if (PRIV(utt)[i].type == PT_BOOL)
+          printf("%s\n", PRIV(utt_names) + PRIV(utt)[i].name_offset);
+      }
+
    else if (strcmp(CS name, "type") == 0 || strcmp(CS name, "types") == 0)
      {
      for (i = 0; i < sizeof(type_names)/sizeof(char *); i += 2)
-        printf("%s %s\n", type_names[i], type_names[i+1]); 
-      }  
-      
+        printf("%s %s\n", type_names[i], type_names[i+1]);
+      }
+
    else if (strcmp(CS name, "gbreak") == 0 || strcmp(CS name, "gbreaks") == 0)
      {
      for (i = 0; i < sizeof(gb_names)/sizeof(char *); i += 2)
        {
-        if (gb_names[i+1][0] != 0)  
+        if (gb_names[i+1][0] != 0)
          printf("%-3s (%s)\n", gb_names[i], gb_names[i+1]);
-        else   
+        else
          printf("%s\n", gb_names[i]);
-        } 
-      }    
+        }
+      }

-    else 
+    else if (strcmp(CS name, "bidi") == 0 ||
+             strcmp(CS name, "bidiclasses") == 0)
      {
-      printf("** Unknown property \"%s\"\n", name);  
+      for (i = 0; i < sizeof(bd_names)/sizeof(char *); i += 2)
+        printf("%3s %s\n", bd_names[i], bd_names[i+1]);
+      }
+
+    else
+      {
+      printf("** Unknown property \"%s\"\n", name);
      break;
-      }  
-    }  
-  }  
+      }
+    }
+  }

 else printf("** Unknown test command \"%s\"\n", name);
 }
@ -751,32 +1009,32 @@ if (argc > 1 && strcmp(argv[1], "-s") == 0)
  {
  show_character = TRUE;
  first_arg++;
-  }   
+  }

 if (argc > first_arg)
  {
  int i;
-  BOOL hexfirst = TRUE; 
-  char *arg = argv[first_arg]; 
+  BOOL datafirst = TRUE;
+  char *arg = argv[first_arg];
  unsigned char *s = buffer;
-  
-  if (strncmp(arg, "U+", 2) != 0 && !isdigit(*arg)) 
+
+  if (*arg != '+' && strncmp(arg, "U+", 2) != 0 && !isdigit(*arg))
    {
-    while (*arg != 0) 
+    while (*arg != 0)
      {
-      if (!isxdigit(*arg++)) { hexfirst = FALSE; break; }  
-      } 
-    } 
-     
-  if (hexfirst)
+      if (!isxdigit(*arg++)) { datafirst = FALSE; break; }
+      }
+    }
+
+  if (datafirst)
    {
    strcpy(CS s, "findprop ");
    s += 9;
    }
-    
+
  for (i = first_arg; i < argc; i++)
    {
-    s += sprintf(CS s, "%s ", argv[i]);       
+    s += sprintf(CS s, "%s ", argv[i]);
    }

  process_command_line(buffer);
@ -812,7 +1070,7 @@ for(;;)
    if (fgets(CS buffer, sizeof(buffer), stdin) == NULL) break;
    if (!interactive) printf("%s", buffer);
    }
-    
+
  process_command_line(buffer);
  }

--- a/maint/ucptestdata/testinput1
+++ b/maint/ucptestdata/testinput1
@ -46,3 +46,5 @@ findprop 32ff
 findprop 1f16d

 findprop U+10e93 U+10eaa
+
+findprop 0602 202a 202b 202c 2068 2069 202d 202e 2067
--- a/maint/ucptestdata/testinput2
+++ b/maint/ucptestdata/testinput2
@ -3,3 +3,17 @@ find type Pe script Common scriptx Hangul
 find type Sk
 find type Pd
 find gbreak LVT
+find script Old_Uyghur
+find bidi PDF
+find bidi CS
+find bidi CS type Sm
+find bidi B
+find bidi FSI
+find bidi PDI
+find bidi RLI
+find bidi RLO
+find bidi S
+find bidi WS
+find script bopo
+find bool prependedconcatenationmark
+find bool pcm
--- a/maint/ucptestdata/testoutput1
+++ b/maint/ucptestdata/testoutput1
@ -1,398 +1,409 @@
 findprop 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 
-U+0000 Control: Control, Common, Control
-U+0001 Control: Control, Common, Control
-U+0002 Control: Control, Common, Control
-U+0003 Control: Control, Common, Control
-U+0004 Control: Control, Common, Control
-U+0005 Control: Control, Common, Control
-U+0006 Control: Control, Common, Control
-U+0007 Control: Control, Common, Control
-U+0008 Control: Control, Common, Control
-U+0009 Control: Control, Common, Control
-U+000A Control: Control, Common, LF
-U+000B Control: Control, Common, Control
-U+000C Control: Control, Common, Control
-U+000D Control: Control, Common, CR
-U+000E Control: Control, Common, Control
-U+000F Control: Control, Common, Control
+U+0000 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0001 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0002 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0003 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0004 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0005 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0006 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0007 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0008 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0009 S   Control: Control, common, Control, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+000A B   Control: Control, common, LF, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+000B S   Control: Control, common, Control, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+000C WS  Control: Control, common, Control, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+000D B   Control: Control, common, CR, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+000E BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+000F BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
 findprop 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 
-U+0010 Control: Control, Common, Control
-U+0011 Control: Control, Common, Control
-U+0012 Control: Control, Common, Control
-U+0013 Control: Control, Common, Control
-U+0014 Control: Control, Common, Control
-U+0015 Control: Control, Common, Control
-U+0016 Control: Control, Common, Control
-U+0017 Control: Control, Common, Control
-U+0018 Control: Control, Common, Control
-U+0019 Control: Control, Common, Control
-U+001A Control: Control, Common, Control
-U+001B Control: Control, Common, Control
-U+001C Control: Control, Common, Control
-U+001D Control: Control, Common, Control
-U+001E Control: Control, Common, Control
-U+001F Control: Control, Common, Control
+U+0010 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0011 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0012 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0013 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0014 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0015 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0016 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0017 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0018 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0019 BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+001A BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+001B BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+001C B   Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+001D B   Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+001E B   Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+001F S   Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
 findprop 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f 
-U+0020 Separator: Space separator, Common, Other
-U+0021 Punctuation: Other punctuation, Common, Other
-U+0022 Punctuation: Other punctuation, Common, Other
-U+0023 Punctuation: Other punctuation, Common, Other
-U+0024 Symbol: Currency symbol, Common, Other
-U+0025 Punctuation: Other punctuation, Common, Other
-U+0026 Punctuation: Other punctuation, Common, Other
-U+0027 Punctuation: Other punctuation, Common, Other
-U+0028 Punctuation: Open punctuation, Common, Other
-U+0029 Punctuation: Close punctuation, Common, Other
-U+002A Punctuation: Other punctuation, Common, Other
-U+002B Symbol: Mathematical symbol, Common, Other
-U+002C Punctuation: Other punctuation, Common, Other
-U+002D Punctuation: Dash punctuation, Common, Other
-U+002E Punctuation: Other punctuation, Common, Other
-U+002F Punctuation: Other punctuation, Common, Other
+U+0020 WS  Separator: Space separator, common, Other, [ascii, emoji, emojicomponent, graphemebase, patternsyntax]
+U+0021 ON  Punctuation: Other punctuation, common, Other, [ascii, caseignorable, graphemebase, patternsyntax, quotationmark]
+U+0022 ON  Punctuation: Other punctuation, common, Other, [ascii, graphemebase, math, patternsyntax]
+U+0023 ET  Punctuation: Other punctuation, common, Other, [ascii, dash, graphemebase, patternsyntax]
+U+0024 ET  Symbol: Currency symbol, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
+U+0025 ET  Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
+U+0026 ON  Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
+U+0027 ON  Punctuation: Other punctuation, common, Other, [ascii, bidimirrored, graphemebase, math, patternsyntax]
+U+0028 ON  Punctuation: Open punctuation, common, Other, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
+U+0029 ON  Punctuation: Close punctuation, common, Other, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
+U+002A ON  Punctuation: Other punctuation, common, Other, [ascii, dash, graphemebase, patternsyntax]
+U+002B ES  Symbol: Mathematical symbol, common, Other, [ascii, graphemebase, idcontinue, xidcontinue]
+U+002C CS  Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, hexdigit, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+002D ES  Punctuation: Dash punctuation, common, Other, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, softdotted, xidcontinue, xidstart]
+U+002E CS  Punctuation: Other punctuation, common, Other, [graphemebase, whitespace]
+U+002F CS  Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
 findprop 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f 
-U+0030 Number: Decimal number, Common, Other
-U+0031 Number: Decimal number, Common, Other
-U+0032 Number: Decimal number, Common, Other
-U+0033 Number: Decimal number, Common, Other
-U+0034 Number: Decimal number, Common, Other
-U+0035 Number: Decimal number, Common, Other
-U+0036 Number: Decimal number, Common, Other
-U+0037 Number: Decimal number, Common, Other
-U+0038 Number: Decimal number, Common, Other
-U+0039 Number: Decimal number, Common, Other
-U+003A Punctuation: Other punctuation, Common, Other
-U+003B Punctuation: Other punctuation, Common, Other
-U+003C Symbol: Mathematical symbol, Common, Other
-U+003D Symbol: Mathematical symbol, Common, Other
-U+003E Symbol: Mathematical symbol, Common, Other
-U+003F Punctuation: Other punctuation, Common, Other
+U+0030 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+0031 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+0032 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+0033 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+0034 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+0035 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+0036 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+0037 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+0038 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+0039 EN  Number: Decimal number, common, Other, [caseignorable, diacritic, graphemebase]
+U+003A CS  Punctuation: Other punctuation, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+003B ON  Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, hexdigit, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+003C ON  Symbol: Mathematical symbol, common, Other, [graphemebase, math, patternsyntax]
+U+003D ON  Symbol: Mathematical symbol, common, Other, [ascii, graphemebase, idcontinue, xidcontinue]
+U+003E ON  Symbol: Mathematical symbol, common, Other, [graphemebase, math, patternsyntax]
+U+003F ON  Punctuation: Other punctuation, common, Other, [ascii, caseignorable, graphemebase, patternsyntax, quotationmark]
 findprop 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f 
-U+0040 Punctuation: Other punctuation, Common, Other
-U+0041 Letter: Upper case letter, Latin, Other, U+0061
-U+0042 Letter: Upper case letter, Latin, Other, U+0062
-U+0043 Letter: Upper case letter, Latin, Other, U+0063
-U+0044 Letter: Upper case letter, Latin, Other, U+0064
-U+0045 Letter: Upper case letter, Latin, Other, U+0065
-U+0046 Letter: Upper case letter, Latin, Other, U+0066
-U+0047 Letter: Upper case letter, Latin, Other, U+0067
-U+0048 Letter: Upper case letter, Latin, Other, U+0068
-U+0049 Letter: Upper case letter, Latin, Other, U+0069
-U+004A Letter: Upper case letter, Latin, Other, U+006A
-U+004B Letter: Upper case letter, Latin, Other, U+006B, U+212A
-U+004C Letter: Upper case letter, Latin, Other, U+006C
-U+004D Letter: Upper case letter, Latin, Other, U+006D
-U+004E Letter: Upper case letter, Latin, Other, U+006E
-U+004F Letter: Upper case letter, Latin, Other, U+006F
+U+0040 ON  Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
+U+0041 L   Letter: Upper case letter, latin, Other, U+0061, [graphemebase]
+U+0042 L   Letter: Upper case letter, latin, Other, U+0062, [graphemebase]
+U+0043 L   Letter: Upper case letter, latin, Other, U+0063, [graphemebase]
+U+0044 L   Letter: Upper case letter, latin, Other, U+0064, [graphemebase]
+U+0045 L   Letter: Upper case letter, latin, Other, U+0065, [graphemebase]
+U+0046 L   Letter: Upper case letter, latin, Other, U+0066, [graphemebase]
+U+0047 L   Letter: Upper case letter, latin, Other, U+0067, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0048 L   Letter: Upper case letter, latin, Other, U+0068, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0049 L   Letter: Upper case letter, latin, Other, U+0069, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+004A L   Letter: Upper case letter, latin, Other, U+006A, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+004B L   Letter: Upper case letter, latin, Other, U+006B, U+212A, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+004C L   Letter: Upper case letter, latin, Other, U+006C, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+004D L   Letter: Upper case letter, latin, Other, U+006D, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+004E L   Letter: Upper case letter, latin, Other, U+006E, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+004F L   Letter: Upper case letter, latin, Other, U+006F, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
 findprop 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f 
-U+0050 Letter: Upper case letter, Latin, Other, U+0070
-U+0051 Letter: Upper case letter, Latin, Other, U+0071
-U+0052 Letter: Upper case letter, Latin, Other, U+0072
-U+0053 Letter: Upper case letter, Latin, Other, U+0073, U+017F
-U+0054 Letter: Upper case letter, Latin, Other, U+0074
-U+0055 Letter: Upper case letter, Latin, Other, U+0075
-U+0056 Letter: Upper case letter, Latin, Other, U+0076
-U+0057 Letter: Upper case letter, Latin, Other, U+0077
-U+0058 Letter: Upper case letter, Latin, Other, U+0078
-U+0059 Letter: Upper case letter, Latin, Other, U+0079
-U+005A Letter: Upper case letter, Latin, Other, U+007A
-U+005B Punctuation: Open punctuation, Common, Other
-U+005C Punctuation: Other punctuation, Common, Other
-U+005D Punctuation: Close punctuation, Common, Other
-U+005E Symbol: Modifier symbol, Common, Other
-U+005F Punctuation: Connector punctuation, Common, Other
+U+0050 L   Letter: Upper case letter, latin, Other, U+0070, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0051 L   Letter: Upper case letter, latin, Other, U+0071, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0052 L   Letter: Upper case letter, latin, Other, U+0072, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0053 L   Letter: Upper case letter, latin, Other, U+0073, U+017F, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0054 L   Letter: Upper case letter, latin, Other, U+0074, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0055 L   Letter: Upper case letter, latin, Other, U+0075, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0056 L   Letter: Upper case letter, latin, Other, U+0076, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0057 L   Letter: Upper case letter, latin, Other, U+0077, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0058 L   Letter: Upper case letter, latin, Other, U+0078, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+0059 L   Letter: Upper case letter, latin, Other, U+0079, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+005A L   Letter: Upper case letter, latin, Other, U+007A, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
+U+005B ON  Punctuation: Open punctuation, common, Other, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
+U+005C ON  Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
+U+005D ON  Punctuation: Close punctuation, common, Other, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
+U+005E ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+005F ON  Punctuation: Connector punctuation, common, Other, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, deprecated, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
 findprop 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f 
-U+0060 Symbol: Modifier symbol, Common, Other
-U+0061 Letter: Lower case letter, Latin, Other, U+0041
-U+0062 Letter: Lower case letter, Latin, Other, U+0042
-U+0063 Letter: Lower case letter, Latin, Other, U+0043
-U+0064 Letter: Lower case letter, Latin, Other, U+0044
-U+0065 Letter: Lower case letter, Latin, Other, U+0045
-U+0066 Letter: Lower case letter, Latin, Other, U+0046
-U+0067 Letter: Lower case letter, Latin, Other, U+0047
-U+0068 Letter: Lower case letter, Latin, Other, U+0048
-U+0069 Letter: Lower case letter, Latin, Other, U+0049
-U+006A Letter: Lower case letter, Latin, Other, U+004A
-U+006B Letter: Lower case letter, Latin, Other, U+004B, U+212A
-U+006C Letter: Lower case letter, Latin, Other, U+004C
-U+006D Letter: Lower case letter, Latin, Other, U+004D
-U+006E Letter: Lower case letter, Latin, Other, U+004E
-U+006F Letter: Lower case letter, Latin, Other, U+004F
+U+0060 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, changeswhentitlecased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
+U+0061 L   Letter: Lower case letter, latin, Other, U+0041, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+0062 L   Letter: Lower case letter, latin, Other, U+0042, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+0063 L   Letter: Lower case letter, latin, Other, U+0043, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+0064 L   Letter: Lower case letter, latin, Other, U+0044, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+0065 L   Letter: Lower case letter, latin, Other, U+0045, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+0066 L   Letter: Lower case letter, latin, Other, U+0046, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+0067 L   Letter: Lower case letter, latin, Other, U+0047, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0068 L   Letter: Lower case letter, latin, Other, U+0048, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0069 L   Letter: Lower case letter, latin, Other, U+0049, [caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
+U+006A L   Letter: Lower case letter, latin, Other, U+004A, [caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
+U+006B L   Letter: Lower case letter, latin, Other, U+004B, U+212A, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+006C L   Letter: Lower case letter, latin, Other, U+004C, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+006D L   Letter: Lower case letter, latin, Other, U+004D, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+006E L   Letter: Lower case letter, latin, Other, U+004E, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+006F L   Letter: Lower case letter, latin, Other, U+004F, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
 findprop 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f 
-U+0070 Letter: Lower case letter, Latin, Other, U+0050
-U+0071 Letter: Lower case letter, Latin, Other, U+0051
-U+0072 Letter: Lower case letter, Latin, Other, U+0052
-U+0073 Letter: Lower case letter, Latin, Other, U+0053, U+017F
-U+0074 Letter: Lower case letter, Latin, Other, U+0054
-U+0075 Letter: Lower case letter, Latin, Other, U+0055
-U+0076 Letter: Lower case letter, Latin, Other, U+0056
-U+0077 Letter: Lower case letter, Latin, Other, U+0057
-U+0078 Letter: Lower case letter, Latin, Other, U+0058
-U+0079 Letter: Lower case letter, Latin, Other, U+0059
-U+007A Letter: Lower case letter, Latin, Other, U+005A
-U+007B Punctuation: Open punctuation, Common, Other
-U+007C Symbol: Mathematical symbol, Common, Other
-U+007D Punctuation: Close punctuation, Common, Other
-U+007E Symbol: Mathematical symbol, Common, Other
-U+007F Control: Control, Common, Control
+U+0070 L   Letter: Lower case letter, latin, Other, U+0050, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0071 L   Letter: Lower case letter, latin, Other, U+0051, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0072 L   Letter: Lower case letter, latin, Other, U+0052, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0073 L   Letter: Lower case letter, latin, Other, U+0053, U+017F, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0074 L   Letter: Lower case letter, latin, Other, U+0054, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0075 L   Letter: Lower case letter, latin, Other, U+0055, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0076 L   Letter: Lower case letter, latin, Other, U+0056, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0077 L   Letter: Lower case letter, latin, Other, U+0057, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0078 L   Letter: Lower case letter, latin, Other, U+0058, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+0079 L   Letter: Lower case letter, latin, Other, U+0059, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+007A L   Letter: Lower case letter, latin, Other, U+005A, [alphabetic, caseignorable, diacritic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+007B ON  Punctuation: Open punctuation, common, Other, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
+U+007C ON  Symbol: Mathematical symbol, common, Other, [ascii, graphemebase, idcontinue, xidcontinue]
+U+007D ON  Punctuation: Close punctuation, common, Other, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
+U+007E ON  Symbol: Mathematical symbol, common, Other, [ascii, graphemebase, idcontinue, xidcontinue]
+U+007F BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]

 findprop 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f 
-U+0080 Control: Control, Common, Control
-U+0081 Control: Control, Common, Control
-U+0082 Control: Control, Common, Control
-U+0083 Control: Control, Common, Control
-U+0084 Control: Control, Common, Control
-U+0085 Control: Control, Common, Control
-U+0086 Control: Control, Common, Control
-U+0087 Control: Control, Common, Control
-U+0088 Control: Control, Common, Control
-U+0089 Control: Control, Common, Control
-U+008A Control: Control, Common, Control
-U+008B Control: Control, Common, Control
-U+008C Control: Control, Common, Control
-U+008D Control: Control, Common, Control
-U+008E Control: Control, Common, Control
-U+008F Control: Control, Common, Control
+U+0080 BN  Control: Control, common, Control
+U+0081 BN  Control: Control, common, Control
+U+0082 BN  Control: Control, common, Control
+U+0083 BN  Control: Control, common, Control
+U+0084 BN  Control: Control, common, Control
+U+0085 B   Control: Control, common, Control, [caseignorable, defaultignorablecodepoint, graphemeextend, idcontinue, xidcontinue]
+U+0086 BN  Control: Control, common, Control
+U+0087 BN  Control: Control, common, Control
+U+0088 BN  Control: Control, common, Control
+U+0089 BN  Control: Control, common, Control
+U+008A BN  Control: Control, common, Control
+U+008B BN  Control: Control, common, Control
+U+008C BN  Control: Control, common, Control
+U+008D BN  Control: Control, common, Control
+U+008E BN  Control: Control, common, Control
+U+008F BN  Control: Control, common, Control
 findprop 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f 
-U+0090 Control: Control, Common, Control
-U+0091 Control: Control, Common, Control
-U+0092 Control: Control, Common, Control
-U+0093 Control: Control, Common, Control
-U+0094 Control: Control, Common, Control
-U+0095 Control: Control, Common, Control
-U+0096 Control: Control, Common, Control
-U+0097 Control: Control, Common, Control
-U+0098 Control: Control, Common, Control
-U+0099 Control: Control, Common, Control
-U+009A Control: Control, Common, Control
-U+009B Control: Control, Common, Control
-U+009C Control: Control, Common, Control
-U+009D Control: Control, Common, Control
-U+009E Control: Control, Common, Control
-U+009F Control: Control, Common, Control
+U+0090 BN  Control: Control, common, Control
+U+0091 BN  Control: Control, common, Control
+U+0092 BN  Control: Control, common, Control
+U+0093 BN  Control: Control, common, Control
+U+0094 BN  Control: Control, common, Control
+U+0095 BN  Control: Control, common, Control
+U+0096 BN  Control: Control, common, Control
+U+0097 BN  Control: Control, common, Control
+U+0098 BN  Control: Control, common, Control
+U+0099 BN  Control: Control, common, Control
+U+009A BN  Control: Control, common, Control
+U+009B BN  Control: Control, common, Control
+U+009C BN  Control: Control, common, Control
+U+009D BN  Control: Control, common, Control
+U+009E BN  Control: Control, common, Control
+U+009F BN  Control: Control, common, Control
 findprop a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aa ab ac ad ae af 
-U+00A0 Separator: Space separator, Common, Other
-U+00A1 Punctuation: Other punctuation, Common, Other
-U+00A2 Symbol: Currency symbol, Common, Other
-U+00A3 Symbol: Currency symbol, Common, Other
-U+00A4 Symbol: Currency symbol, Common, Other
-U+00A5 Symbol: Currency symbol, Common, Other
-U+00A6 Symbol: Other symbol, Common, Other
-U+00A7 Punctuation: Other punctuation, Common, Other
-U+00A8 Symbol: Modifier symbol, Common, Other
-U+00A9 Symbol: Other symbol, Common, Extended Pictographic
-U+00AA Letter: Other letter, Latin, Other
-U+00AB Punctuation: Initial punctuation, Common, Other
-U+00AC Symbol: Mathematical symbol, Common, Other
-U+00AD Control: Format, Common, Control
-U+00AE Symbol: Other symbol, Common, Extended Pictographic
-U+00AF Symbol: Modifier symbol, Common, Other
+U+00A0 CS  Separator: Space separator, common, Other, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase]
+U+00A1 ON  Punctuation: Other punctuation, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
+U+00A2 ET  Symbol: Currency symbol, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
+U+00A3 ET  Symbol: Currency symbol, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
+U+00A4 ET  Symbol: Currency symbol, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
+U+00A5 ET  Symbol: Currency symbol, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
+U+00A6 ON  Symbol: Other symbol, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
+U+00A7 ON  Punctuation: Other punctuation, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
+U+00A8 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+00A9 ON  Symbol: Other symbol, common, Extended Pictographic, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+00AA L   Letter: Other letter, latin, Other, [caseignorable, graphemeextend]
+U+00AB ON  Punctuation: Initial punctuation, common, Other, [graphemebase, sentenceterminal, terminalpunctuation]
+U+00AC ON  Symbol: Mathematical symbol, common, Other, [alphabetic, caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
+U+00AD BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+00AE ON  Symbol: Other symbol, common, Extended Pictographic, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+00AF ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
 findprop b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf 
-U+00B0 Symbol: Other symbol, Common, Other
-U+00B1 Symbol: Mathematical symbol, Common, Other
-U+00B2 Number: Other number, Common, Other
-U+00B3 Number: Other number, Common, Other
-U+00B4 Symbol: Modifier symbol, Common, Other
-U+00B5 Letter: Lower case letter, Common, Other, U+03BC, U+039C
-U+00B6 Punctuation: Other punctuation, Common, Other
-U+00B7 Punctuation: Other punctuation, Common, Other
-U+00B8 Symbol: Modifier symbol, Common, Other
-U+00B9 Number: Other number, Common, Other
-U+00BA Letter: Other letter, Latin, Other
-U+00BB Punctuation: Final punctuation, Common, Other
-U+00BC Number: Other number, Common, Other
-U+00BD Number: Other number, Common, Other
-U+00BE Number: Other number, Common, Other
-U+00BF Punctuation: Other punctuation, Common, Other
+U+00B0 ET  Symbol: Other symbol, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
+U+00B1 ET  Symbol: Mathematical symbol, common, Other, [alphabetic, caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
+U+00B2 EN  Number: Other number, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+00B3 EN  Number: Other number, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+00B4 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+00B5 L   Letter: Lower case letter, common, Other, U+03BC, U+039C, [alphabetic, deprecated, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+00B6 ON  Punctuation: Other punctuation, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
+U+00B7 ON  Punctuation: Other punctuation, common, Other, [alphabetic, graphemebase, idcontinue, xidcontinue]
+U+00B8 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+00B9 EN  Number: Other number, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+00BA L   Letter: Other letter, latin, Other, [caseignorable, graphemeextend]
+U+00BB ON  Punctuation: Final punctuation, common, Other, [graphemebase, sentenceterminal, terminalpunctuation]
+U+00BC ON  Number: Other number, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+00BD ON  Number: Other number, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+00BE ON  Number: Other number, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+00BF ON  Punctuation: Other punctuation, common, Other, [caseignorable, graphemebase, idcontinue, terminalpunctuation, xidcontinue]
 findprop c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 ca cb cc cd ce cf 
-U+00C0 Letter: Upper case letter, Latin, Other, U+00E0
-U+00C1 Letter: Upper case letter, Latin, Other, U+00E1
-U+00C2 Letter: Upper case letter, Latin, Other, U+00E2
-U+00C3 Letter: Upper case letter, Latin, Other, U+00E3
-U+00C4 Letter: Upper case letter, Latin, Other, U+00E4
-U+00C5 Letter: Upper case letter, Latin, Other, U+00E5, U+212B
-U+00C6 Letter: Upper case letter, Latin, Other, U+00E6
-U+00C7 Letter: Upper case letter, Latin, Other, U+00E7
-U+00C8 Letter: Upper case letter, Latin, Other, U+00E8
-U+00C9 Letter: Upper case letter, Latin, Other, U+00E9
-U+00CA Letter: Upper case letter, Latin, Other, U+00EA
-U+00CB Letter: Upper case letter, Latin, Other, U+00EB
-U+00CC Letter: Upper case letter, Latin, Other, U+00EC
-U+00CD Letter: Upper case letter, Latin, Other, U+00ED
-U+00CE Letter: Upper case letter, Latin, Other, U+00EE
-U+00CF Letter: Upper case letter, Latin, Other, U+00EF
+U+00C0 L   Letter: Upper case letter, latin, Other, U+00E0, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00C1 L   Letter: Upper case letter, latin, Other, U+00E1, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00C2 L   Letter: Upper case letter, latin, Other, U+00E2, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00C3 L   Letter: Upper case letter, latin, Other, U+00E3, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00C4 L   Letter: Upper case letter, latin, Other, U+00E4, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00C5 L   Letter: Upper case letter, latin, Other, U+00E5, U+212B, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00C6 L   Letter: Upper case letter, latin, Other, U+00E6, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00C7 L   Letter: Upper case letter, latin, Other, U+00E7, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00C8 L   Letter: Upper case letter, latin, Other, U+00E8, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00C9 L   Letter: Upper case letter, latin, Other, U+00E9, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00CA L   Letter: Upper case letter, latin, Other, U+00EA, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00CB L   Letter: Upper case letter, latin, Other, U+00EB, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00CC L   Letter: Upper case letter, latin, Other, U+00EC, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00CD L   Letter: Upper case letter, latin, Other, U+00ED, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00CE L   Letter: Upper case letter, latin, Other, U+00EE, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00CF L   Letter: Upper case letter, latin, Other, U+00EF, [alphabetic, graphemeextend, idcontinue, xidcontinue]
 findprop d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 da db dc dd de df 
-U+00D0 Letter: Upper case letter, Latin, Other, U+00F0
-U+00D1 Letter: Upper case letter, Latin, Other, U+00F1
-U+00D2 Letter: Upper case letter, Latin, Other, U+00F2
-U+00D3 Letter: Upper case letter, Latin, Other, U+00F3
-U+00D4 Letter: Upper case letter, Latin, Other, U+00F4
-U+00D5 Letter: Upper case letter, Latin, Other, U+00F5
-U+00D6 Letter: Upper case letter, Latin, Other, U+00F6
-U+00D7 Symbol: Mathematical symbol, Common, Other
-U+00D8 Letter: Upper case letter, Latin, Other, U+00F8
-U+00D9 Letter: Upper case letter, Latin, Other, U+00F9
-U+00DA Letter: Upper case letter, Latin, Other, U+00FA
-U+00DB Letter: Upper case letter, Latin, Other, U+00FB
-U+00DC Letter: Upper case letter, Latin, Other, U+00FC
-U+00DD Letter: Upper case letter, Latin, Other, U+00FD
-U+00DE Letter: Upper case letter, Latin, Other, U+00FE
-U+00DF Letter: Lower case letter, Latin, Other, U+1E9E
+U+00D0 L   Letter: Upper case letter, latin, Other, U+00F0, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00D1 L   Letter: Upper case letter, latin, Other, U+00F1, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00D2 L   Letter: Upper case letter, latin, Other, U+00F2, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00D3 L   Letter: Upper case letter, latin, Other, U+00F3, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00D4 L   Letter: Upper case letter, latin, Other, U+00F4, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00D5 L   Letter: Upper case letter, latin, Other, U+00F5, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00D6 L   Letter: Upper case letter, latin, Other, U+00F6, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00D7 ON  Symbol: Mathematical symbol, common, Other, [alphabetic, caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
+U+00D8 L   Letter: Upper case letter, latin, Other, U+00F8, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00D9 L   Letter: Upper case letter, latin, Other, U+00F9, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00DA L   Letter: Upper case letter, latin, Other, U+00FA, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00DB L   Letter: Upper case letter, latin, Other, U+00FB, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00DC L   Letter: Upper case letter, latin, Other, U+00FC, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00DD L   Letter: Upper case letter, latin, Other, U+00FD, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00DE L   Letter: Upper case letter, latin, Other, U+00FE, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+00DF L   Letter: Lower case letter, latin, Other, U+1E9E, [alphabetic, deprecated, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
 findprop e0 e1 e2 e3 e4 e5 e6 e7 e8 e9 ea eb ec ed ee ef 
-U+00E0 Letter: Lower case letter, Latin, Other, U+00C0
-U+00E1 Letter: Lower case letter, Latin, Other, U+00C1
-U+00E2 Letter: Lower case letter, Latin, Other, U+00C2
-U+00E3 Letter: Lower case letter, Latin, Other, U+00C3
-U+00E4 Letter: Lower case letter, Latin, Other, U+00C4
-U+00E5 Letter: Lower case letter, Latin, Other, U+00C5, U+212B
-U+00E6 Letter: Lower case letter, Latin, Other, U+00C6
-U+00E7 Letter: Lower case letter, Latin, Other, U+00C7
-U+00E8 Letter: Lower case letter, Latin, Other, U+00C8
-U+00E9 Letter: Lower case letter, Latin, Other, U+00C9
-U+00EA Letter: Lower case letter, Latin, Other, U+00CA
-U+00EB Letter: Lower case letter, Latin, Other, U+00CB
-U+00EC Letter: Lower case letter, Latin, Other, U+00CC
-U+00ED Letter: Lower case letter, Latin, Other, U+00CD
-U+00EE Letter: Lower case letter, Latin, Other, U+00CE
-U+00EF Letter: Lower case letter, Latin, Other, U+00CF
+U+00E0 L   Letter: Lower case letter, latin, Other, U+00C0, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00E1 L   Letter: Lower case letter, latin, Other, U+00C1, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00E2 L   Letter: Lower case letter, latin, Other, U+00C2, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00E3 L   Letter: Lower case letter, latin, Other, U+00C3, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00E4 L   Letter: Lower case letter, latin, Other, U+00C4, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00E5 L   Letter: Lower case letter, latin, Other, U+00C5, U+212B, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00E6 L   Letter: Lower case letter, latin, Other, U+00C6, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00E7 L   Letter: Lower case letter, latin, Other, U+00C7, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00E8 L   Letter: Lower case letter, latin, Other, U+00C8, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00E9 L   Letter: Lower case letter, latin, Other, U+00C9, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00EA L   Letter: Lower case letter, latin, Other, U+00CA, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00EB L   Letter: Lower case letter, latin, Other, U+00CB, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00EC L   Letter: Lower case letter, latin, Other, U+00CC, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00ED L   Letter: Lower case letter, latin, Other, U+00CD, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00EE L   Letter: Lower case letter, latin, Other, U+00CE, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00EF L   Letter: Lower case letter, latin, Other, U+00CF, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
 findprop f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff 
-U+00F0 Letter: Lower case letter, Latin, Other, U+00D0
-U+00F1 Letter: Lower case letter, Latin, Other, U+00D1
-U+00F2 Letter: Lower case letter, Latin, Other, U+00D2
-U+00F3 Letter: Lower case letter, Latin, Other, U+00D3
-U+00F4 Letter: Lower case letter, Latin, Other, U+00D4
-U+00F5 Letter: Lower case letter, Latin, Other, U+00D5
-U+00F6 Letter: Lower case letter, Latin, Other, U+00D6
-U+00F7 Symbol: Mathematical symbol, Common, Other
-U+00F8 Letter: Lower case letter, Latin, Other, U+00D8
-U+00F9 Letter: Lower case letter, Latin, Other, U+00D9
-U+00FA Letter: Lower case letter, Latin, Other, U+00DA
-U+00FB Letter: Lower case letter, Latin, Other, U+00DB
-U+00FC Letter: Lower case letter, Latin, Other, U+00DC
-U+00FD Letter: Lower case letter, Latin, Other, U+00DD
-U+00FE Letter: Lower case letter, Latin, Other, U+00DE
-U+00FF Letter: Lower case letter, Latin, Other, U+0178
+U+00F0 L   Letter: Lower case letter, latin, Other, U+00D0, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00F1 L   Letter: Lower case letter, latin, Other, U+00D1, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00F2 L   Letter: Lower case letter, latin, Other, U+00D2, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00F3 L   Letter: Lower case letter, latin, Other, U+00D3, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00F4 L   Letter: Lower case letter, latin, Other, U+00D4, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00F5 L   Letter: Lower case letter, latin, Other, U+00D5, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00F6 L   Letter: Lower case letter, latin, Other, U+00D6, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00F7 ON  Symbol: Mathematical symbol, common, Other, [alphabetic, caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
+U+00F8 L   Letter: Lower case letter, latin, Other, U+00D8, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00F9 L   Letter: Lower case letter, latin, Other, U+00D9, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00FA L   Letter: Lower case letter, latin, Other, U+00DA, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00FB L   Letter: Lower case letter, latin, Other, U+00DB, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00FC L   Letter: Lower case letter, latin, Other, U+00DC, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00FD L   Letter: Lower case letter, latin, Other, U+00DD, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00FE L   Letter: Lower case letter, latin, Other, U+00DE, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+00FF L   Letter: Lower case letter, latin, Other, U+0178, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]

 findprop 0100 0101 0102 0103 0104 0105 0106
-U+0100 Letter: Upper case letter, Latin, Other, U+0101
-U+0101 Letter: Lower case letter, Latin, Other, U+0100
-U+0102 Letter: Upper case letter, Latin, Other, U+0103
-U+0103 Letter: Lower case letter, Latin, Other, U+0102
-U+0104 Letter: Upper case letter, Latin, Other, U+0105
-U+0105 Letter: Lower case letter, Latin, Other, U+0104
-U+0106 Letter: Upper case letter, Latin, Other, U+0107
+U+0100 L   Letter: Upper case letter, latin, Other, U+0101, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+0101 L   Letter: Lower case letter, latin, Other, U+0100, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+0102 L   Letter: Upper case letter, latin, Other, U+0103, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+0103 L   Letter: Lower case letter, latin, Other, U+0102, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+0104 L   Letter: Upper case letter, latin, Other, U+0105, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+0105 L   Letter: Lower case letter, latin, Other, U+0104, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+0106 L   Letter: Upper case letter, latin, Other, U+0107, [alphabetic, graphemeextend, idcontinue, xidcontinue]

 findprop ffe0 ffe1 ffe2 ffe3 ffe4 ffe5 ffe6 ffe7 
-U+FFE0 Symbol: Currency symbol, Common, Other
-U+FFE1 Symbol: Currency symbol, Common, Other
-U+FFE2 Symbol: Mathematical symbol, Common, Other
-U+FFE3 Symbol: Modifier symbol, Common, Other
-U+FFE4 Symbol: Other symbol, Common, Other
-U+FFE5 Symbol: Currency symbol, Common, Other
-U+FFE6 Symbol: Currency symbol, Common, Other
-U+FFE7 Control: Unassigned, Unknown, Other
+U+FFE0 ET  Symbol: Currency symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFE1 ET  Symbol: Currency symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFE2 ON  Symbol: Mathematical symbol, common, Other, [emoji, extendedpictographic, graphemebase]
+U+FFE3 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+FFE4 ON  Symbol: Other symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFE5 ET  Symbol: Currency symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFE6 ET  Symbol: Currency symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFE7 L   Control: Unassigned, unknown, Other
 findprop ffe8 ffe9 ffea ffeb ffec ffed ffee ffef
-U+FFE8 Symbol: Other symbol, Common, Other
-U+FFE9 Symbol: Mathematical symbol, Common, Other
-U+FFEA Symbol: Mathematical symbol, Common, Other
-U+FFEB Symbol: Mathematical symbol, Common, Other
-U+FFEC Symbol: Mathematical symbol, Common, Other
-U+FFED Symbol: Other symbol, Common, Other
-U+FFEE Symbol: Other symbol, Common, Other
-U+FFEF Control: Unassigned, Unknown, Other
+U+FFE8 ON  Symbol: Other symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFE9 ON  Symbol: Mathematical symbol, common, Other, [emoji, extendedpictographic, graphemebase]
+U+FFEA ON  Symbol: Mathematical symbol, common, Other, [emoji, extendedpictographic, graphemebase]
+U+FFEB ON  Symbol: Mathematical symbol, common, Other, [emoji, extendedpictographic, graphemebase]
+U+FFEC ON  Symbol: Mathematical symbol, common, Other, [emoji, extendedpictographic, graphemebase]
+U+FFED ON  Symbol: Other symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFEE ON  Symbol: Other symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFEF L   Control: Unassigned, unknown, Other
 findprop fff8 fff9 fffa fffb fffc fffd fffe ffff
-U+FFF8 Control: Unassigned, Unknown, Control
-U+FFF9 Control: Format, Common, Control
-U+FFFA Control: Format, Common, Control
-U+FFFB Control: Format, Common, Control
-U+FFFC Symbol: Other symbol, Common, Other
-U+FFFD Symbol: Other symbol, Common, Other
-U+FFFE Control: Unassigned, Unknown, Other
-U+FFFF Control: Unassigned, Unknown, Other
+U+FFF8 BN  Control: Unassigned, unknown, Control, [dash, defaultignorablecodepoint, deprecated, extendedpictographic, joincontrol, lowercase, patternwhitespace, quotationmark, sentenceterminal, softdotted, xidcontinue, xidstart]
+U+FFF9 ON  Control: Format, common, Control, [changeswhenuppercased, deprecated, emojimodifier, emojipresentation, extender, sentenceterminal, xidcontinue, xidstart]
+U+FFFA ON  Control: Format, common, Control, [changeswhenuppercased, deprecated, emojimodifier, emojipresentation, extender, sentenceterminal, xidcontinue, xidstart]
+U+FFFB ON  Control: Format, common, Control, [changeswhenuppercased, deprecated, emojimodifier, emojipresentation, extender, sentenceterminal, xidcontinue, xidstart]
+U+FFFC ON  Symbol: Other symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFFD ON  Symbol: Other symbol, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FFFE BN  Control: Unassigned, unknown, Other, [changeswhenuppercased, deprecated, emojicomponent, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+FFFF BN  Control: Unassigned, unknown, Other, [changeswhenuppercased, deprecated, emojicomponent, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
 findprop 10000 10001 e01ef f0000 100000
-U+10000 Letter: Other letter, Linear_B, Other
-U+10001 Letter: Other letter, Linear_B, Other
-U+E01EF Mark: Non-spacing mark, Inherited, Extend
-U+F0000 Control: Private use, Unknown, Other
-U+100000 Control: Private use, Unknown, Other
+U+10000 L   Letter: Other letter, linearb, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+10001 L   Letter: Other letter, linearb, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+E01EF NSM Mark: Non-spacing mark, inherited, Extend, []
+U+F0000 L   Control: Private use, unknown, Other
+U+100000 L   Control: Private use, unknown, Other

 findprop 1b00 12000 7c0 a840 10900
-U+1B00 Mark: Non-spacing mark, Balinese, Extend
-U+12000 Letter: Other letter, Cuneiform, Other
-U+07C0 Number: Decimal number, Nko, Other
-U+A840 Letter: Other letter, Phags_Pa, Other
-U+10900 Letter: Other letter, Phoenician, Other
+U+1B00 NSM Mark: Non-spacing mark, balinese, Extend, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, uppercase]
+U+12000 L   Letter: Other letter, cuneiform, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+07C0 R   Number: Decimal number, nko, Other, [graphemebase, patternsyntax, terminalpunctuation]
+U+A840 L   Letter: Other letter, phagspa, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+10900 R   Letter: Other letter, phoenician, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
 findprop 1d79 a77d
-U+1D79 Letter: Lower case letter, Latin, Other, U+A77D
-U+A77D Letter: Upper case letter, Latin, Other, U+1D79
+U+1D79 L   Letter: Lower case letter, latin, Other, U+A77D, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue]
+U+A77D L   Letter: Upper case letter, latin, Other, U+1D79, [alphabetic, graphemeextend, idcontinue, xidcontinue]

 findprop  0800  083e  a4d0  a4f7  aa80  aadf
-U+0800 Letter: Other letter, Samaritan, Other
-U+083E Punctuation: Other punctuation, Samaritan, Other
-U+A4D0 Letter: Other letter, Lisu, Other
-U+A4F7 Letter: Other letter, Lisu, Other
-U+AA80 Letter: Other letter, Tai_Viet, Other
-U+AADF Punctuation: Other punctuation, Tai_Viet, Other
+U+0800 R   Letter: Other letter, samaritan, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+083E R   Punctuation: Other punctuation, samaritan, Other, [bidimirrored, graphemebase, math, patternsyntax]
+U+A4D0 L   Letter: Other letter, lisu, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+A4F7 L   Letter: Other letter, lisu, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AA80 L   Letter: Other letter, taiviet, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AADF L   Punctuation: Other punctuation, taiviet, Other, [graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
 findprop 10b00 10b35 13000 1342e 10840 10855
-U+10B00 Letter: Other letter, Avestan, Other
-U+10B35 Letter: Other letter, Avestan, Other
-U+13000 Letter: Other letter, Egyptian_Hieroglyphs, Other
-U+1342E Letter: Other letter, Egyptian_Hieroglyphs, Other
-U+10840 Letter: Other letter, Imperial_Aramaic, Other
-U+10855 Letter: Other letter, Imperial_Aramaic, Other
+U+10B00 R   Letter: Other letter, avestan, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+10B35 R   Letter: Other letter, avestan, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+13000 L   Letter: Other letter, egyptianhieroglyphs, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+1342E L   Letter: Other letter, egyptianhieroglyphs, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+10840 R   Letter: Other letter, imperialaramaic, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+10855 R   Letter: Other letter, imperialaramaic, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]

 findprop 11100 1113c 11680 116c0
-U+11100 Mark: Non-spacing mark, Chakma, Extend
-U+1113C Number: Decimal number, Chakma, Other
-U+11680 Letter: Other letter, Takri, Other
-U+116C0 Number: Decimal number, Takri, Other
+U+11100 NSM Mark: Non-spacing mark, chakma, Extend, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, uppercase]
+U+1113C L   Number: Decimal number, chakma, Other, [graphemebase, patternsyntax, terminalpunctuation]
+U+11680 L   Letter: Other letter, takri, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+116C0 L   Number: Decimal number, takri, Other, [graphemebase, patternsyntax, terminalpunctuation]

 findprop 0d 0a 0e 0711 1b04 1111 1169 11fe ae4c ad89
-U+000D Control: Control, Common, CR
-U+000A Control: Control, Common, LF
-U+000E Control: Control, Common, Control
-U+0711 Mark: Non-spacing mark, Syriac, Extend
-U+1B04 Mark: Spacing mark, Balinese, SpacingMark
-U+1111 Letter: Other letter, Hangul, Hangul syllable type L
-U+1169 Letter: Other letter, Hangul, Hangul syllable type V
-U+11FE Letter: Other letter, Hangul, Hangul syllable type T
-U+AE4C Letter: Other letter, Hangul, Hangul syllable type LV
-U+AD89 Letter: Other letter, Hangul, Hangul syllable type LVT
+U+000D B   Control: Control, common, CR, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+000A B   Control: Control, common, LF, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+000E BN  Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+U+0711 NSM Mark: Non-spacing mark, syriac, Extend, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, uppercase]
+U+1B04 L   Mark: Spacing mark, balinese, SpacingMark, [dash, emoji, extendedpictographic, graphemebase, patternsyntax]
+U+1111 L   Letter: Other letter, hangul, Hangul syllable type L, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+1169 L   Letter: Other letter, hangul, Hangul syllable type V, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+11FE L   Letter: Other letter, hangul, Hangul syllable type T, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AE4C L   Letter: Other letter, hangul, Hangul syllable type LV, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AD89 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]

 findprop 118a0 11ac7 16ad0
-U+118A0 Letter: Upper case letter, Warang_Citi, Other, U+118C0
-U+11AC7 Letter: Other letter, Pau_Cin_Hau, Other
-U+16AD0 Letter: Other letter, Bassa_Vah, Other
+U+118A0 L   Letter: Upper case letter, warangciti, Other, U+118C0, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+11AC7 L   Letter: Other letter, paucinhau, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+16AD0 L   Letter: Other letter, bassavah, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]

 findprop 11700 14400 108e0 11280 1d800
-U+11700 Letter: Other letter, Ahom, Other
-U+14400 Letter: Other letter, Anatolian_Hieroglyphs, Other
-U+108E0 Letter: Other letter, Hatran, Other
-U+11280 Letter: Other letter, Multani, Other
-U+1D800 Symbol: Other symbol, SignWriting, Other
+U+11700 L   Letter: Other letter, ahom, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+14400 L   Letter: Other letter, anatolianhieroglyphs, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+108E0 R   Letter: Other letter, hatran, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+11280 L   Letter: Other letter, multani, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+1D800 L   Symbol: Other symbol, signwriting, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]

 findprop 11800 1e903 11da9 10d27 11ee0 16e48 10f27 10f30
-U+11800 Letter: Other letter, Dogra, Other
-U+1E903 Letter: Upper case letter, Adlam, Other, U+1E925
-U+11DA9 Number: Decimal number, Gunjala_Gondi, Other
-U+10D27 Mark: Non-spacing mark, Hanifi_Rohingya, Extend
-U+11EE0 Letter: Other letter, Makasar, Other
-U+16E48 Letter: Upper case letter, Medefaidrin, Other, U+16E68
-U+10F27 Letter: Other letter, Old_Sogdian, Other
-U+10F30 Letter: Other letter, Sogdian, Other
+U+11800 L   Letter: Other letter, dogra, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+1E903 R   Letter: Upper case letter, adlam, Other, U+1E925, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+11DA9 L   Number: Decimal number, gunjalagondi, Other, [graphemebase, patternsyntax, terminalpunctuation]
+U+10D27 NSM Mark: Non-spacing mark, hanifirohingya, Extend, [extendedpictographic, graphemebase, patternsyntax]
+U+11EE0 L   Letter: Other letter, makasar, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+16E48 L   Letter: Upper case letter, medefaidrin, Other, U+16E68, [alphabetic, graphemeextend, idcontinue, xidcontinue]
+U+10F27 R   Letter: Other letter, oldsogdian, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+10F30 AL  Letter: Other letter, sogdian, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]

 findprop  a836  a833  1cf4  20f0  1cd0
-U+A836 Symbol: Other symbol, Common, Other, [Devanagari, Dogra, Gujarati, Gurmukhi, Khojki, Kaithi, Mahajani, Modi, Khudawadi, Takri, Tirhuta]
-U+A833 Number: Other number, Common, Other, [Devanagari, Dogra, Gujarati, Gurmukhi, Khojki, Kannada, Kaithi, Mahajani, Modi, Nandinagari, Khudawadi, Takri, Tirhuta]
-U+1CF4 Mark: Non-spacing mark, Inherited, Extend, [Devanagari, Grantha, Kannada]
-U+20F0 Mark: Non-spacing mark, Inherited, Extend, [Devanagari, Grantha, Latin]
-U+1CD0 Mark: Non-spacing mark, Inherited, Extend, [Bengali, Devanagari, Grantha, Kannada]
+U+A836 L   Symbol: Other symbol, common, Other, [devanagari, gurmukhi, gujarati, kaithi, takri, khojki, mahajani, modi, khudawadi, tirhuta, dogra], [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+A833 L   Number: Other number, common, Other, [devanagari, gurmukhi, gujarati, kannada, kaithi, takri, khojki, mahajani, modi, khudawadi, tirhuta, dogra, nandinagari], [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+1CF4 NSM Mark: Non-spacing mark, inherited, Extend, [devanagari, kannada, grantha], [alphabetic, cased, graphemebase, idcontinue, idstart, lowercase, softdotted, xidcontinue, xidstart]
+U+20F0 NSM Mark: Non-spacing mark, inherited, Extend, [latin, devanagari, grantha], [caseignorable, graphemebase, patternsyntax, quotationmark]
+U+1CD0 NSM Mark: Non-spacing mark, inherited, Extend, [devanagari, bengali, kannada, grantha], [alphabetic, cased, graphemebase, idcontinue, idstart, lowercase, softdotted, xidcontinue, xidstart]

 findprop 32ff
-U+32FF Symbol: Other symbol, Common, Other, [Han]
+U+32FF L   Symbol: Other symbol, common, Other, [han], [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]

 findprop 1f16d
-U+1F16D Symbol: Other symbol, Common, Extended Pictographic
+U+1F16D ON  Symbol: Other symbol, common, Extended Pictographic, [ascii, sentenceterminal, unifiedideograph, whitespace, xidcontinue]

 findprop U+10e93 U+10eaa
-U+10E93 Letter: Other letter, Yezidi, Other
-U+10EAA Control: Unassigned, Unknown, Other
+U+10E93 R   Letter: Other letter, yezidi, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+10EAA R   Control: Unassigned, unknown, Other
+
+findprop 0602 202a 202b 202c 2068 2069 202d 202e 2067
+U+0602 AN  Control: Format, arabic, Prepend, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, lowercase]
+U+202A LRE Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+U+202B RLE Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+U+202C PDF Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+U+2068 FSI Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+U+2069 PDI Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+U+202D LRO Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+U+202E RLO Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+U+2067 RLI Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
--- a/maint/ucptestdata/testoutput2
+++ b/maint/ucptestdata/testoutput2
@ -1,188 +1,298 @@
 find script Han
-U+2E80..U+2E99 Symbol: Other symbol, Han, Other
-U+2E9B..U+2EF3 Symbol: Other symbol, Han, Other
-U+2F00..U+2FD5 Symbol: Other symbol, Han, Other
-        U+3005 Letter: Modifier letter, Han, Other
-        U+3007 Number: Letter number, Han, Other
-U+3021..U+3029 Number: Letter number, Han, Other
-U+3038..U+303A Number: Letter number, Han, Other
-        U+303B Letter: Modifier letter, Han, Other
-U+3400..U+4DBF Letter: Other letter, Han, Other
-U+4E00..U+9FFC Letter: Other letter, Han, Other
-U+F900..U+FA6D Letter: Other letter, Han, Other
-U+FA70..U+FAD9 Letter: Other letter, Han, Other
-U+16FF0..U+16FF1 Mark: Spacing mark, Han, SpacingMark
-U+20000..U+2A6DD Letter: Other letter, Han, Other
-U+2A700..U+2B734 Letter: Other letter, Han, Other
-U+2B740..U+2B81D Letter: Other letter, Han, Other
-U+2B820..U+2CEA1 Letter: Other letter, Han, Other
-U+2CEB0..U+2EBE0 Letter: Other letter, Han, Other
-U+2F800..U+2FA1D Letter: Other letter, Han, Other
-U+30000..U+3134A Letter: Other letter, Han, Other
+U+2E80..U+2E99 ON  Symbol: Other symbol, han, Other, [ascii, sentenceterminal, unifiedideograph, whitespace, xidstart]
+U+2E9B..U+2EF3 ON  Symbol: Other symbol, han, Other, [ascii, sentenceterminal, unifiedideograph, whitespace, xidstart]
+U+2F00..U+2FD5 ON  Symbol: Other symbol, han, Other, [ascii, sentenceterminal, unifiedideograph, whitespace, xidstart]
+        U+3005 L   Letter: Modifier letter, han, Other, [emoji, emojimodifierbase, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+        U+3007 L   Number: Letter number, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+U+3021..U+3029 L   Number: Letter number, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+U+3038..U+303A L   Number: Letter number, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+        U+303B L   Letter: Modifier letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, xidcontinue, xidstart]
+U+3400..U+4DBF L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+4E00..U+9FFF L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+F900..U+FA0D L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+U+FA0E..U+FA0F L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+        U+FA10 L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+        U+FA11 L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+        U+FA12 L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+U+FA13..U+FA14 L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+FA15..U+FA1E L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+        U+FA1F L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+        U+FA20 L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+        U+FA21 L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+        U+FA22 L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+U+FA23..U+FA24 L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+FA25..U+FA26 L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+U+FA27..U+FA29 L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+FA2A..U+FA6D L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+U+FA70..U+FAD9 L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+        U+16FE2 ON  Punctuation: Other punctuation, han, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+       U+16FE3 L   Letter: Modifier letter, han, Other, [emoji, emojimodifierbase, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+U+16FF0..U+16FF1 L   Mark: Spacing mark, han, SpacingMark, [caseignorable, graphemeextend, idcontinue, ideographic, xidcontinue]
+U+20000..U+2A6DF L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+2A700..U+2B738 L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+2B740..U+2B81D L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+2B820..U+2CEA1 L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+2CEB0..U+2EBE0 L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+2F800..U+2FA1D L   Letter: Other letter, han, Other, [sentenceterminal, unifiedideograph, xidcontinue, xidstart]
+U+30000..U+3134A L   Letter: Other letter, han, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
 find type Pe script Common scriptx Hangul
-U+3009 Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
-U+300B Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
-U+300D Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
-U+300F Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
-U+3011 Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
-U+3015 Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
-U+3017 Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
-U+3019 Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
-U+301B Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
-U+301E..U+301F Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana]
-        U+FF63 Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
+U+3009 ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase]
+U+300B ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase]
+U+300D ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [graphemebase, sentenceterminal, terminalpunctuation]
+U+300F ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [graphemebase, sentenceterminal, terminalpunctuation]
+U+3011 ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase]
+U+3015 ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase]
+U+3017 ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase]
+U+3019 ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase]
+U+301B ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, dash, emojimodifier, emojimodifierbase]
+U+301E..U+301F ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han], [softdotted, terminalpunctuation, unifiedideograph, xidcontinue, xidstart]
+        U+FF63 ON  Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [changeswhencasemapped, changeswhenlowercased, changeswhentitlecased, emojimodifier, emojimodifierbase]
 find type Sk
-U+005E Symbol: Modifier symbol, Common, Other
-U+0060 Symbol: Modifier symbol, Common, Other
-U+00A8 Symbol: Modifier symbol, Common, Other
-U+00AF Symbol: Modifier symbol, Common, Other
-U+00B4 Symbol: Modifier symbol, Common, Other
-U+00B8 Symbol: Modifier symbol, Common, Other
-U+02C2..U+02C5 Symbol: Modifier symbol, Common, Other
-U+02D2..U+02DF Symbol: Modifier symbol, Common, Other
-U+02E5..U+02E9 Symbol: Modifier symbol, Common, Other
-U+02EA..U+02EB Symbol: Modifier symbol, Bopomofo, Other
-        U+02ED Symbol: Modifier symbol, Common, Other
-U+02EF..U+02FF Symbol: Modifier symbol, Common, Other
-        U+0375 Symbol: Modifier symbol, Greek, Other
-        U+0384 Symbol: Modifier symbol, Greek, Other
-        U+0385 Symbol: Modifier symbol, Common, Other
-        U+1FBD Symbol: Modifier symbol, Greek, Other
-U+1FBF..U+1FC1 Symbol: Modifier symbol, Greek, Other
-U+1FCD..U+1FCF Symbol: Modifier symbol, Greek, Other
-U+1FDD..U+1FDF Symbol: Modifier symbol, Greek, Other
-U+1FED..U+1FEF Symbol: Modifier symbol, Greek, Other
-U+1FFD..U+1FFE Symbol: Modifier symbol, Greek, Other
-U+309B..U+309C Symbol: Modifier symbol, Common, Other, [Hiragana, Katakana]
-U+A700..U+A707 Symbol: Modifier symbol, Common, Other, [Han, Latin]
-U+A708..U+A716 Symbol: Modifier symbol, Common, Other
-U+A720..U+A721 Symbol: Modifier symbol, Common, Other
-U+A789..U+A78A Symbol: Modifier symbol, Common, Other
-        U+AB5B Symbol: Modifier symbol, Common, Other
-U+AB6A..U+AB6B Symbol: Modifier symbol, Common, Other
-U+FBB2..U+FBC1 Symbol: Modifier symbol, Arabic, Other
-        U+FF3E Symbol: Modifier symbol, Common, Other
-        U+FF40 Symbol: Modifier symbol, Common, Other
-        U+FFE3 Symbol: Modifier symbol, Common, Other
-U+1F3FB..U+1F3FF Symbol: Modifier symbol, Common, Extend
+U+005E ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+0060 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, changeswhentitlecased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
+U+00A8 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+00AF ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+00B4 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+00B8 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+02C2..U+02C5 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+02D2..U+02DF ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+02E5..U+02E9 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+02EA..U+02EB ON  Symbol: Modifier symbol, bopomofo, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+        U+02ED ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+02EF..U+02FF ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+        U+0375 ON  Symbol: Modifier symbol, greek, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+        U+0384 ON  Symbol: Modifier symbol, greek, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+        U+0385 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+        U+0888 AL  Symbol: Modifier symbol, arabic, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, lowercase, math, softdotted, xidcontinue, xidstart]
+        U+1FBD ON  Symbol: Modifier symbol, greek, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+1FBF..U+1FC1 ON  Symbol: Modifier symbol, greek, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+1FCD..U+1FCF ON  Symbol: Modifier symbol, greek, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+1FDD..U+1FDF ON  Symbol: Modifier symbol, greek, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+1FED..U+1FEF ON  Symbol: Modifier symbol, greek, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+1FFD..U+1FFE ON  Symbol: Modifier symbol, greek, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+309B..U+309C ON  Symbol: Modifier symbol, common, Other, [hiragana, katakana], [alphabetic, bidimirrored, caseignorable, cased, changeswhencasefolded, changeswhenlowercased, changeswhentitlecased, changeswhenuppercased, dash, defaultignorablecodepoint, deprecated, diacritic, emoji, emojicomponent, emojimodifier, emojimodifierbase, emojipresentation, extendedpictographic, extender, graphemebase, graphemeextend, graphemelink, hexdigit, idsbinaryoperator, idstrinaryoperator, idcontinue, idstart, ideographic, sentenceterminal, unifiedideograph, whitespace, xidcontinue]
+U+A700..U+A707 ON  Symbol: Modifier symbol, common, Other, [latin, han], [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+A708..U+A716 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+A720..U+A721 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+A789..U+A78A L   Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+        U+AB5B L   Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+AB6A..U+AB6B ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+FBB2..U+FBC2 AL  Symbol: Modifier symbol, arabic, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, lowercase, math, softdotted, xidcontinue, xidstart]
+        U+FF3E ON  Symbol: Modifier symbol, common, Other, [asciihexdigit, bidicontrol, bidimirrored, cased, changeswhencasefolded, sentenceterminal, unifiedideograph, whitespace, xidstart]
+        U+FF40 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+        U+FFE3 ON  Symbol: Modifier symbol, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+1F3FB..U+1F3FF ON  Symbol: Modifier symbol, common, Extend, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, math, patternsyntax, radical, sentenceterminal, terminalpunctuation]
 find type Pd
-U+002D Punctuation: Dash punctuation, Common, Other
-U+058A Punctuation: Dash punctuation, Armenian, Other
-U+05BE Punctuation: Dash punctuation, Hebrew, Other
-U+1400 Punctuation: Dash punctuation, Canadian_Aboriginal, Other
-U+1806 Punctuation: Dash punctuation, Mongolian, Other
-U+2010..U+2015 Punctuation: Dash punctuation, Common, Other
-        U+2E17 Punctuation: Dash punctuation, Common, Other
-        U+2E1A Punctuation: Dash punctuation, Common, Other
-U+2E3A..U+2E3B Punctuation: Dash punctuation, Common, Other
-        U+2E40 Punctuation: Dash punctuation, Common, Other
-        U+301C Punctuation: Dash punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana]
-        U+3030 Punctuation: Dash punctuation, Common, Extended Pictographic, [Bopomofo, Hangul, Han, Hiragana, Katakana]
-        U+30A0 Punctuation: Dash punctuation, Common, Other, [Hiragana, Katakana]
-U+FE31..U+FE32 Punctuation: Dash punctuation, Common, Other
-        U+FE58 Punctuation: Dash punctuation, Common, Other
-        U+FE63 Punctuation: Dash punctuation, Common, Other
-        U+FF0D Punctuation: Dash punctuation, Common, Other
-        U+10EAD Punctuation: Dash punctuation, Yezidi, Other
+U+002D ES  Punctuation: Dash punctuation, common, Other, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, softdotted, xidcontinue, xidstart]
+U+058A ON  Punctuation: Dash punctuation, armenian, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+U+05BE R   Punctuation: Dash punctuation, hebrew, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+U+1400 ON  Punctuation: Dash punctuation, canadianaboriginal, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+U+1806 ON  Punctuation: Dash punctuation, mongolian, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+U+2010..U+2015 ON  Punctuation: Dash punctuation, common, Other, [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart]
+        U+2E17 ON  Punctuation: Dash punctuation, common, Other, [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart]
+        U+2E1A ON  Punctuation: Dash punctuation, common, Other, [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart]
+U+2E3A..U+2E3B ON  Punctuation: Dash punctuation, common, Other, [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart]
+        U+2E40 ON  Punctuation: Dash punctuation, common, Other, [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart]
+        U+2E5D ON  Punctuation: Dash punctuation, common, Other, [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart]
+        U+301C ON  Punctuation: Dash punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han], [dash, defaultignorablecodepoint, deprecated, emojipresentation, joincontrol, lowercase, patternwhitespace, radical, regionalindicator, softdotted, xidcontinue, xidstart]
+        U+3030 ON  Punctuation: Dash punctuation, common, Extended Pictographic, [hangul, hiragana, katakana, bopomofo, han], [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, lowercase, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart]
+        U+30A0 ON  Punctuation: Dash punctuation, common, Other, [hiragana, katakana], [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+U+FE31..U+FE32 ON  Punctuation: Dash punctuation, common, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+        U+FE58 ON  Punctuation: Dash punctuation, common, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+        U+FE63 ES  Punctuation: Dash punctuation, common, Other, [caseignorable, sentenceterminal, unifiedideograph, xidcontinue]
+        U+FF0D ES  Punctuation: Dash punctuation, common, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
+        U+10EAD R   Punctuation: Dash punctuation, yezidi, Other, [emoji, emojipresentation, extendedpictographic, graphemebase, patternsyntax]
 find gbreak LVT
-U+AC01..U+AC1B Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AC1D..U+AC37 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AC39..U+AC53 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AC55..U+AC6F Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AC71..U+AC8B Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AC8D..U+ACA7 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+ACA9..U+ACC3 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+ACC5..U+ACDF Letter: Other letter, Hangul, Hangul syllable type LVT
-U+ACE1..U+ACFB Letter: Other letter, Hangul, Hangul syllable type LVT
-U+ACFD..U+AD17 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AD19..U+AD33 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AD35..U+AD4F Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AD51..U+AD6B Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AD6D..U+AD87 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AD89..U+ADA3 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+ADA5..U+ADBF Letter: Other letter, Hangul, Hangul syllable type LVT
-U+ADC1..U+ADDB Letter: Other letter, Hangul, Hangul syllable type LVT
-U+ADDD..U+ADF7 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+ADF9..U+AE13 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AE15..U+AE2F Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AE31..U+AE4B Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AE4D..U+AE67 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AE69..U+AE83 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AE85..U+AE9F Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AEA1..U+AEBB Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AEBD..U+AED7 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AED9..U+AEF3 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AEF5..U+AF0F Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AF11..U+AF2B Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AF2D..U+AF47 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AF49..U+AF63 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AF65..U+AF7F Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AF81..U+AF9B Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AF9D..U+AFB7 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AFB9..U+AFD3 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AFD5..U+AFEF Letter: Other letter, Hangul, Hangul syllable type LVT
-U+AFF1..U+B00B Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B00D..U+B027 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B029..U+B043 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B045..U+B05F Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B061..U+B07B Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B07D..U+B097 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B099..U+B0B3 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B0B5..U+B0CF Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B0D1..U+B0EB Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B0ED..U+B107 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B109..U+B123 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B125..U+B13F Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B141..U+B15B Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B15D..U+B177 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B179..U+B193 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B195..U+B1AF Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B1B1..U+B1CB Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B1CD..U+B1E7 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B1E9..U+B203 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B205..U+B21F Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B221..U+B23B Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B23D..U+B257 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B259..U+B273 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B275..U+B28F Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B291..U+B2AB Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B2AD..U+B2C7 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B2C9..U+B2E3 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B2E5..U+B2FF Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B301..U+B31B Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B31D..U+B337 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B339..U+B353 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B355..U+B36F Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B371..U+B38B Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B38D..U+B3A7 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B3A9..U+B3C3 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B3C5..U+B3DF Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B3E1..U+B3FB Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B3FD..U+B417 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B419..U+B433 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B435..U+B44F Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B451..U+B46B Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B46D..U+B487 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B489..U+B4A3 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B4A5..U+B4BF Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B4C1..U+B4DB Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B4DD..U+B4F7 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B4F9..U+B513 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B515..U+B52F Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B531..U+B54B Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B54D..U+B567 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B569..U+B583 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B585..U+B59F Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B5A1..U+B5BB Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B5BD..U+B5D7 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B5D9..U+B5F3 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B5F5..U+B60F Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B611..U+B62B Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B62D..U+B647 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B649..U+B663 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B665..U+B67F Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B681..U+B69B Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B69D..U+B6B7 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B6B9..U+B6D3 Letter: Other letter, Hangul, Hangul syllable type LVT
-U+B6D5..U+B6EF Letter: Other letter, Hangul, Hangul syllable type LVT
+U+AC01..U+AC1B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AC1D..U+AC37 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AC39..U+AC53 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AC55..U+AC6F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AC71..U+AC8B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AC8D..U+ACA7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+ACA9..U+ACC3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+ACC5..U+ACDF L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+ACE1..U+ACFB L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+ACFD..U+AD17 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AD19..U+AD33 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AD35..U+AD4F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AD51..U+AD6B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AD6D..U+AD87 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AD89..U+ADA3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+ADA5..U+ADBF L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+ADC1..U+ADDB L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+ADDD..U+ADF7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+ADF9..U+AE13 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AE15..U+AE2F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AE31..U+AE4B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AE4D..U+AE67 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AE69..U+AE83 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AE85..U+AE9F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AEA1..U+AEBB L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AEBD..U+AED7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AED9..U+AEF3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AEF5..U+AF0F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AF11..U+AF2B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AF2D..U+AF47 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AF49..U+AF63 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AF65..U+AF7F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AF81..U+AF9B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AF9D..U+AFB7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AFB9..U+AFD3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AFD5..U+AFEF L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+AFF1..U+B00B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B00D..U+B027 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B029..U+B043 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B045..U+B05F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B061..U+B07B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B07D..U+B097 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B099..U+B0B3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B0B5..U+B0CF L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B0D1..U+B0EB L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B0ED..U+B107 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B109..U+B123 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B125..U+B13F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B141..U+B15B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B15D..U+B177 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B179..U+B193 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B195..U+B1AF L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B1B1..U+B1CB L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B1CD..U+B1E7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B1E9..U+B203 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B205..U+B21F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B221..U+B23B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B23D..U+B257 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B259..U+B273 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B275..U+B28F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B291..U+B2AB L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B2AD..U+B2C7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B2C9..U+B2E3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B2E5..U+B2FF L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B301..U+B31B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B31D..U+B337 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B339..U+B353 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B355..U+B36F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B371..U+B38B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B38D..U+B3A7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B3A9..U+B3C3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B3C5..U+B3DF L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B3E1..U+B3FB L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B3FD..U+B417 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B419..U+B433 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B435..U+B44F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B451..U+B46B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B46D..U+B487 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B489..U+B4A3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B4A5..U+B4BF L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B4C1..U+B4DB L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B4DD..U+B4F7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B4F9..U+B513 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B515..U+B52F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B531..U+B54B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B54D..U+B567 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B569..U+B583 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B585..U+B59F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B5A1..U+B5BB L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B5BD..U+B5D7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B5D9..U+B5F3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B5F5..U+B60F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B611..U+B62B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B62D..U+B647 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B649..U+B663 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B665..U+B67F L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B681..U+B69B L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B69D..U+B6B7 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B6B9..U+B6D3 L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+B6D5..U+B6EF L   Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
 ...
+find script Old_Uyghur
+U+10F70..U+10F81 R   Letter: Other letter, olduyghur, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+10F82..U+10F85 NSM Mark: Non-spacing mark, olduyghur, Extend, [alphabetic, cased, graphemebase, idcontinue, idstart, lowercase, softdotted, xidcontinue, xidstart]
+U+10F86..U+10F89 R   Punctuation: Other punctuation, olduyghur, Other, [bidimirrored, graphemebase, math, patternsyntax]
+find bidi PDF
+U+202C PDF Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+find bidi CS
+U+002C CS  Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, hexdigit, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+002E CS  Punctuation: Other punctuation, common, Other, [graphemebase, whitespace]
+U+002F CS  Punctuation: Other punctuation, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
+U+003A CS  Punctuation: Other punctuation, common, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
+U+00A0 CS  Separator: Space separator, common, Other, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase]
+U+060C CS  Punctuation: Other punctuation, common, Other, [arabic, syriac, thaana, nko, hanifirohingya, yezidi], [graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+202F CS  Separator: Space separator, common, Other, [latin, mongolian], [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase]
+U+2044 CS  Symbol: Mathematical symbol, common, Other, [alphabetic, caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
+U+FE50 CS  Punctuation: Other punctuation, common, Other, [graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+FE52 CS  Punctuation: Other punctuation, common, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+FE55 CS  Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark]
+U+FF0C CS  Punctuation: Other punctuation, common, Other, [graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+FF0E CS  Punctuation: Other punctuation, common, Other, [changeswhenuppercased, deprecated, emojimodifier, emojimodifierbase, extender, quotationmark, sentenceterminal, xidcontinue, xidstart]
+U+FF0F CS  Punctuation: Other punctuation, common, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
+U+FF1A CS  Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark]
+find bidi CS type Sm
+U+2044 CS  Symbol: Mathematical symbol, common, Other, [alphabetic, caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
+find bidi B
+U+000A B   Control: Control, common, LF, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+000D B   Control: Control, common, CR, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+001C..U+001E B   Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+        U+0085 B   Control: Control, common, Control, [caseignorable, defaultignorablecodepoint, graphemeextend, idcontinue, xidcontinue]
+        U+2029 B   Separator: Paragraph separator, common, Control, [caseignorable, defaultignorablecodepoint, graphemeextend, idcontinue, xidcontinue]
+find bidi FSI
+U+2068 FSI Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+find bidi PDI
+U+2069 PDI Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+find bidi RLI
+U+2067 RLI Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+find bidi RLO
+U+202E RLO Control: Format, common, Control, [extendedpictographic, graphemebase, math, patternsyntax]
+find bidi S
+U+0009 S   Control: Control, common, Control, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+000B S   Control: Control, common, Control, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+001F S   Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
+find bidi WS
+U+000C WS  Control: Control, common, Control, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
+U+0020 WS  Separator: Space separator, common, Other, [ascii, emoji, emojicomponent, graphemebase, patternsyntax]
+U+1680 WS  Separator: Space separator, ogham, Other, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase]
+U+2000..U+200A WS  Separator: Space separator, common, Other, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase]
+        U+2028 WS  Separator: Line separator, common, Control, [caseignorable, defaultignorablecodepoint, graphemeextend, idcontinue, xidcontinue]
+        U+205F WS  Separator: Space separator, common, Other, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase]
+        U+3000 WS  Separator: Space separator, common, Other, [alphabetic, caseignorable, cased, diacritic, graphemebase, idcontinue, idstart, lowercase]
+find script bopo
+U+02EA..U+02EB ON  Symbol: Modifier symbol, bopomofo, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, math, uppercase, xidcontinue, xidstart]
+U+3105..U+312F L   Letter: Other letter, bopomofo, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+U+31A0..U+31BF L   Letter: Other letter, bopomofo, Other, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
+find bool prependedconcatenationmark
+U+00AD BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+180E BN  Control: Format, mongolian, Control, [caseignorable, prependedconcatenationmark]
+U+200B BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+2060 BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+2118 ON  Symbol: Mathematical symbol, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart]
+U+3030 ON  Punctuation: Dash punctuation, common, Extended Pictographic, [hangul, hiragana, katakana, bopomofo, han], [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, lowercase, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart]
+U+AAC0 L   Letter: Other letter, taiviet, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark]
+U+AAC2 L   Letter: Other letter, taiviet, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark]
+U+FE0F NSM Mark: Non-spacing mark, inherited, Extend, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark]
+U+FE55 CS  Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark]
+U+FEFF BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+FF1A CS  Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark]
+U+FF21..U+FF26 L   Letter: Upper case letter, latin, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, noncharactercodepoint, patternwhitespace, prependedconcatenationmark]
+U+10D22..U+10D23 AL  Letter: Other letter, hanifirohingya, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark]
+       U+1135D L   Letter: Other letter, grantha, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, graphemeextend, hexdigit, logicalorderexception, lowercase, math, noncharactercodepoint, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart]
+U+1BCA0..U+1BCA3 BN  Control: Format, common, Control, [duployan], [caseignorable, prependedconcatenationmark]
+U+1D173..U+1D17A BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+1F1E6..U+1F1FF L   Symbol: Other symbol, common, Regional Indicator, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, math, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart]
+find bool pcm
+U+00AD BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+180E BN  Control: Format, mongolian, Control, [caseignorable, prependedconcatenationmark]
+U+200B BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+2060 BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+2118 ON  Symbol: Mathematical symbol, common, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart]
+U+3030 ON  Punctuation: Dash punctuation, common, Extended Pictographic, [hangul, hiragana, katakana, bopomofo, han], [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, lowercase, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart]
+U+AAC0 L   Letter: Other letter, taiviet, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark]
+U+AAC2 L   Letter: Other letter, taiviet, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark]
+U+FE0F NSM Mark: Non-spacing mark, inherited, Extend, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, logicalorderexception, math, patternwhitespace, prependedconcatenationmark]
+U+FE55 CS  Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark]
+U+FEFF BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+FF1A CS  Punctuation: Other punctuation, common, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, joincontrol, noncharactercodepoint, patternwhitespace, prependedconcatenationmark]
+U+FF21..U+FF26 L   Letter: Upper case letter, latin, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, noncharactercodepoint, patternwhitespace, prependedconcatenationmark]
+U+10D22..U+10D23 AL  Letter: Other letter, hanifirohingya, Other, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, logicalorderexception, lowercase, math, patternwhitespace, prependedconcatenationmark]
+       U+1135D L   Letter: Other letter, grantha, Other, [changeswhencasemapped, changeswhentitlecased, emojimodifier, emojimodifierbase, graphemeextend, hexdigit, logicalorderexception, lowercase, math, noncharactercodepoint, patternsyntax, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart]
+U+1BCA0..U+1BCA3 BN  Control: Format, common, Control, [duployan], [caseignorable, prependedconcatenationmark]
+U+1D173..U+1D17A BN  Control: Format, common, Control, [caseignorable, prependedconcatenationmark]
+U+1F1E6..U+1F1FF L   Symbol: Other symbol, common, Regional Indicator, [changeswhencasemapped, changeswhenuppercased, emojimodifier, emojimodifierbase, math, patternwhitespace, prependedconcatenationmark, quotationmark, radical, regionalindicator, sentenceterminal, softdotted, terminalpunctuation, unifiedideograph, uppercase, variationselector, whitespace, xidcontinue, xidstart]
--- a/src/config.h.generic
+++ b/src/config.h.generic
@ -97,6 +97,9 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Have PTHREAD_PRIO_INHERIT. */
 /* #undef HAVE_PTHREAD_PRIO_INHERIT */

+/* Define to 1 if you have the <readline.h> header file. */
+/* #undef HAVE_READLINE_H */
+
 /* Define to 1 if you have the <readline/history.h> header file. */
 /* #undef HAVE_READLINE_HISTORY_H */

@ -233,7 +236,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_NAME "PCRE2"

 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PCRE2 10.38-RC1"
+#define PACKAGE_STRING "PCRE2 10.40"

 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "pcre2"
@ -242,7 +245,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_URL ""

 /* Define to the version of this package. */
-#define PACKAGE_VERSION "10.38-RC1"
+#define PACKAGE_VERSION "10.40"

 /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
   parentheses (of any kind) in a pattern. This limits the amount of system
@ -435,7 +438,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #endif

 /* Version number of package */
-#define VERSION "10.38-RC1"
+#define VERSION "10.40"

 /* Define to empty if `const' does not conform to ANSI C. */
 /* #undef const */
--- a/src/config.h.in
+++ b/src/config.h.in
@ -97,6 +97,9 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Have PTHREAD_PRIO_INHERIT. */
 #undef HAVE_PTHREAD_PRIO_INHERIT

+/* Define to 1 if you have the <readline.h> header file. */
+#undef HAVE_READLINE_H
+
 /* Define to 1 if you have the <readline/history.h> header file. */
 #undef HAVE_READLINE_HISTORY_H

--- a/src/pcre2.h.generic
+++ b/src/pcre2.h.generic
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */

 #define PCRE2_MAJOR           10
-#define PCRE2_MINOR           38
-#define PCRE2_PRERELEASE      -RC1
-#define PCRE2_DATE            2021-08-31
+#define PCRE2_MINOR           40
+#define PCRE2_PRERELEASE      
+#define PCRE2_DATE            2022-04-14

 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE2, the appropriate
@ -84,8 +84,8 @@ set, we ensure here that it has no effect. */
 /* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and
 uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do
 not have stdint.h, which is why we use inttypes.h, which according to the C
-standard is a superset of stdint.h. If none of these headers are available,
-the relevant values must be provided by some other means. */
+standard is a superset of stdint.h. If inttypes.h is not available the build
+will break and the relevant values must be provided by some other means. */

 #include <limits.h>
 #include <stdlib.h>
--- a/src/pcre2.h.in
+++ b/src/pcre2.h.in
@ -84,8 +84,8 @@ set, we ensure here that it has no effect. */
 /* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and
 uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do
 not have stdint.h, which is why we use inttypes.h, which according to the C
-standard is a superset of stdint.h. If none of these headers are available,
-the relevant values must be provided by some other means. */
+standard is a superset of stdint.h. If inttypes.h is not available the build
+will break and the relevant values must be provided by some other means. */

 #include <limits.h>
 #include <stdlib.h>
--- a/src/pcre2_auto_possess.c
+++ b/src/pcre2_auto_possess.c
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2021 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -123,18 +123,21 @@ opcode is used to select the column. The values are as follows:
 */

 static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
-/* ANY LAMP GC  PC  SC ALNUM SPACE PXSPACE WORD CLIST UCNC */
-  { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   0 },  /* PT_ANY */
-  { 0,  3,  0,  0,  0,    3,    1,      1,   0,    0,   0 },  /* PT_LAMP */
-  { 0,  0,  2,  4,  0,    9,   10,     10,  11,    0,   0 },  /* PT_GC */
-  { 0,  0,  5,  2,  0,   15,   16,     16,  17,    0,   0 },  /* PT_PC */
-  { 0,  0,  0,  0,  2,    0,    0,      0,   0,    0,   0 },  /* PT_SC */
-  { 0,  3,  6, 12,  0,    3,    1,      1,   0,    0,   0 },  /* PT_ALNUM */
-  { 0,  1,  7, 13,  0,    1,    3,      3,   1,    0,   0 },  /* PT_SPACE */
-  { 0,  1,  7, 13,  0,    1,    3,      3,   1,    0,   0 },  /* PT_PXSPACE */
-  { 0,  0,  8, 14,  0,    0,    1,      1,   3,    0,   0 },  /* PT_WORD */
-  { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   0 },  /* PT_CLIST */
-  { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   3 }   /* PT_UCNC */
+/* ANY LAMP GC  PC  SC  SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */
+  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_ANY */
+  { 0,  3,  0,  0,  0,   0,    3,    1,      1,   0,    0,   0,    0,    0 },  /* PT_LAMP */
+  { 0,  0,  2,  4,  0,   0,    9,   10,     10,  11,    0,   0,    0,    0 },  /* PT_GC */
+  { 0,  0,  5,  2,  0,   0,   15,   16,     16,  17,    0,   0,    0,    0 },  /* PT_PC */
+  { 0,  0,  0,  0,  2,   2,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_SC */
+  { 0,  0,  0,  0,  2,   2,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_SCX */
+  { 0,  3,  6, 12,  0,   0,    3,    1,      1,   0,    0,   0,    0,    0 },  /* PT_ALNUM */
+  { 0,  1,  7, 13,  0,   0,    1,    3,      3,   1,    0,   0,    0,    0 },  /* PT_SPACE */
+  { 0,  1,  7, 13,  0,   0,    1,    3,      3,   1,    0,   0,    0,    0 },  /* PT_PXSPACE */
+  { 0,  0,  8, 14,  0,   0,    0,    1,      1,   3,    0,   0,    0,    0 },  /* PT_WORD */
+  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_CLIST */
+  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   3,    0,    0 },  /* PT_UCNC */
+  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_BIDICL */
+  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 }   /* PT_BOOL */
 };

 /* This table is used to check whether auto-possessification is possible
@ -196,6 +199,7 @@ static BOOL
 check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
  BOOL negated)
 {
+BOOL ok;
 const uint32_t *p;
 const ucd_record *prop = GET_UCD(c);

@ -215,6 +219,11 @@ switch(ptype)
  case PT_SC:
  return (pdata == prop->script) == negated;

+  case PT_SCX:
+  ok = (pdata == prop->script
+        || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
+  return ok == negated;
+
  /* These are specials */

  case PT_ALNUM:
@ -251,6 +260,14 @@ switch(ptype)
    if (c == *p++) return negated;
    }
  break;  /* Control never reaches here */
+
+  /* Haven't yet thought these through. */
+
+  case PT_BIDICL:
+  return FALSE;
+
+  case PT_BOOL:
+  return FALSE;
  }

 return FALSE;
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2021 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -124,7 +124,7 @@ static unsigned int

 static int
  compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t,
-    uint32_t *, int32_t *, uint32_t *, int32_t *, branch_chain *,
+    uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *,
    compile_block *, PCRE2_SIZE *);

 static int
@ -137,7 +137,7 @@ static BOOL

 static int
  check_lookbehinds(uint32_t *, uint32_t **, parsed_recurse_check *,
-    compile_block *);
+    compile_block *, int *);


 /*************************************************
@ -385,13 +385,15 @@ compiler is clever with identical subexpressions. */

 #define SETBIT(a,b) a[(b)/8] = (uint8_t)(a[(b)/8] | (1u << ((b)&7)))

-/* Private flags added to firstcu and reqcu. */
+/* Values and flags for the unsigned xxcuflags variables that accompany xxcu
+variables, which are concerned with first and required code units. A value
+greater than or equal to REQ_NONE means "no code unit set"; otherwise the
+matching xxcu variable is set, and the low valued bits are relevant. */

-#define REQ_CASELESS    (1u << 0)       /* Indicates caselessness */
-#define REQ_VARY        (1u << 1)       /* reqcu followed non-literal item */
-/* Negative values for the firstcu and reqcu flags */
-#define REQ_UNSET       (-2)            /* Not yet found anything */
-#define REQ_NONE        (-1)            /* Found not fixed char */
+#define REQ_UNSET     0xffffffffu  /* Not yet found anything */
+#define REQ_NONE      0xfffffffeu  /* Found not fixed character */
+#define REQ_CASELESS  0x00000001u  /* Code unit in xxcu is caseless */
+#define REQ_VARY      0x00000002u  /* Code unit is followed by non-literal */

 /* These flags are used in the groupinfo vector. */

@ -1264,8 +1266,10 @@ PCRE2_SIZE* ref_count;

 if (code != NULL)
  {
+#ifdef SUPPORT_JIT   
  if (code->executable_jit != NULL)
    PRIV(jit_free)(code->executable_jit, &code->memctl);
+#endif

  if ((code->flags & PCRE2_DEREF_TABLES) != 0)
    {
@ -2088,7 +2092,9 @@ get_ucp(PCRE2_SPTR *ptrptr, BOOL *negptr, uint16_t *ptypeptr,
 PCRE2_UCHAR c;
 PCRE2_SIZE i, bot, top;
 PCRE2_SPTR ptr = *ptrptr;
-PCRE2_UCHAR name[32];
+PCRE2_UCHAR name[50];
+PCRE2_UCHAR *vptr = NULL;
+uint16_t ptscript = PT_NOTSCRIPT;

 if (ptr >= cb->end_pattern) goto ERROR_RETURN;
 c = *ptr++;
@ -2100,36 +2106,95 @@ negation. */
 if (c == CHAR_LEFT_CURLY_BRACKET)
  {
  if (ptr >= cb->end_pattern) goto ERROR_RETURN;
+
  if (*ptr == CHAR_CIRCUMFLEX_ACCENT)
    {
    *negptr = TRUE;
    ptr++;
    }
+
  for (i = 0; i < (int)(sizeof(name) / sizeof(PCRE2_UCHAR)) - 1; i++)
    {
    if (ptr >= cb->end_pattern) goto ERROR_RETURN;
    c = *ptr++;
+    while (c == '_' || c == '-' || isspace(c))
+      {
+      if (ptr >= cb->end_pattern) goto ERROR_RETURN;
+      c = *ptr++;
+      }
    if (c == CHAR_NUL) goto ERROR_RETURN;
    if (c == CHAR_RIGHT_CURLY_BRACKET) break;
-    name[i] = c;
+    name[i] = tolower(c);
+    if ((c == ':' || c == '=') && vptr == NULL) vptr = name + i;
    }
+
  if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
  name[i] = 0;
  }

-/* Otherwise there is just one following character, which must be an ASCII
-letter. */
+/* If { doesn't follow \p or \P there is just one following character, which
+must be an ASCII letter. */

 else if (MAX_255(c) && (cb->ctypes[c] & ctype_letter) != 0)
  {
-  name[0] = c;
+  name[0] = tolower(c);
  name[1] = 0;
  }
 else goto ERROR_RETURN;

 *ptrptr = ptr;

-/* Search for a recognized property name using binary chop. */
+/* If the property contains ':' or '=' we have class name and value separately
+specified. The following are supported:
+
+  . Bidi_Class (synonym bc), for which the property names are "bidi<name>".
+  . Script (synonym sc) for which the property name is the script name
+  . Script_Extensions (synonym scx), ditto
+
+As this is a small number, we currently just check the names directly. If this
+grows, a sorted table and a switch will be neater.
+
+For both the script properties, set a PT_xxx value so that (1) they can be
+distinguished and (2) invalid script names that happen to be the name of
+another property can be diagnosed. */
+
+if (vptr != NULL)
+  {
+  int offset = 0;
+  PCRE2_UCHAR sname[8];
+
+  *vptr = 0;   /* Terminate property name */
+  if (PRIV(strcmp_c8)(name, STRING_bidiclass) == 0 ||
+      PRIV(strcmp_c8)(name, STRING_bc) == 0)
+    {
+    offset = 4;
+    sname[0] = CHAR_b;
+    sname[1] = CHAR_i;  /* There is no strcpy_c8 function */
+    sname[2] = CHAR_d;
+    sname[3] = CHAR_i;
+    }
+
+  else if (PRIV(strcmp_c8)(name, STRING_script) == 0 ||
+           PRIV(strcmp_c8)(name, STRING_sc) == 0)
+    ptscript = PT_SC;
+
+  else if (PRIV(strcmp_c8)(name, STRING_scriptextensions) == 0 ||
+           PRIV(strcmp_c8)(name, STRING_scx) == 0)
+    ptscript = PT_SCX;
+
+  else
+    {
+    *errorcodeptr = ERR47;
+    return FALSE;
+    }
+
+  /* Adjust the string in name[] as needed */
+
+  memmove(name + offset, vptr + 1, (name + i - vptr)*sizeof(PCRE2_UCHAR));
+  if (offset != 0) memmove(name, sname, offset*sizeof(PCRE2_UCHAR));
+  }
+
+/* Search for a recognized property using binary chop. */

 bot = 0;
 top = PRIV(utt_size);
@ -2139,15 +2204,37 @@ while (bot < top)
  int r;
  i = (bot + top) >> 1;
  r = PRIV(strcmp_c8)(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);
+
+  /* When a matching property is found, some extra checking is needed when the
+  \p{xx:yy} syntax is used and xx is either sc or scx. */
+
  if (r == 0)
    {
-    *ptypeptr = PRIV(utt)[i].type;
    *pdataptr = PRIV(utt)[i].value;
-    return TRUE;
+    if (vptr == NULL || ptscript == PT_NOTSCRIPT)
+      {
+      *ptypeptr = PRIV(utt)[i].type;
+      return TRUE;
+      }
+
+    switch (PRIV(utt)[i].type)
+      {
+      case PT_SC:
+      *ptypeptr = PT_SC;
+      return TRUE;
+
+      case PT_SCX:
+      *ptypeptr = ptscript;
+      return TRUE;
+      }
+
+    break;  /* Non-script found */
    }
+
  if (r > 0) bot = i + 1; else top = i;
  }
-*errorcodeptr = ERR47;   /* Unrecognized name */
+
+*errorcodeptr = ERR47;   /* Unrecognized property */
 return FALSE;

 ERROR_RETURN:            /* Malformed \P or \p */
@ -5285,9 +5372,9 @@ Arguments:
  pptrptr           points to the current parsed pattern pointer
  errorcodeptr      points to error code variable
  firstcuptr        place to put the first required code unit
-  firstcuflagsptr   place to put the first code unit flags, or a negative number
+  firstcuflagsptr   place to put the first code unit flags
  reqcuptr          place to put the last required code unit
-  reqcuflagsptr     place to put the last required code unit flags, or a negative number
+  reqcuflagsptr     place to put the last required code unit flags
  bcptr             points to current branch chain
  cb                contains pointers to tables etc.
  lengthptr         NULL during the real compile phase
@ -5300,8 +5387,8 @@ Returns:            0 There's been an error, *errorcodeptr is non-zero

 static int
 compile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr, uint32_t **pptrptr,
-  int *errorcodeptr, uint32_t *firstcuptr, int32_t *firstcuflagsptr,
-  uint32_t *reqcuptr, int32_t *reqcuflagsptr, branch_chain *bcptr,
+  int *errorcodeptr, uint32_t *firstcuptr, uint32_t *firstcuflagsptr,
+  uint32_t *reqcuptr, uint32_t *reqcuflagsptr, branch_chain *bcptr,
  compile_block *cb, PCRE2_SIZE *lengthptr)
 {
 int bravalue = 0;
@ -5316,9 +5403,9 @@ uint32_t zeroreqcu, zerofirstcu;
 uint32_t escape;
 uint32_t *pptr = *pptrptr;
 uint32_t meta, meta_arg;
-int32_t firstcuflags, reqcuflags;
-int32_t zeroreqcuflags, zerofirstcuflags;
-int32_t req_caseopt, reqvary, tempreqvary;
+uint32_t firstcuflags, reqcuflags;
+uint32_t zeroreqcuflags, zerofirstcuflags;
+uint32_t req_caseopt, reqvary, tempreqvary;
 PCRE2_SIZE offset = 0;
 PCRE2_SIZE length_prevgroup = 0;
 PCRE2_UCHAR *code = *codeptr;
@ -5374,13 +5461,13 @@ item types that can be repeated set these backoff variables appropriately. */
 firstcu = reqcu = zerofirstcu = zeroreqcu = 0;
 firstcuflags = reqcuflags = zerofirstcuflags = zeroreqcuflags = REQ_UNSET;

-/* The variable req_caseopt contains either the REQ_CASELESS value or zero,
+/* The variable req_caseopt contains either the REQ_CASELESS bit or zero,
 according to the current setting of the caseless flag. The REQ_CASELESS value
 leaves the lower 28 bit empty. It is added into the firstcu or reqcu variables
 to record the case status of the value. This is used only for ASCII characters.
 */

-req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS:0;
+req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS : 0;

 /* Switch on next META item until the end of the branch */

@ -5395,13 +5482,12 @@ for (;; pptr++)
  BOOL possessive_quantifier;
  BOOL note_group_empty;
  int class_has_8bitchar;
-  int i;
  uint32_t mclength;
  uint32_t skipunits;
  uint32_t subreqcu, subfirstcu;
  uint32_t groupnumber;
  uint32_t verbarglen, verbculen;
-  int32_t subreqcuflags, subfirstcuflags;  /* Must be signed */
+  uint32_t subreqcuflags, subfirstcuflags;
  open_capitem *oc;
  PCRE2_UCHAR mcbuffer[8];

@ -5770,9 +5856,9 @@ for (;; pptr++)
        if (taboffset >= 0)
          {
          if (tabopt >= 0)
-            for (i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset];
+            for (int i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset];
          else
-            for (i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset];
+            for (int i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset];
          }

        /* Now see if we need to remove any special characters. An option
@ -5786,9 +5872,9 @@ for (;; pptr++)
        being built and we are done. */

        if (local_negate)
-          for (i = 0; i < 32; i++) classbits[i] |= ~pbits[i];
+          for (int i = 0; i < 32; i++) classbits[i] |= (uint8_t)(~pbits[i]);
        else
-          for (i = 0; i < 32; i++) classbits[i] |= pbits[i];
+          for (int i = 0; i < 32; i++) classbits[i] |= pbits[i];

        /* Every class contains at least one < 256 character. */

@ -5827,21 +5913,23 @@ for (;; pptr++)
        switch(escape)
          {
          case ESC_d:
-          for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit];
+          for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit];
          break;

          case ESC_D:
          should_flip_negation = TRUE;
-          for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_digit];
+          for (int i = 0; i < 32; i++)
+            classbits[i] |= (uint8_t)(~cbits[i+cbit_digit]);
          break;

          case ESC_w:
-          for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word];
+          for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word];
          break;

          case ESC_W:
          should_flip_negation = TRUE;
-          for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_word];
+          for (int i = 0; i < 32; i++)
+            classbits[i] |= (uint8_t)(~cbits[i+cbit_word]);
          break;

          /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
@ -5852,12 +5940,13 @@ for (;; pptr++)
          longer treat \s and \S specially. */

          case ESC_s:
-          for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space];
+          for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space];
          break;

          case ESC_S:
          should_flip_negation = TRUE;
-          for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_space];
+          for (int i = 0; i < 32; i++)
+            classbits[i] |= (uint8_t)(~cbits[i+cbit_space]);
          break;

          /* When adding the horizontal or vertical space lists to a class, or
@ -6098,7 +6187,7 @@ for (;; pptr++)
        if (negate_class && !xclass_has_prop)
          {
          /* Using 255 ^ instead of ~ avoids clang sanitize warning. */
-          for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
+          for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
          }
        memcpy(code, classbits, 32);
        code = class_uchardata + (32 / sizeof(PCRE2_UCHAR));
@ -6124,7 +6213,7 @@ for (;; pptr++)
      if (negate_class)
        {
       /* Using 255 ^ instead of ~ avoids clang sanitize warning. */
-       for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
+       for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
       }
      memcpy(code, classbits, 32);
      }
@ -6198,7 +6287,7 @@ for (;; pptr++)
    verbarglen = *(++pptr);
    verbculen = 0;
    tempcode = code++;
-    for (i = 0; i < (int)verbarglen; i++)
+    for (int i = 0; i < (int)verbarglen; i++)
      {
      meta = *(++pptr);
 #ifdef SUPPORT_UNICODE
@ -6247,6 +6336,7 @@ for (;; pptr++)
    bravalue = OP_COND;
      {
      int count, index;
+      unsigned int i;
      PCRE2_SPTR name;
      named_group *ng = cb->named_groups;
      uint32_t length = *(++pptr);
@ -6286,7 +6376,7 @@ for (;; pptr++)
        groupnumber = 0;
        if (meta == META_COND_RNUMBER)
          {
-          for (i = 1; i < (int)length; i++)
+          for (i = 1; i < length; i++)
            {
            groupnumber = groupnumber * 10 + name[i] - CHAR_0;
            if (groupnumber > MAX_GROUP_NUMBER)
@ -6608,7 +6698,7 @@ for (;; pptr++)

      if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET)
        {
-        if (subfirstcuflags >= 0)
+        if (subfirstcuflags < REQ_NONE)
          {
          firstcu = subfirstcu;
          firstcuflags = subfirstcuflags;
@ -6622,7 +6712,7 @@ for (;; pptr++)
      into reqcu if there wasn't one, using the vary flag that was in
      existence beforehand. */

-      else if (subfirstcuflags >= 0 && subreqcuflags < 0)
+      else if (subfirstcuflags < REQ_NONE && subreqcuflags >= REQ_NONE)
        {
        subreqcu = subfirstcu;
        subreqcuflags = subfirstcuflags | tempreqvary;
@ -6631,7 +6721,7 @@ for (;; pptr++)
      /* If the subpattern set a required code unit (or set a first code unit
      that isn't really the first code unit - see above), set it. */

-      if (subreqcuflags >= 0)
+      if (subreqcuflags < REQ_NONE)
        {
        reqcu = subreqcu;
        reqcuflags = subreqcuflags;
@ -6650,7 +6740,7 @@ for (;; pptr++)
    in that example, 'X' ends up set for both. */

    else if ((bravalue == OP_ASSERT || bravalue == OP_ASSERT_NA) &&
-             subreqcuflags >= 0 && subfirstcuflags >= 0)
+             subreqcuflags < REQ_NONE && subfirstcuflags < REQ_NONE)
      {
      reqcu = subreqcu;
      reqcuflags = subreqcuflags;
@ -6680,7 +6770,7 @@ for (;; pptr++)
      this name is duplicated. */

      groupnumber = 0;
-      for (i = 0; i < cb->names_found; i++, ng++)
+      for (unsigned int i = 0; i < cb->names_found; i++, ng++)
        {
        if (length == ng->length &&
            PRIV(strncmp)(name, ng->name, length) == 0)
@ -6935,14 +7025,19 @@ for (;; pptr++)
 #endif  /* MAYBE_UTF_MULTI */

      /* Handle the case of a single code unit - either with no UTF support, or
-      with UTF disabled, or for a single-code-unit UTF character. */
+      with UTF disabled, or for a single-code-unit UTF character. In the latter
+      case, for a repeated positive match, get the caseless flag for the
+      required code unit from the previous character, because a class like [Aa]
+      sets a caseless A but by now the req_caseopt flag has been reset. */
+
        {
        mcbuffer[0] = code[-1];
        mclength = 1;
        if (op_previous <= OP_CHARI && repeat_min > 1)
          {
          reqcu = mcbuffer[0];
-          reqcuflags = req_caseopt | cb->req_varyopt;
+          reqcuflags = cb->req_varyopt;
+          if (op_previous == OP_CHARI) reqcuflags |= REQ_CASELESS;
          }
        }
      goto OUTPUT_SINGLE_REPEAT;  /* Code shared with single character types */
@ -7034,7 +7129,7 @@ for (;; pptr++)
          *lengthptr += delta;
          }

-        else for (i = 0; i < replicate; i++)
+        else for (int i = 0; i < replicate; i++)
          {
          memcpy(code, previous, CU2BYTES(1 + LINK_SIZE));
          previous = code;
@ -7210,12 +7305,12 @@ for (;; pptr++)

            else
              {
-              if (groupsetfirstcu && reqcuflags < 0)
+              if (groupsetfirstcu && reqcuflags >= REQ_NONE)
                {
                reqcu = firstcu;
                reqcuflags = firstcuflags;
                }
-              for (i = 1; (uint32_t)i < repeat_min; i++)
+              for (uint32_t i = 1; i < repeat_min; i++)
                {
                memcpy(code, previous, CU2BYTES(len));
                code += len;
@ -7259,14 +7354,14 @@ for (;; pptr++)

          /* This is compiling for real */

-          else for (i = repeat_max - 1; i >= 0; i--)
+          else for (uint32_t i = repeat_max; i >= 1; i--)
            {
            *code++ = OP_BRAZERO + repeat_type;

            /* All but the final copy start a new nesting, maintaining the
            chain of brackets outstanding. */

-            if (i != 0)
+            if (i != 1)
              {
              int linkoffset;
              *code++ = OP_BRA;
@ -7985,9 +8080,9 @@ Arguments:
  errorcodeptr      -> pointer to error code variable
  skipunits         skip this many code units at start (for brackets and OP_COND)
  firstcuptr        place to put the first required code unit
-  firstcuflagsptr   place to put the first code unit flags, or a negative number
+  firstcuflagsptr   place to put the first code unit flags
  reqcuptr          place to put the last required code unit
-  reqcuflagsptr     place to put the last required code unit flags, or a negative number
+  reqcuflagsptr     place to put the last required code unit flags
  bcptr             pointer to the chain of currently open branches
  cb                points to the data block with tables pointers etc.
  lengthptr         NULL during the real compile phase
@ -8001,7 +8096,7 @@ Returns:            0 There has been an error
 static int
 compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, uint32_t **pptrptr,
  int *errorcodeptr, uint32_t skipunits, uint32_t *firstcuptr,
-  int32_t *firstcuflagsptr, uint32_t *reqcuptr,int32_t *reqcuflagsptr,
+  uint32_t *firstcuflagsptr, uint32_t *reqcuptr, uint32_t *reqcuflagsptr,
  branch_chain *bcptr, compile_block *cb, PCRE2_SIZE *lengthptr)
 {
 PCRE2_UCHAR *code = *codeptr;
@ -8014,9 +8109,9 @@ int okreturn = 1;
 uint32_t *pptr = *pptrptr;
 uint32_t firstcu, reqcu;
 uint32_t lookbehindlength;
-int32_t firstcuflags, reqcuflags;
+uint32_t firstcuflags, reqcuflags;
 uint32_t branchfirstcu, branchreqcu;
-int32_t branchfirstcuflags, branchreqcuflags;
+uint32_t branchfirstcuflags, branchreqcuflags;
 PCRE2_SIZE length;
 branch_chain bc;

@ -8135,9 +8230,9 @@ for (;;)

      if (firstcuflags != branchfirstcuflags || firstcu != branchfirstcu)
        {
-        if (firstcuflags >= 0)
+        if (firstcuflags < REQ_NONE)
          {
-          if (reqcuflags < 0)
+          if (reqcuflags >= REQ_NONE)
            {
            reqcu = firstcu;
            reqcuflags = firstcuflags;
@ -8149,8 +8244,8 @@ for (;;)
      /* If we (now or from before) have no firstcu, a firstcu from the
      branch becomes a reqcu if there isn't a branch reqcu. */

-      if (firstcuflags < 0 && branchfirstcuflags >= 0 &&
-          branchreqcuflags < 0)
+      if (firstcuflags >= REQ_NONE && branchfirstcuflags < REQ_NONE &&
+          branchreqcuflags >= REQ_NONE)
        {
        branchreqcu = branchfirstcu;
        branchreqcuflags = branchfirstcuflags;
@ -8298,7 +8393,7 @@ Returns:     TRUE or FALSE
 */

 static BOOL
-is_anchored(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb,
+is_anchored(PCRE2_SPTR code, uint32_t bracket_map, compile_block *cb,
  int atomcount, BOOL inassert)
 {
 do {
@ -8321,7 +8416,7 @@ do {
            op == OP_SCBRA || op == OP_SCBRAPOS)
     {
     int n = GET2(scode, 1+LINK_SIZE);
-     int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
+     uint32_t new_map = bracket_map | ((n < 32)? (1u << n) : 1);
     if (!is_anchored(scode, new_map, cb, atomcount, inassert)) return FALSE;
     }

@ -8681,15 +8776,15 @@ Returns:     the fixed first code unit, or 0 with REQ_NONE in flags
 */

 static uint32_t
-find_firstassertedcu(PCRE2_SPTR code, int32_t *flags, uint32_t inassert)
+find_firstassertedcu(PCRE2_SPTR code, uint32_t *flags, uint32_t inassert)
 {
 uint32_t c = 0;
-int cflags = REQ_NONE;
+uint32_t cflags = REQ_NONE;

 *flags = REQ_NONE;
 do {
   uint32_t d;
-   int dflags;
+   uint32_t dflags;
   int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
             *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0;
   PCRE2_SPTR scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE);
@ -8712,9 +8807,8 @@ do {
     case OP_SCRIPT_RUN:
     d = find_firstassertedcu(scode, &dflags, inassert +
       ((op == OP_ASSERT || op == OP_ASSERT_NA)?1:0));
-     if (dflags < 0)
-       return 0;
-     if (cflags < 0) { c = d; cflags = dflags; }
+     if (dflags >= REQ_NONE) return 0;
+     if (cflags >= REQ_NONE) { c = d; cflags = dflags; }
       else if (c != d || cflags != dflags) return 0;
     break;

@ -8727,7 +8821,7 @@ do {
     case OP_MINPLUS:
     case OP_POSPLUS:
     if (inassert == 0) return 0;
-     if (cflags < 0) { c = scode[1]; cflags = 0; }
+     if (cflags >= REQ_NONE) { c = scode[1]; cflags = 0; }
       else if (c != scode[1]) return 0;
     break;

@ -8753,7 +8847,7 @@ do {
 #endif
 #endif

-     if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
+     if (cflags >= REQ_NONE) { c = scode[1]; cflags = REQ_CASELESS; }
       else if (c != scode[1]) return 0;
     break;
     }
@ -9161,7 +9255,7 @@ for (;; pptr++)
    case META_LOOKAHEAD:
    case META_LOOKAHEADNOT:
    case META_LOOKAHEAD_NA:
-    *errcodeptr = check_lookbehinds(pptr + 1, &pptr, recurses, cb);
+    *errcodeptr = check_lookbehinds(pptr + 1, &pptr, recurses, cb, lcptr);
    if (*errcodeptr != 0) return -1;

    /* Ignore any qualifiers that follow a lookahead assertion. */
@ -9501,16 +9595,16 @@ Arguments
  retptr    if not NULL, return the ket pointer here
  recurses  chain of recurse_check to catch mutual recursion
  cb        points to the compile block
+  lcptr     points to loop counter

 Returns:    0 on success, or an errorcode (cb->erroroffset will be set)
 */

 static int
 check_lookbehinds(uint32_t *pptr, uint32_t **retptr,
-  parsed_recurse_check *recurses, compile_block *cb)
+  parsed_recurse_check *recurses, compile_block *cb, int *lcptr)
 {
 int errorcode = 0;
-int loopcount = 0;
 int nestlevel = 0;

 cb->erroroffset = PCRE2_UNSET;
@ -9636,7 +9730,7 @@ for (; *pptr != META_END; pptr++)
    case META_LOOKBEHIND:
    case META_LOOKBEHINDNOT:
    case META_LOOKBEHIND_NA:
-    if (!set_lookbehind_lengths(&pptr, &errorcode, &loopcount, recurses, cb))
+    if (!set_lookbehind_lengths(&pptr, &errorcode, lcptr, recurses, cb))
      return errorcode;
    break;
    }
@ -9689,7 +9783,7 @@ PCRE2_SIZE re_blocksize;              /* Size of memory block */
 PCRE2_SIZE big32count = 0;            /* 32-bit literals >= 0x80000000 */
 PCRE2_SIZE parsed_size_needed;        /* Needed for parsed pattern */

-int32_t firstcuflags, reqcuflags;     /* Type of first/req code unit */
+uint32_t firstcuflags, reqcuflags;    /* Type of first/req code unit */
 uint32_t firstcu, reqcu;              /* Value of first/req code unit */
 uint32_t setflags = 0;                /* NL and BSR set flags */

@ -10091,7 +10185,8 @@ lengths. */

 if (has_lookbehind)
  {
-  errorcode = check_lookbehinds(cb.parsed_pattern, NULL, NULL, &cb);
+  int loopcount = 0;
+  errorcode = check_lookbehinds(cb.parsed_pattern, NULL, NULL, &cb, &loopcount);
  if (errorcode != 0) goto HAD_CB_ERROR;
  }

@ -10368,13 +10463,13 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
  (these are not saved during the compile because they can cause conflicts with
  actual literals that follow). */

-  if (firstcuflags < 0)
+  if (firstcuflags >= REQ_NONE)
    firstcu = find_firstassertedcu(codestart, &firstcuflags, 0);

  /* Save the data for a first code unit. The existence of one means the
  minimum length must be at least 1. */

-  if (firstcuflags >= 0)
+  if (firstcuflags < REQ_NONE)
    {
    re->first_codeunit = firstcu;
    re->flags |= PCRE2_FIRSTSET;
@ -10421,16 +10516,16 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
  different character and not a non-starting code unit of the first character,
  because the minimum length count is in characters, not code units. */

-  if (reqcuflags >= 0)
+  if (reqcuflags < REQ_NONE)
    {
 #if PCRE2_CODE_UNIT_WIDTH == 16
    if ((re->overall_options & PCRE2_UTF) == 0 ||   /* Not UTF */
-        firstcuflags < 0 ||                         /* First not set */
+        firstcuflags >= REQ_NONE ||                 /* First not set */
        (firstcu & 0xf800) != 0xd800 ||             /* First not surrogate */
        (reqcu & 0xfc00) != 0xdc00)                 /* Req not low surrogate */
 #elif PCRE2_CODE_UNIT_WIDTH == 8
    if ((re->overall_options & PCRE2_UTF) == 0 ||   /* Not UTF */
-        firstcuflags < 0 ||                         /* First not set */
+        firstcuflags >= REQ_NONE ||                 /* First not set */
        (firstcu & 0x80) == 0 ||                    /* First is ASCII */
        (reqcu & 0x80) == 0)                        /* Req is ASCII */
 #endif
@ -10527,4 +10622,10 @@ re = NULL;
 goto EXIT;
 }

+/* These #undefs are here to enable unity builds with CMake. */
+
+#undef NLBLOCK /* Block containing newline information */
+#undef PSSTART /* Field containing processed string start */
+#undef PSEND   /* Field containing processed string end */
+
 /* End of pcre2_compile.c */
--- a/Show More
+++ b/Show More