Cleanup of Makefile.os4, added release rule and a README file for this release

Implement -Z in pcre2grep and update documentation
Added some special heap tests
2022-07-31 20:34:33 +01:00 · 2022-07-30 17:41:49 +01:00 · 2022-07-28 17:58:19 +01:00 · 2022-07-27 18:00:40 +01:00 · 2022-07-27 17:44:55 +01:00 · 2022-07-15 17:18:11 +01:00
249 changed files with 72567 additions and 37316 deletions
--- a/.bazelrc
+++ b/.bazelrc
@ -0,0 +1,3 @@
+common --experimental_enable_bzlmod
+build --incompatible_enable_cc_toolchain_resolution
+build --incompatible_strict_action_env
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -0,0 +1,77 @@
+
+name: Build
+on: [push, pull_request]
+
+jobs:
+  linux:
+    name: Linux
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+        
+      - name: Autogen
+        run: ./autogen.sh
+        
+      - name: Configure
+        run: ./configure --enable-jit --enable-pcre2-8 --enable-pcre2-16 --enable-pcre2-32
+        
+      - name: Build
+        run: make
+        
+      - name: Test (main test script)
+        run: ./RunTest
+
+      - name: Test (JIT test program)
+        run: ./pcre2_jit_test
+
+      - name: Test (pcre2grep test script)
+        run: ./RunGrepTest
+    
+  alpine:
+    name: alpine
+    runs-on: ubuntu-latest
+    container: alpine 
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+        
+      - name: Autotools
+        run: apk add --no-cache automake autoconf gcc libtool make musl-dev 
+        
+      - name: Autogen
+        run: ./autogen.sh
+        
+      - name: Configure
+        run: ./configure --enable-jit --enable-pcre2-8 --enable-pcre2-16 --enable-pcre2-32
+        
+      - name: Build
+        run: make
+        
+      - name: Test (main test script)
+        run: ./RunTest
+
+      - name: Test (JIT test program)
+        run: ./pcre2_jit_test
+
+      - name: Test (pcre2grep test script)
+        run: ./RunGrepTest
+        
+  windows:      
+    name: 32bit Windows
+    runs-on: windows-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Configure
+        run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -B build -A Win32
+
+      - name: Build
+        run: cmake --build build
+
+      - name: Test
+        run: |
+          cd build\Debug
+          ..\..\RunTest.bat
+           
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@ -0,0 +1,73 @@
+# For most projects, this workflow file will not need changing; you simply need
+# to commit it to your repository.
+#
+# You may wish to alter this file to override the set of languages analyzed,
+# or to provide custom queries or build logic.
+#
+# ******** NOTE ********
+# We have attempted to detect the languages in your repository. Please check
+# the `language` matrix defined below to confirm you have the correct set of
+# supported CodeQL languages.
+#
+name: "CodeQL"
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    # The branches below must be a subset of the branches above
+    branches: [ master ]
+  schedule:
+    - cron: '27 6 * * 4'
+
+# Declare default permissions as read only.
+permissions: read-all
+
+jobs:
+  analyze:
+    name: Analyze
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+
+    strategy:
+      fail-fast: false
+      matrix:
+        language: [ 'cpp', 'python' ]
+        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
+        # Learn more about CodeQL language support at https://git.io/codeql-language-support
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+
+    # Initializes the CodeQL tools for scanning.
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v1
+      with:
+        languages: ${{ matrix.language }}
+        # If you wish to specify custom queries, you can do so here or in a config file.
+        # By default, queries listed here will override any specified in a config file.
+        # Prefix the list here with "+" to use these queries and those in the config file.
+        # queries: ./path/to/local/query, your-org/your-repo/queries@main
+
+    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
+    # If this step fails, then you should remove it and run the build manually (see below)
+    - name: Autobuild
+      uses: github/codeql-action/autobuild@v1
+
+    # ℹ️ Command-line programs to run using the OS shell.
+    # 📚 https://git.io/JvXDl
+
+    # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
+    #    and modify them (or add more) to build your code if your project
+    #    uses a compiled language
+
+    #- run: |
+    #   make bootstrap
+    #   make release
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v1
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@ -0,0 +1,55 @@
+name: Scorecards supply-chain security
+on:
+  # Only the default branch is supported.
+  branch_protection_rule:
+  schedule:
+    - cron: '23 17 * * 1'
+  push:
+    branches: [ master ]
+
+# Declare default permissions as read only.
+permissions: read-all
+
+jobs:
+  analysis:
+    name: Scorecards analysis
+    runs-on: ubuntu-latest
+    permissions:
+      # Needed to upload the results to code-scanning dashboard.
+      security-events: write
+      actions: read
+      contents: read
+
+    steps:
+      - name: "Checkout code"
+        uses: actions/checkout@ec3a7ce113134d7a93b817d10a8272cb61118579 # v2.4.0
+        with:
+          persist-credentials: false
+
+      - name: "Run analysis"
+        uses: ossf/scorecard-action@c1aec4ac820532bab364f02a81873c555a0ba3a1 # v1.0.4
+        with:
+          results_file: results.sarif
+          results_format: sarif
+          # Read-only PAT token. To create it,
+          # follow the steps in https://github.com/ossf/scorecard-action#pat-token-creation.
+          repo_token: ${{ secrets.SCORECARD_READ_TOKEN }}
+          # Publish the results to enable scorecard badges. For more details, see
+          # https://github.com/ossf/scorecard-action#publishing-results.
+          # For private repositories, `publish_results` will automatically be set to `false`,
+          # regardless of the value entered here.
+          publish_results: true
+
+      # Upload the results as artifacts (optional).
+      - name: "Upload artifact"
+        uses: actions/upload-artifact@82c141cc518b40d92cc801eee768e7aafc9c2fa2 # v2.3.1
+        with:
+          name: SARIF file
+          path: results.sarif
+          retention-days: 5
+
+      # Upload the results to GitHub's code scanning dashboard.
+      - name: "Upload to code-scanning"
+        uses: github/codeql-action/upload-sarif@5f532563584d71fdef14ee64d17bafb34f751ce5 # v1.0.26
+        with:
+          sarif_file: results.sarif
--- a/.gitignore
+++ b/.gitignore
@ -1,47 +1,82 @@
-INSTALL
-Makefile.in
-aclocal.m4
-ar-lib
-compile
-config.guess
-config.sub
-configure
-depcomp
-install-sh
-ltmain.sh
-m4/
-missing
-test-driver
+# Public .gitignore file for PCRE2

-Makefile
-config.log
-config.status
-libpcre2-*.pc
-libtool
-pcre2-config
-src/.deps
-src/config.h
-src/pcre2.h
-src/stamp-h1
-
-.libs
-*.o
-*.lo
 *.a
+*.lo
 *.la
-src/.dirstamp
-src/pcre2_chartables.c
+*.pc
+*.o
+*~
+*.lha

-pcre2grep
-pcre2test
-pcre2_jit_test
+__pycache__
+.deps
+.libs

+INSTALL
+Makefile
+Makefile.in
 RunGrepTest.log
 RunGrepTest.trs
 RunTest.log
 RunTest.trs
+
+aclocal.m4
+ar-lib
+compile
+config.guess
+config.log
+config.status
+config.sub
+configure
+depcomp
+install-sh
+libtool
+ltmain.sh
+missing
+pcre2-config
+pcre2_dftables
+pcre2_jit_test
 pcre2_jit_test.log
 pcre2_jit_test.trs
+pcre2demo
+pcre2fuzzcheck
+pcre2grep
+pcre2test
+test-driver
 test-suite.log
+test3input
+test3output
+testNinput
+testNinputgrep
+teststderr
+teststderrM
+teststderrgrep
+teststdout
+teststdoutM
+testtemp1
+testtemp1grep
+testtemp2
+testtemp2grep
+testtry
+testtrygrep
+
+m4/libtool.m4
+m4/ltoptions.m4
+m4/ltsugar.m4
+m4/ltversion.m4
+m4/lt~obsolete.m4
+
+maint/ucptest
+maint/utf8
+
+src/.deps
+src/.dirstamp
+src/config.h
+src/pcre2.h
+src/pcre2_chartables.c
+src/stamp-h1
+
+/bazel-*
+
+# End

-*~
--- a/12
+++ b/12
@ -2,13 +2,13 @@ THE MAIN PCRE2 LIBRARY CODE
 ---------------------------

 Written by:       Philip Hazel
-Email local part: ph10
-Email domain:     cam.ac.uk
+Email local part: Philip.Hazel
+Email domain:     gmail.com

-University of Cambridge Computing Service,
+Retired from University of Cambridge Computing Service,
 Cambridge, England.

-Copyright (c) 1997-2019 University of Cambridge
+Copyright (c) 1997-2022 University of Cambridge
 All rights reserved


@ -19,7 +19,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu

-Copyright(c) 2010-2019 Zoltan Herczeg
+Copyright(c) 2010-2022 Zoltan Herczeg
 All rights reserved.


@ -30,7 +30,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu

-Copyright(c) 2009-2019 Zoltan Herczeg
+Copyright(c) 2009-2022 Zoltan Herczeg
 All rights reserved.

 ####
--- a/BUILD.bazel
+++ b/BUILD.bazel
@ -0,0 +1,72 @@
+load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
+load("@bazel_skylib//rules:copy_file.bzl", "copy_file")
+
+copy_file(
+    name = "config_h_generic",
+    src = "src/config.h.generic",
+    out = "src/config.h",
+)
+
+copy_file(
+    name = "pcre2_h_generic",
+    src = "src/pcre2.h.generic",
+    out = "src/pcre2.h",
+)
+
+copy_file(
+    name = "pcre2_chartables_c",
+    src = "src/pcre2_chartables.c.dist",
+    out = "src/pcre2_chartables.c",
+)
+
+cc_library(
+    name = "pcre2",
+    srcs = [
+        "src/pcre2_auto_possess.c",
+        "src/pcre2_compile.c",
+        "src/pcre2_config.c",
+        "src/pcre2_context.c",
+        "src/pcre2_convert.c",
+        "src/pcre2_dfa_match.c",
+        "src/pcre2_error.c",
+        "src/pcre2_extuni.c",
+        "src/pcre2_find_bracket.c",
+        "src/pcre2_maketables.c",
+        "src/pcre2_match.c",
+        "src/pcre2_match_data.c",
+        "src/pcre2_newline.c",
+        "src/pcre2_ord2utf.c",
+        "src/pcre2_pattern_info.c",
+        "src/pcre2_script_run.c",
+        "src/pcre2_serialize.c",
+        "src/pcre2_string_utils.c",
+        "src/pcre2_study.c",
+        "src/pcre2_substitute.c",
+        "src/pcre2_substring.c",
+        "src/pcre2_tables.c",
+        "src/pcre2_ucd.c",
+        "src/pcre2_ucptables.c",
+        "src/pcre2_valid_utf.c",
+        "src/pcre2_xclass.c",
+        ":pcre2_chartables_c",
+    ],
+    hdrs = glob(["src/*.h"]) + [
+        ":config_h_generic",
+        ":pcre2_h_generic",
+    ],
+    defines = [
+        "HAVE_CONFIG_H",
+        "PCRE2_CODE_UNIT_WIDTH=8",
+        "PCRE2_STATIC",
+    ],
+    includes = ["src"],
+    strip_include_prefix = "src",
+    visibility = ["//visibility:public"],
+)
+
+cc_binary(
+    name = "pcre2demo",
+    srcs = ["src/pcre2demo.c"],
+    visibility = ["//visibility:public"],
+    deps = [":pcre2"],
+)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,6 +1,5 @@
 # CMakeLists.txt
 #
-#
 # This file enables PCRE2 to be built with the CMake configuration and build
 # tool. Download CMake in source or binary form from http://www.cmake.org/
 # Converted to support PCRE2 from the original PCRE file, August 2014.
@ -85,19 +84,44 @@
 # 2018-11-14 PH removed unnecessary checks for stdint.h and inttypes.h
 # 2018-11-16 PH added PCRE2GREP_SUPPORT_CALLOUT_FORK support and tidied
 # 2019-02-16 PH hacked to avoid CMP0026 policy issue (see comments below)
+# 2020-03-16 PH renamed dftables as pcre2_dftables (as elsewhere)
+# 2020-03-24 PH changed CMAKE_MODULE_PATH definition to add, not replace
+# 2020-04-08 Carlo added function check for secure_getenv, fixed strerror
+# 2020-04-16 enh added check for __attribute__((uninitialized))
+# 2020-04-25 PH applied patches from Uwe Korn to support pkg-config and
+#            library versioning.
+# 2020-04-25 Carlo added function check for mkostemp used in ProtExecAllocator
+# 2020-04-28 PH added function check for memfd_create based on Carlo's patch
+# 2020-05-25 PH added a check for Intel CET
+# 2020-12-03 PH altered the definition of pcre2test as suggested by Daniel
+# 2021-06-29 JWSB added the option to build static library with PIC.
+# 2021-07-05 JWSB modified such both the static and shared library can be
+#            build in one go.
+# 2021-08-28 PH increased minimum version
+# 2021-08-28 PH added test for realpath()

 PROJECT(PCRE2 C)

-# Increased minimum to 2.8.0 to support newer add_test features.
-CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
+# Increased minimum to 2.8.5 to support GNUInstallDirs.
+# Increased minimum to 3.1 to support imported targets.
+CMAKE_MINIMUM_REQUIRED(VERSION 3.1)

 # Set policy CMP0026 to avoid warnings for the use of LOCATION in
 # GET_TARGET_PROPERTY. This should no longer be required.
 # CMAKE_POLICY(SET CMP0026 OLD)

-SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
+# With a recent cmake, you can provide a rootdir to look for non
+# standard installed library dependencies, but to do so, the policy
+# needs to be set to new (by uncommenting the following)
+# CMAKE_POLICY(SET CMP0074 NEW)

-SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -I${PROJECT_SOURCE_DIR}/src")
+# For FindReadline.cmake. This was changed to allow setting CMAKE_MODULE_PATH
+# on the command line.
+# SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
+
+LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
+
+INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/src)

 # external packages
 FIND_PACKAGE( BZip2 )
@ -107,29 +131,66 @@ FIND_PACKAGE( Editline )

 # Configuration checks

-INCLUDE(CheckIncludeFile)
+INCLUDE(CheckCSourceCompiles)
 INCLUDE(CheckFunctionExists)
+INCLUDE(CheckSymbolExists)
+INCLUDE(CheckIncludeFile)
 INCLUDE(CheckTypeSize)
+INCLUDE(GNUInstallDirs) # for CMAKE_INSTALL_LIBDIR

 CHECK_INCLUDE_FILE(dirent.h     HAVE_DIRENT_H)
-CHECK_INCLUDE_FILE(stdint.h     HAVE_STDINT_H)
-CHECK_INCLUDE_FILE(inttypes.h   HAVE_INTTYPES_H)
 CHECK_INCLUDE_FILE(sys/stat.h   HAVE_SYS_STAT_H)
 CHECK_INCLUDE_FILE(sys/types.h  HAVE_SYS_TYPES_H)
 CHECK_INCLUDE_FILE(unistd.h     HAVE_UNISTD_H)
 CHECK_INCLUDE_FILE(windows.h    HAVE_WINDOWS_H)

-CHECK_FUNCTION_EXISTS(bcopy     HAVE_BCOPY)
-CHECK_FUNCTION_EXISTS(memmove   HAVE_MEMMOVE)
-CHECK_FUNCTION_EXISTS(strerror  HAVE_STRERROR)
+CHECK_SYMBOL_EXISTS(bcopy         "strings.h"  HAVE_BCOPY)
+CHECK_SYMBOL_EXISTS(memfd_create  "sys/mman.h" HAVE_MEMFD_CREATE)
+CHECK_SYMBOL_EXISTS(memmove       "string.h"   HAVE_MEMMOVE)
+CHECK_SYMBOL_EXISTS(secure_getenv "stdlib.h"   HAVE_SECURE_GETENV)
+CHECK_SYMBOL_EXISTS(strerror      "string.h"   HAVE_STRERROR)
+
+CHECK_C_SOURCE_COMPILES(
+  "#include <stdlib.h>
+   #include <limits.h>
+   int main(int c, char *v[]) { char buf[PATH_MAX]; realpath(v[1], buf); return 0; }"
+  HAVE_REALPATH
+)
+
+set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror")
+CHECK_C_SOURCE_COMPILES(
+  "int main() { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }"
+  HAVE_ATTRIBUTE_UNINITIALIZED
+)
+set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS})
+
+# Check whether Intel CET is enabled, and if so, adjust compiler flags. This
+# code was written by PH, trying to imitate the logic from the autotools
+# configuration.
+
+CHECK_C_SOURCE_COMPILES(
+  "#ifndef __CET__
+   #error CET is not enabled
+   #endif
+   int main() { return 0; }"
+  INTEL_CET_ENABLED
+)
+
+IF (INTEL_CET_ENABLED)
+  SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk")
+ENDIF(INTEL_CET_ENABLED)
+
+

 # User-configurable options
 #
 # Note: CMakeSetup displays these in alphabetical order, regardless of
 # the order we use here.

-SET(BUILD_SHARED_LIBS OFF CACHE BOOL
-    "Build shared libraries instead of static ones.")
+SET(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries.")
+
+OPTION(BUILD_STATIC_LIBS "Build static libraries." ON)

 OPTION(PCRE2_BUILD_PCRE2_8 "Build 8 bit PCRE2 library" ON)

@ -137,6 +198,8 @@ OPTION(PCRE2_BUILD_PCRE2_16 "Build 16 bit PCRE2 library" OFF)

 OPTION(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF)

+OPTION(PCRE2_STATIC_PIC "Build the static library with the option position independent code enabled." OFF)
+
 OPTION(PCRE2_DEBUG "Include debugging code" OFF)

 OPTION(PCRE2_DISABLE_PERCENT_ZT "Disable the use of %zu and %td (rarely needed)" OFF)
@ -177,8 +240,12 @@ SET(PCRE2_HEAP_MATCH_RECURSE OFF CACHE BOOL
 SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL
    "Enable support for Just-in-time compiling.")

-SET(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL
-    "Enable SELinux compatible execmem allocator in JIT (experimental).")
+IF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+    SET(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL
+        "Enable SELinux compatible execmem allocator in JIT (experimental).")
+ELSE(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+    SET(PCRE2_SUPPORT_JIT_SEALLOC IGNORE)
+ENDIF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)

 SET(PCRE2GREP_SUPPORT_JIT ON CACHE BOOL
    "Enable use of Just-in-time compiling in pcre2grep.")
@ -244,9 +311,19 @@ ENDIF(PCRE2_SUPPORT_LIBZ)
 IF(EDITLINE_FOUND)
  OPTION (PCRE2_SUPPORT_LIBEDIT  "Enable support for linking pcre2test with libedit." OFF)
 ENDIF(EDITLINE_FOUND)
-IF(PCRE2_SUPPORT_LIBEDIT)
-  INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR})
-ENDIF(PCRE2_SUPPORT_LIBEDIT)
+IF(EDITLINE_FOUND)
+  IF(PCRE2_SUPPORT_LIBEDIT)
+    INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR})
+  ENDIF(PCRE2_SUPPORT_LIBEDIT)
+ELSE(EDITLINE_FOUND)
+  IF(PCRE2_SUPPORT_LIBEDIT)
+    MESSAGE(FATAL_ERROR
+      " libedit not found, set EDITLINE_INCLUDE_DIR to a compatible header\n"
+      " or set Editline_ROOT to a full libedit installed tree, as needed\n"
+      " Might need to enable policy CMP0074 in CMakeLists.txt"
+    )
+  ENDIF(PCRE2_SUPPORT_LIBEDIT)
+ENDIF(EDITLINE_FOUND)

 # readline lib
 IF(READLINE_FOUND)
@ -258,9 +335,9 @@ ENDIF(PCRE2_SUPPORT_LIBREADLINE)

 # Prepare build configuration

-IF(NOT BUILD_SHARED_LIBS)
-        SET(PCRE2_STATIC 1)
-ENDIF(NOT BUILD_SHARED_LIBS)
+IF(NOT BUILD_SHARED_LIBS AND NOT BUILD_STATIC_LIBS)
+        MESSAGE(FATAL_ERROR "At least one of BUILD_SHARED_LIBS or BUILD_STATIC_LIBS must be enabled.")
+ENDIF(NOT BUILD_SHARED_LIBS AND NOT BUILD_STATIC_LIBS)

 IF(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32)
        MESSAGE(FATAL_ERROR "At least one of PCRE2_BUILD_PCRE2_8, PCRE2_BUILD_PCRE2_16 or PCRE2_BUILD_PCRE2_32 must be enabled")
@ -284,7 +361,12 @@ IF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8)
 ENDIF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8)

 IF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT)
-        MESSAGE(FATAL_ERROR "Only one of libreadline or libeditline can be specified")
+        IF(READLINE_FOUND)
+                MESSAGE(FATAL_ERROR
+                  " Only one of the readline compatible libraries can be enabled.\n"
+                  " Disable libreadline with -DPCRE2_SUPPORT_LIBREADLINE=OFF"
+                )
+        ENDIF(READLINE_FOUND)
 ENDIF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT)

 IF(PCRE2_SUPPORT_BSR_ANYCRLF)
@ -300,11 +382,29 @@ IF(PCRE2_SUPPORT_UNICODE)
 ENDIF(PCRE2_SUPPORT_UNICODE)

 IF(PCRE2_SUPPORT_JIT)
-        SET(SUPPORT_JIT 1)
+	SET(SUPPORT_JIT 1)
+	IF(UNIX)
+		FIND_PACKAGE(Threads REQUIRED)
+		IF(CMAKE_USE_PTHREADS_INIT)
+			SET(REQUIRE_PTHREAD 1)
+		ENDIF(CMAKE_USE_PTHREADS_INIT)
+	ENDIF(UNIX)
 ENDIF(PCRE2_SUPPORT_JIT)

 IF(PCRE2_SUPPORT_JIT_SEALLOC)
-        SET(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1)
+        SET(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
+	CHECK_SYMBOL_EXISTS(mkostemp stdlib.h REQUIRED)
+        UNSET(CMAKE_REQUIRED_DEFINITIONS)
+        IF(${REQUIRED})
+                IF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+                        ADD_DEFINITIONS(-D_GNU_SOURCE)
+                        SET(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1)
+                ELSE(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+                        MESSAGE(FATAL_ERROR "Your configuration is not supported")
+                ENDIF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+        ELSE(${REQUIRED})
+                SET(PCRE2_SUPPORT_JIT_SEALLOC OFF)
+        ENDIF(${REQUIRED})
 ENDIF(PCRE2_SUPPORT_JIT_SEALLOC)

 IF(PCRE2GREP_SUPPORT_JIT)
@ -400,12 +500,13 @@ file(STRINGS ${PROJECT_SOURCE_DIR}/configure.ac
  LIMIT_COUNT 50 # Read only the first 50 lines of the file
 )

-set(SEARCHED_VARIABLES "pcre2_major" "pcre2_minor" "pcre2_prerelease" "pcre2_date")
+set(SEARCHED_VARIABLES "pcre2_major" "pcre2_minor" "pcre2_prerelease" "pcre2_date"
+  "libpcre2_posix_version" "libpcre2_8_version" "libpcre2_16_version" "libpcre2_32_version")
 foreach(configure_line ${configure_lines})
    foreach(_substitution_variable ${SEARCHED_VARIABLES})
        string(TOUPPER ${_substitution_variable} _substitution_variable_upper)
        if (NOT ${_substitution_variable_upper})
-            string(REGEX MATCH "m4_define\\(${_substitution_variable}, \\[(.*)\\]" MACTHED_STRING ${configure_line})
+            string(REGEX MATCH "m4_define\\(${_substitution_variable}, *\\[(.*)\\]" MATCHED_STRING ${configure_line})
            if (CMAKE_MATCH_1)
                set(${_substitution_variable_upper} ${CMAKE_MATCH_1})
            endif()
@ -413,21 +514,83 @@ foreach(configure_line ${configure_lines})
    endforeach()
 endforeach()

+macro(PARSE_LIB_VERSION VARIABLE_PREFIX)
+  string(REPLACE ":" ";" ${VARIABLE_PREFIX}_VERSION_LIST ${${VARIABLE_PREFIX}_VERSION})
+  list(GET ${VARIABLE_PREFIX}_VERSION_LIST 0 ${VARIABLE_PREFIX}_VERSION_CURRENT)
+  list(GET ${VARIABLE_PREFIX}_VERSION_LIST 1 ${VARIABLE_PREFIX}_VERSION_REVISION)
+  list(GET ${VARIABLE_PREFIX}_VERSION_LIST 2 ${VARIABLE_PREFIX}_VERSION_AGE)
+
+  math(EXPR ${VARIABLE_PREFIX}_SOVERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} - ${${VARIABLE_PREFIX}_VERSION_AGE}")
+  math(EXPR ${VARIABLE_PREFIX}_MACHO_COMPATIBILITY_VERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} + 1")
+  math(EXPR ${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} + 1")
+  set(${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION "${${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION}.${${VARIABLE_PREFIX}_VERSION_REVISION}}")
+  set(${VARIABLE_PREFIX}_VERSION "${${VARIABLE_PREFIX}_SOVERSION}.${${VARIABLE_PREFIX}_VERSION_AGE}.${${VARIABLE_PREFIX}_VERSION_REVISION}")
+endmacro()
+
+PARSE_LIB_VERSION(LIBPCRE2_POSIX)
+PARSE_LIB_VERSION(LIBPCRE2_8)
+PARSE_LIB_VERSION(LIBPCRE2_16)
+PARSE_LIB_VERSION(LIBPCRE2_32)
+
 CONFIGURE_FILE(src/pcre2.h.in
               ${PROJECT_BINARY_DIR}/pcre2.h
               @ONLY)

-# What about pcre2-config and libpcre2.pc?
+# Make sure to not link debug libs
+# against release libs and vice versa
+IF(WIN32)
+  SET(CMAKE_DEBUG_POSTFIX "d")
+ENDIF(WIN32)
+
+# Generate pkg-config files
+
+SET(PACKAGE_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}")
+SET(prefix ${CMAKE_INSTALL_PREFIX})
+
+SET(exec_prefix "\${prefix}")
+SET(libdir "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}")
+SET(includedir "\${prefix}/include")
+IF(WIN32 AND (CMAKE_BUILD_TYPE MATCHES Debug))
+  SET(LIB_POSTFIX ${CMAKE_DEBUG_POSTFIX})
+ENDIF()
+CONFIGURE_FILE(libpcre2-posix.pc.in libpcre2-posix.pc @ONLY)
+SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-posix.pc")
+
+IF(PCRE2_BUILD_PCRE2_8)
+  CONFIGURE_FILE(libpcre2-8.pc.in libpcre2-8.pc @ONLY)
+  SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-8.pc")
+  SET(enable_pcre2_8 "yes")
+ELSE()
+  SET(enable_pcre2_8 "no")
+ENDIF()
+
+IF(PCRE2_BUILD_PCRE2_16)
+  CONFIGURE_FILE(libpcre2-16.pc.in libpcre2-16.pc @ONLY)
+  SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-16.pc")
+  SET(enable_pcre2_16 "yes")
+ELSE()
+  SET(enable_pcre2_16 "no")
+ENDIF()
+
+IF(PCRE2_BUILD_PCRE2_32)
+  CONFIGURE_FILE(libpcre2-32.pc.in libpcre2-32.pc @ONLY)
+  SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-32.pc")
+  SET(enable_pcre2_32 "yes")
+ELSE()
+  SET(enable_pcre2_32 "no")
+ENDIF()
+
+CONFIGURE_FILE(pcre2-config.in pcre2-config @ONLY)

 # Character table generation

 OPTION(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
 IF(PCRE2_REBUILD_CHARTABLES)
-  ADD_EXECUTABLE(dftables src/dftables.c)
+  ADD_EXECUTABLE(pcre2_dftables src/pcre2_dftables.c)
  ADD_CUSTOM_COMMAND(
    COMMENT "Generating character tables (pcre2_chartables.c) for current locale"
-    DEPENDS dftables
-    COMMAND dftables
+    DEPENDS pcre2_dftables
+    COMMAND pcre2_dftables
    ARGS        ${PROJECT_BINARY_DIR}/pcre2_chartables.c
    OUTPUT      ${PROJECT_BINARY_DIR}/pcre2_chartables.c
  )
@ -474,39 +637,37 @@ SET(PCRE2_SOURCES
 SET(PCRE2POSIX_HEADERS src/pcre2posix.h)
 SET(PCRE2POSIX_SOURCES src/pcre2posix.c)

-IF(MINGW AND NOT PCRE2_STATIC)
-IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
-ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2.o
-PRE-LINK
-COMMAND windres ARGS pcre2.rc pcre2.o
-WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
-COMMENT Using pcre2 coff info in mingw build)
-SET(PCRE2_SOURCES
-  ${PCRE2_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2.o
-)
-ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
-IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
-ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2posix.o
-PRE-LINK
-COMMAND windres ARGS pcre2posix.rc pcre2posix.o
-WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
-COMMENT Using pcre2posix coff info in mingw build)
-SET(PCRE2POSIX_SOURCES
-  ${PCRE2POSIX_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2posix.o
-)
-ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
-ENDIF(MINGW AND NOT PCRE2_STATIC)
+IF(MINGW AND BUILD_SHARED_LIBS)
+  IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
+    ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2.o
+      PRE-LINK
+      COMMAND windres ARGS pcre2.rc pcre2.o
+      WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+      COMMENT Using pcre2 coff info in mingw build)
+    SET(PCRE2_SOURCES ${PCRE2_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2.o)
+  ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)

-IF(MSVC AND NOT PCRE2_STATIC)
-IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
-SET(PCRE2_SOURCES
-  ${PCRE2_SOURCES} pcre2.rc)
-ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
-IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
-SET(PCRE2POSIX_SOURCES
-  ${PCRE2POSIX_SOURCES} pcre2posix.rc)
-ENDIF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
-ENDIF(MSVC AND NOT PCRE2_STATIC)
+  IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
+    ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2posix.o
+      PRE-LINK
+      COMMAND windres ARGS pcre2posix.rc pcre2posix.o
+      WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+      COMMENT Using pcre2posix coff info in mingw build)
+    SET(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2posix.o)
+  ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
+ENDIF(MINGW AND BUILD_SHARED_LIBS)
+
+IF(MSVC AND BUILD_SHARED_LIBS)
+  SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-posix.pdb ${dll_pdb_files})
+  SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-posixd.pdb ${dll_pdb_debug_files})
+  IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
+    SET(PCRE2_SOURCES ${PCRE2_SOURCES} pcre2.rc)
+  ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
+
+  IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
+    SET(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} pcre2posix.rc)
+  ENDIF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
+ENDIF(MSVC AND BUILD_SHARED_LIBS)

 # Fix static compilation with MSVC: https://bugs.exim.org/show_bug.cgi?id=1681
 # This code was taken from the CMake wiki, not from WebM.
@ -529,71 +690,219 @@ IF(MSVC)
 ENDIF(MSVC)

 SET(CMAKE_INCLUDE_CURRENT_DIR 1)
-# needed to make sure to not link debug libs
-# against release libs and vice versa
-IF(WIN32)
-  SET(CMAKE_DEBUG_POSTFIX "d")
-ENDIF(WIN32)

 SET(targets)

 # 8-bit library

 IF(PCRE2_BUILD_PCRE2_8)
-ADD_LIBRARY(pcre2-8 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
-SET_PROPERTY(TARGET pcre2-8
-  PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
-SET(targets ${targets} pcre2-8)
-ADD_LIBRARY(pcre2-posix ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
-SET_PROPERTY(TARGET pcre2-posix
-  PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
-SET(targets ${targets} pcre2-posix)
-TARGET_LINK_LIBRARIES(pcre2-posix pcre2-8)
+  IF(BUILD_STATIC_LIBS)
+    ADD_LIBRARY(pcre2-8-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
+    SET_TARGET_PROPERTIES(pcre2-8-static PROPERTIES
+      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
+      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}"
+      MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}"
+      VERSION ${LIBPCRE2_8_VERSION}
+      SOVERSION ${LIBPCRE2_8_SOVERSION})
+    TARGET_COMPILE_DEFINITIONS(pcre2-8-static PUBLIC PCRE2_STATIC)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-8-static PUBLIC ${PROJECT_BINARY_DIR})
+    IF(REQUIRE_PTHREAD)
+        TARGET_LINK_LIBRARIES(pcre2-8-static Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
+    SET(targets ${targets} pcre2-8-static)
+    ADD_LIBRARY(pcre2-posix-static STATIC ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
+    SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES
+      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
+      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}"
+      MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}"
+      VERSION ${LIBPCRE2_POSIX_VERSION}
+      SOVERSION ${LIBPCRE2_POSIX_SOVERSION})
+    TARGET_LINK_LIBRARIES(pcre2-posix-static pcre2-8-static)
+    TARGET_COMPILE_DEFINITIONS(pcre2-posix-static PUBLIC PCRE2_STATIC)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-posix-static PUBLIC ${PROJECT_BINARY_DIR})
+    SET(targets ${targets} pcre2-posix-static)

-IF(MINGW AND NOT PCRE2_STATIC)
-  IF(NON_STANDARD_LIB_PREFIX)
-    SET_TARGET_PROPERTIES(pcre2-8 pcre2-posix PROPERTIES PREFIX "")
-  ENDIF(NON_STANDARD_LIB_PREFIX)
-  IF(NON_STANDARD_LIB_SUFFIX)
-    SET_TARGET_PROPERTIES(pcre2-8 pcre2-posix PROPERTIES SUFFIX "-0.dll")
-  ENDIF(NON_STANDARD_LIB_SUFFIX)
-ENDIF(MINGW AND NOT PCRE2_STATIC)
+    IF(MSVC)
+      SET_TARGET_PROPERTIES(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8-static)
+      SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix-static)
+    ELSE(MSVC)
+      SET_TARGET_PROPERTIES(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8)
+      SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix)
+    ENDIF(MSVC)
+    IF(PCRE2_STATIC_PIC)
+      SET_TARGET_PROPERTIES(pcre2-8-static pcre2-posix-static PROPERTIES POSITION_INDEPENDENT_CODE 1)
+    ENDIF(PCRE2_STATIC_PIC)
+  ENDIF(BUILD_STATIC_LIBS)
+
+  IF(BUILD_SHARED_LIBS)
+    ADD_LIBRARY(pcre2-8-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-8-shared PUBLIC ${PROJECT_BINARY_DIR})
+    SET_TARGET_PROPERTIES(pcre2-8-shared PROPERTIES
+      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
+      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}"
+      MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}"
+      VERSION ${LIBPCRE2_8_VERSION}
+      SOVERSION ${LIBPCRE2_8_SOVERSION}
+      OUTPUT_NAME pcre2-8)
+    IF(REQUIRE_PTHREAD)
+        TARGET_LINK_LIBRARIES(pcre2-8-shared Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
+    SET(targets ${targets} pcre2-8-shared)
+    ADD_LIBRARY(pcre2-posix-shared SHARED ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
+    TARGET_INCLUDE_DIRECTORIES(pcre2-posix-shared PUBLIC ${PROJECT_BINARY_DIR})
+    SET_TARGET_PROPERTIES(pcre2-posix-shared PROPERTIES
+      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
+      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}"
+      MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}"
+      VERSION ${LIBPCRE2_POSIX_VERSION}
+      SOVERSION ${LIBPCRE2_POSIX_SOVERSION}
+      OUTPUT_NAME pcre2-posix)
+    TARGET_LINK_LIBRARIES(pcre2-posix-shared pcre2-8-shared)
+    SET(targets ${targets} pcre2-posix-shared)
+    SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-8.pdb ${dll_pdb_files})
+    SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-8d.pdb ${dll_pdb_debug_files})
+
+    IF(MINGW)
+      IF(NON_STANDARD_LIB_PREFIX)
+        SET_TARGET_PROPERTIES(pcre2-8-shared pcre2-posix-shared PROPERTIES PREFIX "")
+      ENDIF(NON_STANDARD_LIB_PREFIX)
+      IF(NON_STANDARD_LIB_SUFFIX)
+        SET_TARGET_PROPERTIES(pcre2-8-shared pcre2-posix-shared PROPERTIES SUFFIX "-0.dll")
+      ENDIF(NON_STANDARD_LIB_SUFFIX)
+    ENDIF(MINGW)
+  ENDIF(BUILD_SHARED_LIBS)
+
+  IF(BUILD_STATIC_LIBS)
+    ADD_LIBRARY(pcre2-8 ALIAS pcre2-8-static)
+    ADD_LIBRARY(pcre2-posix ALIAS pcre2-posix-static)
+  ELSE(BUILD_STATIC_LIBS)
+    ADD_LIBRARY(pcre2-8 ALIAS pcre2-8-shared)
+    ADD_LIBRARY(pcre2-posix ALIAS pcre2-posix-shared)
+  ENDIF(BUILD_STATIC_LIBS)
 ENDIF(PCRE2_BUILD_PCRE2_8)

 # 16-bit library

 IF(PCRE2_BUILD_PCRE2_16)
-ADD_LIBRARY(pcre2-16 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
-SET_PROPERTY(TARGET pcre2-16
-  PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16)
-SET(targets ${targets} pcre2-16)
+  IF(BUILD_STATIC_LIBS)
+    ADD_LIBRARY(pcre2-16-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-16-static PUBLIC ${PROJECT_BINARY_DIR})
+    SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES
+      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16
+      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
+      MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
+      VERSION ${LIBPCRE2_16_VERSION}
+      SOVERSION ${LIBPCRE2_16_SOVERSION})
+    TARGET_COMPILE_DEFINITIONS(pcre2-16-static PUBLIC PCRE2_STATIC)
+    IF(REQUIRE_PTHREAD)
+      TARGET_LINK_LIBRARIES(pcre2-16-static Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
+    SET(targets ${targets} pcre2-16-static)

-IF(MINGW AND NOT PCRE2_STATIC)
-  IF(NON_STANDARD_LIB_PREFIX)
-    SET_TARGET_PROPERTIES(pcre2-16 PROPERTIES PREFIX "")
-  ENDIF(NON_STANDARD_LIB_PREFIX)
-  IF(NON_STANDARD_LIB_SUFFIX)
-    SET_TARGET_PROPERTIES(pcre2-16 PROPERTIES SUFFIX "-0.dll")
-  ENDIF(NON_STANDARD_LIB_SUFFIX)
-ENDIF(MINGW AND NOT PCRE2_STATIC)
+    IF(MSVC)
+      SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16-static)
+    ELSE(MSVC)
+      SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16)
+    ENDIF(MSVC)
+    IF(PCRE2_STATIC_PIC)
+      SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES POSITION_INDEPENDENT_CODE 1)
+    ENDIF(PCRE2_STATIC_PIC)
+  ENDIF(BUILD_STATIC_LIBS)
+
+  IF(BUILD_SHARED_LIBS)
+    ADD_LIBRARY(pcre2-16-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-16-shared PUBLIC ${PROJECT_BINARY_DIR})
+    SET_TARGET_PROPERTIES(pcre2-16-shared PROPERTIES
+      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16
+      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
+      MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
+      VERSION ${LIBPCRE2_16_VERSION}
+      SOVERSION ${LIBPCRE2_16_SOVERSION}
+      OUTPUT_NAME pcre2-16)
+    IF(REQUIRE_PTHREAD)
+      TARGET_LINK_LIBRARIES(pcre2-16-shared Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
+    SET(targets ${targets} pcre2-16-shared)
+    SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-16.pdb ${dll_pdb_files})
+    SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-16d.pdb ${dll_pdb_debug_files})
+
+    IF(MINGW)
+      IF(NON_STANDARD_LIB_PREFIX)
+        SET_TARGET_PROPERTIES(pcre2-16-shared PROPERTIES PREFIX "")
+      ENDIF(NON_STANDARD_LIB_PREFIX)
+      IF(NON_STANDARD_LIB_SUFFIX)
+        SET_TARGET_PROPERTIES(pcre2-16-shared PROPERTIES SUFFIX "-0.dll")
+      ENDIF(NON_STANDARD_LIB_SUFFIX)
+    ENDIF(MINGW)
+  ENDIF(BUILD_SHARED_LIBS)
+
+  IF(BUILD_STATIC_LIBS)
+    ADD_LIBRARY(pcre2-16 ALIAS pcre2-16-static)
+  ELSE(BUILD_STATIC_LIBS)
+    ADD_LIBRARY(pcre2-16 ALIAS pcre2-16-shared)
+  ENDIF(BUILD_STATIC_LIBS)
 ENDIF(PCRE2_BUILD_PCRE2_16)

 # 32-bit library

 IF(PCRE2_BUILD_PCRE2_32)
-ADD_LIBRARY(pcre2-32 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
-SET_PROPERTY(TARGET pcre2-32
-  PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32)
-SET(targets ${targets} pcre2-32)
+  IF(BUILD_STATIC_LIBS)
+    ADD_LIBRARY(pcre2-32-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-32-static PUBLIC ${PROJECT_BINARY_DIR})
+    SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES
+      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32
+      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
+      MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
+      VERSION ${LIBPCRE2_32_VERSION}
+      SOVERSION ${LIBPCRE2_32_SOVERSION})
+    TARGET_COMPILE_DEFINITIONS(pcre2-32-static PUBLIC PCRE2_STATIC)
+    IF(REQUIRE_PTHREAD)
+      TARGET_LINK_LIBRARIES(pcre2-32-static Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
+    SET(targets ${targets} pcre2-32-static)

-IF(MINGW AND NOT PCRE2_STATIC)
-  IF(NON_STANDARD_LIB_PREFIX)
-    SET_TARGET_PROPERTIES(pcre2-32 PROPERTIES PREFIX "")
-  ENDIF(NON_STANDARD_LIB_PREFIX)
-  IF(NON_STANDARD_LIB_SUFFIX)
-    SET_TARGET_PROPERTIES(pcre2-32 PROPERTIES SUFFIX "-0.dll")
-  ENDIF(NON_STANDARD_LIB_SUFFIX)
-ENDIF(MINGW AND NOT PCRE2_STATIC)
+    IF(MSVC)
+      SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32-static)
+    ELSE(MSVC)
+      SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32)
+    ENDIF(MSVC)
+    IF(PCRE2_STATIC_PIC)
+      SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES POSITION_INDEPENDENT_CODE 1)
+    ENDIF(PCRE2_STATIC_PIC)
+  ENDIF(BUILD_STATIC_LIBS)
+
+  IF(BUILD_SHARED_LIBS)
+    ADD_LIBRARY(pcre2-32-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-32-shared PUBLIC ${PROJECT_BINARY_DIR})
+    SET_TARGET_PROPERTIES(pcre2-32-shared PROPERTIES
+      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32
+      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
+      MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
+      VERSION ${LIBPCRE2_32_VERSION}
+      SOVERSION ${LIBPCRE2_32_SOVERSION}
+      OUTPUT_NAME pcre2-32)
+    IF(REQUIRE_PTHREAD)
+      TARGET_LINK_LIBRARIES(pcre2-32-shared Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
+    SET(targets ${targets} pcre2-32-shared)
+    SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-32.pdb ${dll_pdb_files})
+    SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-32d.pdb ${dll_pdb_debug_files})
+
+    IF(MINGW)
+      IF(NON_STANDARD_LIB_PREFIX)
+        SET_TARGET_PROPERTIES(pcre2-32-shared PROPERTIES PREFIX "")
+      ENDIF(NON_STANDARD_LIB_PREFIX)
+      IF(NON_STANDARD_LIB_SUFFIX)
+        SET_TARGET_PROPERTIES(pcre2-32-shared PROPERTIES SUFFIX "-0.dll")
+      ENDIF(NON_STANDARD_LIB_SUFFIX)
+    ENDIF(MINGW)
+  ENDIF(BUILD_SHARED_LIBS)
+
+  IF(BUILD_STATIC_LIBS)
+    ADD_LIBRARY(pcre2-32 ALIAS pcre2-32-static)
+  ELSE(BUILD_STATIC_LIBS)
+    ADD_LIBRARY(pcre2-32 ALIAS pcre2-32-shared)
+  ENDIF(BUILD_STATIC_LIBS)
 ENDIF(PCRE2_BUILD_PCRE2_32)

 # Executables
@ -718,7 +1027,9 @@ if test \"$?\" != \"0\"; then exit 1; fi
 \@echo off
 setlocal
 SET srcdir=\"${winsrc}\"
-SET pcre2test=\"${winexe}\"
+# The next line was replaced by the following one after a user comment.
+# SET pcre2test=\"${winexe}\"
+SET pcre2test=\"${winbin}\\pcre2test.exe\"
 if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\"
 call %srcdir%\\RunTest.Bat
 if errorlevel 1 exit /b 1
@ -754,42 +1065,44 @@ SET(CMAKE_INSTALL_ALWAYS 1)

 INSTALL(TARGETS ${targets}
        RUNTIME DESTINATION bin
-        LIBRARY DESTINATION lib
-        ARCHIVE DESTINATION lib)
+        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+        ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+INSTALL(FILES ${pkg_config_files} DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
+INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2-config"
+  DESTINATION bin
+  # Set 0755 permissions
+  PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)

 INSTALL(FILES ${PCRE2_HEADERS} ${PCRE2POSIX_HEADERS} DESTINATION include)

+# CMake config files.
+set(PCRE2_CONFIG_IN  ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config.cmake.in)
+set(PCRE2_CONFIG_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config.cmake)
+configure_file(${PCRE2_CONFIG_IN} ${PCRE2_CONFIG_OUT} @ONLY)
+set(PCRE2_CONFIG_VERSION_IN  ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config-version.cmake.in)
+set(PCRE2_CONFIG_VERSION_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config-version.cmake)
+configure_file(${PCRE2_CONFIG_VERSION_IN} ${PCRE2_CONFIG_VERSION_OUT} @ONLY)
+install(FILES ${PCRE2_CONFIG_OUT} ${PCRE2_CONFIG_VERSION_OUT} DESTINATION cmake)
+
 FILE(GLOB html ${PROJECT_SOURCE_DIR}/doc/html/*.html)
 FILE(GLOB man1 ${PROJECT_SOURCE_DIR}/doc/*.1)
 FILE(GLOB man3 ${PROJECT_SOURCE_DIR}/doc/*.3)

-FOREACH(man ${man3})
-        GET_FILENAME_COMPONENT(man_tmp ${man} NAME)
-        SET(man3_new ${man3} ${man})
-ENDFOREACH(man ${man3})
-SET(man3 ${man3_new})
-
 INSTALL(FILES ${man1} DESTINATION man/man1)
 INSTALL(FILES ${man3} DESTINATION man/man3)
 INSTALL(FILES ${html} DESTINATION share/doc/pcre2/html)

 IF(MSVC AND INSTALL_MSVC_PDB)
-    INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2.pdb
-                  ${PROJECT_BINARY_DIR}/pcre2posix.pdb
-            DESTINATION bin
-            CONFIGURATIONS RelWithDebInfo)
-    INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2d.pdb
-                  ${PROJECT_BINARY_DIR}/pcre2posixd.pdb
-            DESTINATION bin
-            CONFIGURATIONS Debug)
+ INSTALL(FILES ${dll_pdb_files} DESTINATION bin CONFIGURATIONS RelWithDebInfo)
+ INSTALL(FILES ${dll_pdb_debug_files} DESTINATION bin CONFIGURATIONS Debug)
 ENDIF(MSVC AND INSTALL_MSVC_PDB)

 # Help, only for nice output
-IF(BUILD_SHARED_LIBS)
-  SET(BUILD_STATIC_LIBS OFF)
-ELSE(BUILD_SHARED_LIBS)
+IF(BUILD_STATIC_LIBS)
  SET(BUILD_STATIC_LIBS ON)
-ENDIF(BUILD_SHARED_LIBS)
+ELSE(BUILD_STATIC_LIBS)
+  SET(BUILD_STATIC_LIBS OFF)
+ENDIF(BUILD_STATIC_LIBS)

 IF(PCRE2_HEAP_MATCH_RECURSE)
  MESSAGE(WARNING "HEAP_MATCH_RECURSE is obsolete and does nothing.")
@ -802,7 +1115,7 @@ IF(PCRE2_SHOW_REPORT)
  ENDIF(CMAKE_C_FLAGS)
  MESSAGE(STATUS "")
  MESSAGE(STATUS "")
-  MESSAGE(STATUS "PCRE2 configuration summary:")
+  MESSAGE(STATUS "PCRE2-${PCRE2_MAJOR}.${PCRE2_MINOR} configuration summary:")
  MESSAGE(STATUS "")
  MESSAGE(STATUS "  Install prefix .................. : ${CMAKE_INSTALL_PREFIX}")
  MESSAGE(STATUS "  C compiler ...................... : ${CMAKE_C_COMPILER}")
@ -827,6 +1140,7 @@ IF(PCRE2_SHOW_REPORT)
  MESSAGE(STATUS "  Match depth limit ............... : ${PCRE2_MATCH_LIMIT_DEPTH}")
  MESSAGE(STATUS "  Build shared libs ............... : ${BUILD_SHARED_LIBS}")
  MESSAGE(STATUS "  Build static libs ............... : ${BUILD_STATIC_LIBS}")
+  MESSAGE(STATUS "     with PIC enabled ............. : ${PCRE2_STATIC_PIC}")
  MESSAGE(STATUS "  Build pcre2grep ................. : ${PCRE2_BUILD_PCRE2GREP}")
  MESSAGE(STATUS "  Enable JIT in pcre2grep ......... : ${PCRE2GREP_SUPPORT_JIT}")
  MESSAGE(STATUS "  Enable callouts in pcre2grep .... : ${PCRE2GREP_SUPPORT_CALLOUT}")
@ -861,10 +1175,10 @@ IF(PCRE2_SHOW_REPORT)
    MESSAGE(STATUS "  Use %zu and %td ..................: AUTO" )
  ENDIF(PCRE2_DISABLE_PERCENT_ZT)

-  IF(MINGW AND NOT PCRE2_STATIC)
+  IF(MINGW AND BUILD_SHARED_LIBS)
    MESSAGE(STATUS "  Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}")
    MESSAGE(STATUS "  Non-standard dll names (suffix) . : ${NON_STANDARD_LIB_SUFFIX}")
-  ENDIF(MINGW AND NOT PCRE2_STATIC)
+  ENDIF(MINGW AND BUILD_SHARED_LIBS)

  IF(MSVC)
    MESSAGE(STATUS "  Install MSVC .pdb files ..........: ${INSTALL_MSVC_PDB}")
--- a/563
+++ b/563
@ -1,5 +1,562 @@
-Change Log for PCRE2
--------------------
+Change Log for PCRE2 - see also the Git log
+-------------------------------------------
+
+
+Version 10.41 xx-xxx-2022
+-------------------------
+
+1. Add fflush() before and after a fork callout in pcre2grep to get its output
+to be the same on all systems. (THere were previously ordering differences in
+Alpine Linux).
+
+2. Merged patch from @carenas (GitHub #110) for pthreads support in CMake.
+
+3. SSF scorecards grumbled about possible overflow in an expression in
+pcre2test. It never would have overflowed in practice, but some casts have been
+added and at the some time there's been some tidying of fprints that output
+size_t values.
+
+4. PR #94 showed up an unused enum in pcre2_convert.c, which is now removed.
+
+5. Minor code re-arrangement to remove gcc warning about realloc() in
+pcre2test.
+
+6. Change a number of int variables that hold buffer and line lengths in
+pcre2grep to PCRE2_SIZE (aka size_t).
+
+7. Added an #ifdef to cut out a call to PRIV(jit_free) when JIT is not
+supported (even though that function would do nothing in that case) at the
+request of a user who doesn't even want to link with pcre_jit_compile.o. Also
+tidied up an untidy #ifdef arrangement in pcre2test.
+
+8. Fixed an issue in the backtracking optimization of character repeats in
+JIT. Furthermore optimize star repetitions, not just plus repetitions.
+
+9. Removed the use of an initial backtracking frames vector on the system stack 
+in pcre2_match() so that it now always uses the heap. (In a multi-thread 
+environment with very small stacks there had been an issue.) This also is 
+tidier for JIT matching, which didn't need that vector. The heap vector is now 
+remembered in the match data block and re-used if that block itself is re-used. 
+It is freed with the match data block.
+
+10. Adjusted the find_limits code in pcre2test to work with change 9 above.
+
+11. Added find_limits_noheap to pcre2test, because the heap limits are now 
+different in different environments and so cannot be included in the standard 
+tests.
+
+12. Created a test for pcre2_match() heap processing that is not part of the 
+tests run by 'make check', but can be run manually. The current output is from 
+a 64-bit system.
+
+13. Implemented -Z aka --null in pcre2grep.
+
+
+Version 10.40 15-April-2022
+---------------------------
+
+1. Merged patch from @carenas (GitHub #35, 7db87842) to fix pcre2grep incorrect
+handling of multiple passes.
+
+2. Merged patch from @carenas (GitHub #36, dae47509) to fix portability issue
+in pcre2grep with buffered fseek(stdin).
+
+3. Merged patch from @carenas (GitHub #37, acc520924) to fix tests when -S is
+not supported.
+
+4. Revert an unintended change in JIT repeat detection.
+
+5. Merged patch from @carenas (GitHub #52, b037bfa1) to fix build on GNU Hurd.
+
+6. Merged documentation and comments patches from @carenas (GitHub #47).
+
+7. Merged patch from @carenas (GitHub #49) to remove obsolete JFriedl test code
+from pcre2grep.
+
+8. Merged patch from @carenas (GitHub #48) to fix CMake install issue #46.
+
+9. Merged patch from @carenas (GitHub #53) fixing NULL checks in matching and
+substituting.
+
+10. Add null_subject and null_replacement modifiers to pcre2test.
+
+11. Add check for NULL subject to POSIX regexec() function.
+
+12. Add check for NULL replacement to pcre2_substitute().
+
+13. For the subject arguments of pcre2_match(), pcre2_dfa_match(), and
+pcre2_substitute(), and the replacement argument of the latter, if the pointer
+is NULL and the length is zero, treat as an empty string. Apparently a number
+of applications treat NULL/0 in this way.
+
+14. Added support for Bidi_Class and a number of binary Unicode properties,
+including Bidi_Control.
+
+15. Fix some minor issues raised by clang sanitize.
+
+16. Very minor code speed up for maximizing character property matches.
+
+17. A number of changes to script matching for \p and \P:
+
+    (a) Script extensions for a character are now coded as a bitmap instead of
+        a list of script numbers, which should be faster and does not need a
+        loop.
+
+    (b) Added the syntax \p{script:xxx} and \p{script_extensions:xxx} (synonyms
+        sc and scx).
+
+    (c) Changed \p{scriptname} from being the same as \p{sc:scriptname} to being
+        the same as \p{scx:scriptname} because this change happened in Perl at
+        release 5.26.
+
+    (d) The standard Unicode 4-letter abbreviations for script names are now
+        recognized.
+
+    (e) In accordance with Unicode and Perl's "loose matching" rules, spaces,
+        hyphens, and underscores are ignored in property names, which are then
+        matched independent of case.
+
+18. The Python scripts in the maint directory have been refactored. There are
+now three scripts that generate pcre2_ucd.c, pcre2_ucp.h, and pcre2_ucptables.c
+(which is #included by pcre2_tables.c). The data lists that used to be
+duplicated are now held in a single common Python module.
+
+19. On CHERI, and thus Arm's Morello prototype, pointers are represented as
+hardware capabilities, which consist of both an integer address and additional
+metadata, meaning they are twice the size of the platform's size_t type, i.e.
+16 bytes on a 64-bit system. The ovector member of heapframe happens to only be
+8 byte aligned, and so computing frame_size ended up with a multiple of 8 but
+not 16. Whilst the first frame was always suitably aligned, this then
+misaligned the frame that follows, resulting in an alignment fault when storing
+a pointer to Fecode at the start of match. Patch to fix this issue by Jessica
+Clarke PR#72.
+
+20. Added -LP and -LS listing options to pcre2test.
+
+21. A user discovered that the library names in CMakeLists.txt for MSVC
+debugger (PDB) files were incorrect - perhaps never tried for PCRE2?
+
+22. An item such as [Aa] is optimized into a caseless single character match.
+When this was quantified (e.g. [Aa]{2}) and was also the last literal item in a
+pattern, the optimizing "must be present for a match" character check was not
+being flagged as caseless, causing some matches that should have succeeded to
+fail.
+
+23. Fixed a unicode property matching issue in JIT. The character was not
+fully read in caseless matching.
+
+24. Fixed an issue affecting recursions in JIT caused by duplicated data
+transfers.
+
+25. Merged patch from @carenas (GitHub #96) which fixes some problems with
+pcre2test and readline/readedit:
+
+  * Use the right header for libedit in FreeBSD with autoconf
+  * Really allow libedit with cmake
+  * Avoid using readline headers with libedit
+
+
+Version 10.39 29-October-2021
+-----------------------------
+
+1. Fix incorrect detection of alternatives in first character search in JIT.
+
+2. Merged patch from @carenas (GitHub #28):
+
+  Visual Studio 2013 includes support for %zu and %td, so let newer
+  versions of it avoid the fallback, and while at it, make sure that
+  the first check is for DISABLE_PERCENT_ZT so it will be always
+  honoured if chosen.
+
+  prtdiff_t is signed, so use a signed type instead, and make sure
+  that an appropriate width is chosen if pointers are 64bit wide and
+  long is not (ex: Windows 64bit).
+
+  IMHO removing the cast (and therefore the possibilty of truncation)
+  make the code cleaner and the fallback is likely portable enough
+  with all 64-bit POSIX systems doing LP64 except for Windows.
+
+3. Merged patch from @carenas (GitHub #29) to update to Unicode 14.0.0.
+
+4. Merged patch from @carenas (GitHub #30):
+
+  * Cleanup: remove references to no longer used stdint.h
+
+  Since 19c50b9d (Unconditionally use inttypes.h instead of trying for stdint.h
+  (simplification) and remove the now unnecessary inclusion in
+  pcre2_internal.h., 2018-11-14), stdint.h is no longer used.
+
+  Remove checks for it in autotools and CMake and document better the expected
+  build failures for systems that might have stdint.h (C99) and not inttypes.h
+  (from POSIX), like old Windows.
+
+  * Cleanup: remove detection for inttypes.h which is a hard dependency
+
+  CMake checks for standard headers are not meant to be used for hard
+  dependencies, so will prevent a possible fallback to work.
+
+  Alternatively, the header could be checked to make the configuration fail
+  instead of breaking the build, but that was punted, as it was missing anyway
+  from autotools.
+
+5. Merged patch from @carenas (GitHub #32):
+
+  * jit: allow building with ancient MSVC versions
+
+  Visual Studio older than 2013 fails to build with JIT enabled, because it is
+  unable to parse non C89 compatible syntax, with mixed declarations and code.
+  While most recent compilers wouldn't even report this as a warning since it
+  is valid C99, it could be also made visible by adding to gcc/clang the
+  -Wdeclaration-after-statement flag at build time.
+
+  Move the code below the affected definitions.
+
+  * pcre2grep: avoid mixing declarations with code
+
+  Since d5a61ee8 (Patch to detect (and ignore) symlink loops in pcre2grep,
+  2021-08-28), code will fail to build in a strict C89 compiler.
+
+  Reformat slightly to make it C89 compatible again.
+
+
+Version 10.38 01-October-2021
+-----------------------------
+
+1. Fix invalid single character repetition issues in JIT when the repetition
+is inside a capturing bracket and the bracket is preceded by character
+literals.
+
+2. Installed revised CMake configuration files provided by Jan-Willem Blokland.
+This extends the CMake build system to build both static and shared libraries
+in one go, builds the static library with PIC, and exposes PCRE2 libraries
+using the CMake config files. JWB provided these notes:
+
+- Introduced CMake variable BUILD_STATIC_LIBS to build the static library.
+
+- Make a small modification to config-cmake.h.in by removing the PCRE2_STATIC
+  variable. Added PCRE2_STATIC variable to the static build using the
+  target_compile_definitions() function.
+
+- Extended the CMake config files.
+
+  - Introduced CMake variable PCRE2_USE_STATIC_LIBS to easily switch between
+    the static and shared libraries.
+
+  - Added the PCRE_STATIC variable to the target compile definitions for the
+    import of the static library.
+
+Building static and shared libraries using MSVC results in a name clash of
+the libraries. Both static and shared library builds create, for example, the
+file pcre2-8.lib. Therefore, I decided to change the static library names by
+adding "-static". For example, pcre2-8.lib has become pcre2-8-static.lib.
+[Comment by PH: this is MSVC-specific. It doesn't happen on Linux.]
+
+3. Increased the minimum release number for CMake to 3.0.0 because older than
+2.8.12 is deprecated (it was set to 2.8.5) and causes warnings. Even 3.0.0 is
+quite old; it was released in 2014.
+
+4. Implemented a modified version of Thomas Tempelmann's pcre2grep patch for
+detecting symlink loops. This is dependent on the availability of realpath(),
+which is now tested for in ./configure and CMakeLists.txt.
+
+5. Implemented a modified version of Thomas Tempelmann's patch for faster
+case-independent "first code unit" searches for unanchored patterns in 8-bit
+mode in the interpreters. Instead of just remembering whether one case matched
+or not, it remembers the position of a previous match so as to avoid
+unnecessary repeated searching.
+
+6. Perl now locks out \K in lookarounds, so PCRE2 now does the same by default.
+However, just in case anybody was relying on the old behaviour, there is an
+option called PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK that enables the old behaviour.
+An option has also been added to pcre2grep to enable this.
+
+7. Re-enable a JIT optimization which was unintentionally disabled in 10.35.
+
+8. There is a loop counter to catch excessively crazy patterns when checking
+the lengths of lookbehinds at compile time. This was incorrectly getting reset
+whenever a lookahead was processed, leading to some fuzzer-generated patterns
+taking a very long time to compile when (?|) was present in the pattern,
+because (?|) disables caching of group lengths.
+
+
+Version 10.37 26-May-2021
+-------------------------
+
+1. Change RunGrepTest to use tr instead of sed when testing with binary
+zero bytes, because sed varies a lot from system to system and has problems
+with binary zeros. This is from Bugzilla #2681. Patch from Jeremie
+Courreges-Anglas via Nam Nguyen. This fixes RunGrepTest for OpenBSD. Later:
+it broke it for at least one version of Solaris, where tr can't handle binary
+zeros. However, that system had /usr/xpg4/bin/tr installed, which works OK, so
+RunGrepTest now checks for that command and uses it if found.
+
+2. Compiling with gcc 10.2's -fanalyzer option showed up a hypothetical problem
+with a NULL dereference. I don't think this case could ever occur in practice,
+but I have put in a check in order to get rid of the compiler error.
+
+3. An alternative patch for CMakeLists.txt because 10.36 #4 breaks CMake on
+Windows. Patch from email@cs-ware.de fixes bugzilla #2688.
+
+4. Two bugs related to over-large numbers have been fixed so the behaviour is
+now the same as Perl.
+
+  (a) A pattern such as /\214748364/ gave an overflow error instead of being
+  treated as the octal number \214 followed by literal digits.
+
+  (b) A sequence such as {65536 that has no terminating } so is not a
+  quantifier was nevertheless complaining that a quantifier number was too big.
+
+5. A run of autoconf suggested that configure.ac was out-of-date with respect
+to the lastest autoconf. Running autoupdate made some valid changes, some valid
+suggestions, and also some invalid changes, which were fixed by hand. Autoconf
+now runs clean and the resulting "configure" seems to work, so I hope nothing
+is broken. Later: the requirement for autoconf 2.70 broke some automatic test
+robots. It doesn't seem to be necessary: trying a reduction to 2.60.
+
+6. The pattern /a\K.(?0)*/ when matched against "abac" by the interpreter gave
+the answer "bac", whereas Perl and JIT both yield "c". This was because the
+effect of \K was not propagating back from the full pattern recursion. Other
+recursions such as /(a\K.(?1)*)/ did not have this problem.
+
+7. Restore single character repetition optimization in JIT. Currently fewer
+character repetitions are optimized than in 10.34.
+
+8. When the names of the functions in the POSIX wrapper were changed to
+pcre2_regcomp() etc. (see change 10.33 #4 below), functions with the original
+names were left in the library so that pre-compiled programs would still work.
+However, this has proved troublesome when programs link with several libraries,
+some of which use PCRE2 via the POSIX interface while others use a native POSIX
+library. For this reason, the POSIX function names are removed in this release.
+The macros in pcre2posix.h should ensure that re-compiling fixes any programs
+that haven't been compiled since before 10.33.
+
+
+Version 10.36 04-December-2020
+------------------------------
+
+1. Add CET_CFLAGS so that when Intel CET is enabled, pass -mshstk to
+compiler. This fixes https://bugs.exim.org/show_bug.cgi?id=2578. Patch for
+Makefile.am and configure.ac by H.J. Lu. Equivalent patch for CMakeLists.txt
+invented by PH.
+
+2. Fix inifinite loop when a single byte newline is searched in JIT when
+invalid utf8 mode is enabled.
+
+3. Updated CMakeLists.txt with patch from Wolfgang Stöggl (Bugzilla #2584):
+
+  - Include GNUInstallDirs and use ${CMAKE_INSTALL_LIBDIR} instead of hardcoded
+    lib. This allows differentiation between lib and lib64.
+    CMAKE_INSTALL_LIBDIR is used for installation of libraries and also for
+    pkgconfig file generation.
+
+  - Add the version of PCRE2 to the configuration summary like ./configure
+    does.
+
+  - Fix typo: MACTHED_STRING->MATCHED_STRING
+
+4. Updated CMakeLists.txt with another patch from Wolfgang Stöggl (Bugzilla
+#2588):
+
+  - Add escaped double quotes around include directory in CMakeLists.txt to
+    allow spaces in directory names.
+
+  - This fixes a cmake error, if the path of the pcre2 source contains a space.
+
+5. Updated CMakeLists.txt with a patch from B. Scott Michel: CMake's
+documentation suggests using CHECK_SYMBOL_EXISTS over CHECK_FUNCTION_EXIST.
+Moreover, these functions come from specific header files, which need to be
+specified (and, thankfully, are the same on both the Linux and WinXX
+platforms.)
+
+6. Added a (uint32_t) cast to prevent a compiler warning in pcre2_compile.c.
+
+7. Applied a patch from Wolfgang Stöggl (Bugzilla #2600) to fix postfix for
+debug Windows builds using CMake. This also updated configure so that it
+generates *.pc files and pcre2-config with the same content, as in the past.
+
+8. If a pattern ended with (?(VERSION=n.d where n is any number but d is just a
+single digit, the code unit beyond d was being read (i.e. there was a read
+buffer overflow). Fixes ClusterFuzz 23779.
+
+9. After the rework in r1235, certain character ranges were incorrectly
+handled by an optimization in JIT. Furthermore a wrong offset was used to
+read a value from a buffer which could lead to memory overread.
+
+10. Unnoticed for many years was the fact that delimiters other than / in the
+testinput1 and testinput4 files could cause incorrect behaviour when these
+files were processed by perltest.sh. There were several tests that used quotes
+as delimiters, and it was just luck that they didn't go wrong with perltest.sh.
+All the patterns in testinput1 and testinput4 now use / as their delimiter.
+This fixes Bugzilla #2641.
+
+11. Perl has started to give an error for \K within lookarounds (though there
+are cases where it doesn't). PCRE2 still allows this, so the tests that include
+this case have been moved from test 1 to test 2.
+
+12. Further to 10 above, pcre2test has been updated to detect and grumble if a
+delimiter other than / is used after #perltest.
+
+13. Fixed a bug with PCRE2_MATCH_INVALID_UTF in 8-bit mode when PCRE2_CASELESS
+was set and PCRE2_NO_START_OPTIMIZE was not set. The optimization for finding
+the start of a match was not resetting correctly after a failed match on the
+first valid fragment of the subject, possibly causing incorrect "no match"
+returns on subsequent fragments. For example, the pattern /A/ failed to match
+the subject \xe5A. Fixes Bugzilla #2642.
+
+14. Fixed a bug in character set matching when JIT is enabled and both unicode
+scripts and unicode classes are present at the same time.
+
+15. Added GNU grep's -m (aka --max-count) option to pcre2grep.
+
+16. Refactored substitution processing in pcre2grep strings, both for the -O
+option and when dealing with callouts. There is now a single function that
+handles $ expansion in all cases (instead of multiple copies of almost
+identical code). This means that the same escape sequences are available
+everywhere, which was not previously the case. At the same time, the escape
+sequences $x{...} and $o{...} have been introduced, to allow for characters
+whose code points are greater than 255 in Unicode mode.
+
+17. Applied the patch from Bugzilla #2628 to RunGrepTest. This does an explicit
+test for a version of sed that can handle binary zero, instead of assuming that
+any Linux version will work. Later: replaced $(...) by `...` because not all
+shells recognize the former.
+
+18. Fixed a word boundary check bug in JIT when partial matching is enabled.
+
+19. Fix ARM64 compilation warning in JIT. Patch by Carlo.
+
+20. A bug in the RunTest script meant that if the first part of test 2 failed,
+the failure was not reported.
+
+21. Test 2 was failing when run from a directory other than the source
+directory. This failure was previously missed in RunTest because of 20 above.
+Fixes added to both RunTest and RunTest.bat.
+
+22. Patch to CMakeLists.txt from Daniel to fix problem with testing under
+Windows.
+
+
+Version 10.35 09-May-2020
+---------------------------
+
+1. Use PCRE2_MATCH_EMPTY flag to detect empty matches in JIT.
+
+2. Fix ARMv5 JIT improper handling of labels right after a constant pool.
+
+3. A JIT bug is fixed which allowed to read the fields of the compiled
+pattern before its existence is checked.
+
+4. Back in the PCRE1 day, capturing groups that contained recursive back
+references to themselves were made atomic (version 8.01, change 18) because
+after the end a repeated group, the captured substrings had their values from
+the final repetition, not from an earlier repetition that might be the
+destination of a backtrack. This feature was documented, and was carried over
+into PCRE2. However, it has now been realized that the major refactoring that
+was done for 10.30 has made this atomicizing unnecessary, and it is confusing
+when users are unaware of it, making some patterns appear not to be working as
+expected. Capture values of recursive back references in repeated groups are
+now correctly backtracked, so this unnecessary restriction has been removed.
+
+5. Added PCRE2_SUBSTITUTE_LITERAL.
+
+6. Avoid some VS compiler warnings.
+
+7. Added PCRE2_SUBSTITUTE_MATCHED.
+
+8. Added (?* and (?<* as synonyms for (*napla: and (*naplb: to match another
+regex engine. The Perl regex folks are aware of this usage and have made a note
+about it.
+
+9. When an assertion is repeated, PCRE2 used to limit the maximum repetition to
+1, believing that repeating an assertion is pointless. However, if a positive
+assertion contains capturing groups, repetition can be useful. In any case, an
+assertion could always be wrapped in a repeated group. The only restriction
+that is now imposed is that an unlimited maximum is changed to one more than
+the minimum.
+
+10. Fix *THEN verbs in lookahead assertions in JIT.
+
+11. Added PCRE2_SUBSTITUTE_REPLACEMENT_ONLY.
+
+12. The JIT stack should be freed when the low-level stack allocation fails.
+
+13. In pcre2grep, if the final line in a scanned file is output but does not
+end with a newline sequence, add a newline according to the --newline setting.
+
+14. (?(DEFINE)...) groups were not being handled correctly when checking for
+the fixed length of a lookbehind assertion. Such a group within a lookbehind
+should be skipped, as it does not contribute to the length of the group.
+Instead, the (DEFINE) group was being processed, and if at the end of the
+lookbehind, that end was not correctly recognized. Errors such as "lookbehind
+assertion is not fixed length" and also "internal error: bad code value in
+parsed_skip()" could result.
+
+15. Put a limit of 1000 on recursive calls in pcre2_study() when searching
+nested groups for starting code units, in order to avoid stack overflow issues.
+If the limit is reached, it just gives up trying for this optimization.
+
+16. The control verb chain list must always be restored when exiting from a
+recurse function in JIT.
+
+17. Fix a crash which occurs when the character type of an invalid UTF
+character is decoded in JIT.
+
+18. Changes in many areas of the code so that when Unicode is supported and
+PCRE2_UCP is set without PCRE2_UTF, Unicode character properties are used for
+upper/lower case computations on characters whose code points are greater than
+127.
+
+19. The function for checking UTF-16 validity was returning an incorrect offset
+for the start of the error when a high surrogate was not followed by a valid
+low surrogate. This caused incorrect behaviour, for example when
+PCRE2_MATCH_INVALID_UTF was set and a match started immediately following the
+invalid high surrogate, such as /aa/ matching "\x{d800}aa".
+
+20. If a DEFINE group immediately preceded a lookbehind assertion, the pattern
+could be mis-compiled and therefore not match correctly. This is the example
+that found this: /(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word/ which failed to
+match "word" because the "move back" value was set to zero.
+
+21. Following a request from a user, some extensions and tidies to the
+character tables handling have been done:
+
+  (a) The dftables auxiliary program is renamed pcre2_dftables, but it is still
+  not installed for public use.
+
+  (b) There is now a -b option for pcre2_dftables, which causes the tables to
+  be written in binary. There is also a -help option.
+
+  (c) PCRE2_CONFIG_TABLES_LENGTH is added to pcre2_config() so that an
+  application that wants to save tables in binary knows how long they are.
+
+22. Changed setting of CMAKE_MODULE_PATH in CMakeLists.txt from SET to
+LIST(APPEND...) to allow a setting from the command line to be included.
+
+23. Updated to Unicode 13.0.0.
+
+24. CMake build now checks for secure_getenv() and strerror(). Patch by Carlo.
+
+25. Avoid using [-1] as a suffix in pcre2test because it can provoke a compiler
+warning.
+
+26. Added tests for __attribute__((uninitialized)) to both the configure and
+CMake build files, and then applied this attribute to the variable called
+stack_frames_vector[] in pcre2_match(). When implemented, this disables
+automatic initialization (a facility in clang), which can take time on big
+variables.
+
+27. Updated CMakeLists.txt (patches by Uwe Korn) to add support for
+pcre2-config, the libpcre*.pc files, SOVERSION, VERSION and the
+MACHO_*_VERSIONS settings for CMake builds.
+
+28. Another patch to CMakeLists.txt to check for mkostemp (configure already
+does). Patch by Carlo Marcelo Arenas Belon.
+
+29. Check for the existence of memfd_create in both CMake and configure
+configurations. Patch by Carlo Marcelo Arenas Belon.
+
+30. Restrict the configuration setting for the SELinux compatible execmem
+allocator (change 10.30/44) to Linux and NetBSD.


 Version 10.34 21-November-2019
@ -337,7 +894,7 @@ Patch by Guillem Jover.
 warnings were reported.

 38. Using the clang compiler with sanitizing options causes runtime complaints
-about truncation for statments such as x = ~x when x is an 8-bit value; it
+about truncation for statements such as x = ~x when x is an 8-bit value; it
 seems to compute ~x as a 32-bit value. Changing such statements to x = 255 ^ x
 gets rid of the warnings. There were also two missing casts in pcre2test.

--- a/25
+++ b/25
@ -16,6 +16,7 @@ while (scalar(@ARGV) > 0)
  
  while (<IN>)
    {  
+    $count = 0;
    $line++; 
    if (/^\s*$/)
      {
@ -50,14 +51,24 @@ while (scalar(@ARGV) > 0)
        $yield = 1;
        }
      }
-    else
+    elsif (/\\[^ef]|\\f[^IBP]/)
      {
-      if (/\\[^ef]|\\f[^IBP]/)
-        {
-        printf "Bad backslash in line $line of $file\n";  
-        $yield = 1; 
-        } 
-      }   
+      printf "Bad backslash in line $line of $file\n";
+      $yield = 1;
+      }
+    while (/\\f[BI]/g)
+      {
+      $count++;
+      }
+    while (/\\fP/g)
+      {
+      $count--;
+      }
+    if ($count != 0)
+      {
+      printf "Mismatching formatting in line $line of $file\n";
+      $yield = 1;
+      }
    }
     
  close(IN);   
--- a/64
+++ b/64
@ -8,8 +8,8 @@ library is referred to as PCRE1 below. For information about testing PCRE2, see
 the pcre2test documentation and the comment at the head of the RunTest file.

 PCRE1 releases were up to 8.3x when PCRE2 was developed, and later bug fix
-releases remain in the 8.xx series. PCRE2 releases started at 10.00 to avoid
-confusion with PCRE1.
+releases carried on the 8.xx series, up to the final 8.45 release. PCRE2
+releases started at 10.00 to avoid confusion with PCRE1.


 Historical note 1
@ -38,8 +38,8 @@ Historical note 2
 By contrast, the code originally written by Henry Spencer (which was
 subsequently heavily modified for Perl) compiles the expression twice: once in
 a dummy mode in order to find out how much store will be needed, and then for
-real. (The Perl version probably doesn't do this any more; I'm talking about
-the original library.) The execution function operates by backtracking and
+real. (The Perl version may or may not still do this; I'm talking about the
+original library.) The execution function operates by backtracking and
 maximizing (or, optionally, minimizing, in Perl) the amount of the subject that
 matches individual wild portions of the pattern. This is an "NFA algorithm" in
 Friedl's terminology.
@ -151,8 +151,8 @@ of code units in the item itself. The exception is the aforementioned large
 advance to check for such values. When auto-callouts are enabled, the generous
 assumption is made that there will be a callout for each pattern code unit
 (which of course is only actually true if all code units are literals) plus one
-at the end. There is a default parsed pattern vector on the system stack, but
-if this is not big enough, heap memory is used.
+at the end. A default parsed pattern vector is defined on the system stack, to
+minimize memory handling, but if this is not big enough, heap memory is used.

 As before, the actual compiling function is run twice, the first time to
 determine the amount of memory needed for the final compiled pattern. It
@ -187,7 +187,7 @@ META_CLASS_EMPTY      [] empty class - only with PCRE2_ALLOW_EMPTY_CLASS
 META_CLASS_EMPTY_NOT  [^] negative empty class - ditto
 META_CLASS_END        ] end of non-empty class
 META_CLASS_NOT        [^ start non-empty negative class
-META_COMMIT           (*COMMIT)
+META_COMMIT           (*COMMIT) - no argument (see below for with argument)
 META_COND_ASSERT      (?(?assertion)
 META_DOLLAR           $ metacharacter
 META_DOT              . metacharacter
@ -201,18 +201,18 @@ META_NOCAPTURE        (?: no capture parens
 META_PLUS             +
 META_PLUS_PLUS        ++
 META_PLUS_QUERY       +?
-META_PRUNE            (*PRUNE) - no argument
+META_PRUNE            (*PRUNE) - no argument (see below for with argument)
 META_QUERY            ?
 META_QUERY_PLUS       ?+
 META_QUERY_QUERY      ??
 META_RANGE_ESCAPED    hyphen in class range with at least one escape
 META_RANGE_LITERAL    hyphen in class range defined literally
-META_SKIP             (*SKIP) - no argument
-META_THEN             (*THEN) - no argument
+META_SKIP             (*SKIP) - no argument (see below for with argument)
+META_THEN             (*THEN) - no argument (see below for with argument)

 The two RANGE values occur only in character classes. They are positioned
 between two literals that define the start and end of the range. In an EBCDIC
-evironment it is necessary to know whether either of the range values was
+environment it is necessary to know whether either of the range values was
 specified as an escape. In an ASCII/Unicode environment the distinction is not
 relevant.

@ -229,17 +229,16 @@ If the data for META_ALT is non-zero, it is inside a lookbehind, and the data
 is the length of its branch, for which OP_REVERSE must be generated.

 META_BACKREF, META_CAPTURE, and META_RECURSE have the capture group number as
-their data in the lower 16 bits of the element.
+their data in the lower 16 bits of the element. META_RECURSE is followed by an
+offset, for use in error messages.

 META_BACKREF is followed by an offset if the back reference group number is 10
-or more. The offsets of the first ocurrences of references to groups whose
+or more. The offsets of the first occurrences of references to groups whose
 numbers are less than 10 are put in cb->small_ref_offset[] (only the first
 occurrence is useful). On 64-bit systems this avoids using more than two parsed
 pattern elements for items such as \3. The offset is used when an error occurs
 because the reference is to a non-existent group.

-META_RECURSE is always followed by an offset, for use in error messages.
-
 META_ESCAPE has an ESC_xxx value as its data. For ESC_P and ESC_p, the next
 element contains the 16-bit type and data property values, packed together.
 ESC_g and ESC_k are used only for named references - numerical ones are turned
@ -291,9 +290,9 @@ META_LOOKBEHIND       (?<=      start of lookbehind
 META_LOOKBEHIND_NA    (*naplb:  start of non-atomic lookbehind
 META_LOOKBEHINDNOT    (?<!      start of negative lookbehind

-The following are followed by two elements, the minimum and maximum. Repeat
-values are limited to 65535 (MAX_REPEAT). A maximum value of "unlimited" is
-represented by UNLIMITED_REPEAT, which is bigger than MAX_REPEAT:
+The following are followed by two elements, the minimum and maximum. The
+maximum value is limited to 65535 (MAX_REPEAT). A maximum value of "unlimited"
+is represented by UNLIMITED_REPEAT, which is bigger than MAX_REPEAT:

 META_MINMAX           {n,m}  repeat
 META_MINMAX_PLUS      {n,m}+ repeat
@ -347,11 +346,11 @@ support is not available for this kind of matching.
 Changeable options
 ------------------

-The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL, and
-others) may be changed in the middle of patterns by items such as (?i). Their
-processing is handled entirely at compile time by generating different opcodes
-for the different settings. The runtime functions do not need to keep track of
-an option's state.
+The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL) and
+some others may be changed in the middle of patterns by items such as (?i).
+Their processing is handled entirely at compile time by generating different
+opcodes for the different settings. The runtime functions do not need to keep
+track of an option's state.

 PCRE2_DUPNAMES, PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE
 are tracked and processed during the parsing pre-pass. The others are handled
@ -437,7 +436,7 @@ Backtracking control verbs
 --------------------------

 Verbs with no arguments generate opcodes with no following data (as listed
-in the section above). 
+in the section above).

 (*MARK:NAME) generates OP_MARK followed by the mark name, preceded by a
 length in one code unit, and followed by a binary zero. The name length is
@ -468,8 +467,8 @@ Caseless matching (positive or negative) of characters that have more than two
 case-equivalent code points (which is possible only in UTF mode) is handled by
 compiling a Unicode property item (see below), with the pseudo-property
 PT_CLIST. The value of this property is an offset in a vector called
-"ucd_caseless_sets" which identifies the start of a short list of equivalent
-characters, terminated by the value NOTACHAR (0xffffffff).
+"ucd_caseless_sets" which identifies the start of a short list of case
+equivalent characters, terminated by the value NOTACHAR (0xffffffff).


 Repeating single characters
@ -546,8 +545,9 @@ Each is followed by two code units that encode the desired property as a type
 and a value. The types are a set of #defines of the form PT_xxx, and the values
 are enumerations of the form ucp_xx, defined in the pcre2_ucp.h source file.
 The value is relevant only for PT_GC (General Category), PT_PC (Particular
-Category), PT_SC (Script), and the pseudo-property PT_CLIST, which is used to
-identify a list of case-equivalent characters when there are three or more.
+Category), PT_SC (Script), PT_BIDICL (Bidi Class), PT_BOOL (Boolean property),
+and the pseudo-property PT_CLIST, which is used to identify a list of
+case-equivalent characters when there are three or more (see above).

 Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
 three code units: OP_PROP or OP_NOTPROP, and then the desired property type and
@ -665,9 +665,9 @@ a count that immediately follows the offset.
 There are several opcodes that mark the end of a subpattern group. OP_KET is
 used for subpatterns that do not repeat indefinitely, OP_KETRMIN and
 OP_KETRMAX are used for indefinite repetitions, minimally or maximally
-respectively, and OP_KETRPOS for possessive repetitions (see below for more 
+respectively, and OP_KETRPOS for possessive repetitions (see below for more
 details). All four are followed by a LINK_SIZE value giving (as a positive
-number) the offset back to the matching bracket opcode.
+number) the offset back to the matching opening bracket opcode.

 If a subpattern is quantified such that it is permitted to match zero times, it
 is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
@ -718,7 +718,7 @@ Assertions

 Forward assertions are also just like other subpatterns, but starting with one
 of the opcodes OP_ASSERT, OP_ASSERT_NA (non-atomic assertion), or
-OP_ASSERT_NOT. Backward assertions use the opcodes OP_ASSERTBACK, 
+OP_ASSERT_NOT. Backward assertions use the opcodes OP_ASSERTBACK,
 OP_ASSERTBACK_NA, and OP_ASSERTBACK_NOT, and the first opcode inside the
 assertion is OP_REVERSE, followed by a count of the number of characters to
 move back the pointer in the subject string. In ASCII or UTF-32 mode, the count
@ -827,4 +827,4 @@ not a real opcode, but is used to check at compile time that tables indexed by
 opcode are the correct length, in order to catch updating errors.

 Philip Hazel
-12 July 2019
+April 2022
--- a/12
+++ b/12
@ -20,13 +20,13 @@ THE BASIC LIBRARY FUNCTIONS
 ---------------------------

 Written by:       Philip Hazel
-Email local part: ph10
-Email domain:     cam.ac.uk
+Email local part: Philip.Hazel
+Email domain:     gmail.com

-University of Cambridge Computing Service,
+Retired from University of Cambridge Computing Service,
 Cambridge, England.

-Copyright (c) 1997-2019 University of Cambridge
+Copyright (c) 1997-2022 University of Cambridge
 All rights reserved.


@ -37,7 +37,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Email domain:     freemail.hu

-Copyright(c) 2010-2019 Zoltan Herczeg
+Copyright(c) 2010-2022 Zoltan Herczeg
 All rights reserved.


@ -48,7 +48,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Email domain:     freemail.hu

-Copyright(c) 2009-2019 Zoltan Herczeg
+Copyright(c) 2009-2022 Zoltan Herczeg
 All rights reserved.


--- a/MODULE.bazel
+++ b/MODULE.bazel
@ -0,0 +1,8 @@
+module(
+    name = "pcre2",
+    version = "10.40",
+    compatibility_level = 1,
+)
+
+bazel_dep(name = "rules_cc", version = "0.0.1")
+bazel_dep(name = "bazel_skylib", version = "1.2.1")
--- a/Makefile.am
+++ b/Makefile.am
@ -325,18 +325,18 @@ include_HEADERS = src/pcre2posix.h
 bin_SCRIPTS = pcre2-config

 ## ---------------------------------------------------------------
-## The dftables program is used to rebuild character tables before compiling
-## PCRE2, if --enable-rebuild-chartables is specified. It is not a user-visible
-## program. The default (when --enable-rebuild-chartables is not specified) is
-## to copy a distributed set of tables that are defined for ASCII code. In this
-## case, dftables is not needed.
+## The pcre2_dftables program is used to rebuild character tables before
+## compiling PCRE2, if --enable-rebuild-chartables is specified. It is not an
+## installed program. The default (when --enable-rebuild-chartables is not
+## specified) is to copy a distributed set of tables that are defined for ASCII
+## code. In this case, pcre2_dftables is not needed.

 if WITH_REBUILD_CHARTABLES
-noinst_PROGRAMS += dftables
-dftables_SOURCES = src/dftables.c
-src/pcre2_chartables.c: dftables$(EXEEXT)
+noinst_PROGRAMS += pcre2_dftables
+pcre2_dftables_SOURCES = src/pcre2_dftables.c
+src/pcre2_chartables.c: pcre2_dftables$(EXEEXT)
 	rm -f $@
-	./dftables$(EXEEXT) $@
+	./pcre2_dftables$(EXEEXT) $@
 else
 src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist
 	rm -f $@
@ -382,6 +382,10 @@ COMMON_SOURCES = \
  src/pcre2_valid_utf.c \
  src/pcre2_xclass.c

+# The pcre2_ucptables.c file is #included by pcre2_tables.c
+
+EXTRA_DIST += src/pcre2_ucptables.c
+
 if WITH_PCRE2_8
 lib_LTLIBRARIES += libpcre2-8.la
 libpcre2_8_la_SOURCES = \
@ -391,6 +395,7 @@ nodist_libpcre2_8_la_SOURCES = \
 libpcre2_8_la_CFLAGS = \
  -DPCRE2_CODE_UNIT_WIDTH=8 \
  $(VISIBILITY_CFLAGS) \
+  $(CET_CFLAGS) \
  $(AM_CFLAGS)
 libpcre2_8_la_LIBADD =
 endif # WITH_PCRE2_8
@ -404,6 +409,7 @@ nodist_libpcre2_16_la_SOURCES = \
 libpcre2_16_la_CFLAGS = \
  -DPCRE2_CODE_UNIT_WIDTH=16 \
  $(VISIBILITY_CFLAGS) \
+  $(CET_CFLAGS) \
  $(AM_CFLAGS)
 libpcre2_16_la_LIBADD =
 endif # WITH_PCRE2_16
@ -417,6 +423,7 @@ nodist_libpcre2_32_la_SOURCES = \
 libpcre2_32_la_CFLAGS = \
  -DPCRE2_CODE_UNIT_WIDTH=32 \
  $(VISIBILITY_CFLAGS) \
+  $(CET_CFLAGS) \
  $(AM_CFLAGS)
 libpcre2_32_la_LIBADD =
 endif # WITH_PCRE2_32
@ -445,15 +452,16 @@ EXTRA_DIST += \
  src/sljit/sljitNativePPC_32.c \
  src/sljit/sljitNativePPC_64.c \
  src/sljit/sljitNativePPC_common.c \
-  src/sljit/sljitNativeSPARC_32.c \
-  src/sljit/sljitNativeSPARC_common.c \
-  src/sljit/sljitNativeTILEGX-encoder.c \
-  src/sljit/sljitNativeTILEGX_64.c \
+  src/sljit/sljitNativeRISCV_32.c \
+  src/sljit/sljitNativeRISCV_64.c \
+  src/sljit/sljitNativeRISCV_common.c \
+  src/sljit/sljitNativeS390X.c \
  src/sljit/sljitNativeX86_32.c \
  src/sljit/sljitNativeX86_64.c \
  src/sljit/sljitNativeX86_common.c \
  src/sljit/sljitProtExecAllocator.c \
-  src/sljit/sljitUtils.c
+  src/sljit/sljitUtils.c \
+  src/sljit/sljitWXExecAllocator.c

 # Some of the JIT sources are also in separate files that are #included.

@ -634,6 +642,7 @@ EXTRA_DIST += \
  testdata/grepoutputCN \
  testdata/grepoutputN \
  testdata/greppatN4 \
+  testdata/testbtables \
  testdata/testinput1 \
  testdata/testinput2 \
  testdata/testinput3 \
@ -659,6 +668,7 @@ EXTRA_DIST += \
  testdata/testinput23 \
  testdata/testinput24 \
  testdata/testinput25 \
+  testdata/testinput26 \
  testdata/testinputEBC \
  testdata/testoutput1 \
  testdata/testoutput2 \
@ -701,6 +711,7 @@ EXTRA_DIST += \
  testdata/testoutput23 \
  testdata/testoutput24 \
  testdata/testoutput25 \
+  testdata/testoutput26 \
  testdata/testoutputEBC \
  testdata/valgrind-jit.supp \
  testdata/wintestinput3 \
@ -855,9 +866,11 @@ endif # WITH_GCOV

 EXTRA_DIST += \
  cmake/COPYING-CMAKE-SCRIPTS \
+  cmake/FindEditline.cmake \
  cmake/FindPackageHandleStandardArgs.cmake \
  cmake/FindReadline.cmake \
-  cmake/FindEditline.cmake \
+  cmake/pcre2-config-version.cmake.in \
+  cmake/pcre2-config.cmake.in \
  CMakeLists.txt \
  config-cmake.h.in

--- a/Makefile.os4
+++ b/Makefile.os4
@ -0,0 +1,271 @@
+#
+# Project: pcre2
+#
+# Created on: 10-01-2022 22:01:46
+#
+# commands to use:
+# make -f Makefile.os4 libpcre2.a
+# make -f Makefile.os4 libpcre2-posix.a
+# make -f Makefile.os4 pcre2test
+# sh RunTest
+# make -f Makefile.os4 clean
+#
+
+###################################################################
+##
+##////  Objects
+##
+###################################################################
+
+libpcre2_OBJ := \
+	 src/pcre2_chartables.o src/pcre2_auto_possess.o src/pcre2_compile.o \
+	 src/pcre2_config.o src/pcre2_context.o src/pcre2_convert.o \
+	 src/pcre2_dfa_match.o src/pcre2_error.o src/pcre2_extuni.o \
+	 src/pcre2_find_bracket.o src/pcre2_jit_compile.o src/pcre2_maketables.o \
+	 src/pcre2_match.o src/pcre2_match_data.o src/pcre2_newline.o \
+	 src/pcre2_ord2utf.o src/pcre2_pattern_info.o src/pcre2_script_run.o \
+	 src/pcre2_serialize.o src/pcre2_string_utils.o src/pcre2_study.o \
+	 src/pcre2_substitute.o src/pcre2_substring.o src/pcre2_tables.o \
+	 src/pcre2_ucd.o src/pcre2_valid_utf.o src/pcre2_xclass.o \
+	
+
+
+pcre2posix_OBJ := \
+	 src/pcre2posix.o
+
+
+pcre2test_OBJ := \
+	 src/pcre2test.o
+
+
+pcre2grep_OBJ := \
+	 src/pcre2grep.o
+
+###################################################################
+##
+##////  Variables and Environment
+##
+###################################################################
+
+MCRT := -mcrt=newlib
+ifeq ($(USE_CLIB2), yes)
+MCRT := -mcrt=clib2
+endif
+
+CC := gcc:bin/gcc
+
+INCPATH := -I. -Isrc
+
+# for pcre2test
+CFLAGS := $(MCRT) $(INCPATH) -O2 -DHAVE_CONFIG_H -DPCRE2_CODE_UNIT_WIDTH=8
+
+###################################################################
+##
+##////  General rules
+##
+###################################################################
+
+.PHONY: all all-before all-after clean clean-custom realclean
+
+all: all-before libpcre2.a libpcre2-posix.a all-after
+
+all-before:
+#	You can add rules here to execute before the project is built
+
+all-after:
+#	You can add rules here to execute after the project is built
+
+tests: pcre2test pcre2grep
+
+clean: clean-custom
+	@echo "Cleaning compiler objects..."
+	@rm -f  $(libpcre2_OBJ) $(pcre2posix_OBJ) $(pcre2test_OBJ)
+
+cleanall: clean
+	@echo "Cleaning compiler targets..."
+	@rm -f  libpcre.a libpcre-posix.a pcre2test pcre2grep
+
+###################################################################
+##
+##////  Targets
+##
+###################################################################
+
+libpcre2.a: $(libpcre2_OBJ)
+	ar -rcs libpcre2.a $(libpcre2_OBJ)
+	ranlib libpcre2.a
+
+libpcre2-posix.a: $(pcre2posix_OBJ)
+	ar -rcs libpcre2-posix.a $(pcre2posix_OBJ)
+	ranlib libpcre2-posix.a
+
+pcre2test: libpcre2.a libpcre2-posix.a $(pcre2test_OBJ)
+	@echo "Linking pcre2test"
+	@gcc:bin/gcc $(MCRT) -o pcre2test $(pcre2test_OBJ) -L. -lauto -lpcre2 -lpcre2-posix
+	@echo "Removing stale debug target: pcre2test"
+	@rm -f pcre2test.debug
+	
+pcre2grep: libpcre2.a $(pcre2grep_OBJ)
+	@echo "Linking pcre2grep"
+	@gcc:bin/gcc $(MCRT) -o pcre2grep $(pcre2grep_OBJ) -L . -lauto -lpcre2
+	@echo "Removing stale debug target: pcre2grep"
+	@rm -f pcre2grep.debug
+
+
+###################################################################
+##
+##////  Standard rules
+##
+###################################################################
+
+# A default rule to make all the objects listed below
+# because we are hiding compiler commands from the output
+
+.c.o:
+	@echo "Compiling $<"
+	@$(CC) -c $< -o $*.o $(CFLAGS)
+
+src/pcre2_chartables.o: src/pcre2_chartables.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_auto_possess.o: src/pcre2_auto_possess.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_compile.o: src/pcre2_compile.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h src/pcre2_intmodedep.h \
+	
+
+src/pcre2_config.o: src/pcre2_config.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_context.o: src/pcre2_context.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_convert.o: src/pcre2_convert.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_dfa_match.o: src/pcre2_dfa_match.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_error.o: src/pcre2_error.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_extuni.o: src/pcre2_extuni.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_find_bracket.o: src/pcre2_find_bracket.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_jit_compile.o: src/pcre2_jit_compile.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h src/pcre2_intmodedep.h \
+	 src/sljit/sljitLir.c src/sljit/sljitLir.h src/sljit/sljitConfig.h \
+	 src/sljit/sljitConfigInternal.h src/sljit/sljitUtils.c src/sljit/sljitProtExecAllocator.c \
+	 src/sljit/sljitWXExecAllocator.c src/sljit/sljitExecAllocator.c src/pcre2_jit_simd_inc.h \
+	 src/pcre2_jit_neon_inc.h src/pcre2_jit_match.c
+
+src/pcre2_maketables.o: src/pcre2_maketables.c
+
+src/pcre2_match.o: src/pcre2_match.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_match_data.o: src/pcre2_match_data.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_newline.o: src/pcre2_newline.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_ord2utf.o: src/pcre2_ord2utf.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_pattern_info.o: src/pcre2_pattern_info.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_script_run.o: src/pcre2_script_run.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_serialize.o: src/pcre2_serialize.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2test.o: src/pcre2test.c src/config.h src/pcre2.h \
+	 src/pcre2posix.h src/pcre2_internal.h src/pcre2_ucp.h \
+	 src/pcre2_intmodedep.h src/pcre2_tables.c src/pcre2_ucptables.c \
+	 src/pcre2_ucd.c src/pcre2_printint.c
+
+src/pcre2_string_utils.o: src/pcre2_string_utils.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_study.o: src/pcre2_study.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_substitute.o: src/pcre2_substitute.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_substring.o: src/pcre2_substring.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2posix.o: src/pcre2posix.c src/config.h src/pcre2.h \
+	
+
+src/pcre2_tables.o: src/pcre2_tables.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h src/pcre2_intmodedep.h \
+	
+
+src/pcre2_ucd.o: src/pcre2_ucd.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_valid_utf.o: src/pcre2_valid_utf.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_xclass.o: src/pcre2_xclass.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+
+src/pcre2grep.o: src/pcre2grep.c src/config.h
+
+###################################################################
+##
+##////  Custom rules
+##
+###################################################################
+
+runtests: libpcre2.a libpcre2-posix.a tests
+	sh RunTest
+	sh RunGrepTest
+
+release:
+	@echo "Create release folders..."
+	@mkdir -p release/local/newlib/lib release/local/clib2/lib release/local/Documentation/pcre2 release/local/common/include
+	
+	@echo "Building newlib based libraries..."
+	@make -f Makefile.os4 all
+	@cp libpcre2.a release/local/newlib/lib/
+	@cp libpcre2-posix.a release/local/newlib/lib/
+	
+	@echo "Clean build and libraries files..."
+	@make -f Makefile.os4 cleanall
+	
+	@echo "Building clib2 based libraries..."
+	@make -f Makefile.os4 all USE_CLIB2=yes
+	@cp libpcre2.a release/local/clib2/lib/
+	@cp libpcre2-posix.a release/local/clib2/lib/
+
+	@echo "Copy the necessary files..."
+	@cp src/pcre2.h release/local/common/include/
+	@cp src/pcre2posix.h release/local/common/include/
+	@cp COPYING release/local/Documentation/pcre2/
+	@cp HACKING release/local/Documentation/pcre2/
+	@cp LICENCE release/local/Documentation/pcre2/
+	@cp README release/local/Documentation/pcre2/
+	@cp README-OS4.md release/local/Documentation/pcre2/
+	
+	@echo "Clean build and libraries files..."
+	@make -f Makefile.os4 cleanall
+	
+	@echo "Creating the lha release file..."
+	@rm -f pcre2.lha
+	@lha -aeqr3 a pcre2.lha release/
+	
+	@rm -rf release
+
+###################################################################
+
--- a/100
+++ b/100
@ -2,6 +2,106 @@ News about PCRE2 releases
 -------------------------


+Version 10.40 15-April-2022
+---------------------------
+
+This is mostly a bug-fixing and code-tidying release. However, there are some
+extensions to Unicode property handling:
+
+* Added support for Bidi_Class and a number of binary Unicode properties,
+including Bidi_Control.
+
+* A number of changes to script matching for \p and \P:
+
+  (a) Script extensions for a character are now coded as a bitmap instead of
+      a list of script numbers, which should be faster and does not need a
+      loop.
+
+  (b) Added the syntax \p{script:xxx} and \p{script_extensions:xxx} (synonyms
+      sc and scx).
+
+  (c) Changed \p{scriptname} from being the same as \p{sc:scriptname} to being
+      the same as \p{scx:scriptname} because this change happened in Perl at
+      release 5.26.
+
+  (d) The standard Unicode 4-letter abbreviations for script names are now
+      recognized.
+
+  (e) In accordance with Unicode and Perl's "loose matching" rules, spaces,
+      hyphens, and underscores are ignored in property names, which are then
+      matched independent of case.
+
+As always, see ChangeLog for a list of all changes (also the Git log).
+
+
+Version 10.39 29-October-2021
+-----------------------------
+
+This release is happening soon after 10.38 because the bug fix is important.
+
+1. Fix incorrect detection of alternatives in first character search in JIT.
+
+2. Update to Unicode 14.0.0.
+
+3. Some code cleanups (see ChangeLog).
+
+
+Version 10.38 01-October-2021
+-----------------------------
+
+As well as some bug fixes and tidies (as always, see ChangeLog for details),
+the documentation is updated to list the new URLs, following the move of the
+source repository to GitHub and the mailing list to Google Groups.
+
+* The CMake build system can now build both static and shared libraries in one
+go.
+
+* Following Perl's lead, \K is now locked out in lookaround assertions by
+default, but an option is provided to re-enable the previous behaviour.
+
+
+Version 10.37 26-May-2021
+-------------------------
+
+A few more bug fixes and tidies. The only change of real note is the removal of
+the actual POSIX names regcomp etc. from the POSIX wrapper library because
+these have caused issues for some applications (see 10.33 #2 below).
+
+
+Version 10.36 04-December-2020
+------------------------------
+
+Again, mainly bug fixes and tidies. The only enhancements are the addition of
+GNU grep's -m (aka --max-count) option to pcre2grep, and also unifying the
+handling of substitution strings for both -O and callouts in pcre2grep, with
+the addition of $x{...} and $o{...} to allow for characters whose code points
+are greater than 255 in Unicode mode.
+
+NOTE: there is an outstanding issue with JIT support for MacOS on arm64
+hardware. For details, please see Bugzilla issue #2618.
+
+
+Version 10.35 15-April-2020
+---------------------------
+
+Bugfixes, tidies, and a few new enhancements.
+
+1. Capturing groups that contain recursive backreferences to themselves are no
+longer automatically atomic, because the restriction is no longer necessary
+as a result of the 10.30 restructuring.
+
+2. Several new options for pcre2_substitute().
+
+3. When Unicode is supported and PCRE2_UCP is set without PCRE2_UTF, Unicode
+character properties are used for upper/lower case computations on characters
+whose code points are greater than 127.
+
+4. The character tables (for low-valued characters) can now more easily be
+saved and restored in binary.
+
+5. Updated to Unicode 13.0.0.
+
+
 Version 10.34 21-November-2019
 ------------------------------

--- a/57
+++ b/57
@ -40,7 +40,11 @@ GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARY

 The following are generic instructions for building the PCRE2 C library "by
 hand". If you are going to use CMake, this section does not apply to you; you
-can skip ahead to the CMake section.
+can skip ahead to the CMake section. Note that the settings concerned with
+8-bit, 16-bit, and 32-bit code units relate to the type of data string that
+PCRE2 processes. They are NOT referring to the underlying operating system bit
+width. You do not have to do anything special to compile in a 64-bit
+environment, for example.

 (1) Copy or rename the file src/config.h.generic as src/config.h, and edit the
     macro settings that it contains to whatever is appropriate for your
@ -74,23 +78,23 @@ can skip ahead to the CMake section.
       src/pcre2_chartables.c.

     OR:
-       Compile src/dftables.c as a stand-alone program (using -DHAVE_CONFIG_H
-       if you have set up src/config.h), and then run it with the single
-       argument "src/pcre2_chartables.c". This generates a set of standard
-       character tables and writes them to that file. The tables are generated
-       using the default C locale for your system. If you want to use a locale
-       that is specified by LC_xxx environment variables, add the -L option to
-       the dftables command. You must use this method if you are building on a
-       system that uses EBCDIC code.
+       Compile src/pcre2_dftables.c as a stand-alone program (using
+       -DHAVE_CONFIG_H if you have set up src/config.h), and then run it with
+       the single argument "src/pcre2_chartables.c". This generates a set of
+       standard character tables and writes them to that file. The tables are
+       generated using the default C locale for your system. If you want to use
+       a locale that is specified by LC_xxx environment variables, add the -L
+       option to the pcre2_dftables command. You must use this method if you
+       are building on a system that uses EBCDIC code.

     The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
     specify alternative tables at run time.

- (4) For an 8-bit library, compile the following source files from the src
-     directory, setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also
-     set -DHAVE_CONFIG_H if you have set up src/config.h with your
-     configuration, or else use other -D settings to change the configuration
-     as required.
+ (4) For a library that supports 8-bit code units in the character strings that
+     it processes, compile the following source files from the src directory,
+     setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also set
+     -DHAVE_CONFIG_H if you have set up src/config.h with your configuration,
+     or else use other -D settings to change the configuration as required.

       pcre2_auto_possess.c
       pcre2_chartables.c
@ -117,6 +121,7 @@ can skip ahead to the CMake section.
       pcre2_substring.c
       pcre2_tables.c
       pcre2_ucd.c
+       pcre2_ucptables.c
       pcre2_valid_utf.c
       pcre2_xclass.c

@ -142,9 +147,9 @@ can skip ahead to the CMake section.
     If your system has static and shared libraries, you may have to do this
     once for each type.

- (6) If you want to build a 16-bit library or 32-bit library (as well as, or
-     instead of the 8-bit library) just supply 16 or 32 as the value of
-     -DPCRE2_CODE_UNIT_WIDTH when you are compiling.
+ (6) If you want to build a library that supports 16-bit or 32-bit code units,
+     (as well as, or instead of the 8-bit library) just supply 16 or 32 as the
+     value of -DPCRE2_CODE_UNIT_WIDTH when you are compiling.

 (7) If you want to build the POSIX wrapper functions (which apply only to the
     8-bit library), ensure that you have the src/pcre2posix.h file and then
@ -302,7 +307,7 @@ cache can be deleted by selecting "File > Delete Cache".
 3.  Create a new, empty build directory, preferably a subdirectory of the
    source dir. For example, C:\pcre2\pcre2-xx\build.

-4.  Run cmake-gui from the Shell envirornment of your build tool, for example,
+4.  Run cmake-gui from the Shell environment of your build tool, for example,
    Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
    to start Cmake from the Windows Start menu, as this can lead to errors.

@ -339,10 +344,10 @@ cache can be deleted by selecting "File > Delete Cache".

 BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO

-The code currently cannot be compiled without a stdint.h header, which is
-available only in relatively recent versions of Visual Studio. However, this
-portable and permissively-licensed implementation of the header worked without
-issue:
+The code currently cannot be compiled without an inttypes.h header, which is
+available only with Visual Studio 2013 or newer. However, this portable and
+permissively-licensed implementation of the stdint.h header could be used as an
+alternative:

  http://www.azillionmonkeys.com/qed/pstdint.h

@ -369,7 +374,7 @@ Otherwise:
 1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
   have been created.

-2. Edit RunTest.bat to indentify the full or relative location of
+2. Edit RunTest.bat to identify the full or relative location of
   the pcre2 source (wherein which the testdata folder resides), e.g.:

   set srcdir=C:\pcre2\pcre2-10.00
@ -401,6 +406,6 @@ Everything in that location, source and executable, is in EBCDIC and native
 z/OS file formats. The port provides an API for LE languages such as COBOL and
 for the z/OS and z/VM versions of the Rexx languages.

-==============================
-Last Updated: 14 November 2018
-==============================
+===========================
+Last Updated: 28 April 2021
+===========================
--- a/2
+++ b/2
@ -190,7 +190,7 @@ files="\
  libpcre2-16.pc.in \
  libpcre2-32.pc.in \
  libpcre2-posix.pc.in \
-  src/dftables.c \
+  src/pcre2_dftables.c \
  src/pcre2.h.in \
  src/pcre2_auto_possess.c \
  src/pcre2_compile.c \
--- a/154
+++ b/154
@ -4,18 +4,20 @@ README file for PCRE2 (Perl-compatible regular expression library)
 PCRE2 is a re-working of the original PCRE1 library to provide an entirely new
 API. Since its initial release in 2015, there has been further development of
 the code and it now differs from PCRE1 in more than just the API. There are new
-features and the internals have been improved. The latest release of PCRE2 is
-available in three alternative formats from:
+features, and the internals have been improved. The original PCRE1 library is
+now obsolete and no longer maintained. The latest release of PCRE2 is available
+in .tar.gz, tar.bz2, or .zip form from this GitHub repository:

-https://ftp.pcre.org/pub/pcre/pcre2-10.xx.tar.gz
-https://ftp.pcre.org/pub/pcre/pcre2-10.xx.tar.bz2
-https://ftp.pcre.org/pub/pcre/pcre2-10.xx.tar.zip
+https://github.com/PCRE2Project/pcre2/releases

-There is a mailing list for discussion about the development of PCRE (both the
-original and new APIs) at pcre-dev@exim.org. You can access the archives and
-subscribe or manage your subscription here:
+There is a mailing list for discussion about the development of PCRE2 at
+pcre2-dev@googlegroups.com. You can subscribe by sending an email to
+pcre2-dev+subscribe@googlegroups.com.

-   https://lists.exim.org/mailman/listinfo/pcre-dev
+You can access the archives and also subscribe or manage your subscription
+here:
+
+https://groups.google.com/g/pcre2-dev

 Please read the NEWS file if you are upgrading from a previous release. The
 contents of this README file are:
@ -112,12 +114,18 @@ Building PCRE2 using autotools
 The following instructions assume the use of the widely used "configure; make;
 make install" (autotools) process.

-To build PCRE2 on system that supports autotools, first run the "configure"
-command from the PCRE2 distribution directory, with your current directory set
+If you have downloaded and unpacked a PCRE2 release tarball, run the
+"configure" command from the PCRE2 directory, with your current directory set
 to the directory where you want the files to be created. This command is a
 standard GNU "autoconf" configuration script, for which generic instructions
 are supplied in the file INSTALL.

+The files in the GitHub repository do not contain "configure". If you have
+downloaded the PCRE2 source files from GitHub, before you can run "configure"
+you must run the shell script called autogen.sh. This runs a number of
+autotools to create a "configure" script (you must of course have the autotools
+commands installed in order to do this).
+
 Most commonly, people build PCRE2 within its own distribution directory, and in
 this case, on many systems, just running "./configure" is sufficient. However,
 the usual methods of changing standard defaults are available. For example:
@ -186,10 +194,10 @@ library. They are also documented in the pcre2build man page.

  As well as supporting UTF strings, Unicode support includes support for the
  \P, \p, and \X sequences that recognize Unicode character properties.
-  However, only the basic two-letter properties such as Lu are supported.
-  Escape sequences such as \d and \w in patterns do not by default make use of
-  Unicode properties, but can be made to do so by setting the PCRE2_UCP option
-  or starting a pattern with (*UCP).
+  However, only a subset of Unicode properties are supported; see the
+  pcre2pattern man page for details. Escape sequences such as \d and \w in
+  patterns do not by default make use of Unicode properties, but can be made to
+  do so by setting the PCRE2_UCP option or starting a pattern with (*UCP).

 . You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
  of the preceding, or any of the Unicode newline sequences, or the NUL (zero)
@ -269,9 +277,9 @@ library. They are also documented in the pcre2build man page.

  --enable-rebuild-chartables

-  a program called dftables is compiled and run in the default C locale when
-  you obey "make". It builds a source file called pcre2_chartables.c. If you do
-  not specify this option, pcre2_chartables.c is created as a copy of
+  a program called pcre2_dftables is compiled and run in the default C locale
+  when you obey "make". It builds a source file called pcre2_chartables.c. If
+  you do not specify this option, pcre2_chartables.c is created as a copy of
  pcre2_chartables.c.dist. See "Character tables" below for further
  information.

@ -297,8 +305,8 @@ library. They are also documented in the pcre2build man page.
  unaddressable. This allows it to detect invalid memory accesses, and is
  mostly useful for debugging PCRE2 itself.

-. In environments where the gcc compiler is used and lcov version 1.6 or above
-  is installed, if you specify
+. In environments where the gcc compiler is used and lcov is installed, if you
+  specify

  --enable-coverage

@ -367,19 +375,20 @@ library. They are also documented in the pcre2build man page.
  necessary to specify something like LIBS="-lncurses" as well. This is
  because, to quote the readline INSTALL, "Readline uses the termcap functions,
  but does not link with the termcap or curses library itself, allowing
-  applications which link with readline the to choose an appropriate library."
+  applications which link with readline the option to choose an appropriate
+  library."
  If you get error messages about missing functions tgetstr, tgetent, tputs,
  tgetflag, or tgoto, this is the problem, and linking with the ncurses library
  should fix it.

 . The C99 standard defines formatting modifiers z and t for size_t and
  ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
-  environments other than Microsoft Visual Studio when __STDC_VERSION__ is
-  defined and has a value greater than or equal to 199901L (indicating C99).
-  However, there is at least one environment that claims to be C99 but does not
-  support these modifiers. If --disable-percent-zt is specified, no use is made
-  of the z or t modifiers. Instead or %td or %zu, %lu is used, with a cast for
-  size_t values.
+  environments other than Microsoft Visual Studio versions earlier than 2013
+  when __STDC_VERSION__ is defined and has a value greater than or equal to
+  199901L (indicating C99). However, there is at least one environment that
+  claims to be C99 but does not support these modifiers. If
+  --disable-percent-zt is specified, no use is made of the z or t modifiers.
+  Instead of %td or %zu, %lu is used, with a cast for size_t values.

 . There is a special option called --enable-fuzz-support for use by people who
  want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
@ -392,10 +401,10 @@ library. They are also documented in the pcre2build man page.
  Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
  be created. This is normally run under valgrind or used when PCRE2 is
  compiled with address sanitizing enabled. It calls the fuzzing function and
-  outputs information about it is doing. The input strings are specified by
-  arguments: if an argument starts with "=" the rest of it is a literal input
-  string. Otherwise, it is assumed to be a file name, and the contents of the
-  file are the test string.
+  outputs information about what it is doing. The input strings are specified
+  by arguments: if an argument starts with "=" the rest of it is a literal
+  input string. Otherwise, it is assumed to be a file name, and the contents
+  of the file are the test string.

 . Releases before 10.30 could be compiled with --disable-stack-for-recursion,
  which caused pcre2_match() to use individual blocks on the heap for
@ -409,7 +418,7 @@ The "configure" script builds the following files for the basic C library:
 . Makefile             the makefile that builds the library
 . src/config.h         build-time configuration options for the library
 . src/pcre2.h          the public PCRE2 header file
-. pcre2-config          script that shows the building settings such as CFLAGS
+. pcre2-config         script that shows the building settings such as CFLAGS
                         that were set for "configure"
 . libpcre2-8.pc        )
 . libpcre2-16.pc       ) data for the pkg-config command
@ -548,11 +557,11 @@ Cross-compiling using autotools

 You can specify CC and CFLAGS in the normal way to the "configure" command, in
 order to cross-compile PCRE2 for some other host. However, you should NOT
-specify --enable-rebuild-chartables, because if you do, the dftables.c source
-file is compiled and run on the local host, in order to generate the inbuilt
-character tables (the pcre2_chartables.c file). This will probably not work,
-because dftables.c needs to be compiled with the local compiler, not the cross
-compiler.
+specify --enable-rebuild-chartables, because if you do, the pcre2_dftables.c
+source file is compiled and run on the local host, in order to generate the
+inbuilt character tables (the pcre2_chartables.c file). This will probably not
+work, because pcre2_dftables.c needs to be compiled with the local compiler,
+not the cross compiler.

 When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
 created by making a copy of pcre2_chartables.c.dist, which is a default set of
@ -560,9 +569,10 @@ tables that assumes ASCII code. Cross-compiling with the default tables should
 not be a problem.

 If you need to modify the character tables when cross-compiling, you should
-move pcre2_chartables.c.dist out of the way, then compile dftables.c by hand
-and run it on the local host to make a new version of pcre2_chartables.c.dist.
-Then when you cross-compile PCRE2 this new version of the tables will be used.
+move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by
+hand and run it on the local host to make a new version of
+pcre2_chartables.c.dist. See the pcre2build section "Creating character tables
+at build time" for more details.


 Making new tarballs
@ -599,13 +609,13 @@ is available. RunTest outputs a comment when it skips a test.

 Many (but not all) of the tests that are not skipped are run twice if JIT
 support is available. On the second run, JIT compilation is forced. This
-testing can be suppressed by putting "nojit" on the RunTest command line.
+testing can be suppressed by putting "-nojit" on the RunTest command line.

 The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
 libraries that are enabled. If you want to run just one set of tests, call
 RunTest with either the -8, -16 or -32 option.

-If valgrind is installed, you can run the tests under it by putting "valgrind"
+If valgrind is installed, you can run the tests under it by putting "-valgrind"
 on the RunTest command line. To run pcre2test on just one or more specific test
 files, give their numbers as arguments to RunTest, for example:

@ -686,7 +696,7 @@ Test 14 contains some special UTF and UCP tests that give different output for
 different code unit widths.

 Test 15 contains a number of tests that must not be run with JIT. They check,
-among other non-JIT things, the match-limiting features of the intepretive
+among other non-JIT things, the match-limiting features of the interpretive
 matcher.

 Test 16 is run only when JIT support is not available. It checks that an
@ -721,8 +731,8 @@ compile context.
 The source file called pcre2_chartables.c contains the default set of tables.
 By default, this is created as a copy of pcre2_chartables.c.dist, which
 contains tables for ASCII coding. However, if --enable-rebuild-chartables is
-specified for ./configure, a different version of pcre2_chartables.c is built
-by the program dftables (compiled from dftables.c), which uses the ANSI C
+specified for ./configure, a new version of pcre2_chartables.c is built by the
+program pcre2_dftables (compiled from pcre2_dftables.c), which uses the ANSI C
 character handling functions such as isalnum(), isalpha(), isupper(),
 islower(), etc. to build the table sources. This means that the default C
 locale that is set for your system will control the contents of these default
@ -732,32 +742,40 @@ file does not get automatically re-generated. The best way to do this is to
 move pcre2_chartables.c.dist out of the way and replace it with your customized
 tables.

-When the dftables program is run as a result of --enable-rebuild-chartables,
-it uses the default C locale that is set on your system. It does not pay
-attention to the LC_xxx environment variables. In other words, it uses the
-system's default locale rather than whatever the compiling user happens to have
-set. If you really do want to build a source set of character tables in a
-locale that is specified by the LC_xxx variables, you can run the dftables
-program by hand with the -L option. For example:
+When the pcre2_dftables program is run as a result of specifying
+--enable-rebuild-chartables, it uses the default C locale that is set on your
+system. It does not pay attention to the LC_xxx environment variables. In other
+words, it uses the system's default locale rather than whatever the compiling
+user happens to have set. If you really do want to build a source set of
+character tables in a locale that is specified by the LC_xxx variables, you can
+run the pcre2_dftables program by hand with the -L option. For example:

-  ./dftables -L pcre2_chartables.c.special
+  ./pcre2_dftables -L pcre2_chartables.c.special

-The first two 256-byte tables provide lower casing and case flipping functions,
-respectively. The next table consists of three 32-byte bit maps which identify
-digits, "word" characters, and white space, respectively. These are used when
-building 32-byte bit maps that represent character classes for code points less
-than 256. The final 256-byte table has bits indicating various character types,
-as follows:
+The second argument names the file where the source code for the tables is
+written. The first two 256-byte tables provide lower casing and case flipping
+functions, respectively. The next table consists of a number of 32-byte bit
+maps which identify certain character classes such as digits, "word"
+characters, white space, etc. These are used when building 32-byte bit maps
+that represent character classes for code points less than 256. The final
+256-byte table has bits indicating various character types, as follows:

    1   white space character
    2   letter
-    4   decimal digit
-    8   hexadecimal digit
+    4   lower case letter
+    8   decimal digit
   16   alphanumeric or '_'
-  128   regular expression metacharacter or binary zero

-You should not alter the set of characters that contain the 128 bit, as that
-will cause PCRE2 to malfunction.
+You can also specify -b (with or without -L) when running pcre2_dftables. This
+causes the tables to be written in binary instead of as source code. A set of
+binary tables can be loaded into memory by an application and passed to
+pcre2_compile() in the same way as tables created dynamically by calling
+pcre2_maketables(). The tables are just a string of bytes, independent of
+hardware characteristics such as endianness. This means they can be bundled
+with an application that runs in different environments, to ensure consistent
+behaviour.
+
+See also the pcre2build section "Creating character tables at build time".


 File manifest
@ -768,7 +786,7 @@ The distribution should contain the files listed below.
 (A) Source files for the PCRE2 library functions and their headers are found in
    the src directory:

-  src/dftables.c           auxiliary program for building pcre2_chartables.c
+  src/pcre2_dftables.c     auxiliary program for building pcre2_chartables.c
                           when --enable-rebuild-chartables is specified

  src/pcre2_chartables.c.dist  a default set of character tables that assume
@ -892,6 +910,6 @@ The distribution should contain the files listed below.
                          )   environments

 Philip Hazel
-Email local part: ph10
-Email domain: cam.ac.uk
-Last updated: 16 April 2019
+Email local part: Philip.Hazel
+Email domain: gmail.com
+Last updated: 15 April 2022
--- a/README-OS4.md
+++ b/README-OS4.md
@ -0,0 +1,39 @@
+PCRE2 (Perl-compatible regular expression library)
+---------------------------------------------------------------------------
+
+This is a port of PCRE2 10.40 by Philip Hazel for AmigaOS 4, as found at the
+GitHub repository https://github.com/PCRE2Project/pcre2
+
+More information about PCRE can be found at its official website
+at https://www.pcre.org and at the documentation that comes with this
+package.
+
+In the archive both newlib and clib2 libraries are included. It has been
+tested with various applications, but in case you find issues please 
+contact me.
+
+To install it into your AmigaOS 4 SDK installation, just extract all the 
+files in the SDK: path.
+
+Compile
+--------------------------
+The source and the changes I did can be found at my personale repository
+https://git.walkero.gr/walkero/pcre2
+
+You can compile it using the Makefile.os4 file, and produce the libraries
+yourself.
+
+* with newlib run:
+  ```bash
+  make -f Makefile.os4 all
+  ```
+* with clib2 run:
+  ```bash
+  make -f Makefile.os4 all USE_CLIB2=yes
+  ```
+
+Changelog
+--------------------------
+v10.40r1 - 2022-07-31
+* First release
+
--- a/README.md
+++ b/README.md
@ -0,0 +1,56 @@
+# PCRE2 - Perl-Compatible Regular Expressions
+
+The PCRE2 library is a set of C functions that implement regular expression
+pattern matching using the same syntax and semantics as Perl 5. PCRE2 has its
+own native API, as well as a set of wrapper functions that correspond to the
+POSIX regular expression API. The PCRE2 library is free, even for building 
+proprietary software. It comes in three forms, for processing 8-bit, 16-bit,
+or 32-bit code units, in either literal or UTF encoding.
+
+PCRE2 was first released in 2015 to replace the API in the original PCRE 
+library, which is now obsolete and no longer maintained. As well as a more
+flexible API, the code of PCRE2 has been much improved since the fork.
+ 
+## Download
+
+As well as downloading from the 
+[GitHub site](https://github.com/PCRE2Project/pcre2), you can download PCRE2 
+or the older, unmaintained PCRE1 library from an 
+[*unofficial* mirror](https://sourceforge.net/projects/pcre/files/) at SourceForge.
+
+You can check out the PCRE2 source code via Git or Subversion:
+
+    git clone https://github.com/PCRE2Project/pcre2.git
+    svn co    https://github.com/PCRE2Project/pcre2.git
+
+## Contributed Ports
+
+If you just need the command-line PCRE2 tools on Windows, precompiled binary
+versions are available at this 
+[Rexegg page](http://www.rexegg.com/pcregrep-pcretest.html).
+
+A PCRE2 port for z/OS, a mainframe operating system which uses EBCDIC as its
+default character encoding, can be found at 
+[http://www.cbttape.org](http://www.cbttape.org/) (File 939).
+
+## Documentation
+
+You can read the PCRE2 documentation 
+[here](https://PCRE2Project.github.io/pcre2/doc/html/index.html).
+
+Comparisons to Perl's regular expression semantics can be found in the
+community authored Wikipedia entry for PCRE.
+
+There is a curated summary of changes for each PCRE release, copies of
+documentation from older releases, and other useful information from the third
+party authored 
+[RexEgg PCRE Documentation and Change Log page](http://www.rexegg.com/pcre-documentation.html).
+
+## Contact
+
+To report a problem with the PCRE2 library, or to make a feature request, please
+use the PCRE2 GitHub issues tracker. There is a mailing list for discussion of
+ PCRE2 issues and development at pcre2-dev@googlegroups.com, which is where any
+announcements will be made. You can browse the 
+[list archives](https://groups.google.com/g/pcre2-dev).
+
--- a/87
+++ b/87
@ -68,6 +68,22 @@ diff -b  /dev/null /dev/null 2>/dev/null && cf="diff -b"
 diff -u  /dev/null /dev/null 2>/dev/null && cf="diff -u"
 diff -ub /dev/null /dev/null 2>/dev/null && cf="diff -ub"

+# Some tests involve NUL characters. It seems impossible to handle them easily
+# in many operating systems. An earlier version of this script used sed to
+# translate NUL into the string ZERO, but this didn't work on Solaris (aka
+# SunOS), where the version of sed explicitly doesn't like them, and also MacOS
+# (Darwin), OpenBSD, FreeBSD, NetBSD, and some Linux distributions like Alpine,
+# even when using GNU sed. A user suggested using tr instead, which
+# necessitates translating to a single character. However, on (some versions
+# of?) Solaris, the normal "tr" cannot handle binary zeros, but if
+# /usr/xpg4/bin/tr is available, it can do so, so test for that.
+
+if [ -x /usr/xpg4/bin/tr ] ; then
+  tr=/usr/xpg4/bin/tr
+else
+  tr=tr
+fi
+
 # If this test is being run from "make check", $srcdir will be set. If not, set
 # it to the current or parent directory, whichever one contains the test data.
 # Subsequently, we run most of the pcre2grep tests in the source directory so
@ -558,7 +574,7 @@ echo "RC=$?" >>testtrygrep
 echo "---------------------------- Test 107 -----------------------------" >>testtrygrep
 echo "a" >testtemp1grep
 echo "aaaaa" >>testtemp1grep
-(cd $srcdir; $valgrind $vjs $pcre2grep  --line-offsets '(?<=\Ka)' $builddir/testtemp1grep) >>testtrygrep 2>&1
+(cd $srcdir; $valgrind $vjs $pcre2grep  --line-offsets --allow-lookaround-bsk '(?<=\Ka)' $builddir/testtemp1grep) >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep

 echo "---------------------------- Test 108 ------------------------------" >>testtrygrep
@ -638,13 +654,13 @@ echo "RC=$?" >>testtrygrep

 echo "---------------------------- Test 125 -----------------------------" >>testtrygrep
 printf 'abcd\n' >testNinputgrep
-$valgrind $vjs $pcre2grep --colour=always '(?<=\K.)' testNinputgrep >>testtrygrep
+$valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?<=\K.)' testNinputgrep >>testtrygrep
 echo "RC=$?" >>testtrygrep
-$valgrind $vjs $pcre2grep --colour=always '(?=.\K)' testNinputgrep >>testtrygrep
+$valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?=.\K)' testNinputgrep >>testtrygrep
 echo "RC=$?" >>testtrygrep
-$valgrind $vjs $pcre2grep --colour=always '(?<=\K[ac])' testNinputgrep >>testtrygrep
+$valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?<=\K[ac])' testNinputgrep >>testtrygrep
 echo "RC=$?" >>testtrygrep
-$valgrind $vjs $pcre2grep --colour=always '(?=[ac]\K)' testNinputgrep >>testtrygrep
+$valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?=[ac]\K)' testNinputgrep >>testtrygrep
 echo "RC=$?" >>testtrygrep

 echo "---------------------------- Test 126 -----------------------------" >>testtrygrep
@ -661,6 +677,40 @@ echo "---------------------------- Test 128 -----------------------------" >>tes
 (cd $srcdir; $valgrind $vjs $pcre2grep -o1 --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep

+echo "---------------------------- Test 129 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -m 2 'fox' testdata/grepinput) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 130 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -o -m2 'fox' testdata/grepinput) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 131 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -oc -m2 'fox' testdata/grepinput) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 132 -----------------------------" >>testtrygrep
+(cd $srcdir; exec 3<testdata/grepinput; $valgrind $vjs $pcre2grep -m1 -A3 '^match' <&3; echo '---'; head -1 <&3; exec 3<&-) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 133 -----------------------------" >>testtrygrep
+(cd $srcdir; exec 3<testdata/grepinput; $valgrind $vjs $pcre2grep -m1 -A3 '^match' <&3; echo '---'; $valgrind $vjs $pcre2grep -m1 -A3 '^match' <&3; exec 3<&-) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 134 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 135 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -lZ 'word' ./testdata/grepinputv ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -A 1 -B 1 -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -MHZn 'start[\s]+end' testdata/grepinputM) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+
 # Now compare the results.

 $cf $srcdir/testdata/grepoutput testtrygrep
@ -681,7 +731,7 @@ if [ $utf8 -ne 0 ] ; then
  echo "RC=$?" >>testtrygrep

  echo "---------------------------- Test U3 ------------------------------" >>testtrygrep
-  (cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets -u --newline=any '(?<=\K\x{17f})' ./testdata/grepinput8) >>testtrygrep
+  (cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets -u --newline=any --allow-lookaround-bsk '(?<=\K\x{17f})' ./testdata/grepinput8) >>testtrygrep
  echo "RC=$?" >>testtrygrep

  echo "---------------------------- Test U4 ------------------------------" >>testtrygrep
@ -694,6 +744,10 @@ if [ $utf8 -ne 0 ] ; then
  (cd $srcdir; $valgrind $vjs $pcre2grep -U -o '....' $builddir/testtemp1grep) >>testtrygrep
  echo "RC=$?" >>testtrygrep

+  echo "---------------------------- Test U6 -----------------------------" >>testtrygrep
+  (cd $srcdir; $valgrind $vjs $pcre2grep -u -m1 -O '=$x{1d3}$o{744}=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1
+  echo "RC=$?" >>testtrygrep
+
  $cf $srcdir/testdata/grepoutput8 testtrygrep
  if [ $? != 0 ] ; then exit 1; fi

@ -731,24 +785,10 @@ $valgrind $vjs $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >
 printf '%c--------------------------- Test N6 ------------------------------\r\n' - >>testtrygrep
 $valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep

-# It seems impossible to handle NUL characters easily in many operating
-# systems, including Solaris (aka SunOS), where the version of sed explicitly
-# doesn't like them, and also MacOS (Darwin), OpenBSD, FreeBSD, and NetBSD. So
-# now we run this test only on OS that are known to work. For the rest, we
-# fudge the output so that the comparison works.
-
 printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep
-uname=`uname`
-case $uname in
-  Linux)
-    printf 'abc\0def' >testNinputgrep
-    $valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | sed 's/\x00/ZERO/' >>testtrygrep
-    echo "" >>testtrygrep
-    ;;
-  *)
-    echo '1:abcZERO2:def' >>testtrygrep
-    ;;
-esac
+printf 'abc\0def' >testNinputgrep
+$valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep
+echo "" >>testtrygrep

 $cf $srcdir/testdata/grepoutputN testtrygrep
 if [ $? != 0 ] ; then exit 1; fi
@ -764,6 +804,7 @@ if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scri
  $valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep
  $valgrind $vjs $pcre2grep '(T)(?C"|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep
  $valgrind $vjs $pcre2grep '(T)(?C"|$1$n")(*F)' $srcdir/testdata/grepinputv >>testtrygrep
+  $valgrind $vjs $pcre2grep -m1 '(T)(?C"|$0:$1:$x{41}$o{101}$n")' $srcdir/testdata/grepinputv >>testtrygrep

  if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Non-fork callout scripts in patterns are supported'; then
    $cf $srcdir/testdata/grepoutputCN testtrygrep
--- a/74
+++ b/74
@ -17,8 +17,16 @@
 # individual test numbers, ranges of tests such as 3-6 or 3- (meaning 3 to the
 # end), or a number preceded by ~ to exclude a test. For example, "3-15 ~10"
 # runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests
-# except test 10. Whatever order the arguments are in, the tests are always run
-# in numerical order.
+# except test 10. Whatever order the arguments are in, these tests are always
+# run in numerical order.
+#
+# If no specific tests are selected (which is the case when this script is run
+# via 'make check') the default is to run all the numbered tests.
+#
+# There may also be named (as well as numbered) tests for special purposes. At
+# present there is just one, called "heap". This test's output contains the
+# sizes of heap frames and frame vectors, which depend on the environment. It
+# is therefore not run unless explicitly requested.
 #
 # Inappropriate tests are automatically skipped (with a comment to say so). For
 # example, if JIT support is not compiled, test 16 is skipped, whereas if JIT
@ -80,7 +88,9 @@ title22="Test 22: \C tests with UTF (not supported for DFA matching)"
 title23="Test 23: \C disabled test"
 title24="Test 24: Non-UTF pattern conversion tests"
 title25="Test 25: UTF pattern conversion tests"
-maxtest=25
+title26="Test 26: Auto-generated unicode property tests"
+maxtest=26
+titleheap="Test 'heap': Environment-specific heap tests"

 if [ $# -eq 1 -a "$1" = "list" ]; then
  echo $title0
@ -109,6 +119,12 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
  echo $title23
  echo $title24
  echo $title25
+  echo $title26
+  echo ""
+  echo $titleheap
+  echo ""
+  echo "Numbered tests are automatically run if nothing selected."
+  echo "Named tests must be explicitly selected."
  exit 0
 fi

@ -238,6 +254,8 @@ do22=no
 do23=no
 do24=no
 do25=no
+do26=no
+doheap=no

 while [ $# -gt 0 ] ; do
  case $1 in
@ -267,6 +285,8 @@ while [ $# -gt 0 ] ; do
   23) do23=yes;;
   24) do24=yes;;
   25) do25=yes;;
+   26) do26=yes;;
+ heap) doheap=yes;;
   -8) arg8=yes;;
  -16) arg16=yes;;
  -32) arg32=yes;;
@ -320,7 +340,8 @@ fi
 # set up a large stack.

 $sim ./pcre2test -S 64 /dev/null /dev/null
-if [ $? -eq 0 -a "$bigstack" != "" ] ; then
+support_setstack=$?
+if [ $support_setstack -eq 0 -a "$bigstack" != "" ] ; then
  setstack="-S 64"
 else
  setstack=""
@ -407,8 +428,8 @@ if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
  fi
 fi

-# If no specific tests were requested, select all. Those that are not
-# relevant will be automatically skipped.
+# If no specific tests were requested, select all the numbered tests. Those
+# that are not relevant will be automatically skipped.

 if [ $do0  = no -a $do1  = no -a $do2  = no -a $do3  = no -a \
     $do4  = no -a $do5  = no -a $do6  = no -a $do7  = no -a \
@ -416,7 +437,7 @@ if [ $do0  = no -a $do1  = no -a $do2  = no -a $do3  = no -a \
     $do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \
     $do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \
     $do20 = no -a $do21 = no -a $do22 = no -a $do23 = no -a \
-     $do24 = no -a $do25 = no \
+     $do24 = no -a $do25 = no -a $do26 = no -a $doheap = no \
   ]; then
  do0=yes
  do1=yes
@ -444,6 +465,7 @@ if [ $do0  = no -a $do1  = no -a $do2  = no -a $do3  = no -a \
  do23=yes
  do24=yes
  do25=yes
+  do26=yes
 fi

 # Handle any explicit skips at this stage, so that an argument list may consist
@ -479,7 +501,9 @@ for bmode in "$test8" "$test16" "$test32"; do
    echo '' >testtry
    checkspecial '-C'
    checkspecial '--help'
-    checkspecial '-S 1 -t 10 testSinput'
+    if [ $support_setstack -eq 0 ] ; then
+      checkspecial '-S 1 -t 10 testSinput'
+    fi
    echo "  OK"
  fi

@ -493,15 +517,20 @@ for bmode in "$test8" "$test16" "$test32"; do
    done
  fi

-  # PCRE2 tests that are not Perl-compatible: API, errors, internals
+  # PCRE2 tests that are not Perl-compatible: API, errors, internals. We copy
+  # the testbtables file to the current directory for use by this test.

  if [ $do2 = yes ] ; then
    echo $title2 "(excluding UTF-$bits)"
+    cp $testdata/testbtables .
    for opt in "" $jitopt; do
      $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
-      if [ $? = 0 ] ; then
+      saverc=$?
+      if [ $saverc = 0 ] ; then
        $sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -70,-62,-2,-1,0,100,101,191,200 >>testtry
        checkresult $? 2 "$opt"
+      else
+        checkresult $saverc 2 "$opt"
      fi
    done
  fi
@ -855,10 +884,33 @@ for bmode in "$test8" "$test16" "$test32"; do
    fi
  fi

+  # Auto-generated unicode property tests
+
+  if [ $do26 = yes ] ; then
+    echo $title26
+    if [ $utf -eq 0 ] ; then
+      echo "  Skipped because UTF-$bits support is not available"
+    else
+      for opt in "" $jitopt; do
+        $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput26 testtry
+        checkresult $? 26 "$opt"
+      done
+    fi
+  fi
+
+  # Manually selected heap tests - output may vary in different environments,
+  # which is why that are not automatically run.
+
+  if [ $doheap = yes ] ; then
+    echo $titleheap
+    $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinputheap testtry
+    checkresult $? heap-$bits ""
+  fi
+
 # End of loop for 8/16/32-bit tests
 done

 # Clean up local working files
-rm -f testSinput test3input testsaved1 testsaved2 test3output test3outputA test3outputB teststdout teststderr testtry
+rm -f testbtables testSinput test3input testsaved1 testsaved2 test3output test3outputA test3outputB teststdout teststderr testtry

 # End
--- a/RunTest.bat
+++ b/RunTest.bat
@ -26,6 +26,7 @@
@rem Updated for new test 14 (moving others up a number), August 2015.
@rem Tidied and updated for new tests 21, 22, 23 by PH, October 2015.
@rem PH added missing "set type" for test 22, April 2016.
+@rem PH added copy command for new testbtables file, November 2020


 setlocal enabledelayedexpansion
@ -134,9 +135,9 @@ if "%all%" == "yes" (
  set do7=yes
  set do8=yes
  set do9=yes
-  set do10=yes
+  set do10=no
  set do11=yes
-  set do12=yes
+  set do12=no
  set do13=yes
  set do14=yes
  set do15=yes
@ -305,6 +306,7 @@ if %jit% EQU 1 call :runsub 1 testoutjit "Test with JIT Override" -q -jit
 goto :eof

 :do2
+  copy /y %srcdir%\testdata\testbtables testbtables
 
  call :runsub 2 testout "API, errors, internals, and non-Perl stuff" -q
  if %jit% EQU 1 call :runsub 2 testoutjit "Test with JIT Override" -q -jit
 goto :eof
--- a/WORKSPACE.bazel
+++ b/WORKSPACE.bazel
@ -0,0 +1 @@
+# See MODULE.bazel
--- a/cmake/FindEditline.cmake
+++ b/cmake/FindEditline.cmake
@ -1,17 +1,16 @@
 # Modified from FindReadline.cmake (PH Feb 2012)

-if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
+if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
  set(EDITLINE_FOUND TRUE)
-else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
-  FIND_PATH(EDITLINE_INCLUDE_DIR readline.h
-    /usr/include/editline
-    /usr/include/edit/readline  
-    /usr/include/readline
+else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
+  FIND_PATH(EDITLINE_INCLUDE_DIR readline.h PATH_SUFFIXES
+    editline
+    edit/readline
  )
  
  FIND_LIBRARY(EDITLINE_LIBRARY NAMES edit)
  include(FindPackageHandleStandardArgs)
-  FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY )
+  FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)

  MARK_AS_ADVANCED(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
-endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
+endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
--- a/cmake/pcre2-config-version.cmake.in
+++ b/cmake/pcre2-config-version.cmake.in
@ -0,0 +1,15 @@
+set(PACKAGE_VERSION_MAJOR @PCRE2_MAJOR@)
+set(PACKAGE_VERSION_MINOR @PCRE2_MINOR@)
+set(PACKAGE_VERSION_PATCH 0)
+set(PACKAGE_VERSION @PCRE2_MAJOR@.@PCRE2_MINOR@.0)
+
+# Check whether the requested PACKAGE_FIND_VERSION is compatible
+if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION OR
+   PACKAGE_VERSION_MAJOR GREATER PACKAGE_FIND_VERSION_MAJOR)
+  set(PACKAGE_VERSION_COMPATIBLE FALSE)
+else()
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  if(PACKAGE_VERSION VERSION_EQUAL PACKAGE_FIND_VERSION)
+    set(PACKAGE_VERSION_EXACT TRUE)
+  endif()
+endif()
--- a/cmake/pcre2-config.cmake.in
+++ b/cmake/pcre2-config.cmake.in
@ -0,0 +1,145 @@
+# pcre2-config.cmake
+# ----------------
+#
+# Finds the PCRE2 library, specify the starting search path in PCRE2_ROOT.
+#
+# Static vs. shared
+# -----------------
+# To make use of the static library instead of the shared one, one needs
+# to set the variable PCRE2_USE_STATIC_LIBS to ON before calling find_package.
+# Example:
+#   set(PCRE2_USE_STATIC_LIBS ON)
+#   find_package(PCRE2 CONFIG COMPONENTS 8BIT)
+#
+# This will define the following variables:
+#
+#   PCRE2_FOUND   - True if the system has the PCRE2 library.
+#   PCRE2_VERSION - The version of the PCRE2 library which was found.
+#
+# and the following imported targets:
+#
+#   PCRE2::8BIT  - The 8 bit PCRE2 library.
+#   PCRE2::16BIT - The 16 bit PCRE2 library.
+#   PCRE2::32BIT - The 32 bit PCRE2 library.
+#   PCRE2::POSIX - The POSIX PCRE2 library.
+
+set(PCRE2_NON_STANDARD_LIB_PREFIX @NON_STANDARD_LIB_PREFIX@)
+set(PCRE2_NON_STANDARD_LIB_SUFFIX @NON_STANDARD_LIB_SUFFIX@)
+set(PCRE2_8BIT_NAME pcre2-8)
+set(PCRE2_16BIT_NAME pcre2-16)
+set(PCRE2_32BIT_NAME pcre2-32)
+set(PCRE2_POSIX_NAME pcre2-posix)
+find_path(PCRE2_INCLUDE_DIR NAMES pcre2.h DOC "PCRE2 include directory")
+if (PCRE2_USE_STATIC_LIBS)
+  if (MSVC)
+    set(PCRE2_8BIT_NAME pcre2-8-static)
+    set(PCRE2_16BIT_NAME pcre2-16-static)
+    set(PCRE2_32BIT_NAME pcre2-32-static)
+    set(PCRE2_POSIX_NAME pcre2-posix-static)
+  endif ()
+
+  set(PCRE2_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX})
+  set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
+else ()
+  set(PCRE2_PREFIX ${CMAKE_SHARED_LIBRARY_PREFIX})
+  if (MINGW AND PCRE2_NON_STANDARD_LIB_PREFIX)
+    set(PCRE2_PREFIX "")
+  endif ()
+
+  set(PCRE2_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
+  if (MINGW AND PCRE2_NON_STANDARD_LIB_SUFFIX)
+    set(PCRE2_SUFFIX "-0.dll")
+  endif ()
+endif ()
+find_library(PCRE2_8BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "8 bit PCRE2 library")
+find_library(PCRE2_16BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "16 bit PCRE2 library")
+find_library(PCRE2_32BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "32 bit PCRE2 library")
+find_library(PCRE2_POSIX_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "8 bit POSIX PCRE2 library")
+unset(PCRE2_NON_STANDARD_LIB_PREFIX)
+unset(PCRE2_NON_STANDARD_LIB_SUFFIX)
+unset(PCRE2_8BIT_NAME)
+unset(PCRE2_16BIT_NAME)
+unset(PCRE2_32BIT_NAME)
+unset(PCRE2_POSIX_NAME)
+
+# Set version
+if (PCRE2_INCLUDE_DIR)
+  set(PCRE2_VERSION "@PCRE2_MAJOR@.@PCRE2_MINOR@.0")
+endif ()
+
+# Which components have been found.
+if (PCRE2_8BIT_LIBRARY)
+  set(PCRE2_8BIT_FOUND TRUE)
+endif ()
+if (PCRE2_16BIT_LIBRARY)
+  set(PCRE2_16BIT_FOUND TRUE)
+endif ()
+if (PCRE2_32BIT_LIBRARY)
+  set(PCRE2_32BIT_FOUND TRUE)
+endif ()
+if (PCRE2_POSIX_LIBRARY)
+  set(PCRE2_POSIX_FOUND TRUE)
+endif ()
+
+# Check if at least one component has been specified.
+list(LENGTH PCRE2_FIND_COMPONENTS PCRE2_NCOMPONENTS)
+if (PCRE2_NCOMPONENTS LESS 1)
+  message(FATAL_ERROR "No components have been specified. This is not allowed. Please, specify at least one component.")
+endif ()
+unset(PCRE2_NCOMPONENTS)
+
+# When POSIX component has been specified make sure that also 8BIT component is specified.
+set(PCRE2_8BIT_COMPONENT FALSE)
+set(PCRE2_POSIX_COMPONENT FALSE)
+foreach(component ${PCRE2_FIND_COMPONENTS})
+  if (component STREQUAL "8BIT")
+    set(PCRE2_8BIT_COMPONENT TRUE)
+  elseif (component STREQUAL "POSIX")
+    set(PCRE2_POSIX_COMPONENT TRUE)
+  endif ()
+endforeach()
+
+if (PCRE2_POSIX_COMPONENT AND NOT PCRE2_8BIT_COMPONENT)
+  message(FATAL_ERROR "The component POSIX is specified while the 8BIT one is not. This is not allowed. Please, also specify the 8BIT component.")
+endif()
+unset(PCRE2_8BIT_COMPONENT)
+unset(PCRE2_POSIX_COMPONENT)
+
+include(FindPackageHandleStandardArgs)
+set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}")
+find_package_handle_standard_args(PCRE2
+  FOUND_VAR PCRE2_FOUND
+  REQUIRED_VARS PCRE2_INCLUDE_DIR
+  HANDLE_COMPONENTS
+  VERSION_VAR PCRE2_VERSION
+  CONFIG_MODE
+)
+
+set(PCRE2_LIBRARIES)
+if (PCRE2_FOUND)
+  foreach(component ${PCRE2_FIND_COMPONENTS})
+    if (PCRE2_USE_STATIC_LIBS)
+      add_library(PCRE2::${component} STATIC IMPORTED)
+      target_compile_definitions(PCRE2::${component} INTERFACE PCRE2_STATIC)
+    else ()
+      add_library(PCRE2::${component} SHARED IMPORTED)
+    endif ()
+    set_target_properties(PCRE2::${component} PROPERTIES
+      IMPORTED_LOCATION "${PCRE2_${component}_LIBRARY}"
+      INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIR}"
+    )
+    if (component STREQUAL "POSIX")
+      set_target_properties(PCRE2::${component} PROPERTIES
+        INTERFACE_LINK_LIBRARIES "PCRE2::8BIT"
+        LINK_LIBRARIES "PCRE2::8BIT"
+      )
+    endif ()
+
+    set(PCRE2_LIBRARIES ${PCRE2_LIBRARIES} ${PCRE2_${component}_LIBRARY})
+    mark_as_advanced(PCRE2_${component}_LIBRARY)
+  endforeach()
+endif ()
+
+mark_as_advanced(
+  PCRE2_INCLUDE_DIR
+)
--- a/config-cmake.h.in
+++ b/config-cmake.h.in
@ -1,8 +1,7 @@
 /* config.h for CMake builds */

+#cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1
 #cmakedefine HAVE_DIRENT_H 1
-#cmakedefine HAVE_INTTYPES_H 1    
-#cmakedefine HAVE_STDINT_H 1                                                   
 #cmakedefine HAVE_STRERROR 1
 #cmakedefine HAVE_SYS_STAT_H 1
 #cmakedefine HAVE_SYS_TYPES_H 1
@ -10,9 +9,10 @@
 #cmakedefine HAVE_WINDOWS_H 1

 #cmakedefine HAVE_BCOPY 1
+#cmakedefine HAVE_MEMFD_CREATE 1
 #cmakedefine HAVE_MEMMOVE 1
-
-#cmakedefine PCRE2_STATIC 1
+#cmakedefine HAVE_SECURE_GETENV 1
+#cmakedefine HAVE_STRERROR 1

 #cmakedefine SUPPORT_PCRE2_8 1
 #cmakedefine SUPPORT_PCRE2_16 1
--- a/configure.ac
+++ b/configure.ac
@ -9,21 +9,21 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
 dnl be defined as -RC2, for example. For real releases, it should be empty.

 m4_define(pcre2_major, [10])
-m4_define(pcre2_minor, [34])
+m4_define(pcre2_minor, [41])
 m4_define(pcre2_prerelease, [])
-m4_define(pcre2_date, [2019-11-21])
+m4_define(pcre2_date, [2022-xx-xx])
+
+# Libtool shared library interface versions (current:revision:age)
+m4_define(libpcre2_8_version,     [11:0:11])
+m4_define(libpcre2_16_version,    [11:0:11])
+m4_define(libpcre2_32_version,    [11:0:11])
+m4_define(libpcre2_posix_version, [3:2:0])

 # NOTE: The CMakeLists.txt file searches for the above variables in the first
 # 50 lines of this file. Please update that if the variables above are moved.

-# Libtool shared library interface versions (current:revision:age)
-m4_define(libpcre2_8_version,     [9:0:9])
-m4_define(libpcre2_16_version,    [9:0:9])
-m4_define(libpcre2_32_version,    [9:0:9])
-m4_define(libpcre2_posix_version, [2:3:0])
-
-AC_PREREQ(2.57)
-AC_INIT(PCRE2, pcre2_major.pcre2_minor[]pcre2_prerelease, , pcre2)
+AC_PREREQ([2.60])
+AC_INIT([PCRE2],pcre2_major.pcre2_minor[]pcre2_prerelease,[],[pcre2])
 AC_CONFIG_SRCDIR([src/pcre2.h.in])
 AM_INIT_AUTOMAKE([dist-bzip2 dist-zip])
 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
@ -64,14 +64,31 @@ m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
 AC_TYPE_INT64_T

 AC_PROG_INSTALL
-AC_LIBTOOL_WIN32_DLL
-LT_INIT
+LT_INIT([win32-dll])
 AC_PROG_LN_S

 # Check for GCC visibility feature

 PCRE2_VISIBILITY

+# Check for Clang __attribute__((uninitialized)) feature
+
+AC_MSG_CHECKING([for __attribute__((uninitialized))])
+AC_LANG_PUSH([C])
+tmp_CFLAGS=$CFLAGS
+CFLAGS="$CFLAGS -Werror"
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
+                   [[char buf[128] __attribute__((uninitialized));(void)buf]])],
+                   [pcre2_cc_cv_attribute_uninitialized=yes],
+                   [pcre2_cc_cv_attribute_uninitialized=no])
+AC_MSG_RESULT([$pcre2_cc_cv_attribute_uninitialized])
+if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then
+  AC_DEFINE([HAVE_ATTRIBUTE_UNINITIALIZED], 1, [Define this if your compiler
+             supports __attribute__((uninitialized))])
+fi
+CFLAGS=$tmp_CFLAGS
+AC_LANG_POP([C])
+
 # Versioning

 PCRE2_MAJOR="pcre2_major"
@ -158,11 +175,18 @@ if test "$enable_jit" = "auto"; then
  echo checking for JIT support on this hardware... $enable_jit
 fi

-# Handle --enable-jit-sealloc (disabled by default)
-AC_ARG_ENABLE(jit-sealloc,
-              AS_HELP_STRING([--enable-jit-sealloc],
-                             [enable SELinux compatible execmem allocator in JIT (experimental)]),
-              , enable_jit_sealloc=no)
+# Handle --enable-jit-sealloc (disabled by default and only experimental)
+case $host_os in
+  linux* | netbsd*)
+    AC_ARG_ENABLE(jit-sealloc,
+      AS_HELP_STRING([--enable-jit-sealloc],
+        [enable SELinux compatible execmem allocator in JIT (experimental)]),
+        ,enable_jit_sealloc=no)
+    ;;
+  *)
+    enable_jit_sealloc=unsupported
+    ;;
+esac

 # Handle --disable-pcre2grep-jit (enabled by default)
 AC_ARG_ENABLE(pcre2grep-jit,
@ -399,7 +423,7 @@ case "$enable_newline" in
  anycrlf) ac_pcre2_newline_value=5 ;;
  nul)     ac_pcre2_newline_value=6 ;;
  *)
-  AC_MSG_ERROR([invalid argument \"$enable_newline\" to --enable-newline option])
+  AC_MSG_ERROR([invalid argument "$enable_newline" to --enable-newline option])
  ;;
 esac

@ -428,7 +452,7 @@ fi
 case "$with_link_size" in
  2|3|4) ;;
  *)
-  AC_MSG_ERROR([invalid argument \"$with_link_size\" to --with-link-size option])
+  AC_MSG_ERROR([invalid argument "$with_link_size" to --with-link-size option])
  ;;
 esac

@ -461,7 +485,6 @@ HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
 sure both macros are undefined; an emulation function will then be used. */])

 # Checks for header files.
-AC_HEADER_STDC
 AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h)
 AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1])
 AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1])
@ -489,7 +512,20 @@ AC_TYPE_SIZE_T

 # Checks for library functions.

-AC_CHECK_FUNCS(bcopy memmove strerror mkostemp secure_getenv)
+AC_CHECK_FUNCS(bcopy memfd_create memmove mkostemp secure_getenv strerror)
+AC_MSG_CHECKING([for realpath])
+AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+#include <stdlib.h>
+#include <limits.h>
+]],[[
+char buffer[PATH_MAX];
+realpath(".", buffer);
+]])],
+[AC_MSG_RESULT([yes])
+ AC_DEFINE([HAVE_REALPATH], 1,
+  [Define to 1 if you have the `realpath' function.])
+],
+AC_MSG_RESULT([no]))

 # Check for the availability of libz (aka zlib)

@ -561,14 +597,14 @@ if test "$enable_pcre2test_libreadline" = "yes"; then
 fi
 fi

-
 # Check for the availability of libedit. Different distributions put its
 # headers in different places. Try to cover the most common ones.

 if test "$enable_pcre2test_libedit" = "yes"; then
-  AC_CHECK_HEADERS([editline/readline.h], [HAVE_EDITLINE_READLINE_H=1],
-    [AC_CHECK_HEADERS([edit/readline/readline.h], [HAVE_READLINE_READLINE_H=1],
-      [AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_READLINE_H=1])])])
+  AC_CHECK_HEADERS([editline/readline.h edit/readline/readline.h readline.h], [
+    HAVE_LIBEDIT_HEADER=1
+    break
+  ])
  AC_CHECK_LIB([edit], [readline], [LIBEDIT="-ledit"])
 fi

@ -904,10 +940,9 @@ if test "$enable_pcre2test_libedit" = "yes"; then
    echo "** Cannot use both --enable-pcre2test-libedit and --enable-pcre2test-readline"
    exit 1
  fi
-  if test "$HAVE_EDITLINE_READLINE_H" != "1" -a \
-          "$HAVE_READLINE_READLINE_H" != "1"; then
-    echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h"
-    echo "** nor readline/readline.h was found."
+  if test -z "$HAVE_LIBEDIT_HEADER"; then
+    echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h,"
+    echo "** edit/readline/readline.h nor a compatible header was found."
    exit 1
  fi
  if test -z "$LIBEDIT"; then
@ -981,7 +1016,27 @@ fi # enable_coverage

 AM_CONDITIONAL([WITH_GCOV],[test "x$enable_coverage" = "xyes"])

+AC_MSG_CHECKING([whether Intel CET is enabled])
+AC_LANG_PUSH([C])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
+                   [[#ifndef __CET__
+# error CET is not enabled
+#endif]])],
+                   [pcre2_cc_cv_intel_cet_enabled=yes],
+                   [pcre2_cc_cv_intel_cet_enabled=no])
+AC_MSG_RESULT([$pcre2_cc_cv_intel_cet_enabled])
+if test "$pcre2_cc_cv_intel_cet_enabled" = yes; then
+  CET_CFLAGS="-mshstk"
+  AC_SUBST([CET_CFLAGS])
+fi
+AC_LANG_POP([C])
+
+# LIB_POSTFIX is used by CMakeLists.txt for Windows debug builds.
+# Pass empty LIB_POSTFIX to *.pc files and pcre2-config here.
+AC_SUBST(LIB_POSTFIX)
+
 # Produce these files, in addition to config.h.
+
 AC_CONFIG_FILES(
 	Makefile
 	libpcre2-8.pc
--- a/doc/html/NON-AUTOTOOLS-BUILD.txt
+++ b/doc/html/NON-AUTOTOOLS-BUILD.txt
@ -40,7 +40,11 @@ GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARY

 The following are generic instructions for building the PCRE2 C library "by
 hand". If you are going to use CMake, this section does not apply to you; you
-can skip ahead to the CMake section.
+can skip ahead to the CMake section. Note that the settings concerned with
+8-bit, 16-bit, and 32-bit code units relate to the type of data string that
+PCRE2 processes. They are NOT referring to the underlying operating system bit
+width. You do not have to do anything special to compile in a 64-bit
+environment, for example.

 (1) Copy or rename the file src/config.h.generic as src/config.h, and edit the
     macro settings that it contains to whatever is appropriate for your
@ -74,23 +78,23 @@ can skip ahead to the CMake section.
       src/pcre2_chartables.c.

     OR:
-       Compile src/dftables.c as a stand-alone program (using -DHAVE_CONFIG_H
-       if you have set up src/config.h), and then run it with the single
-       argument "src/pcre2_chartables.c". This generates a set of standard
-       character tables and writes them to that file. The tables are generated
-       using the default C locale for your system. If you want to use a locale
-       that is specified by LC_xxx environment variables, add the -L option to
-       the dftables command. You must use this method if you are building on a
-       system that uses EBCDIC code.
+       Compile src/pcre2_dftables.c as a stand-alone program (using
+       -DHAVE_CONFIG_H if you have set up src/config.h), and then run it with
+       the single argument "src/pcre2_chartables.c". This generates a set of
+       standard character tables and writes them to that file. The tables are
+       generated using the default C locale for your system. If you want to use
+       a locale that is specified by LC_xxx environment variables, add the -L
+       option to the pcre2_dftables command. You must use this method if you
+       are building on a system that uses EBCDIC code.

     The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
     specify alternative tables at run time.

- (4) For an 8-bit library, compile the following source files from the src
-     directory, setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also
-     set -DHAVE_CONFIG_H if you have set up src/config.h with your
-     configuration, or else use other -D settings to change the configuration
-     as required.
+ (4) For a library that supports 8-bit code units in the character strings that
+     it processes, compile the following source files from the src directory,
+     setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also set
+     -DHAVE_CONFIG_H if you have set up src/config.h with your configuration,
+     or else use other -D settings to change the configuration as required.

       pcre2_auto_possess.c
       pcre2_chartables.c
@ -117,6 +121,7 @@ can skip ahead to the CMake section.
       pcre2_substring.c
       pcre2_tables.c
       pcre2_ucd.c
+       pcre2_ucptables.c
       pcre2_valid_utf.c
       pcre2_xclass.c

@ -142,9 +147,9 @@ can skip ahead to the CMake section.
     If your system has static and shared libraries, you may have to do this
     once for each type.

- (6) If you want to build a 16-bit library or 32-bit library (as well as, or
-     instead of the 8-bit library) just supply 16 or 32 as the value of
-     -DPCRE2_CODE_UNIT_WIDTH when you are compiling.
+ (6) If you want to build a library that supports 16-bit or 32-bit code units,
+     (as well as, or instead of the 8-bit library) just supply 16 or 32 as the
+     value of -DPCRE2_CODE_UNIT_WIDTH when you are compiling.

 (7) If you want to build the POSIX wrapper functions (which apply only to the
     8-bit library), ensure that you have the src/pcre2posix.h file and then
@ -302,7 +307,7 @@ cache can be deleted by selecting "File > Delete Cache".
 3.  Create a new, empty build directory, preferably a subdirectory of the
    source dir. For example, C:\pcre2\pcre2-xx\build.

-4.  Run cmake-gui from the Shell envirornment of your build tool, for example,
+4.  Run cmake-gui from the Shell environment of your build tool, for example,
    Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
    to start Cmake from the Windows Start menu, as this can lead to errors.

@ -339,10 +344,10 @@ cache can be deleted by selecting "File > Delete Cache".

 BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO

-The code currently cannot be compiled without a stdint.h header, which is
-available only in relatively recent versions of Visual Studio. However, this
-portable and permissively-licensed implementation of the header worked without
-issue:
+The code currently cannot be compiled without an inttypes.h header, which is
+available only with Visual Studio 2013 or newer. However, this portable and
+permissively-licensed implementation of the stdint.h header could be used as an
+alternative:

  http://www.azillionmonkeys.com/qed/pstdint.h

@ -369,7 +374,7 @@ Otherwise:
 1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
   have been created.

-2. Edit RunTest.bat to indentify the full or relative location of
+2. Edit RunTest.bat to identify the full or relative location of
   the pcre2 source (wherein which the testdata folder resides), e.g.:

   set srcdir=C:\pcre2\pcre2-10.00
@ -401,6 +406,6 @@ Everything in that location, source and executable, is in EBCDIC and native
 z/OS file formats. The port provides an API for LE languages such as COBOL and
 for the z/OS and z/VM versions of the Rexx languages.

-==============================
-Last Updated: 14 November 2018
-==============================
+===========================
+Last Updated: 28 April 2021
+===========================
--- a/doc/html/README.txt
+++ b/doc/html/README.txt
@ -4,18 +4,20 @@ README file for PCRE2 (Perl-compatible regular expression library)
 PCRE2 is a re-working of the original PCRE1 library to provide an entirely new
 API. Since its initial release in 2015, there has been further development of
 the code and it now differs from PCRE1 in more than just the API. There are new
-features and the internals have been improved. The latest release of PCRE2 is
-available in three alternative formats from:
+features, and the internals have been improved. The original PCRE1 library is
+now obsolete and no longer maintained. The latest release of PCRE2 is available
+in .tar.gz, tar.bz2, or .zip form from this GitHub repository:

-https://ftp.pcre.org/pub/pcre/pcre2-10.xx.tar.gz
-https://ftp.pcre.org/pub/pcre/pcre2-10.xx.tar.bz2
-https://ftp.pcre.org/pub/pcre/pcre2-10.xx.tar.zip
+https://github.com/PCRE2Project/pcre2/releases

-There is a mailing list for discussion about the development of PCRE (both the
-original and new APIs) at pcre-dev@exim.org. You can access the archives and
-subscribe or manage your subscription here:
+There is a mailing list for discussion about the development of PCRE2 at
+pcre2-dev@googlegroups.com. You can subscribe by sending an email to
+pcre2-dev+subscribe@googlegroups.com.

-   https://lists.exim.org/mailman/listinfo/pcre-dev
+You can access the archives and also subscribe or manage your subscription
+here:
+
+https://groups.google.com/g/pcre2-dev

 Please read the NEWS file if you are upgrading from a previous release. The
 contents of this README file are:
@ -112,12 +114,18 @@ Building PCRE2 using autotools
 The following instructions assume the use of the widely used "configure; make;
 make install" (autotools) process.

-To build PCRE2 on system that supports autotools, first run the "configure"
-command from the PCRE2 distribution directory, with your current directory set
+If you have downloaded and unpacked a PCRE2 release tarball, run the
+"configure" command from the PCRE2 directory, with your current directory set
 to the directory where you want the files to be created. This command is a
 standard GNU "autoconf" configuration script, for which generic instructions
 are supplied in the file INSTALL.

+The files in the GitHub repository do not contain "configure". If you have
+downloaded the PCRE2 source files from GitHub, before you can run "configure"
+you must run the shell script called autogen.sh. This runs a number of
+autotools to create a "configure" script (you must of course have the autotools
+commands installed in order to do this).
+
 Most commonly, people build PCRE2 within its own distribution directory, and in
 this case, on many systems, just running "./configure" is sufficient. However,
 the usual methods of changing standard defaults are available. For example:
@ -186,10 +194,10 @@ library. They are also documented in the pcre2build man page.

  As well as supporting UTF strings, Unicode support includes support for the
  \P, \p, and \X sequences that recognize Unicode character properties.
-  However, only the basic two-letter properties such as Lu are supported.
-  Escape sequences such as \d and \w in patterns do not by default make use of
-  Unicode properties, but can be made to do so by setting the PCRE2_UCP option
-  or starting a pattern with (*UCP).
+  However, only a subset of Unicode properties are supported; see the
+  pcre2pattern man page for details. Escape sequences such as \d and \w in
+  patterns do not by default make use of Unicode properties, but can be made to
+  do so by setting the PCRE2_UCP option or starting a pattern with (*UCP).

 . You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
  of the preceding, or any of the Unicode newline sequences, or the NUL (zero)
@ -269,9 +277,9 @@ library. They are also documented in the pcre2build man page.

  --enable-rebuild-chartables

-  a program called dftables is compiled and run in the default C locale when
-  you obey "make". It builds a source file called pcre2_chartables.c. If you do
-  not specify this option, pcre2_chartables.c is created as a copy of
+  a program called pcre2_dftables is compiled and run in the default C locale
+  when you obey "make". It builds a source file called pcre2_chartables.c. If
+  you do not specify this option, pcre2_chartables.c is created as a copy of
  pcre2_chartables.c.dist. See "Character tables" below for further
  information.

@ -297,8 +305,8 @@ library. They are also documented in the pcre2build man page.
  unaddressable. This allows it to detect invalid memory accesses, and is
  mostly useful for debugging PCRE2 itself.

-. In environments where the gcc compiler is used and lcov version 1.6 or above
-  is installed, if you specify
+. In environments where the gcc compiler is used and lcov is installed, if you
+  specify

  --enable-coverage

@ -367,19 +375,20 @@ library. They are also documented in the pcre2build man page.
  necessary to specify something like LIBS="-lncurses" as well. This is
  because, to quote the readline INSTALL, "Readline uses the termcap functions,
  but does not link with the termcap or curses library itself, allowing
-  applications which link with readline the to choose an appropriate library."
+  applications which link with readline the option to choose an appropriate
+  library."
  If you get error messages about missing functions tgetstr, tgetent, tputs,
  tgetflag, or tgoto, this is the problem, and linking with the ncurses library
  should fix it.

 . The C99 standard defines formatting modifiers z and t for size_t and
  ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
-  environments other than Microsoft Visual Studio when __STDC_VERSION__ is
-  defined and has a value greater than or equal to 199901L (indicating C99).
-  However, there is at least one environment that claims to be C99 but does not
-  support these modifiers. If --disable-percent-zt is specified, no use is made
-  of the z or t modifiers. Instead or %td or %zu, %lu is used, with a cast for
-  size_t values.
+  environments other than Microsoft Visual Studio versions earlier than 2013
+  when __STDC_VERSION__ is defined and has a value greater than or equal to
+  199901L (indicating C99). However, there is at least one environment that
+  claims to be C99 but does not support these modifiers. If
+  --disable-percent-zt is specified, no use is made of the z or t modifiers.
+  Instead of %td or %zu, %lu is used, with a cast for size_t values.

 . There is a special option called --enable-fuzz-support for use by people who
  want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
@ -392,10 +401,10 @@ library. They are also documented in the pcre2build man page.
  Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
  be created. This is normally run under valgrind or used when PCRE2 is
  compiled with address sanitizing enabled. It calls the fuzzing function and
-  outputs information about it is doing. The input strings are specified by
-  arguments: if an argument starts with "=" the rest of it is a literal input
-  string. Otherwise, it is assumed to be a file name, and the contents of the
-  file are the test string.
+  outputs information about what it is doing. The input strings are specified
+  by arguments: if an argument starts with "=" the rest of it is a literal
+  input string. Otherwise, it is assumed to be a file name, and the contents
+  of the file are the test string.

 . Releases before 10.30 could be compiled with --disable-stack-for-recursion,
  which caused pcre2_match() to use individual blocks on the heap for
@ -409,7 +418,7 @@ The "configure" script builds the following files for the basic C library:
 . Makefile             the makefile that builds the library
 . src/config.h         build-time configuration options for the library
 . src/pcre2.h          the public PCRE2 header file
-. pcre2-config          script that shows the building settings such as CFLAGS
+. pcre2-config         script that shows the building settings such as CFLAGS
                         that were set for "configure"
 . libpcre2-8.pc        )
 . libpcre2-16.pc       ) data for the pkg-config command
@ -548,11 +557,11 @@ Cross-compiling using autotools

 You can specify CC and CFLAGS in the normal way to the "configure" command, in
 order to cross-compile PCRE2 for some other host. However, you should NOT
-specify --enable-rebuild-chartables, because if you do, the dftables.c source
-file is compiled and run on the local host, in order to generate the inbuilt
-character tables (the pcre2_chartables.c file). This will probably not work,
-because dftables.c needs to be compiled with the local compiler, not the cross
-compiler.
+specify --enable-rebuild-chartables, because if you do, the pcre2_dftables.c
+source file is compiled and run on the local host, in order to generate the
+inbuilt character tables (the pcre2_chartables.c file). This will probably not
+work, because pcre2_dftables.c needs to be compiled with the local compiler,
+not the cross compiler.

 When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
 created by making a copy of pcre2_chartables.c.dist, which is a default set of
@ -560,9 +569,10 @@ tables that assumes ASCII code. Cross-compiling with the default tables should
 not be a problem.

 If you need to modify the character tables when cross-compiling, you should
-move pcre2_chartables.c.dist out of the way, then compile dftables.c by hand
-and run it on the local host to make a new version of pcre2_chartables.c.dist.
-Then when you cross-compile PCRE2 this new version of the tables will be used.
+move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by
+hand and run it on the local host to make a new version of
+pcre2_chartables.c.dist. See the pcre2build section "Creating character tables
+at build time" for more details.


 Making new tarballs
@ -599,13 +609,13 @@ is available. RunTest outputs a comment when it skips a test.

 Many (but not all) of the tests that are not skipped are run twice if JIT
 support is available. On the second run, JIT compilation is forced. This
-testing can be suppressed by putting "nojit" on the RunTest command line.
+testing can be suppressed by putting "-nojit" on the RunTest command line.

 The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
 libraries that are enabled. If you want to run just one set of tests, call
 RunTest with either the -8, -16 or -32 option.

-If valgrind is installed, you can run the tests under it by putting "valgrind"
+If valgrind is installed, you can run the tests under it by putting "-valgrind"
 on the RunTest command line. To run pcre2test on just one or more specific test
 files, give their numbers as arguments to RunTest, for example:

@ -686,7 +696,7 @@ Test 14 contains some special UTF and UCP tests that give different output for
 different code unit widths.

 Test 15 contains a number of tests that must not be run with JIT. They check,
-among other non-JIT things, the match-limiting features of the intepretive
+among other non-JIT things, the match-limiting features of the interpretive
 matcher.

 Test 16 is run only when JIT support is not available. It checks that an
@ -721,8 +731,8 @@ compile context.
 The source file called pcre2_chartables.c contains the default set of tables.
 By default, this is created as a copy of pcre2_chartables.c.dist, which
 contains tables for ASCII coding. However, if --enable-rebuild-chartables is
-specified for ./configure, a different version of pcre2_chartables.c is built
-by the program dftables (compiled from dftables.c), which uses the ANSI C
+specified for ./configure, a new version of pcre2_chartables.c is built by the
+program pcre2_dftables (compiled from pcre2_dftables.c), which uses the ANSI C
 character handling functions such as isalnum(), isalpha(), isupper(),
 islower(), etc. to build the table sources. This means that the default C
 locale that is set for your system will control the contents of these default
@ -732,32 +742,40 @@ file does not get automatically re-generated. The best way to do this is to
 move pcre2_chartables.c.dist out of the way and replace it with your customized
 tables.

-When the dftables program is run as a result of --enable-rebuild-chartables,
-it uses the default C locale that is set on your system. It does not pay
-attention to the LC_xxx environment variables. In other words, it uses the
-system's default locale rather than whatever the compiling user happens to have
-set. If you really do want to build a source set of character tables in a
-locale that is specified by the LC_xxx variables, you can run the dftables
-program by hand with the -L option. For example:
+When the pcre2_dftables program is run as a result of specifying
+--enable-rebuild-chartables, it uses the default C locale that is set on your
+system. It does not pay attention to the LC_xxx environment variables. In other
+words, it uses the system's default locale rather than whatever the compiling
+user happens to have set. If you really do want to build a source set of
+character tables in a locale that is specified by the LC_xxx variables, you can
+run the pcre2_dftables program by hand with the -L option. For example:

-  ./dftables -L pcre2_chartables.c.special
+  ./pcre2_dftables -L pcre2_chartables.c.special

-The first two 256-byte tables provide lower casing and case flipping functions,
-respectively. The next table consists of three 32-byte bit maps which identify
-digits, "word" characters, and white space, respectively. These are used when
-building 32-byte bit maps that represent character classes for code points less
-than 256. The final 256-byte table has bits indicating various character types,
-as follows:
+The second argument names the file where the source code for the tables is
+written. The first two 256-byte tables provide lower casing and case flipping
+functions, respectively. The next table consists of a number of 32-byte bit
+maps which identify certain character classes such as digits, "word"
+characters, white space, etc. These are used when building 32-byte bit maps
+that represent character classes for code points less than 256. The final
+256-byte table has bits indicating various character types, as follows:

    1   white space character
    2   letter
-    4   decimal digit
-    8   hexadecimal digit
+    4   lower case letter
+    8   decimal digit
   16   alphanumeric or '_'
-  128   regular expression metacharacter or binary zero

-You should not alter the set of characters that contain the 128 bit, as that
-will cause PCRE2 to malfunction.
+You can also specify -b (with or without -L) when running pcre2_dftables. This
+causes the tables to be written in binary instead of as source code. A set of
+binary tables can be loaded into memory by an application and passed to
+pcre2_compile() in the same way as tables created dynamically by calling
+pcre2_maketables(). The tables are just a string of bytes, independent of
+hardware characteristics such as endianness. This means they can be bundled
+with an application that runs in different environments, to ensure consistent
+behaviour.
+
+See also the pcre2build section "Creating character tables at build time".


 File manifest
@ -768,7 +786,7 @@ The distribution should contain the files listed below.
 (A) Source files for the PCRE2 library functions and their headers are found in
    the src directory:

-  src/dftables.c           auxiliary program for building pcre2_chartables.c
+  src/pcre2_dftables.c     auxiliary program for building pcre2_chartables.c
                           when --enable-rebuild-chartables is specified

  src/pcre2_chartables.c.dist  a default set of character tables that assume
@ -892,6 +910,6 @@ The distribution should contain the files listed below.
                          )   environments

 Philip Hazel
-Email local part: ph10
-Email domain: cam.ac.uk
-Last updated: 16 April 2019
+Email local part: Philip.Hazel
+Email domain: gmail.com
+Last updated: 15 April 2022
--- a/doc/html/pcre2.html
+++ b/doc/html/pcre2.html
@ -28,7 +28,8 @@ nearly two decades, the limitations of the original API were making development
 increasingly difficult. The new API is more extensible, and it was simplified
 by abolishing the separate "study" optimizing function; in PCRE2, patterns are
 automatically optimized where possible. Since forking from PCRE1, the code has
-been extensively refactored and new features introduced.
+been extensively refactored and new features introduced. The old library is now
+obsolete and is no longer maintained.
 </P>
 <P>
 As well as Perl-style regular expression patterns, some features that appeared
@ -38,8 +39,14 @@ Oniguruma syntax items, and there are options for requesting some minor changes
 that give better ECMAScript (aka JavaScript) compatibility.
 </P>
 <P>
-The source code for PCRE2 can be compiled to support 8-bit, 16-bit, or 32-bit
-code units, which means that up to three separate libraries may be installed.
+The source code for PCRE2 can be compiled to support strings of 8-bit, 16-bit,
+or 32-bit code units, which means that up to three separate libraries may be
+installed, one for each code unit size. The size of code unit is not related to
+the bit size of the underlying hardware. In a 64-bit environment that also
+supports 32-bit applications, versions of PCRE2 that are compiled in both
+64-bit and 32-bit modes may be needed.
+</P>
+<P>
 The original work to extend PCRE to 16-bit and 32-bit code units was done by
 Zoltan Herczeg and Christian Persch, respectively. In all three cases, strings
 can be interpreted either as one character per code unit, or as UTF-encoded
@ -187,20 +194,20 @@ function, listing its arguments and results.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <P>
 Putting an actual email address here is a spam magnet. If you want to email me,
-use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
+use my two names separated by a dot at gmail.com.
 </P>
 <br><a name="SEC5" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 17 September 2018
+Last updated: 27 August 2021
 <br>
-Copyright &copy; 1997-2018 University of Cambridge.
+Copyright &copy; 1997-2021 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2_compile.html
+++ b/doc/html/pcre2_compile.html
@ -92,8 +92,18 @@ Additional options may be set in the compile context via the
 function.
 </P>
 <P>
-The yield of this function is a pointer to a private data structure that
-contains the compiled pattern, or NULL if an error was detected.
+If either of <i>errorcode</i> or <i>erroroffset</i> is NULL, the function returns
+NULL immediately. Otherwise, the yield of this function is a pointer to a
+private data structure that contains the compiled pattern, or NULL if an error
+was detected. In the error case, a text error message can be obtained by
+passing the value returned via the <i>errorcode</i> argument to the the
+<b>pcre2_get_error_message()</b> function. The offset (in code units) where the
+error was encountered is returned via the <i>erroroffset</i> argument.
+</P>
+<P>
+If there is no error, the value passed via <i>errorcode</i> returns the message
+"no error" if passed to <b>pcre2_get_error_message()</b>, and the value passed
+via <i>erroroffset</i> is zero.
 </P>
 <P>
 There is a complete description of the PCRE2 native API, with more detail on
--- a/doc/html/pcre2_dfa_match.html
+++ b/doc/html/pcre2_dfa_match.html
@ -45,10 +45,16 @@ just once (except when processing lookaround assertions). This function is
  <i>workspace</i>    Points to a vector of ints used as working space
  <i>wscount</i>      Number of elements in the vector
 </pre>
-For <b>pcre2_dfa_match()</b>, a match context is needed only if you want to set
-up a callout function or specify the heap limit or the match or the recursion
-depth limits. The <i>length</i> and <i>startoffset</i> values are code units, not
-characters. The options are:
+The size of output vector needed to contain all the results depends on the
+number of simultaneous matches, not on the number of parentheses in the
+pattern. Using <b>pcre2_match_data_create_from_pattern()</b> to create the match
+data block is therefore not advisable when using this function.
+</P>
+<P>
+A match context is needed only if you want to set up a callout function or
+specify the heap limit or the match or the recursion depth limits. The
+<i>length</i> and <i>startoffset</i> values are code units, not characters. The
+options are:
 <pre>
  PCRE2_ANCHORED          Match only at the first position
  PCRE2_COPY_MATCHED_SUBJECT
--- a/doc/html/pcre2_jit_free_unused_memory.html
+++ b/doc/html/pcre2_jit_free_unused_memory.html
@ -29,7 +29,7 @@ This function frees unused JIT executable memory. The argument is a general
 context, for custom memory management, or NULL for standard memory management.
 JIT memory allocation retains some memory in order to improve future JIT
 compilation speed. In low memory conditions,
-\fBpcre2_jit_free_unused_memory()\fB can be used to cause this memory to be
+<b>pcre2_jit_free_unused_memory()</b> can be used to cause this memory to be
 freed.
 </P>
 <P>
--- a/doc/html/pcre2_jit_match.html
+++ b/doc/html/pcre2_jit_match.html
@ -33,7 +33,9 @@ processed by the JIT compiler against a given subject string, using a matching
 algorithm that is similar to Perl's. It is a "fast path" interface to JIT, and
 it bypasses some of the sanity checks that <b>pcre2_match()</b> applies.
 Its arguments are exactly the same as for
-<a href="pcre2_match.html"><b>pcre2_match()</b>.</a>
+<a href="pcre2_match.html"><b>pcre2_match()</b>,</a>
+except that the subject string must be specified with a length;
+PCRE2_ZERO_TERMINATED is not supported.
 </P>
 <P>
 The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
--- a/doc/html/pcre2_jit_stack_create.html
+++ b/doc/html/pcre2_jit_stack_create.html
@ -34,7 +34,8 @@ allocation. The result can be passed to the JIT run-time code by calling
 <b>pcre2_jit_stack_assign()</b> to associate the stack with a compiled pattern,
 which can then be processed by <b>pcre2_match()</b> or <b>pcre2_jit_match()</b>.
 A maximum stack size of 512KiB to 1MiB should be more than enough for any
-pattern. For more details, see the
+pattern. If the stack couldn't be allocated or the values passed were not
+reasonable, NULL will be returned. For more details, see the
 <a href="pcre2jit.html"><b>pcre2jit</b></a>
 page.
 </P>
--- a/doc/html/pcre2_match_data_create.html
+++ b/doc/html/pcre2_match_data_create.html
@ -30,8 +30,9 @@ This function creates a new match data block, which is used for holding the
 result of a match. The first argument specifies the number of pairs of offsets
 that are required. These form the "output vector" (ovector) within the match
 data block, and are used to identify the matched string and any captured
-substrings. There is always one pair of offsets; if <b>ovecsize</b> is zero, it
-is treated as one.
+substrings when matching with <b>pcre2_match()</b>, or a number of different
+matches at the same point when used with <b>pcre2_dfa_match()</b>. There is
+always one pair of offsets; if <b>ovecsize</b> is zero, it is treated as one.
 </P>
 <P>
 The second argument points to a general context, for custom memory management,
--- a/doc/html/pcre2_match_data_create_from_pattern.html
+++ b/doc/html/pcre2_match_data_create_from_pattern.html
@ -26,12 +26,15 @@ SYNOPSIS
 DESCRIPTION
 </b><br>
 <P>
-This function creates a new match data block, which is used for holding the
-result of a match. The first argument points to a compiled pattern. The number
-of capturing parentheses within the pattern is used to compute the number of
-pairs of offsets that are required in the match data block. These form the
-"output vector" (ovector) within the match data block, and are used to identify
-the matched string and any captured substrings.
+This function creates a new match data block for holding the result of a match.
+The first argument points to a compiled pattern. The number of capturing
+parentheses within the pattern is used to compute the number of pairs of
+offsets that are required in the match data block. These form the "output
+vector" (ovector) within the match data block, and are used to identify the
+matched string and any captured substrings when matching with
+<b>pcre2_match()</b>. If you are using <b>pcre2_dfa_match()</b>, which uses the
+outut vector in a different way, you should use <b>pcre2_match_data_create()</b>
+instead of this function.
 </P>
 <P>
 The second argument points to a general context, for custom memory management,
--- a/doc/html/pcre2_serialize_decode.html
+++ b/doc/html/pcre2_serialize_decode.html
@ -48,7 +48,7 @@ the following negative error codes:
  PCRE2_ERROR_BADDATA   <i>number_of_codes</i> is zero or less
  PCRE2_ERROR_BADMAGIC  mismatch of id bytes in <i>bytes</i>
  PCRE2_ERROR_BADMODE   mismatch of variable unit size or PCRE version
-  PCRE2_ERROR_MEMORY    memory allocation failed
+  PCRE2_ERROR_NOMEMORY  memory allocation failed
  PCRE2_ERROR_NULL      <i>codes</i> or <i>bytes</i> is NULL
 </pre>
 PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled
--- a/doc/html/pcre2_set_character_tables.html
+++ b/doc/html/pcre2_set_character_tables.html
@ -27,9 +27,12 @@ DESCRIPTION
 </b><br>
 <P>
 This function sets a pointer to custom character tables within a compile
-context. The second argument must be the result of a call to
-<b>pcre2_maketables()</b> or NULL to request the default tables. The result is
-always zero.
+context. The second argument must point to a set of PCRE2 character tables or
+be NULL to request the default tables. The result is always zero. Character
+tables can be created by calling <b>pcre2_maketables()</b> or by running the
+<b>pcre2_dftables</b> maintenance command in binary mode (see the
+<a href="pcre2build.html"><b>pcre2build</b></a>
+documentation).
 </P>
 <P>
 There is a complete description of the PCRE2 native API in the
--- a/doc/html/pcre2_set_compile_extra_options.html
+++ b/doc/html/pcre2_set_compile_extra_options.html
@ -30,7 +30,8 @@ This function sets additional option bits for <b>pcre2_compile()</b> that are
 housed in a compile context. It completely replaces all the bits. The extra
 options are:
 <pre>
-  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \x{df800} to \x{dfff} in UTF-8 and UTF-32 modes
+  PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK     Allow \K in lookarounds
+  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \x{d800} to \x{dfff} in UTF-8 and UTF-32 modes
  PCRE2_EXTRA_ALT_BSUX                 Extended alternate \u, \U, and \x handling
  PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL    Treat all invalid escapes as a literal following character
  PCRE2_EXTRA_ESCAPED_CR_IS_LF         Interpret \r as \n
--- a/doc/html/pcre2_substitute.html
+++ b/doc/html/pcre2_substitute.html
@ -48,8 +48,8 @@ Its arguments are:
  <i>outlengthptr</i>  Points to the length of the output buffer
 </pre>
 A match data block is needed only if you want to inspect the data from the
-match that is returned in that block. A match context is needed only if you
-want to:
+final match that is returned in that block or if PCRE2_SUBSTITUTE_MATCHED is
+set. A match context is needed only if you want to:
 <pre>
  Set up a callout function
  Set a matching offset limit
@ -57,29 +57,46 @@ want to:
  Change the backtracking depth limit
  Set custom memory management in the match context
 </pre>
-The <i>length</i>, <i>startoffset</i> and <i>rlength</i> values are code
-units, not characters, as is the contents of the variable pointed at by
-<i>outlengthptr</i>, which is updated to the actual length of the new string.
+The <i>length</i>, <i>startoffset</i> and <i>rlength</i> values are code units,
+not characters, as is the contents of the variable pointed at by
+<i>outlengthptr</i>. This variable must contain the length of the output buffer
+when the function is called. If the function is successful, the value is
+changed to the length of the new string, excluding the trailing zero that is
+automatically added.
+</P>
+<P>
 The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for
 zero-terminated strings. The options are:
 <pre>
-  PCRE2_ANCHORED             Match only at the first position
-  PCRE2_ENDANCHORED          Pattern can match only at end of subject
-  PCRE2_NOTBOL               Subject is not the beginning of a line
-  PCRE2_NOTEOL               Subject is not the end of a line
-  PCRE2_NOTEMPTY             An empty string is not a valid match
-  PCRE2_NOTEMPTY_ATSTART     An empty string at the start of the subject is not a valid match
-  PCRE2_NO_JIT               Do not use JIT matching
-  PCRE2_NO_UTF_CHECK         Do not check the subject or replacement for UTF validity (only relevant if
-                              PCRE2_UTF was set at compile time)
-  PCRE2_SUBSTITUTE_EXTENDED  Do extended replacement processing
-  PCRE2_SUBSTITUTE_GLOBAL    Replace all occurrences in the subject
-  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH  If overflow, compute needed length
-  PCRE2_SUBSTITUTE_UNKNOWN_UNSET  Treat unknown group as unset
-  PCRE2_SUBSTITUTE_UNSET_EMPTY  Simple unset insert = empty string
+  PCRE2_ANCHORED                     Match only at the first position
+  PCRE2_ENDANCHORED                  Match only at end of subject
+  PCRE2_NOTBOL                       Subject is not the beginning of a line
+  PCRE2_NOTEOL                       Subject is not the end of a line
+  PCRE2_NOTEMPTY                     An empty string is not a valid match
+  PCRE2_NOTEMPTY_ATSTART             An empty string at the start of the subject is not a valid match
+  PCRE2_NO_JIT                       Do not use JIT matching
+  PCRE2_NO_UTF_CHECK                 Do not check for UTF validity in the subject or replacement
+                                      (only relevant if PCRE2_UTF was set at compile time)
+  PCRE2_SUBSTITUTE_EXTENDED          Do extended replacement processing
+  PCRE2_SUBSTITUTE_GLOBAL            Replace all occurrences in the subject
+  PCRE2_SUBSTITUTE_LITERAL           The replacement string is literal
+  PCRE2_SUBSTITUTE_MATCHED           Use pre-existing match data for first match
+  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH   If overflow, compute needed length
+  PCRE2_SUBSTITUTE_REPLACEMENT_ONLY  Return only replacement string(s)
+  PCRE2_SUBSTITUTE_UNKNOWN_UNSET     Treat unknown group as unset
+  PCRE2_SUBSTITUTE_UNSET_EMPTY       Simple unset insert = empty string
 </pre>
+If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_EXTENDED,
+PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and PCRE2_SUBSTITUTE_UNSET_EMPTY are ignored.
+</P>
+<P>
+If PCRE2_SUBSTITUTE_MATCHED is set, <i>match_data</i> must be non-NULL; its
+contents must be the result of a call to <b>pcre2_match()</b> using the same
+pattern and subject.
+</P>
+<P>
 The function returns the number of substitutions, which may be zero if there
-were no matches. The result can be greater than one only when
+are no matches. The result may be greater than one only when
 PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code
 is returned.
 </P>
--- a/doc/html/pcre2api.html
+++ b/doc/html/pcre2api.html
@ -252,7 +252,7 @@ document for an overview of all the PCRE2 documentation.
 <b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
 <b>  PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
 <b>  uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
-<b>  pcre2_match_context *<i>mcontext</i>, PCRE2_SPTR \fIreplacementzfP,</b>
+<b>  pcre2_match_context *<i>mcontext</i>, PCRE2_SPTR <i>replacementz</i>,</b>
 <b>  PCRE2_SIZE <i>rlength</i>, PCRE2_UCHAR *<i>outputbuffer</i>,</b>
 <b>  PCRE2_SIZE *<i>outlengthptr</i>);</b>
 </P>
@ -626,14 +626,15 @@ documentation for more details.
 <P>
 In a more complicated situation, where patterns are compiled only when they are
 first needed, but are still shared between threads, pointers to compiled
-patterns must be protected from simultaneous writing by multiple threads, at
-least until a pattern has been compiled. The logic can be something like this:
+patterns must be protected from simultaneous writing by multiple threads. This
+is somewhat tricky to do correctly. If you know that writing to a pointer is
+atomic in your environment, you can use logic like this:
 <pre>
  Get a read-only (shared) lock (mutex) for pointer
  if (pointer == NULL)
    {
    Get a write (unique) lock for pointer
-    pointer = pcre2_compile(...
+    if (pointer == NULL) pointer = pcre2_compile(...
    }
  Release the lock
  Use pointer in pcre2_match()
@ -641,10 +642,39 @@ least until a pattern has been compiled. The logic can be something like this:
 Of course, testing for compilation errors should also be included in the code.
 </P>
 <P>
-If JIT is being used, but the JIT compilation is not being done immediately,
-(perhaps waiting to see if the pattern is used often enough) similar logic is
-required. JIT compilation updates a pointer within the compiled code block, so
-a thread must gain unique write access to the pointer before calling
+The reason for checking the pointer a second time is as follows: Several
+threads may have acquired the shared lock and tested the pointer for being
+NULL, but only one of them will be given the write lock, with the rest kept
+waiting. The winning thread will compile the pattern and store the result.
+After this thread releases the write lock, another thread will get it, and if
+it does not retest pointer for being NULL, will recompile the pattern and
+overwrite the pointer, creating a memory leak and possibly causing other
+issues.
+</P>
+<P>
+In an environment where writing to a pointer may not be atomic, the above logic
+is not sufficient. The thread that is doing the compiling may be descheduled
+after writing only part of the pointer, which could cause other threads to use
+an invalid value. Instead of checking the pointer itself, a separate "pointer
+is valid" flag (that can be updated atomically) must be used:
+<pre>
+  Get a read-only (shared) lock (mutex) for pointer
+  if (!pointer_is_valid)
+    {
+    Get a write (unique) lock for pointer
+    if (!pointer_is_valid)
+      {
+      pointer = pcre2_compile(...
+      pointer_is_valid = TRUE
+      }
+    }
+  Release the lock
+  Use pointer in pcre2_match()
+</pre>
+If JIT is being used, but the JIT compilation is not being done immediately
+(perhaps waiting to see if the pattern is used often enough), similar logic is
+required. JIT compilation updates a value within the compiled code block, so a
+thread must gain unique write access to the pointer before calling
 <b>pcre2_jit_compile()</b>. Alternatively, <b>pcre2_code_copy()</b> or
 <b>pcre2_code_copy_with_tables()</b> can be used to obtain a private copy of the
 compiled code before calling the JIT compiler.
@ -987,7 +1017,7 @@ has its own memory control arrangements (see the
 documentation for more details). If the limit is reached, the negative error
 code PCRE2_ERROR_HEAPLIMIT is returned. The default limit can be set when PCRE2
 is built; if it is not, the default is set very large and is essentially
-"unlimited".
+unlimited.
 </P>
 <P>
 A value for the heap limit may also be supplied by an item at the start of a
@ -1000,19 +1030,17 @@ less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
 limit is set, less than the default.
 </P>
 <P>
-The <b>pcre2_match()</b> function starts out using a 20KiB vector on the system
-stack for recording backtracking points. The more nested backtracking points
-there are (that is, the deeper the search tree), the more memory is needed.
-Heap memory is used only if the initial vector is too small. If the heap limit
-is set to a value less than 21 (in particular, zero) no heap memory will be
-used. In this case, only patterns that do not have a lot of nested backtracking
-can be successfully processed.
+The <b>pcre2_match()</b> function always needs some heap memory, so setting a
+value of zero guarantees a "heap limit exceeded" error. Details of how
+<b>pcre2_match()</b> uses the heap are given in the
+<a href="pcre2perform.html"><b>pcre2perform</b></a>
+documentation.
 </P>
 <P>
-Similarly, for <b>pcre2_dfa_match()</b>, a vector on the system stack is used
-when processing pattern recursions, lookarounds, or atomic groups, and only if
-this is not big enough is heap memory used. In this case, too, setting a value
-of zero disables the use of the heap.
+For <b>pcre2_dfa_match()</b>, a vector on the system stack is used when
+processing pattern recursions, lookarounds, or atomic groups, and only if this
+is not big enough is heap memory used. In this case, setting a value of zero
+disables the use of the heap.
 <br>
 <br>
 <b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
@ -1059,10 +1087,10 @@ less than the limit set by the caller of <b>pcre2_match()</b> or
 <br>
 <br>
 This parameter limits the depth of nested backtracking in <b>pcre2_match()</b>.
-Each time a nested backtracking point is passed, a new memory "frame" is used
+Each time a nested backtracking point is passed, a new memory frame is used
 to remember the state of matching at that point. Thus, this parameter
 indirectly limits the amount of memory that is used in a match. However,
-because the size of each memory "frame" depends on the number of capturing
+because the size of each memory frame depends on the number of capturing
 parentheses, the actual memory limit varies from pattern to pattern. This limit
 was more useful in versions before 10.30, where function recursion was used for
 backtracking.
@ -1105,10 +1133,11 @@ less than the limit set by the caller of <b>pcre2_match()</b> or
 <b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>);</b>
 </P>
 <P>
-The function <b>pcre2_config()</b> makes it possible for a PCRE2 client to
-discover which optional features have been compiled into the PCRE2 library. The
+The function <b>pcre2_config()</b> makes it possible for a PCRE2 client to find
+the value of certain configuration parameters and to discover which optional
+features have been compiled into the PCRE2 library. The
 <a href="pcre2build.html"><b>pcre2build</b></a>
-documentation has more details about these optional features.
+documentation has more details about these features.
 </P>
 <P>
 The first argument for <b>pcre2_config()</b> specifies which information is
@ -1224,6 +1253,13 @@ over compilation stack usage, see <b>pcre2_set_compile_recursion_guard()</b>.
 </pre>
 This parameter is obsolete and should not be used in new code. The output is a
 uint32_t integer that is always set to zero.
+<pre>
+  PCRE2_CONFIG_TABLES_LENGTH
+</pre>
+The output is a uint32_t integer that gives the length of PCRE2's character
+processing tables in bytes. For details of these tables see the
+<a href="#localesupport">section on locale support</a>
+below.
 <pre>
  PCRE2_CONFIG_UNICODE_VERSION
 </pre>
@ -1345,8 +1381,7 @@ If <i>errorcode</i> or <i>erroroffset</i> is NULL, <b>pcre2_compile()</b> return
 NULL immediately. Otherwise, the variables to which these point are set to an
 error code and an offset (number of code units) within the pattern,
 respectively, when <b>pcre2_compile()</b> returns NULL because a compilation
-error has occurred. The values are not defined when compilation is successful
-and <b>pcre2_compile()</b> returns a non-NULL value.
+error has occurred. 
 </P>
 <P>
 There are nearly 100 positive error codes that <b>pcre2_compile()</b> may return
@ -1361,15 +1396,18 @@ because the textual error messages that are obtained by calling the
 message"
 <a href="#geterrormessage">below)</a>
 should be self-explanatory. Macro names starting with PCRE2_ERROR_ are defined
-for both positive and negative error codes in <b>pcre2.h</b>.
+for both positive and negative error codes in <b>pcre2.h</b>. When compilation
+is successful <i>errorcode</i> is set to a value that returns the message "no
+error" if passed to <b>pcre2_get_error_message()</b>.
 </P>
 <P>
 The value returned in <i>erroroffset</i> is an indication of where in the
-pattern the error occurred. It is not necessarily the furthest point in the
-pattern that was read. For example, after the error "lookbehind assertion is
-not fixed length", the error offset points to the start of the failing
-assertion. For an invalid UTF-8 or UTF-16 string, the offset is that of the
-first code unit of the failing character.
+pattern an error occurred. When there is no error, zero is returned. A non-zero
+value is not necessarily the furthest point in the pattern that was read. For
+example, after the error "lookbehind assertion is not fixed length", the error
+offset points to the start of the failing assertion. For an invalid UTF-8 or
+UTF-16 string, the offset is that of the first code unit of the failing
+character.
 </P>
 <P>
 Some errors are not detected until the whole pattern has been scanned; in these
@ -1481,13 +1519,16 @@ documentation.
 </pre>
 If this bit is set, letters in the pattern match both upper and lower case
 letters in the subject. It is equivalent to Perl's /i option, and it can be
-changed within a pattern by a (?i) option setting. If PCRE2_UTF is set, Unicode
-properties are used for all characters with more than one other case, and for
-all characters whose code points are greater than U+007F. For lower valued
-characters with only one other case, a lookup table is used for speed. When
-PCRE2_UTF is not set, a lookup table is used for all code points less than 256,
-and higher code points (available only in 16-bit or 32-bit mode) are treated as
-not having another case.
+changed within a pattern by a (?i) option setting. If either PCRE2_UTF or
+PCRE2_UCP is set, Unicode properties are used for all characters with more than
+one other case, and for all characters whose code points are greater than
+U+007F. Note that there are two ASCII characters, K and S, that, in addition to
+their lower case ASCII equivalents, are case-equivalent with U+212A (Kelvin
+sign) and U+017F (long S) respectively. For lower valued characters with only
+one other case, a lookup table is used for speed. When neither PCRE2_UTF nor
+PCRE2_UCP is set, a lookup table is used for all code points less than 256, and
+higher code points (available only in 16-bit or 32-bit mode) are treated as not
+having another case.
 <pre>
  PCRE2_DOLLAR_ENDONLY
 </pre>
@ -1804,7 +1845,7 @@ undefined. It may cause your program to crash or loop.
 </P>
 <P>
 Note that this option can also be passed to <b>pcre2_match()</b> and
-<b>pcre_dfa_match()</b>, to suppress UTF validity checking of the subject
+<b>pcre2_dfa_match()</b>, to suppress UTF validity checking of the subject
 string.
 </P>
 <P>
@ -1820,16 +1861,23 @@ are not representable in UTF-16.
 <pre>
  PCRE2_UCP
 </pre>
-This option changes the way PCRE2 processes \B, \b, \D, \d, \S, \s, \W,
-\w, and some of the POSIX character classes. By default, only ASCII characters
-are recognized, but if PCRE2_UCP is set, Unicode properties are used instead to
-classify characters. More details are given in the section on
+This option has two effects. Firstly, it change the way PCRE2 processes \B,
+\b, \D, \d, \S, \s, \W, \w, and some of the POSIX character classes. By
+default, only ASCII characters are recognized, but if PCRE2_UCP is set, Unicode
+properties are used instead to classify characters. More details are given in
+the section on
 <a href="pcre2pattern.html#genericchartypes">generic character types</a>
 in the
 <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
 page. If you set PCRE2_UCP, matching one of the items it affects takes much
-longer. The option is available only if PCRE2 has been compiled with Unicode
-support (which is the default).
+longer.
+</P>
+<P>
+The second effect of PCRE2_UCP is to force the use of Unicode properties for
+upper/lower casing operations on characters with code points greater than 127,
+even when PCRE2_UTF is not set. This makes it possible, for example, to process
+strings in the 16-bit UCS-2 code. This option is available only if PCRE2 has
+been compiled with Unicode support (which is the default).
 <pre>
  PCRE2_UNGREEDY
 </pre>
@ -1866,6 +1914,13 @@ Extra compile options
 <P>
 The option bits that can be set in a compile context by calling the
 <b>pcre2_set_compile_extra_options()</b> function are as follows:
+<pre>
+  PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
+</pre>
+Since release 10.38 PCRE2 has forbidden the use of \K within lookaround
+assertions, following Perl's lead. This option is provided to re-enable the
+previous behaviour (act in positive lookarounds, ignore in negative ones) in
+case anybody is relying on it.
 <pre>
  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
 </pre>
@ -1997,14 +2052,20 @@ PCRE2 handles caseless matching, and determines whether characters are letters,
 digits, or whatever, by reference to a set of tables, indexed by character code
 point. However, this applies only to characters whose code points are less than
 256. By default, higher-valued code points never match escapes such as \w or
-\d. When PCRE2 is built with Unicode support (the default), all characters can
-be tested with \p and \P, or, alternatively, the PCRE2_UCP option can be set
-when a pattern is compiled; this causes \w and friends to use Unicode property
-support instead of the built-in tables.
+\d.
+</P>
+<P>
+When PCRE2 is built with Unicode support (the default), certain Unicode
+character properties can be tested with \p and \P, or, alternatively, the
+PCRE2_UCP option can be set when a pattern is compiled; this causes \w and
+friends to use Unicode property support instead of the built-in tables.
+PCRE2_UCP also causes upper/lower casing operations on characters with code
+points greater than 127 to use Unicode properties. These effects apply even
+when PCRE2_UTF is not set.
 </P>
 <P>
 The use of locales with Unicode is discouraged. If you are handling characters
-with code points greater than 128, you should either use Unicode support, or
+with code points greater than 127, you should either use Unicode support, or
 use locales, but not try to mix the two.
 </P>
 <P>
@ -2030,7 +2091,7 @@ calling <b>pcre2_set_character_tables()</b> to set the tables pointer therein.
 </P>
 <P>
 For example, to build and use tables that are appropriate for the French locale
-(where accented characters with values greater than 128 are treated as
+(where accented characters with values greater than 127 are treated as
 letters), the following code could be used:
 <pre>
  setlocale(LC_CTYPE, "fr_FR");
@ -2044,10 +2105,10 @@ are using Windows, the name for the French locale is "french".
 </P>
 <P>
 The pointer that is passed (via the compile context) to <b>pcre2_compile()</b>
-is saved with the compiled pattern, and the same tables are used by
-<b>pcre2_match()</b> and <b>pcre_dfa_match()</b>. Thus, for any single pattern,
-compilation and matching both happen in the same locale, but different patterns
-can be processed in different locales.
+is saved with the compiled pattern, and the same tables are used by the
+matching functions. Thus, for any single pattern, compilation and matching both
+happen in the same locale, but different patterns can be processed in different
+locales.
 </P>
 <P>
 It is the caller's responsibility to ensure that the memory containing the
@ -2055,6 +2116,23 @@ tables remains available while they are still in use. When they are no longer
 needed, you can discard them using <b>pcre2_maketables_free()</b>, which should
 pass as its first parameter the same global context that was used to create the
 tables.
+</P>
+<br><b>
+Saving locale tables
+</b><br>
+<P>
+The tables described above are just a sequence of binary bytes, which makes
+them independent of hardware characteristics such as endianness or whether the
+processor is 32-bit or 64-bit. A copy of the result of <b>pcre2_maketables()</b>
+can therefore be saved in a file or elsewhere and re-used later, even in a
+different program or on another computer. The size of the tables (number of
+bytes) must be obtained by calling <b>pcre2_config()</b> with the
+PCRE2_CONFIG_TABLES_LENGTH option because <b>pcre2_maketables()</b> does not
+return this value. Note that the <b>pcre2_dftables</b> program, which is part of
+the PCRE2 build system, can be used stand-alone to create a file that contains
+a set of binary tables. See the
+<a href="pcre2build.html#createtables"><b>pcre2build</b></a>
+documentation for details.
 <a name="infoaboutpattern"></a></P>
 <br><a name="SEC23" href="#TOC1">INFORMATION ABOUT A COMPILED PATTERN</a><br>
 <P>
@ -2063,7 +2141,7 @@ tables.
 <P>
 The <b>pcre2_pattern_info()</b> function returns general information about a
 compiled pattern. For information about callouts, see the
-<a href="pcre2pattern.html#infoaboutcallouts">next section.</a>
+<a href="#infoaboutcallouts">next section.</a>
 The first argument for <b>pcre2_pattern_info()</b> is a pointer to the compiled
 pattern. The second argument specifies which piece of information is required,
 and the third argument is a pointer to a variable to receive the data. If the
@ -2238,7 +2316,7 @@ return zero. The third argument should point to a <b>size_t</b> variable.
  PCRE2_INFO_LASTCODETYPE
 </pre>
 Returns 1 if there is a rightmost literal code unit that must exist in any
-matched string, other than at its start. The third argument should  point to a
+matched string, other than at its start. The third argument should point to a
 <b>uint32_t</b> variable. If there is no such value, 0 is returned. When 1 is
 returned, the code unit value itself can be retrieved using
 PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last literal value is
@ -2441,20 +2519,31 @@ to an abstract format like Java or .NET serialization.
 Information about a successful or unsuccessful match is placed in a match
 data block, which is an opaque structure that is accessed by function calls. In
 particular, the match data block contains a vector of offsets into the subject
-string that define the matched part of the subject and any substrings that were
-captured. This is known as the <i>ovector</i>.
+string that define the matched parts of the subject. This is known as the
+<i>ovector</i>.
 </P>
 <P>
 Before calling <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b>, or
 <b>pcre2_jit_match()</b> you must create a match data block by calling one of
 the creation functions above. For <b>pcre2_match_data_create()</b>, the first
-argument is the number of pairs of offsets in the <i>ovector</i>. One pair of
-offsets is required to identify the string that matched the whole pattern, with
-an additional pair for each captured substring. For example, a value of 4
-creates enough space to record the matched portion of the subject plus three
-captured substrings. A minimum of at least 1 pair is imposed by
-<b>pcre2_match_data_create()</b>, so it is always possible to return the overall
-matched string.
+argument is the number of pairs of offsets in the <i>ovector</i>.
+</P>
+<P>
+When using <b>pcre2_match()</b>, one pair of offsets is required to identify the
+string that matched the whole pattern, with an additional pair for each
+captured substring. For example, a value of 4 creates enough space to record
+the matched portion of the subject plus three captured substrings.
+</P>
+<P>
+When using <b>pcre2_dfa_match()</b> there may be multiple matched substrings of
+different lengths at the same point in the subject. The ovector should be made
+large enough to hold as many as are expected.
+</P>
+<P>
+A minimum of at least 1 pair is imposed by <b>pcre2_match_data_create()</b>, so
+it is always possible to return the overall matched string in the case of
+<b>pcre2_match()</b> or the longest match in the case of
+<b>pcre2_dfa_match()</b>.
 </P>
 <P>
 The second argument of <b>pcre2_match_data_create()</b> is a pointer to a
@ -2465,10 +2554,11 @@ pass NULL, which causes <b>malloc()</b> to be used.
 <P>
 For <b>pcre2_match_data_create_from_pattern()</b>, the first argument is a
 pointer to a compiled pattern. The ovector is created to be exactly the right
-size to hold all the substrings a pattern might capture. The second argument is
-again a pointer to a general context, but in this case if NULL is passed, the
-memory is obtained using the same allocator that was used for the compiled
-pattern (custom or default).
+size to hold all the substrings a pattern might capture when matched using
+<b>pcre2_match()</b>. You should not use this call when matching with
+<b>pcre2_dfa_match()</b>. The second argument is again a pointer to a general
+context, but in this case if NULL is passed, the memory is obtained using the
+same allocator that was used for the compiled pattern (custom or default).
 </P>
 <P>
 A match data block can be used many times, with the same or different compiled
@ -2550,7 +2640,9 @@ The subject string is passed to <b>pcre2_match()</b> as a pointer in
 <i>startoffset</i>. The length and offset are in code units, not characters.
 That is, they are in bytes for the 8-bit library, 16-bit code units for the
 16-bit library, and 32-bit code units for the 32-bit library, whether or not
-UTF processing is enabled.
+UTF processing is enabled. As a special case, if <i>subject</i> is NULL and
+<i>length</i> is zero, the subject is assumed to be an empty string. If
+<i>length</i> is non-zero, an error occurs if <i>subject</i> is NULL.
 </P>
 <P>
 If <i>startoffset</i> is greater than the length of the subject,
@ -2572,10 +2664,10 @@ lookbehind. For example, consider the pattern
 </pre>
 which finds occurrences of "iss" in the middle of words. (\B matches only if
 the current position in the subject is not a word boundary.) When applied to
-the string "Mississipi" the first call to <b>pcre2_match()</b> finds the first
+the string "Mississippi" the first call to <b>pcre2_match()</b> finds the first
 occurrence. If <b>pcre2_match()</b> is called again with just the remainder of
-the subject, namely "issipi", it does not match, because \B is always false at
-the start of the subject, which is deemed to be a word boundary. However, if
+the subject, namely "issippi", it does not match, because \B is always false
+at the start of the subject, which is deemed to be a word boundary. However, if
 <b>pcre2_match()</b> is passed the entire string again, but with
 <i>startoffset</i> set to 4, it finds the second occurrence of "iss" because it
 is able to look behind the starting point to discover that it is preceded by a
@ -3054,11 +3146,11 @@ The backtracking match limit was reached.
 <pre>
  PCRE2_ERROR_NOMEMORY
 </pre>
-If a pattern contains many nested backtracking points, heap memory is used to
-remember them. This error is given when the memory allocation function (default
-or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
-if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
-also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
+Heap memory is used to remember backgracking points. This error is given when
+the memory allocation function (default or custom) fails. Note that a different
+error, PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds
+the heap limit. PCRE2_ERROR_NOMEMORY is also returned if
+PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
 <pre>
  PCRE2_ERROR_NULL
 </pre>
@ -3302,12 +3394,25 @@ same number causes an error at compile time.
 <b>  PCRE2_SIZE *<i>outlengthptr</i>);</b>
 </P>
 <P>
-This function calls <b>pcre2_match()</b> and then makes a copy of the subject
-string in <i>outputbuffer</i>, replacing one or more parts that were matched
-with the <i>replacement</i> string, whose length is supplied in <b>rlength</b>.
-This can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
-The default is to perform just one replacement, but there is an option that
-requests multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below for details).
+This function optionally calls <b>pcre2_match()</b> and then makes a copy of the
+subject string in <i>outputbuffer</i>, replacing parts that were matched with
+the <i>replacement</i> string, whose length is supplied in <b>rlength</b>, which
+can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As a
+special case, if <i>replacement</i> is NULL and <i>rlength</i> is zero, the
+replacement is assumed to be an empty string. If <i>rlength</i> is non-zero, an
+error occurs if <i>replacement</i> is NULL.
+</P>
+<P>
+There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just
+the replacement string(s). The default action is to perform just one
+replacement if the pattern matches, but there is an option that requests
+multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below).
+</P>
+<P>
+If successful, <b>pcre2_substitute()</b> returns the number of substitutions
+that were carried out. This may be zero if no match was found, and is never
+greater than one unless PCRE2_SUBSTITUTE_GLOBAL is set. A negative value is
+returned if an error is detected.
 </P>
 <P>
 Matches in which a \K item in a lookahead in the pattern causes the match to
@ -3325,35 +3430,86 @@ functions from the match context, if provided, or else those that were used to
 allocate memory for the compiled code.
 </P>
 <P>
-If an external <i>match_data</i> block is provided, its contents afterwards
-are those set by the final call to <b>pcre2_match()</b>. For global changes,
-this will have ended in a matching error. The contents of the ovector within
-the match data block may or may not have been changed.
+If <i>match_data</i> is not NULL and PCRE2_SUBSTITUTE_MATCHED is not set, the
+provided block is used for all calls to <b>pcre2_match()</b>, and its contents
+afterwards are the result of the final call. For global changes, this will
+always be a no-match error. The contents of the ovector within the match data
+block may or may not have been changed.
 </P>
 <P>
-The <i>outlengthptr</i> argument must point to a variable that contains the
-length, in code units, of the output buffer. If the function is successful, the
-value is updated to contain the length of the new string, excluding the
-trailing zero that is automatically added.
+As well as the usual options for <b>pcre2_match()</b>, a number of additional
+options can be set in the <i>options</i> argument of <b>pcre2_substitute()</b>.
+One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external
+<i>match_data</i> block must be provided, and it must have already been used for
+an external call to <b>pcre2_match()</b> with the same pattern and subject
+arguments. The data in the <i>match_data</i> block (return code, offset vector)
+is then used for the first substitution instead of calling <b>pcre2_match()</b>
+from within <b>pcre2_substitute()</b>. This allows an application to check for a
+match before choosing to substitute, without having to repeat the match.
+</P>
+<P>
+The contents of the externally supplied match data block are not changed when
+PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTITUTE_GLOBAL is also set,
+<b>pcre2_match()</b> is called after the first substitution to check for further
+matches, but this is done using an internally obtained match data block, thus
+always leaving the external block unchanged.
+</P>
+<P>
+The <i>code</i> argument is not used for matching before the first substitution
+when PCRE2_SUBSTITUTE_MATCHED is set, but it must be provided, even when
+PCRE2_SUBSTITUTE_GLOBAL is not set, because it contains information such as the
+UTF setting and the number of capturing parentheses in the pattern.
+</P>
+<P>
+The default action of <b>pcre2_substitute()</b> is to return a copy of the
+subject string with matched substrings replaced. However, if
+PCRE2_SUBSTITUTE_REPLACEMENT_ONLY is set, only the replacement substrings are
+returned. In the global case, multiple replacements are concatenated in the
+output buffer. Substitution callouts (see
+<a href="#subcallouts">below)</a>
+can be used to separate them if necessary.
+</P>
+<P>
+The <i>outlengthptr</i> argument of <b>pcre2_substitute()</b> must point to a
+variable that contains the length, in code units, of the output buffer. If the
+function is successful, the value is updated to contain the length in code
+units of the new string, excluding the trailing zero that is automatically
+added.
 </P>
 <P>
 If the function is not successful, the value set via <i>outlengthptr</i> depends
 on the type of error. For syntax errors in the replacement string, the value is
 the offset in the replacement string where the error was detected. For other
 errors, the value is PCRE2_UNSET by default. This includes the case of the
-output buffer being too small, unless PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set
-(see below), in which case the value is the minimum length needed, including
-space for the trailing zero. Note that in order to compute the required length,
-<b>pcre2_substitute()</b> has to simulate all the matching and copying, instead
-of giving an error return as soon as the buffer overflows. Note also that the
-length is in code units, not bytes.
+output buffer being too small, unless PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set.
 </P>
 <P>
-In the replacement string, which is interpreted as a UTF string in UTF mode,
-and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
-dollar character is an escape character that can specify the insertion of
-characters from capture groups or names from (*MARK) or other control verbs
-in the pattern. The following forms are always recognized:
+PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is
+too small. The default action is to return PCRE2_ERROR_NOMEMORY immediately. If
+this option is set, however, <b>pcre2_substitute()</b> continues to go through
+the motions of matching and substituting (without, of course, writing anything)
+in order to compute the size of buffer that is needed. This value is passed
+back via the <i>outlengthptr</i> variable, with the result of the function still
+being PCRE2_ERROR_NOMEMORY.
+</P>
+<P>
+Passing a buffer size of zero is a permitted way of finding out how much memory
+is needed for given substitution. However, this does mean that the entire
+operation is carried out twice. Depending on the application, it may be more
+efficient to allocate a large buffer and free the excess afterwards, instead of
+using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH.
+</P>
+<P>
+The replacement string, which is interpreted as a UTF string in UTF mode, is
+checked for UTF validity unless PCRE2_NO_UTF_CHECK is set. An invalid UTF
+replacement string causes an immediate return with the relevant UTF error code.
+</P>
+<P>
+If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not interpreted
+in any way. By default, however, a dollar character is an escape character that
+can specify the insertion of characters from capture groups and names from
+(*MARK) or other control verbs in the pattern. The following forms are always
+recognized:
 <pre>
  $$                  insert a dollar character
  $&#60;n&#62; or ${&#60;n&#62;}      insert the contents of group &#60;n&#62;
@ -3377,10 +3533,6 @@ facility can be used to perform simple simultaneous substitutions, as this
      apple lemon
   2: pear orange
 </pre>
-As well as the usual options for <b>pcre2_match()</b>, a number of additional
-options can be set in the <i>options</i> argument of <b>pcre2_substitute()</b>.
-</P>
-<P>
 PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject string,
 replacing every matching substring. If this option is not set, only the first
 matching substring is replaced. The search for matches takes place in the
@ -3392,7 +3544,7 @@ set in the match context, searching stops when that limit is reached.
 <P>
 You can restrict the effect of a global substitution to a portion of the
 subject string by setting either or both of <i>startoffset</i> and an offset
-limit. Here is a \fPpcre2test\fP example:
+limit. Here is a <b>pcre2test</b> example:
 <pre>
  /B/g,replace=!,use_offset_limit
  ABC ABC ABC ABC\=offset=3,offset_limit=12
@ -3405,22 +3557,6 @@ CRLF is a valid newline sequence and the next two characters are CR, LF. In
 this case, the offset is advanced by two characters.
 </P>
 <P>
-PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is
-too small. The default action is to return PCRE2_ERROR_NOMEMORY immediately. If
-this option is set, however, <b>pcre2_substitute()</b> continues to go through
-the motions of matching and substituting (without, of course, writing anything)
-in order to compute the size of buffer that is needed. This value is passed
-back via the <i>outlengthptr</i> variable, with the result of the function still
-being PCRE2_ERROR_NOMEMORY.
-</P>
-<P>
-Passing a buffer size of zero is a permitted way of finding out how much memory
-is needed for given substitution. However, this does mean that the entire
-operation is carried out twice. Depending on the application, it may be more
-efficient to allocate a large buffer and free the excess afterwards, instead of
-using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH.
-</P>
-<P>
 PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups that do
 not appear in the pattern to be treated as unset groups. This option should be
 used with care, because it means that a typo in a group name or number no
@ -3454,8 +3590,11 @@ and force lower case. The escape sequences change the current state: \U and
 terminating a \Q quoted sequence) reverts to no case forcing. The sequences
 \u and \l force the next character (if it is a letter) to upper or lower
 case, respectively, and then the state automatically reverts to no case
-forcing. Case forcing applies to all inserted  characters, including those from
-capture groups and letters within \Q...\E quoted sequences.
+forcing. Case forcing applies to all inserted characters, including those from
+capture groups and letters within \Q...\E quoted sequences. If either
+PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode
+properties are used for case forcing characters whose code points are greater
+than 127.
 </P>
 <P>
 Note that case forcing sequences such as \U...\E do not nest. For example,
@ -3494,14 +3633,17 @@ substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause unknown
 groups in the extended syntax forms to be treated as unset.
 </P>
 <P>
-If successful, <b>pcre2_substitute()</b> returns the number of successful
-matches. This may be zero if no matches were found, and is never greater than 1
-unless PCRE2_SUBSTITUTE_GLOBAL is set.
+If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_UNKNOWN_UNSET,
+PCRE2_SUBSTITUTE_UNSET_EMPTY, and PCRE2_SUBSTITUTE_EXTENDED are irrelevant and
+are ignored.
 </P>
+<br><b>
+Substitution errors
+</b><br>
 <P>
-In the event of an error, a negative error code is returned. Except for
-PCRE2_ERROR_NOMATCH (which is never returned), errors from <b>pcre2_match()</b>
-are passed straight back.
+In the event of an error, <b>pcre2_substitute()</b> returns a negative error
+code. Except for PCRE2_ERROR_NOMATCH (which is never returned), errors from
+<b>pcre2_match()</b> are passed straight back.
 </P>
 <P>
 PCRE2_ERROR_NOSUBSTRING is returned for a non-existent substring insertion,
@ -3519,6 +3661,12 @@ needed is returned via <i>outlengthptr</i>. Note that this does not happen by
 default.
 </P>
 <P>
+PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the
+<i>match_data</i> argument is NULL or if the <i>subject</i> or <i>replacement</i>
+arguments are NULL. For backward compatibility reasons an exception is made for
+the <i>replacement</i> argument if the <i>rlength</i> argument is also 0.
+</P>
+<P>
 PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in the
 replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE
 (invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE (closing curly bracket
@ -3532,7 +3680,7 @@ As for all PCRE2 errors, a text message that describes the error can be
 obtained by calling the <b>pcre2_get_error_message()</b> function (see
 "Obtaining a textual error message"
 <a href="#geterrormessage">above).</a>
-</P>
+<a name="subcallouts"></a></P>
 <br><b>
 Substitution callouts
 </b><br>
@ -3671,12 +3819,13 @@ other alternatives. Ultimately, when it runs out of matches,
 <P>
 The function <b>pcre2_dfa_match()</b> is called to match a subject string
 against a compiled pattern, using a matching algorithm that scans the subject
-string just once (not counting lookaround assertions), and does not backtrack.
-This has different characteristics to the normal algorithm, and is not
-compatible with Perl. Some of the features of PCRE2 patterns are not supported.
-Nevertheless, there are times when this kind of matching can be useful. For a
-discussion of the two matching algorithms, and a list of features that
-<b>pcre2_dfa_match()</b> does not support, see the
+string just once (not counting lookaround assertions), and does not backtrack
+(except when processing lookaround assertions). This has different
+characteristics to the normal algorithm, and is not compatible with Perl. Some
+of the features of PCRE2 patterns are not supported. Nevertheless, there are
+times when this kind of matching can be useful. For a discussion of the two
+matching algorithms, and a list of features that <b>pcre2_dfa_match()</b> does
+not support, see the
 <a href="pcre2matching.html"><b>pcre2matching</b></a>
 documentation.
 </P>
@ -3711,7 +3860,7 @@ Here is an example of a simple call to <b>pcre2_dfa_match()</b>:
 </PRE>
 </P>
 <br><b>
-Option bits for <b>pcre_dfa_match()</b>
+Option bits for <b>pcre2_dfa_match()</b>
 </b><br>
 <P>
 The unused bits of the <i>options</i> argument for <b>pcre2_dfa_match()</b> must
@ -3862,16 +4011,16 @@ fail, this error is given.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC42" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 02 September 2019
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2build.html
+++ b/doc/html/pcre2build.html
@ -128,7 +128,7 @@ To build it without Unicode support, add
  --disable-unicode
 </pre>
 to the <b>configure</b> command. This setting applies to all three libraries. It
-is not possible to build one library with Unicode support, and another without,
+is not possible to build one library with Unicode support and another without
 in the same configuration.
 </P>
 <P>
@ -142,8 +142,9 @@ locked this out by setting PCRE2_NEVER_UTF.
 UTF support allows the libraries to process character code points up to
 0x10ffff in the strings that they handle. Unicode support also gives access to
 the Unicode properties of characters, using pattern escapes such as \P, \p,
-and \X. Only the general category properties such as <i>Lu</i> and <i>Nd</i> are
-supported. Details are given in the
+and \X. Only the general category properties such as <i>Lu</i> and <i>Nd</i>,
+script names, and some bi-directional properties are supported. Details are
+given in the
 <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
 documentation.
 </P>
@ -188,11 +189,11 @@ which enables the use of an execmem allocator in JIT that is compatible with
 SELinux. This has no effect if JIT is not enabled. See the
 <a href="pcre2jit.html"><b>pcre2jit</b></a>
 documentation for a discussion of JIT usage. When JIT support is enabled,
-pcre2grep automatically makes use of it, unless you add
+<b>pcre2grep</b> automatically makes use of it, unless you add
 <pre>
  --disable-pcre2grep-jit
 </pre>
-to the "configure" command.
+to the <b>configure</b> command.
 </P>
 <br><a name="SEC8" href="#TOC1">NEWLINE RECOGNITION</a><br>
 <P>
@ -283,12 +284,11 @@ to the <b>configure</b> command. This setting also applies to the
 counting is done differently).
 </P>
 <P>
-The <b>pcre2_match()</b> function starts out using a 20KiB vector on the system
-stack to record backtracking points. The more nested backtracking points there
-are (that is, the deeper the search tree), the more memory is needed. If the
-initial vector is not large enough, heap memory is used, up to a certain limit,
-which is specified in kibibytes (units of 1024 bytes). The limit can be changed
-at run time, as described in the
+The <b>pcre2_match()</b> function uses heap memory to record backtracking
+points. The more nested backtracking points there are (that is, the deeper the
+search tree), the more memory is needed. There is an upper limit, specified in
+kibibytes (units of 1024 bytes). This limit can be changed at run time, as
+described in the
 <a href="pcre2api.html"><b>pcre2api</b></a>
 documentation. The default limit (in effect unlimited) is 20 million. You can
 change this by a setting such as
@ -307,7 +307,7 @@ You can also explicitly limit the depth of nested backtracking in the
 for --with-match-limit. You can set a lower default limit by adding, for
 example,
 <pre>
-  --with-match-limit_depth=10000
+  --with-match-limit-depth=10000
 </pre>
 to the <b>configure</b> command. This value can be overridden at run time. This
 depth limit indirectly limits the amount of heap memory that is used, but
@ -321,7 +321,7 @@ As well as applying to <b>pcre2_match()</b>, the depth limit also controls
 the depth of recursive function calls in <b>pcre2_dfa_match()</b>. These are
 used for lookaround assertions, atomic groups, and recursion within patterns.
 The limit does not apply to JIT matching.
-</P>
+<a name="createtables"></a></P>
 <br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
 <P>
 PCRE2 uses fixed tables for processing characters whose code points are less
@ -332,12 +332,34 @@ only. If you add
  --enable-rebuild-chartables
 </pre>
 to the <b>configure</b> command, the distributed tables are no longer used.
-Instead, a program called <b>dftables</b> is compiled and run. This outputs the
-source for new set of tables, created in the default locale of your C run-time
-system. This method of replacing the tables does not work if you are cross
-compiling, because <b>dftables</b> is run on the local host. If you need to
-create alternative tables when cross compiling, you will have to do so "by
-hand".
+Instead, a program called <b>pcre2_dftables</b> is compiled and run. This
+outputs the source for new set of tables, created in the default locale of your
+C run-time system. This method of replacing the tables does not work if you are
+cross compiling, because <b>pcre2_dftables</b> needs to be run on the local
+host and therefore not compiled with the cross compiler.
+</P>
+<P>
+If you need to create alternative tables when cross compiling, you will have to
+do so "by hand". There may also be other reasons for creating tables manually.
+To cause <b>pcre2_dftables</b> to be built on the local host, run a normal
+compiling command, and then run the program with the output file as its
+argument, for example:
+<pre>
+  cc src/pcre2_dftables.c -o pcre2_dftables
+  ./pcre2_dftables src/pcre2_chartables.c
+</pre>
+This builds the tables in the default locale of the local host. If you want to
+specify a locale, you must use the -L option:
+<pre>
+  LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c
+</pre>
+You can also specify -b (with or without -L). This causes the tables to be
+written in binary instead of as source code. A set of binary tables can be
+loaded into memory by an application and passed to <b>pcre2_compile()</b> in the
+same way as tables created by calling <b>pcre2_maketables()</b>. The tables are
+just a string of bytes, independent of hardware characteristics such as
+endianness. This means they can be bundled with an application that runs in
+different environments, to ensure consistent behaviour.
 </P>
 <br><a name="SEC13" href="#TOC1">USING EBCDIC CODE</a><br>
 <P>
@ -414,7 +436,7 @@ default parameter values by adding, for example,
  --with-pcre2grep-bufsize=51200
  --with-pcre2grep-max-bufsize=2097152
 </pre>
-to the <b>configure</b> command. The caller of \fPpcre2grep\fP can override
+to the <b>configure</b> command. The caller of <b>pcre2grep</b> can override
 these values by using --buffer-size and --max-buffer-size on the command line.
 </P>
 <br><a name="SEC17" href="#TOC1">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a><br>
@ -531,15 +553,16 @@ documentation.
 <P>
 The C99 standard defines formatting modifiers z and t for size_t and
 ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
-environments other than Microsoft Visual Studio when __STDC_VERSION__ is
-defined and has a value greater than or equal to 199901L (indicating C99).
+environments other than old versions of Microsoft Visual Studio when
+__STDC_VERSION__ is defined and has a value greater than or equal to 199901L
+(indicating support for C99).
 However, there is at least one environment that claims to be C99 but does not
 support these modifiers. If
 <pre>
  --disable-percent-zt
 </pre>
-is specified, no use is made of the z or t modifiers. Instead or %td or %zu,
-%lu is used, with a cast for size_t values.
+is specified, no use is made of the z or t modifiers. Instead of %td or %zu,
+a suitable format is used depending in the size of long for the platform.
 </P>
 <br><a name="SEC22" href="#TOC1">SUPPORT FOR FUZZERS</a><br>
 <P>
@ -585,16 +608,16 @@ give a warning.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC26" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 03 March 2019
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2compat.html
+++ b/doc/html/pcre2compat.html
@ -16,32 +16,43 @@ please consult the man page, in case the conversion went wrong.
 DIFFERENCES BETWEEN PCRE2 AND PERL
 </b><br>
 <P>
-This document describes the differences in the ways that PCRE2 and Perl handle
-regular expressions. The differences described here are with respect to Perl
-versions 5.26, but as both Perl and PCRE2 are continually changing, the
-information may sometimes be out of date.
+This document describes some of the differences in the ways that PCRE2 and Perl
+handle regular expressions. The differences described here are with respect to
+Perl version 5.34.0, but as both Perl and PCRE2 are continually changing, the
+information may at times be out of date.
 </P>
 <P>
-1. PCRE2 has only a subset of Perl's Unicode support. Details of what it does
+1. When PCRE2_DOTALL (equivalent to Perl's /s qualifier) is not set, the
+behaviour of the '.' metacharacter differs from Perl. In PCRE2, '.' matches the
+next character unless it is the start of a newline sequence. This means that,
+if the newline setting is CR, CRLF, or NUL, '.' will match the code point LF
+(0x0A) in ASCII/Unicode environments, and NL (either 0x15 or 0x25) when using
+EBCDIC. In Perl, '.' appears never to match LF, even when 0x0A is not a newline
+indicator.
+</P>
+<P>
+2. PCRE2 has only a subset of Perl's Unicode support. Details of what it does
 have are given in the
 <a href="pcre2unicode.html"><b>pcre2unicode</b></a>
 page.
 </P>
 <P>
-2. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but
+3. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but
 they do not mean what you might think. For example, (?!a){3} does not assert
 that the next three characters are not "a". It just asserts that the next
 character is not "a" three times (in principle; PCRE2 optimizes this to run the
 assertion just once). Perl allows some repeat quantifiers on other assertions,
-for example, \b* (but not \b{3}), but these do not seem to have any use.
+for example, \b* , but these do not seem to have any use. PCRE2 does not allow
+any kind of quantifier on non-lookaround assertions.
 </P>
 <P>
-3. Capture groups that occur inside negative lookaround assertions are counted,
+4. Capture groups that occur inside negative lookaround assertions are counted,
 but their entries in the offsets vector are set only when a negative assertion
 is a condition that has a matching branch (that is, the condition is false).
+Perl may set such capture groups in other circumstances.
 </P>
 <P>
-4. The following Perl escape sequences are not supported: \F, \l, \L, \u,
+5. The following Perl escape sequences are not supported: \F, \l, \L, \u,
 \U, and \N when followed by a character name. \N on its own, matching a
 non-newline character, and \N{U+dd..}, matching a Unicode code point, are
 supported. The escapes that modify the case of following letters are
@ -52,24 +63,26 @@ PCRE2_EXTRA_ALT_BSUX options is set, \U and \u are interpreted as ECMAScript
 interprets them.
 </P>
 <P>
-5. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is
+6. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is
 built with Unicode support (the default). The properties that can be tested
 with \p and \P are limited to the general category properties such as Lu and
-Nd, script names such as Greek or Han, and the derived properties Any and L&.
-PCRE2 does support the Cs (surrogate) property, which Perl does not; the Perl
-documentation says "Because Perl hides the need for the user to understand the
-internal representation of Unicode characters, there is no need to implement
-the somewhat messy concept of surrogates."
+Nd, script names such as Greek or Han, Bidi_Class, Bidi_Control, and the
+derived properties Any and LC (synonym L&). Both PCRE2 and Perl support the Cs
+(surrogate) property, but in PCRE2 its use is limited. See the
+<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
+documentation for details. The long synonyms for property names that Perl
+supports (such as \p{Letter}) are not supported by PCRE2, nor is it permitted
+to prefix any of these properties with "Is".
 </P>
 <P>
-6. PCRE2 supports the \Q...\E escape for quoting substrings. Characters
+7. PCRE2 supports the \Q...\E escape for quoting substrings. Characters
 in between are treated as literals. However, this is slightly different from
 Perl in that $ and @ are also handled as literals inside the quotes. In Perl,
-they cause variable interpolation (but of course PCRE2 does not have
-variables). Also, Perl does "double-quotish backslash interpolation" on any
-backslashes between \Q and \E which, its documentation says, "may lead to
-confusing results". PCRE2 treats a backslash between \Q and \E just like any
-other character. Note the following examples:
+they cause variable interpolation (PCRE2 does not have variables). Also, Perl
+does "double-quotish backslash interpolation" on any backslashes between \Q
+and \E which, its documentation says, "may lead to confusing results". PCRE2
+treats a backslash between \Q and \E just like any other character. Note the
+following examples:
 <pre>
    Pattern            PCRE2 matches     Perl matches

@ -79,41 +92,38 @@ other character. Note the following examples:
    \QA\B\E            A\B               A\B
    \Q\\E              \                 \\E
 </pre>
-The \Q...\E sequence is recognized both inside and outside character classes.
+The \Q...\E sequence is recognized both inside and outside character classes
+by both PCRE2 and Perl.
 </P>
 <P>
-7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
+8. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
 constructions. However, PCRE2 does have a "callout" feature, which allows an
 external function to be called during pattern matching. See the
 <a href="pcre2callout.html"><b>pcre2callout</b></a>
 documentation for details.
 </P>
 <P>
-8. Subroutine calls (whether recursive or not) were treated as atomic groups up
+9. Subroutine calls (whether recursive or not) were treated as atomic groups up
 to PCRE2 release 10.23, but from release 10.30 this changed, and backtracking
 into subroutine calls is now supported, as in Perl.
 </P>
 <P>
-9. If any of the backtracking control verbs are used in a group that is called
-as a subroutine (whether or not recursively), their effect is confined to that
-group; it does not extend to the surrounding pattern. This is not always the
-case in Perl. In particular, if (*THEN) is present in a group that is called as
-a subroutine, its action is limited to that group, even if the group does not
-contain any | characters. Note that such groups are processed as anchored
-at the point where they are tested.
+10. In PCRE2, if any of the backtracking control verbs are used in a group that
+is called as a subroutine (whether or not recursively), their effect is
+confined to that group; it does not extend to the surrounding pattern. This is
+not always the case in Perl. In particular, if (*THEN) is present in a group
+that is called as a subroutine, its action is limited to that group, even if
+the group does not contain any | characters. Note that such groups are
+processed as anchored at the point where they are tested.
 </P>
 <P>
-10. If a pattern contains more than one backtracking control verb, the first
+11. If a pattern contains more than one backtracking control verb, the first
 one that is backtracked onto acts. For example, in the pattern
 A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C
 triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the
 same as PCRE2, but there are cases where it differs.
 </P>
 <P>
-11. Most backtracking verbs in assertions have their normal actions. They are
-not confined to the assertion.
-</P>
-<P>
 12. There are some differences that are concerned with the settings of captured
 strings when part of a pattern is repeated. For example, matching "aba" against
 the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to
@ -123,7 +133,7 @@ the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to
 13. PCRE2's handling of duplicate capture group numbers and names is not as
 general as Perl's. This is a consequence of the fact the PCRE2 works internally
 just with numbers, using an external table to translate between numbers and
-names. In particular, a pattern such as (?|(?&#60;a&#62;A)|(?&#60;b&#62;B), where the two
+names. In particular, a pattern such as (?|(?&#60;a&#62;A)|(?&#60;b&#62;B)), where the two
 capture groups have the same number but different names, is not supported, and
 causes an error at compile time. If it were allowed, it would not be possible
 to distinguish which group matched, because both names map to capture group
@ -146,19 +156,27 @@ certainly user mistakes.
 16. In PCRE2, the upper/lower case character properties Lu and Ll are not
 affected when case-independent matching is specified. For example, \p{Lu}
 always matches an upper case letter. I think Perl has changed in this respect;
-in the release at the time of writing (5.24), \p{Lu} and \p{Ll} match all
+in the release at the time of writing (5.34), \p{Lu} and \p{Ll} match all
 letters, regardless of case, when case independence is specified.
 </P>
 <P>
-17. PCRE2 provides some extensions to the Perl regular expression facilities.
-Perl 5.10 includes new features that are not in earlier versions of Perl, some
+17. From release 5.32.0, Perl locks out the use of \K in lookaround
+assertions. From release 10.38 PCRE2 does the same by default. However, there
+is an option for re-enabling the previous behaviour. When this option is set,
+\K is acted on when it occurs in positive assertions, but is ignored in
+negative assertions.
+</P>
+<P>
+18. PCRE2 provides some extensions to the Perl regular expression facilities.
+Perl 5.10 included new features that were not in earlier versions of Perl, some
 of which (such as named parentheses) were in PCRE2 for some time before. This
-list is with respect to Perl 5.26:
+list is with respect to Perl 5.34:
 <br>
 <br>
 (a) Although lookbehind assertions in PCRE2 must match fixed length strings,
-each alternative branch of a lookbehind assertion can match a different length
-of string. Perl requires them all to have the same length.
+each alternative toplevel branch of a lookbehind assertion can match a
+different length of string. Perl used to require them all to have the same
+length, but the latest version has some variable length support.
 <br>
 <br>
 (b) From PCRE2 10.23, backreferences to groups of fixed length are supported
@ -203,7 +221,7 @@ different way and is not Perl-compatible.
 <br>
 <br>
 (l) PCRE2 recognizes some special sequences such as (*CR) or (*NO_JIT) at
-the start of a pattern that set overall options that cannot be changed within
+the start of a pattern. These set overall options that cannot be changed within
 the pattern.
 <br>
 <br>
@ -212,12 +230,12 @@ extension to the lookaround facilities. The default, Perl-compatible
 lookarounds are atomic.
 </P>
 <P>
-18. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa
+19. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa
 modifier restricts /i case-insensitive matching to pure ascii, ignoring Unicode
 rules. This separation cannot be represented with PCRE2_UCP.
 </P>
 <P>
-19. Perl has different limits than PCRE2. See the
+20. Perl has different limits than PCRE2. See the
 <a href="pcre2limit.html"><b>pcre2limit</b></a>
 documentation for details. Perl went with 5.10 from recursion to iteration
 keeping the intermediate matches on the heap, which is ~10% slower but does not
@ -230,7 +248,7 @@ AUTHOR
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
@ -239,9 +257,9 @@ Cambridge, England.
 REVISION
 </b><br>
 <P>
-Last updated: 13 July 2019
+Last updated: 08 December 2021
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2021 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2convert.html
+++ b/doc/html/pcre2convert.html
@ -141,8 +141,8 @@ permitted to match separator characters, but the double-star (**) feature
 </P>
 <P>
 PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR matches globs with wildcards allowed to
-match separator characters. PCRE2_GLOB_NO_STARSTAR matches globs with the
-double-star feature disabled. These options may be given together.
+match separator characters. PCRE2_CONVERT_GLOB_NO_STARSTAR matches globs with
+the double-star feature disabled. These options may be given together.
 </P>
 <br><a name="SEC5" href="#TOC1">CONVERTING POSIX PATTERNS</a><br>
 <P>
--- a/doc/html/pcre2demo.html
+++ b/doc/html/pcre2demo.html
@ -215,8 +215,8 @@ if (rc &lt; 0)
  return 1;
  }

-/* Match succeded. Get a pointer to the output vector, where string offsets are
-stored. */
+/* Match succeeded. Get a pointer to the output vector, where string offsets
+are stored. */

 ovector = pcre2_get_ovector_pointer(match_data);
 printf("Match succeeded at offset %d\n", (int)ovector[0]);
@ -234,9 +234,12 @@ pcre2_match_data_create_from_pattern() above. */
 if (rc == 0)
  printf("ovector was not big enough for all the captured substrings\n");

-/* We must guard against patterns such as /(?=.\K)/ that use \K in an assertion
-to set the start of a match later than its end. In this demonstration program,
-we just detect this case and give up. */
+/* Since release 10.38 PCRE2 has locked out the use of \K in lookaround
+assertions. However, there is an option to re-enable the old behaviour. If that
+is set, it is possible to run patterns such as /(?=.\K)/ that use \K in an
+assertion to set the start of a match later than its end. In this demonstration
+program, we show how to detect this case, but it shouldn't arise because the
+option is never set. */

 if (ovector[0] &gt; ovector[1])
  {
@ -453,7 +456,7 @@ for (;;)
    return 1;
    }

-  /* Match succeded */
+  /* Match succeeded */

  printf("\nMatch succeeded again at offset %d\n", (int)ovector[0]);

--- a/doc/html/pcre2grep.html
+++ b/doc/html/pcre2grep.html
@ -71,13 +71,15 @@ For example:
 <pre>
  pcre2grep some-pattern file1 - file3
 </pre>
-Input files are searched line by line. By default, each line that matches a
+By default, input files are searched line by line. Each line that matches a
 pattern is copied to the standard output, and if there is more than one file,
 the file name is output at the start of each line, followed by a colon.
-However, there are options that can change how <b>pcre2grep</b> behaves. In
-particular, the <b>-M</b> option makes it possible to search for strings that
-span line boundaries. What defines a line boundary is controlled by the
-<b>-N</b> (<b>--newline</b>) option.
+However, there are options that can change how <b>pcre2grep</b> behaves. For
+example, the <b>-M</b> option makes it possible to search for strings that span
+line boundaries. What defines a line boundary is controlled by the <b>-N</b>
+(<b>--newline</b>) option. The <b>-h</b> and <b>-H</b> options control whether or
+not file names are shown, and the <b>-Z</b> option changes the file name
+terminator to a zero byte.
 </P>
 <P>
 The amount of memory used for buffering files that are being scanned is
@ -111,8 +113,8 @@ matching substrings, or if <b>--only-matching</b>, <b>--file-offsets</b>, or
 (either shown literally, or as an offset), scanning resumes immediately
 following the match, so that further matches on the same line can be found. If
 there are multiple patterns, they are all tried on the remainder of the line,
-but patterns that follow the one that matched are not tried on the earlier part
-of the line.
+but patterns that follow the one that matched are not tried on the earlier
+matched part of the line.
 </P>
 <P>
 This behaviour means that the order in which multiple patterns are specified
@ -146,11 +148,10 @@ ignored.
 <br><a name="SEC4" href="#TOC1">BINARY FILES</a><br>
 <P>
 By default, a file that contains a binary zero byte within the first 1024 bytes
-is identified as a binary file, and is processed specially. (GNU grep
-identifies binary files in this manner.) However, if the newline type is
-specified as "nul", that is, the line terminator is a binary zero, the test for
-a binary file is not applied. See the <b>--binary-files</b> option for a means
-of changing the way binary files are handled.
+is identified as a binary file, and is processed specially. However, if the
+newline type is specified as NUL, that is, the line terminator is a binary
+zero, the test for a binary file is not applied. See the <b>--binary-files</b>
+option for a means of changing the way binary files are handled.
 </P>
 <br><a name="SEC5" href="#TOC1">BINARY ZEROS IN PATTERNS</a><br>
 <P>
@ -179,9 +180,11 @@ Output up to <i>number</i> lines of context after each matching line. Fewer
 lines are output if the next match or the end of the file is reached, or if the
 processing buffer size has been set too small. If file names and/or line
 numbers are being output, a hyphen separator is used instead of a colon for the
-context lines. A line containing "--" is output between each group of lines,
-unless they are in fact contiguous in the input file. The value of <i>number</i>
-is expected to be relatively small. When <b>-c</b> is used, <b>-A</b> is ignored.
+context lines (the <b>-Z</b> option can be used to change the file name
+terminator to a zero byte). A line containing "--" is output between each group
+of lines, unless they are in fact contiguous in the input file. The value of
+<i>number</i> is expected to be relatively small. When <b>-c</b> is used,
+<b>-A</b> is ignored.
 </P>
 <P>
 <b>-a</b>, <b>--text</b>
@ -189,14 +192,21 @@ Treat binary files as text. This is equivalent to
 <b>--binary-files</b>=<i>text</i>.
 </P>
 <P>
+<b>--allow-lookaround-bsk</b>
+PCRE2 now forbids the use of \K in lookarounds by default, in line with Perl.
+This option causes <b>pcre2grep</b> to set the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
+option, which enables this somewhat dangerous usage.
+</P>
+<P>
 <b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i>
 Output up to <i>number</i> lines of context before each matching line. Fewer
 lines are output if the previous match or the start of the file is within
 <i>number</i> lines, or if the processing buffer size has been set too small. If
 file names and/or line numbers are being output, a hyphen separator is used
-instead of a colon for the context lines. A line containing "--" is output
-between each group of lines, unless they are in fact contiguous in the input
-file. The value of <i>number</i> is expected to be relatively small. When
+instead of a colon for the context lines (the <b>-Z</b> option can be used to
+change the file name terminator to a zero byte). A line containing "--" is
+output between each group of lines, unless they are in fact contiguous in the
+input file. The value of <i>number</i> is expected to be relatively small. When
 <b>-c</b> is used, <b>-B</b> is ignored.
 </P>
 <P>
@ -406,20 +416,22 @@ shown separately. This option is mutually exclusive with <b>--output</b>,
 <P>
 <b>-H</b>, <b>--with-filename</b>
 Force the inclusion of the file name at the start of output lines when
-searching a single file. By default, the file name is not shown in this case.
-For matching lines, the file name is followed by a colon; for context lines, a
-hyphen separator is used. If a line number is also being output, it follows the
-file name. When the <b>-M</b> option causes a pattern to match more than one
-line, only the first is preceded by the file name. This option overrides any
-previous <b>-h</b>, <b>-l</b>, or <b>-L</b> options.
+searching a single file. The file name is not normally shown in this case.
+By default, for matching lines, the file name is followed by a colon; for
+context lines, a hyphen separator is used. The <b>-Z</b> option can be used to
+change the terminator to a zero byte. If a line number is also being output,
+it follows the file name. When the <b>-M</b> option causes a pattern to match
+more than one line, only the first is preceded by the file name. This option
+overrides any previous <b>-h</b>, <b>-l</b>, or <b>-L</b> options.
 </P>
 <P>
 <b>-h</b>, <b>--no-filename</b>
-Suppress the output file names when searching multiple files. By default,
-file names are shown when multiple files are searched. For matching lines, the
-file name is followed by a colon; for context lines, a hyphen separator is used.
-If a line number is also being output, it follows the file name. This option
-overrides any previous <b>-H</b>, <b>-L</b>, or <b>-l</b> options.
+Suppress the output file names when searching multiple files. File names are
+normally shown when multiple files are searched. By default, for matching
+lines, the file name is followed by a colon; for context lines, a hyphen
+separator is used. The <b>-Z</b> option can be used to change the terminator to
+a zero byte. If a line number is also being output, it follows the file name.
+This option overrides any previous <b>-H</b>, <b>-L</b>, or <b>-l</b> options.
 </P>
 <P>
 <b>--heap-limit</b>=<i>number</i>
@ -443,8 +455,8 @@ Ignore upper/lower case distinctions during comparisons.
 <P>
 <b>--include</b>=<i>pattern</i>
 If any <b>--include</b> patterns are specified, the only files that are
-processed are those that match one of the patterns (and do not match an
-<b>--exclude</b> pattern). This option does not affect directories, but it
+processed are those whose names match one of the patterns and do not match an
+<b>--exclude</b> pattern. This option does not affect directories, but it
 applies to all files, whether listed on the command line, obtained from
 <b>--file-list</b>, or by scanning a directory. The pattern is a PCRE2 regular
 expression, and is matched against the final component of the file name, not
@ -463,8 +475,8 @@ may be given any number of times; all the files are read.
 <P>
 <b>--include-dir</b>=<i>pattern</i>
 If any <b>--include-dir</b> patterns are specified, the only directories that
-are processed are those that match one of the patterns (and do not match an
-<b>--exclude-dir</b> pattern). This applies to all directories, whether listed
+are processed are those whose names match one of the patterns and do not match
+an <b>--exclude-dir</b> pattern. This applies to all directories, whether listed
 on the command line, obtained from <b>--file-list</b>, or by scanning a parent
 directory. The pattern is a PCRE2 regular expression, and is matched against
 the final component of the directory name, not the entire path. The <b>-F</b>,
@ -476,19 +488,22 @@ given any number of times. If a directory matches both <b>--include-dir</b> and
 <b>-L</b>, <b>--files-without-match</b>
 Instead of outputting lines from the files, just output the names of the files
 that do not contain any lines that would have been output. Each file name is
-output once, on a separate line. This option overrides any previous <b>-H</b>,
-<b>-h</b>, or <b>-l</b> options.
+output once, on a separate line by default, but if the <b>-Z</b> option is set, 
+they are separated by zero bytes instead of newlines. This option overrides any
+previous <b>-H</b>, <b>-h</b>, or <b>-l</b> options.
 </P>
 <P>
 <b>-l</b>, <b>--files-with-matches</b>
 Instead of outputting lines from the files, just output the names of the files
 containing lines that would have been output. Each file name is output once, on
-a separate line. Searching normally stops as soon as a matching line is found
-in a file. However, if the <b>-c</b> (count) option is also used, matching
-continues in order to obtain the correct count, and those files that have at
-least one match are listed along with their counts. Using this option with
-<b>-c</b> is a way of suppressing the listing of files with no matches. This
-opeion overrides any previous <b>-H</b>, <b>-h</b>, or <b>-L</b> options.
+a separate line, but if the <b>-Z</b> option is set, they are separated by zero
+bytes instead of newlines. Searching normally stops as soon as a matching line
+is found in a file. However, if the <b>-c</b> (count) option is also used,
+matching continues in order to obtain the correct count, and those files that
+have at least one match are listed along with their counts. Using this option
+with <b>-c</b> is a way of suppressing the listing of files with no matches that
+occurs with <b>-c</b> on its own. This option overrides any previous <b>-H</b>,
+<b>-h</b>, or <b>-L</b> options.
 </P>
 <P>
 <b>--label</b>=<i>name</i>
@ -501,8 +516,8 @@ short form for this option.
 When this option is given, non-compressed input is read and processed line by
 line, and the output is flushed after each write. By default, input is read in
 large chunks, unless <b>pcre2grep</b> can determine that it is reading from a
-terminal (which is currently possible only in Unix-like environments or
-Windows). Output to terminal is normally automatically flushed by the operating
+terminal, which is currently possible only in Unix-like environments or
+Windows. Output to terminal is normally automatically flushed by the operating
 system. This option can be useful when the input or output is attached to a
 pipe and you do not want <b>pcre2grep</b> to buffer up large amounts of data.
 However, its use will affect performance, and the <b>-M</b> (multiline) option
@ -528,46 +543,6 @@ locale is specified, the PCRE2 library's default (usually the "C" locale) is
 used. There is no short form for this option.
 </P>
 <P>
-<b>--match-limit</b>=<i>number</i>
-Processing some regular expression patterns may take a very long time to search
-for all possible matching strings. Others may require a very large amount of
-memory. There are three options that set resource limits for matching.
-<br>
-<br>
-The <b>--match-limit</b> option provides a means of limiting computing resource
-usage when processing patterns that are not going to match, but which have a
-very large number of possibilities in their search trees. The classic example
-is a pattern that uses nested unlimited repeats. Internally, PCRE2 has a
-counter that is incremented each time around its main processing loop. If the
-value set by <b>--match-limit</b> is reached, an error occurs.
-<br>
-<br>
-The <b>--heap-limit</b> option specifies, as a number of kibibytes (units of
-1024 bytes), the amount of heap memory that may be used for matching. Heap
-memory is needed only if matching the pattern requires a significant number of
-nested backtracking points to be remembered. This parameter can be set to zero
-to forbid the use of heap memory altogether.
-<br>
-<br>
-The <b>--depth-limit</b> option limits the depth of nested backtracking points,
-which indirectly limits the amount of memory that is used. The amount of memory
-needed for each backtracking point depends on the number of capturing
-parentheses in the pattern, so the amount of memory that is used before this
-limit acts varies from pattern to pattern. This limit is of use only if it is
-set smaller than <b>--match-limit</b>.
-<br>
-<br>
-There are no short forms for these options. The default limits can be set
-when the PCRE2 library is compiled; if they are not specified, the defaults
-are very large and so effectively unlimited.
-</P>
-<P>
-\fB--max-buffer-size=<i>number</i>
-This limits the expansion of the processing buffer, whose initial size can be
-set by <b>--buffer-size</b>. The maximum buffer size is silently forced to be no
-smaller than the starting buffer size.
-</P>
-<P>
 <b>-M</b>, <b>--multiline</b>
 Allow patterns to match more than one line. When this option is set, the PCRE2
 library is called in "multiline" mode. This allows a matched string to extend
@ -597,29 +572,84 @@ well as possibly handling a two-character newline sequence.
 There is a limit to the number of lines that can be matched, imposed by the way
 that <b>pcre2grep</b> buffers the input file as it scans it. With a sufficiently
 large processing buffer, this should not be a problem, but the <b>-M</b> option
-does not work when input is read line by line (see \fP--line-buffered\fP.)
+does not work when input is read line by line (see <b>--line-buffered</b>.)
+</P>
+<P>
+<b>-m</b> <i>number</i>, <b>--max-count</b>=<i>number</i>
+Stop processing after finding <i>number</i> matching lines, or non-matching
+lines if <b>-v</b> is also set. Any trailing context lines are output after the
+final match. In multiline mode, each multiline match counts as just one line
+for this purpose. If this limit is reached when reading the standard input from
+a regular file, the file is left positioned just after the last matching line.
+If <b>-c</b> is also set, the count that is output is never greater than
+<i>number</i>. This option has no effect if used with <b>-L</b>, <b>-l</b>, or
+<b>-q</b>, or when just checking for a match in a binary file.
+</P>
+<P>
+<b>--match-limit</b>=<i>number</i>
+Processing some regular expression patterns may take a very long time to search
+for all possible matching strings. Others may require a very large amount of
+memory. There are three options that set resource limits for matching.
+<br>
+<br>
+The <b>--match-limit</b> option provides a means of limiting computing resource
+usage when processing patterns that are not going to match, but which have a
+very large number of possibilities in their search trees. The classic example
+is a pattern that uses nested unlimited repeats. Internally, PCRE2 has a
+counter that is incremented each time around its main processing loop. If the
+value set by <b>--match-limit</b> is reached, an error occurs.
+<br>
+<br>
+The <b>--heap-limit</b> option specifies, as a number of kibibytes (units of
+1024 bytes), the maximum amount of heap memory that may be used for matching.
+<br>
+<br>
+The <b>--depth-limit</b> option limits the depth of nested backtracking points,
+which indirectly limits the amount of memory that is used. The amount of memory
+needed for each backtracking point depends on the number of capturing
+parentheses in the pattern, so the amount of memory that is used before this
+limit acts varies from pattern to pattern. This limit is of use only if it is
+set smaller than <b>--match-limit</b>.
+<br>
+<br>
+There are no short forms for these options. The default limits can be set
+when the PCRE2 library is compiled; if they are not specified, the defaults
+are very large and so effectively unlimited.
+</P>
+<P>
+<b>--max-buffer-size</b>=<i>number</i>
+This limits the expansion of the processing buffer, whose initial size can be
+set by <b>--buffer-size</b>. The maximum buffer size is silently forced to be no
+smaller than the starting buffer size.
 </P>
 <P>
 <b>-N</b> <i>newline-type</i>, <b>--newline</b>=<i>newline-type</i>
-The PCRE2 library supports five different conventions for indicating
-the ends of lines. They are the single-character sequences CR (carriage return)
-and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
-which recognizes any of the preceding three types, and an "any" convention, in
-which any Unicode line ending sequence is assumed to end a line. The Unicode
-sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
-(form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
-PS (paragraph separator, U+2029).
+Six different conventions for indicating the ends of lines in scanned files are
+supported. For example:
+<pre>
+  pcre2grep -N CRLF 'some pattern' &#60;file&#62;
+</pre>
+The newline type may be specified in upper, lower, or mixed case. If the
+newline type is NUL, lines are separated by binary zero characters. The other
+types are the single-character sequences CR (carriage return) and LF
+(linefeed), the two-character sequence CRLF, an "anycrlf" type, which
+recognizes any of the preceding three types, and an "any" type, for which any
+Unicode line ending sequence is assumed to end a line. The Unicode sequences
+are the three just mentioned, plus VT (vertical tab, U+000B), FF (form feed,
+U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
+(paragraph separator, U+2029).
 <br>
 <br>
 When the PCRE2 library is built, a default line-ending sequence is specified.
 This is normally the standard sequence for the operating system. Unless
 otherwise specified by this option, <b>pcre2grep</b> uses the library's default.
-The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
-makes it possible to use <b>pcre2grep</b> to scan files that have come from
-other environments without having to modify their line endings. If the data
-that is being scanned does not agree with the convention set by this option,
-<b>pcre2grep</b> may behave in strange ways. Note that this option does not
-apply to files specified by the <b>-f</b>, <b>--exclude-from</b>, or
+<br>
+<br>
+This option makes it possible to use <b>pcre2grep</b> to scan files that have
+come from other environments without having to modify their line endings. If
+the data that is being scanned does not agree with the convention set by this
+option, <b>pcre2grep</b> may behave in strange ways. Note that this option does
+not apply to files specified by the <b>-f</b>, <b>--exclude-from</b>, or
 <b>--include-from</b> options, which are expected to use the operating system's
 standard newline sequence.
 </P>
@ -641,29 +671,41 @@ It should never be needed in normal use.
 </P>
 <P>
 <b>-O</b> <i>text</i>, <b>--output</b>=<i>text</i>
-When there is a match, instead of outputting the whole line that matched,
-output just the given text. This option is mutually exclusive with
-<b>--only-matching</b>, <b>--file-offsets</b>, and <b>--line-offsets</b>. Escape
-sequences starting with a dollar character may be used to insert the contents
-of the matched part of the line and/or captured substrings into the text.
+When there is a match, instead of outputting the line that matched, output just
+the text specified in this option, followed by an operating-system standard
+newline. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>,
+and <b>-C</b> options are ignored. The <b>--newline</b> option has no effect on
+this option, which is mutually exclusive with <b>--only-matching</b>,
+<b>--file-offsets</b>, and <b>--line-offsets</b>. However, like
+<b>--only-matching</b>, if there is more than one match in a line, each of them
+causes a line of output.
 <br>
 <br>
-$&#60;digits&#62; or ${&#60;digits&#62;} is replaced by the captured
-substring of the given decimal number; zero substitutes the whole match. If
-the number is greater than the number of capturing substrings, or if the
-capture is unset, the replacement is empty.
+Escape sequences starting with a dollar character may be used to insert the
+contents of the matched part of the line and/or captured substrings into the
+text.
+<br>
+<br>
+$&#60;digits&#62; or ${&#60;digits&#62;} is replaced by the captured substring of the given
+decimal number; zero substitutes the whole match. If the number is greater than
+the number of capturing substrings, or if the capture is unset, the replacement
+is empty.
 <br>
 <br>
 $a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
 newline; $r by carriage return; $t by tab; $v by vertical tab.
 <br>
 <br>
-$o&#60;digits&#62; is replaced by the character represented by the given octal
-number; up to three digits are processed.
+$o&#60;digits&#62; or $o{&#60;digits&#62;} is replaced by the character whose code point is the
+given octal number. In the first form, up to three octal digits are processed.
+When more digits are needed in Unicode mode to specify a wide character, the
+second form must be used.
 <br>
 <br>
-$x&#60;digits&#62; is replaced by the character represented by the given hexadecimal
-number; up to two digits are processed.
+$x&#60;digits&#62; or $x{&#60;digits&#62;} is replaced by the character represented by the
+given hexadecimal number. In the first form, up to two hexadecimal digits are
+processed. When more digits are needed in Unicode mode to specify a wide
+character, the second form must be used.
 <br>
 <br>
 Any other character is substituted by itself. In particular, $$ is replaced by
@ -732,7 +774,8 @@ option to "recurse".
 </P>
 <P>
 <b>--recursion-limit</b>=<i>number</i>
-See <b>--match-limit</b> above.
+This is an obsolete synonym for <b>--depth-limit</b>. See <b>--match-limit</b>
+above for details.
 </P>
 <P>
 <b>-s</b>, <b>--no-messages</b>
@ -756,15 +799,18 @@ total would always be zero.
 <b>-u</b>, <b>--utf</b>
 Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
 with UTF-8 support. All patterns (including those for any <b>--exclude</b> and
-<b>--include</b> options) and all subject lines that are scanned must be valid
-strings of UTF-8 characters.
+<b>--include</b> options) and all lines that are scanned must be valid strings
+of UTF-8 characters. If an invalid UTF-8 string is encountered, an error
+occurs.
 </P>
 <P>
 <b>-U</b>, <b>--utf-allow-invalid</b>
 As <b>--utf</b>, but in addition subject lines may contain invalid UTF-8 code
-unit sequences. These can never form part of any pattern match. This facility
-allows valid UTF-8 strings to be sought in executable or other binary files.
-For more details about matching in non-valid UTF-8 strings, see the
+unit sequences. These can never form part of any pattern match. Patterns
+themselves, however, must still be valid UTF-8 strings. This facility allows
+valid UTF-8 strings to be sought within arbitrary byte sequences in executable
+or other binary files. For more details about matching in non-valid UTF-8
+strings, see the
 <a href="pcre2unicode.html"><b>pcre2unicode</b>(3)</a>
 documentation.
 </P>
@ -777,7 +823,9 @@ ignored.
 <P>
 <b>-v</b>, <b>--invert-match</b>
 Invert the sense of the match, so that lines which do <i>not</i> match any of
-the patterns are the ones that are found.
+the patterns are the ones that are found. When this option is set, options such
+as <b>--only-matching</b> and <b>--output</b>, which specify parts of a match
+that are to be output, are ignored.
 </P>
 <P>
 <b>-w</b>, <b>--word-regex</b>, <b>--word-regexp</b>
@ -797,6 +845,13 @@ pattern and ")$" at the end. This option applies only to the patterns that are
 matched against the contents of files; it does not apply to patterns specified
 by any of the <b>--include</b> or <b>--exclude</b> options.
 </P>
+<P>
+<b>-Z</b>, <b>--null</b>
+Terminate files names in the regular output with a zero byte (the NUL
+character) instead of what would normally appear. This is useful when file
+names contain unusual characters such as colons, hyphens, or even newlines. The
+option does not apply to file names in error messages.
+</P>
 <br><a name="SEC7" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
 <P>
 The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
@ -807,16 +862,27 @@ by the <b>--locale</b> option. If no locale is set, the PCRE2 library's default
 <br><a name="SEC8" href="#TOC1">NEWLINES</a><br>
 <P>
 The <b>-N</b> (<b>--newline</b>) option allows <b>pcre2grep</b> to scan files with
-different newline conventions from the default. Any parts of the input files
-that are written to the standard output are copied identically, with whatever
-newline sequences they have in the input. However, the setting of this option
-affects only the way scanned files are processed. It does not affect the
-interpretation of files specified by the <b>-f</b>, <b>--file-list</b>,
-<b>--exclude-from</b>, or <b>--include-from</b> options, nor does it affect the
-way in which <b>pcre2grep</b> writes informational messages to the standard
-error and output streams. For these it uses the string "\n" to indicate
-newlines, relying on the C I/O library to convert this to an appropriate
-sequence.
+newline conventions that differ from the default. This option affects only the
+way scanned files are processed. It does not affect the interpretation of files
+specified by the <b>-f</b>, <b>--file-list</b>, <b>--exclude-from</b>, or
+<b>--include-from</b> options.
+</P>
+<P>
+Any parts of the scanned input files that are written to the standard output
+are copied with whatever newline sequences they have in the input. However, if
+the final line of a file is output, and it does not end with a newline
+sequence, a newline sequence is added. If the newline setting is CR, LF, CRLF
+or NUL, that line ending is output; for the other settings (ANYCRLF or ANY) a
+single NL is used.
+</P>
+<P>
+The newline setting does not affect the way in which <b>pcre2grep</b> writes
+newlines in informational messages to the standard output and error streams.
+Under Windows, the standard output is set to be binary, so that "\r\n" at the
+ends of output lines that are copied from the input is not converted to
+"\r\r\n" by the C I/O library. This means that any messages written to the
+standard output must end with "\r\n". For all other operating systems, and
+for all messages to the standard error stream, "\n" is used.
 </P>
 <br><a name="SEC9" href="#TOC1">OPTIONS COMPATIBILITY</a><br>
 <P>
@ -889,12 +955,36 @@ documentation for details). Numbered callouts are ignored by <b>pcre2grep</b>;
 only callouts with string arguments are useful.
 </P>
 <br><b>
+Echoing a specific string
+</b><br>
+<P>
+Starting the callout string with a pipe character invokes an echoing facility
+that avoids calling an external program or script. This facility is always
+available, provided that callouts were not completely disabled when
+<b>pcre2grep</b> was built. The rest of the callout string is processed as a
+zero-terminated string, which means it should not contain any internal binary
+zeros. It is written to the output, having first been passed through the same
+escape processing as text from the <b>--output</b> (<b>-O</b>) option (see
+above). However, $0 cannot be used to insert a matched substring because the
+match is still in progress. Instead, the single character '0' is inserted. Any
+syntax errors in the string (for example, a dollar not followed by another
+character) causes the callout to be ignored. No terminator is added to the
+output string, so if you want a newline, you must include it explicitly using
+the escape $n. For example:
+<pre>
+  pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' &#60;some file&#62;
+</pre>
+Matching continues normally after the string is output. If you want to see only
+the callout output but not any output from an actual match, you should end the
+pattern with (*FAIL).
+</P>
+<br><b>
 Calling external programs or scripts
 </b><br>
 <P>
 This facility can be independently disabled when <b>pcre2grep</b> is built. It
 is supported for Windows, where a call to <b>_spawnvp()</b> is used, for VMS,
-where <b>lib$spawn()</b> is used, and for any other Unix-like environment where
+where <b>lib$spawn()</b> is used, and for any Unix-like environment where
 <b>fork()</b> and <b>execv()</b> are available.
 </P>
 <P>
@ -906,14 +996,11 @@ arguments:
  executable_name|arg1|arg2|...
 </pre>
 Any substring (including the executable name) may contain escape sequences
-started by a dollar character: $&#60;digits&#62; or ${&#60;digits&#62;} is replaced by the
-captured substring of the given decimal number, which must be greater than
-zero. If the number is greater than the number of capturing substrings, or if
-the capture is unset, the replacement is empty.
-</P>
-<P>
-Any other character is substituted by itself. In particular, $$ is replaced by
-a single dollar and $| is replaced by a pipe character. Here is an example:
+started by a dollar character. These are the same as for the <b>--output</b>
+(<b>-O</b>) option documented above, except that $0 cannot insert the matched
+string because the match is still in progress. Instead, the character '0'
+is inserted. If you need a literal dollar or pipe character in any
+substring, use $$ or $| respectively. Here is an example:
 <pre>
  echo -e "abcde\n12345" | pcre2grep \
    '(?x)(.)(..(.))
@ -926,28 +1013,14 @@ a single dollar and $| is replaced by a pipe character. Here is an example:
    Arg1: [1] [234] [4] Arg2: |1| ()
    12345
 </pre>
-The parameters for the system call that is used to run the
-program or script are zero-terminated strings. This means that binary zero
-characters in the callout argument will cause premature termination of their
-substrings, and therefore should not be present. Any syntax errors in the
-string (for example, a dollar not followed by another character) cause the
-callout to be ignored. If running the program fails for any reason (including
-the non-existence of the executable), a local matching failure occurs and the
-matcher backtracks in the normal way.
-</P>
-<br><b>
-Echoing a specific string
-</b><br>
-<P>
-This facility is always available, provided that callouts were not completely
-disabled when <b>pcre2grep</b> was built. If the callout string starts with a
-pipe (vertical bar) character, the rest of the string is written to the output,
-having been passed through the same escape processing as text from the --output
-option. This provides a simple echoing facility that avoids calling an external
-program or script. No terminator is added to the string, so if you want a
-newline, you must include it explicitly. Matching continues normally after the
-string is output. If you want to see only the callout output but not any output
-from an actual match, you should end the relevant pattern with (*FAIL).
+The parameters for the system call that is used to run the program or script
+are zero-terminated strings. This means that binary zero characters in the
+callout argument will cause premature termination of their substrings, and
+therefore should not be present. Any syntax errors in the string (for example,
+a dollar not followed by another character) causes the callout to be ignored.
+If running the program fails for any reason (including the non-existence of the
+executable), a local matching failure occurs and the matcher backtracks in the
+normal way.
 </P>
 <br><a name="SEC12" href="#TOC1">MATCHING ERRORS</a><br>
 <P>
@ -979,22 +1052,23 @@ because VMS does not distinguish between exit(0) and exit(1).
 </P>
 <br><a name="SEC14" href="#TOC1">SEE ALSO</a><br>
 <P>
-<b>pcre2pattern</b>(3), <b>pcre2syntax</b>(3), <b>pcre2callout</b>(3).
+<b>pcre2pattern</b>(3), <b>pcre2syntax</b>(3), <b>pcre2callout</b>(3),
+<b>pcre2unicode</b>(3).
 </P>
 <br><a name="SEC15" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC16" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 15 June 2019
+Last updated: 30 July 2022
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2jit.html
+++ b/doc/html/pcre2jit.html
@ -54,6 +54,7 @@ platforms:
 <pre>
  ARM 32-bit (v5, v7, and Thumb2)
  ARM 64-bit
+  IBM s390x 64 bit
  Intel x86 32-bit and 64-bit
  MIPS 32-bit and 64-bit
  Power PC 32-bit and 64-bit
@ -90,7 +91,7 @@ or a negative error code.
 There is a limit to the size of pattern that JIT supports, imposed by the size
 of machine stack that it uses. The exact rules are not documented because they
 may change at any time, in particular, when new optimizations are introduced.
-If a pattern is too big, a call to \fBpcre2_jit_compile()\fB returns
+If a pattern is too big, a call to <b>pcre2_jit_compile()</b> returns
 PCRE2_ERROR_NOMEMORY.
 </P>
 <P>
@ -268,11 +269,11 @@ starts another match, that match must use a different JIT stack to the one used
 for currently suspended match(es).
 </P>
 <P>
-In a multithread application, if you do not
-specify a JIT stack, or if you assign or pass back NULL from a callback, that
-is thread-safe, because each thread has its own machine stack. However, if you
-assign or pass back a non-NULL JIT stack, this must be a different stack for
-each thread so that the application is thread-safe.
+In a multithread application, if you do not specify a JIT stack, or if you
+assign or pass back NULL from a callback, that is thread-safe, because each
+thread has its own machine stack. However, if you assign or pass back a
+non-NULL JIT stack, this must be a different stack for each thread so that the
+application is thread-safe.
 </P>
 <P>
 Strictly speaking, even more is allowed. You can assign the same non-NULL stack
@ -286,7 +287,7 @@ inefficient solution, and not recommended.
 This is a suggestion for how a multithreaded program that needs to set up
 non-default JIT stacks might operate:
 <pre>
-  During thread initalization
+  During thread initialization
    thread_local_var = pcre2_jit_stack_create(...)

  During thread exit
@ -339,12 +340,12 @@ stack through the JIT callback function.
 You can free a JIT stack at any time, as long as it will not be used by
 <b>pcre2_match()</b> again. When you assign the stack to a match context, only a
 pointer is set. There is no reference counting or any other magic. You can free
-compiled patterns, contexts, and stacks in any order, anytime. Just \fIdo
-not\fP call <b>pcre2_match()</b> with a match context pointing to an already
-freed stack, as that will cause SEGFAULT. (Also, do not free a stack currently
-used by <b>pcre2_match()</b> in another thread). You can also replace the stack
-in a context at any time when it is not in use. You should free the previous
-stack before assigning a replacement.
+compiled patterns, contexts, and stacks in any order, anytime.
+Just <i>do not</i> call <b>pcre2_match()</b> with a match context pointing to an
+already freed stack, as that will cause SEGFAULT. (Also, do not free a stack
+currently used by <b>pcre2_match()</b> in another thread). You can also replace
+the stack in a context at any time when it is not in use. You should free the
+previous stack before assigning a replacement.
 </P>
 <P>
 (5) Should I allocate/free a stack every time before/after calling
@ -381,8 +382,8 @@ out this complicated API.
 <b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
 </P>
 <P>
-The JIT executable allocator does not free all memory when it is possible.
-It expects new allocations, and keeps some free memory around to improve
+The JIT executable allocator does not free all memory when it is possible. It
+expects new allocations, and keeps some free memory around to improve
 allocation speed. However, in low memory conditions, it might be better to free
 all possible memory. You can cause this to happen by calling
 pcre2_jit_free_unused_memory(). Its argument is a general context, for custom
@ -441,10 +442,10 @@ that was not compiled.
 <P>
 When you call <b>pcre2_match()</b>, as well as testing for invalid options, a
 number of other sanity checks are performed on the arguments. For example, if
-the subject pointer is NULL, an immediate error is given. Also, unless
-PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for validity. In the
-interests of speed, these checks do not happen on the JIT fast path, and if
-invalid data is passed, the result is undefined.
+the subject pointer is NULL but the length is non-zero, an immediate error is
+given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested
+for validity. In the interests of speed, these checks do not happen on the JIT
+fast path, and if invalid data is passed, the result is undefined.
 </P>
 <P>
 Bypassing the sanity checks and the <b>pcre2_match()</b> wrapping can give
@ -465,9 +466,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC14" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 23 May 2019
+Last updated: 30 November 2021
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2021 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2limits.html
+++ b/doc/html/pcre2limits.html
@ -71,13 +71,18 @@ is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
 The maximum length of a string argument to a callout is the largest number a
 32-bit unsigned integer can hold.
 </P>
+<P>
+The maximum amount of heap memory used for matching is controlled by the heap 
+limit, which can be set in a pattern or in a match context. The default is a 
+very large number, effectively unlimited.
+</P>
 <br><b>
 AUTHOR
 </b><br>
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
@ -86,9 +91,9 @@ Cambridge, England.
 REVISION
 </b><br>
 <P>
-Last updated: 02 February 2019
+Last updated: 26 July 2022
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2matching.html
+++ b/doc/html/pcre2matching.html
@ -78,8 +78,9 @@ tried is controlled by the greedy or ungreedy nature of the quantifier.
 If a leaf node is reached, a matching string has been found, and at that point
 the algorithm stops. Thus, if there is more than one possible match, this
 algorithm returns the first one that it finds. Whether this is the shortest,
-the longest, or some intermediate length depends on the way the greedy and
-ungreedy repetition quantifiers are specified in the pattern.
+the longest, or some intermediate length depends on the way the alternations
+and the greedy or ungreedy repetition quantifiers are specified in the
+pattern.
 </P>
 <P>
 Because it ends up with a single path through the tree, it is relatively
@ -109,11 +110,17 @@ no more unterminated paths. At this point, terminated paths represent the
 different matching possibilities (if there are none, the match has failed).
 Thus, if there is more than one possible match, this algorithm finds all of
 them, and in particular, it finds the longest. The matches are returned in
-decreasing order of length. There is an option to stop the algorithm after the
-first match (which is necessarily the shortest) is found.
+the output vector in decreasing order of length. There is an option to stop the
+algorithm after the first match (which is necessarily the shortest) is found.
 </P>
 <P>
-Note that all the matches that are found start at the same point in the
+Note that the size of vector needed to contain all the results depends on the
+number of simultaneous matches, not on the number of parentheses in the
+pattern. Using <b>pcre2_match_data_create_from_pattern()</b> to create the match
+data block is therefore not advisable when doing DFA matching.
+</P>
+<P>
+Note also that all the matches that are found start at the same point in the
 subject. If the pattern
 <pre>
  cat(er(pillar)?)?
@ -194,21 +201,14 @@ supported by <b>pcre2_dfa_match()</b>.
 </P>
 <br><a name="SEC5" href="#TOC1">ADVANTAGES OF THE ALTERNATIVE ALGORITHM</a><br>
 <P>
-Using the alternative matching algorithm provides the following advantages:
+The main advantage of the alternative algorithm is that all possible matches
+(at a single point in the subject) are automatically found, and in particular,
+the longest match is found. To find more than one match at the same point using
+the standard algorithm, you have to do kludgy things with callouts.
 </P>
 <P>
-1. All possible matches (at a single point in the subject) are automatically
-found, and in particular, the longest match is found. To find more than one
-match using the standard algorithm, you have to do kludgy things with
-callouts.
-</P>
-<P>
-2. Because the alternative algorithm scans the subject string just once, and
-never needs to backtrack (except for lookbehinds), it is possible to pass very
-long subject strings to the matching function in several pieces, checking for
-partial matching each time. Although it is also possible to do multi-segment
-matching using the standard algorithm, by retaining partially matched
-substrings, it is more complicated. The
+Partial matching is possible with this algorithm, though it has some
+limitations. The
 <a href="pcre2partial.html"><b>pcre2partial</b></a>
 documentation gives details of partial matching and discusses multi-segment
 matching.
@ -230,20 +230,23 @@ invalid UTF string are not supported.
 3. Although atomic groups are supported, their use does not provide the
 performance advantage that it does for the standard algorithm.
 </P>
+<P>
+4. JIT optimization is not supported.
+</P>
 <br><a name="SEC7" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC8" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 23 May 2019
+Last updated: 28 August 2021
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2021 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2partial.html
+++ b/doc/html/pcre2partial.html
@ -295,7 +295,7 @@ these characters with '&#60;' if the <b>allusedtext</b> modifier is set:
  Partial match: 123ab
                 &#60;&#60;&#60;
 </pre>
-However, the \fPallusedtext\fP modifier is not available for JIT matching,
+However, the <b>allusedtext</b> modifier is not available for JIT matching,
 because JIT matching does not record the first (or last) consulted characters.
 For this reason, this information is not available via the API. It is therefore
 not possible in general to obtain the exact number of characters that must be
--- a/doc/html/pcre2pattern.html
+++ b/doc/html/pcre2pattern.html
@ -114,7 +114,8 @@ Another special sequence that may appear at the start of a pattern is (*UCP).
 This has the same effect as setting the PCRE2_UCP option: it causes sequences
 such as \d and \w to use Unicode properties to determine character types,
 instead of recognizing only characters with codes less than 256 via a lookup
-table.
+table. If also causes upper/lower casing operations to use Unicode properties
+for characters with code points greater than 127, even when UTF is not set.
 </P>
 <P>
 Some applications that allow their users to supply patterns may wish to
@ -288,8 +289,11 @@ corresponding characters in the subject. As a trivial example, the pattern
  The quick brown fox
 </pre>
 matches a portion of a subject string that is identical to itself. When
-caseless matching is specified (the PCRE2_CASELESS option), letters are matched
-independently of case.
+caseless matching is specified (the PCRE2_CASELESS option or (?i) within the
+pattern), letters are matched independently of case. Note that there are two
+ASCII characters, K and S, that, in addition to their lower case ASCII
+equivalents, are case-equivalent with Unicode U+212A (Kelvin sign) and U+017F
+(long S) respectively when either PCRE2_UTF or PCRE2_UCP is set.
 </P>
 <P>
 The power of regular expressions comes from the ability to include wild cards,
@ -325,6 +329,20 @@ a character class the only metacharacters are:
  [      POSIX character class (if followed by POSIX syntax)
  ]      terminates the character class
 </pre>
+If a pattern is compiled with the PCRE2_EXTENDED option, most white space in
+the pattern, other than in a character class, and characters between a #
+outside a character class and the next newline, inclusive, are ignored. An
+escaping backslash can be used to include a white space or a # character as
+part of the pattern. If the PCRE2_EXTENDED_MORE option is set, the same
+applies, but in addition unescaped space and horizontal tab characters are
+ignored inside a character class. Note: only these two characters are ignored,
+not the full set of pattern white space characters that are ignored outside a
+character class. Option settings can be changed within a pattern; see the
+section entitled
+<a href="#internaloptions">"Internal Option Setting"</a>
+below.
+</P>
+<P>
 The following sections describe the use of each of the metacharacters.
 </P>
 <br><a name="SEC5" href="#TOC1">BACKSLASH</a><br>
@ -342,16 +360,9 @@ precede a non-alphanumeric with backslash to specify that it stands for itself.
 In particular, if you want to match a backslash, you write \\.
 </P>
 <P>
-In a UTF mode, only ASCII digits and letters have any special meaning after a
-backslash. All other characters (in particular, those whose code points are
-greater than 127) are treated as literals.
-</P>
-<P>
-If a pattern is compiled with the PCRE2_EXTENDED option, most white space in
-the pattern (other than in a character class), and characters between a #
-outside a character class and the next newline, inclusive, are ignored. An
-escaping backslash can be used to include a white space or # character as part
-of the pattern.
+Only ASCII digits and letters have any special meaning after a backslash. All
+other characters (in particular, those whose code points are greater than 127)
+are treated as literals.
 </P>
 <P>
 If you want to treat all characters in a sequence as literals, you can do so by
@ -523,7 +534,7 @@ for themselves. For example, outside a character class:
  \0113  is a tab followed by the character "3"
  \113   might be a backreference, otherwise the character with octal code 113
  \377   might be a backreference, otherwise the value 255 (decimal)
-  \81    is always a backreference .sp
+  \81    is always a backreference
 </pre>
 Note that octal values of 100 or greater that are specified using this syntax
 must not be introduced by a leading zero, because no more than three octal
@ -734,7 +745,7 @@ Unicode support is not needed for these characters to be recognized.
 <P>
 It is possible to restrict \R to match only CR, LF, or CRLF (instead of the
 complete set of Unicode line endings) by setting the option PCRE2_BSR_ANYCRLF
-at compile time. (BSR is an abbrevation for "backslash R".) This can be made
+at compile time. (BSR is an abbreviation for "backslash R".) This can be made
 the default when PCRE2 is built; if this is the case, the other behaviour can
 be requested via the PCRE2_BSR_UNICODE option. It is also possible to specify
 these settings by starting a pattern string with one of the following
@ -765,190 +776,62 @@ can be used in any mode, though in 8-bit and 16-bit non-UTF modes these
 sequences are of course limited to testing characters whose code points are
 less than U+0100 and U+10000, respectively. In 32-bit non-UTF mode, code points
 greater than 0x10ffff (the Unicode limit) may be encountered. These are all
-treated as being in the Unknown script and with an unassigned type. The extra
-escape sequences are:
+treated as being in the Unknown script and with an unassigned type.
+</P>
+<P>
+Matching characters by Unicode property is not fast, because PCRE2 has to do a
+multistage table lookup in order to find a character's property. That is why
+the traditional escape sequences such as \d and \w do not use Unicode
+properties in PCRE2 by default, though you can make them do so by setting the
+PCRE2_UCP option or by starting the pattern with (*UCP).
+</P>
+<P>
+The extra escape sequences that provide property support are:
 <pre>
  \p{<i>xx</i>}   a character with the <i>xx</i> property
  \P{<i>xx</i>}   a character without the <i>xx</i> property
  \X       a Unicode extended grapheme cluster
 </pre>
-The property names represented by <i>xx</i> above are case-sensitive. There is
-support for Unicode script names, Unicode general category properties, "Any",
-which matches any character (including newline), and some special PCRE2
-properties (described in the
-<a href="#extraprops">next section).</a>
-Other Perl properties such as "InMusicalSymbols" are not supported by PCRE2.
-Note that \P{Any} does not match any characters, so always causes a match
-failure.
+The property names represented by <i>xx</i> above are not case-sensitive, and in
+accordance with Unicode's "loose matching" rules, spaces, hyphens, and
+underscores are ignored. There is support for Unicode script names, Unicode
+general category properties, "Any", which matches any character (including
+newline), Bidi_Class, a number of binary (yes/no) properties, and some special
+PCRE2 properties (described
+<a href="#extraprops">below).</a>
+Certain other Perl properties such as "InMusicalSymbols" are not supported by
+PCRE2. Note that \P{Any} does not match any characters, so always causes a
+match failure.
+</P>
+<br><b>
+Script properties for \p and \P
+</b><br>
+<P>
+There are three different syntax forms for matching a script. Each Unicode
+character has a basic script and, optionally, a list of other scripts ("Script
+Extensions") with which it is commonly used. Using the Adlam script as an
+example, \p{sc:Adlam} matches characters whose basic script is Adlam, whereas
+\p{scx:Adlam} matches, in addition, characters that have Adlam in their
+extensions list. The full names "script" and "script extensions" for the
+property types are recognized, and a equals sign is an alternative to the
+colon. If a script name is given without a property type, for example,
+\p{Adlam}, it is treated as \p{scx:Adlam}. Perl changed to this
+interpretation at release 5.26 and PCRE2 changed at release 10.40.
 </P>
 <P>
-Sets of Unicode characters are defined as belonging to certain scripts. A
-character from one of these sets can be matched using a script name. For
-example:
-<pre>
-  \p{Greek}
-  \P{Han}
-</pre>
 Unassigned characters (and in non-UTF 32-bit mode, characters with code points
 greater than 0x10FFFF) are assigned the "Unknown" script. Others that are not
 part of an identified script are lumped together as "Common". The current list
-of scripts is:
-</P>
-<P>
-Adlam,
-Ahom,
-Anatolian_Hieroglyphs,
-Arabic,
-Armenian,
-Avestan,
-Balinese,
-Bamum,
-Bassa_Vah,
-Batak,
-Bengali,
-Bhaiksuki,
-Bopomofo,
-Brahmi,
-Braille,
-Buginese,
-Buhid,
-Canadian_Aboriginal,
-Carian,
-Caucasian_Albanian,
-Chakma,
-Cham,
-Cherokee,
-Common,
-Coptic,
-Cuneiform,
-Cypriot,
-Cyrillic,
-Deseret,
-Devanagari,
-Dogra,
-Duployan,
-Egyptian_Hieroglyphs,
-Elbasan,
-Elymaic,
-Ethiopic,
-Georgian,
-Glagolitic,
-Gothic,
-Grantha,
-Greek,
-Gujarati,
-Gunjala_Gondi,
-Gurmukhi,
-Han,
-Hangul,
-Hanifi_Rohingya,
-Hanunoo,
-Hatran,
-Hebrew,
-Hiragana,
-Imperial_Aramaic,
-Inherited,
-Inscriptional_Pahlavi,
-Inscriptional_Parthian,
-Javanese,
-Kaithi,
-Kannada,
-Katakana,
-Kayah_Li,
-Kharoshthi,
-Khmer,
-Khojki,
-Khudawadi,
-Lao,
-Latin,
-Lepcha,
-Limbu,
-Linear_A,
-Linear_B,
-Lisu,
-Lycian,
-Lydian,
-Mahajani,
-Makasar,
-Malayalam,
-Mandaic,
-Manichaean,
-Marchen,
-Masaram_Gondi,
-Medefaidrin,
-Meetei_Mayek,
-Mende_Kikakui,
-Meroitic_Cursive,
-Meroitic_Hieroglyphs,
-Miao,
-Modi,
-Mongolian,
-Mro,
-Multani,
-Myanmar,
-Nabataean,
-Nandinagari,
-New_Tai_Lue,
-Newa,
-Nko,
-Nushu,
-Nyakeng_Puachue_Hmong,
-Ogham,
-Ol_Chiki,
-Old_Hungarian,
-Old_Italic,
-Old_North_Arabian,
-Old_Permic,
-Old_Persian,
-Old_Sogdian,
-Old_South_Arabian,
-Old_Turkic,
-Oriya,
-Osage,
-Osmanya,
-Pahawh_Hmong,
-Palmyrene,
-Pau_Cin_Hau,
-Phags_Pa,
-Phoenician,
-Psalter_Pahlavi,
-Rejang,
-Runic,
-Samaritan,
-Saurashtra,
-Sharada,
-Shavian,
-Siddham,
-SignWriting,
-Sinhala,
-Sogdian,
-Sora_Sompeng,
-Soyombo,
-Sundanese,
-Syloti_Nagri,
-Syriac,
-Tagalog,
-Tagbanwa,
-Tai_Le,
-Tai_Tham,
-Tai_Viet,
-Takri,
-Tamil,
-Tangut,
-Telugu,
-Thaana,
-Thai,
-Tibetan,
-Tifinagh,
-Tirhuta,
-Ugaritic,
-Unknown,
-Vai,
-Wancho,
-Warang_Citi,
-Yi,
-Zanabazar_Square.
+of recognized script names and their 4-character abbreviations can be obtained
+by running this command:
+<pre>
+  pcre2test -LS
+
+</PRE>
 </P>
+<br><b>
+The general category property for \p and \P
+</b><br>
 <P>
 Each character has exactly one Unicode general category property, specified by
 a two-letter abbreviation. For compatibility with Perl, negation can be
@ -1010,9 +893,9 @@ The following general category property codes are supported:
  Zp    Paragraph separator
  Zs    Space separator
 </pre>
-The special property L& is also supported: it matches a character that has
-the Lu, Ll, or Lt property, in other words, a letter that is not classified as
-a modifier or "other".
+The special property LC, which has the synonym L&, is also supported: it
+matches a character that has the Lu, Ll, or Lt property, in other words, a
+letter that is not classified as a modifier or "other".
 </P>
 <P>
 The Cs (Surrogate) property applies only to characters whose code points are in
@ -1039,12 +922,54 @@ Specifying caseless matching does not affect these escape sequences. For
 example, \p{Lu} always matches only upper case letters. This is different from
 the behaviour of current versions of Perl.
 </P>
+<br><b>
+Binary (yes/no) properties for \p and \P
+</b><br>
 <P>
-Matching characters by Unicode property is not fast, because PCRE2 has to do a
-multistage table lookup in order to find a character's property. That is why
-the traditional escape sequences such as \d and \w do not use Unicode
-properties in PCRE2 by default, though you can make them do so by setting the
-PCRE2_UCP option or by starting the pattern with (*UCP).
+Unicode defines a number of binary properties, that is, properties whose only
+values are true or false. You can obtain a list of those that are recognized by
+\p and \P, along with their abbreviations, by running this command:
+<pre>
+  pcre2test -LP
+
+</PRE>
+</P>
+<br><b>
+The Bidi_Class property for \p and \P
+</b><br>
+<P>
+<pre>
+  \p{Bidi_Class:&#60;class&#62;}   matches a character with the given class
+  \p{BC:&#60;class&#62;}           matches a character with the given class
+</pre>
+The recognized classes are:
+<pre>
+  AL          Arabic letter
+  AN          Arabic number
+  B           paragraph separator
+  BN          boundary neutral
+  CS          common separator
+  EN          European number
+  ES          European separator
+  ET          European terminator
+  FSI         first strong isolate
+  L           left-to-right
+  LRE         left-to-right embedding
+  LRI         left-to-right isolate
+  LRO         left-to-right override
+  NSM         non-spacing mark
+  ON          other neutral
+  PDF         pop directional format
+  PDI         pop directional isolate
+  R           right-to-left
+  RLE         right-to-left embedding
+  RLI         right-to-left isolate
+  RLO         right-to-left override
+  S           segment separator
+  WS          which space
+</pre>
+An equals sign may be used instead of a colon. The class names are
+case-insensitive; only the short names listed above are recognized.
 </P>
 <br><b>
 Extended grapheme clusters
@ -1075,7 +1000,7 @@ additional characters according to the following rules for ending a cluster:
 3. Do not break Hangul (a Korean script) syllable sequences. Hangul characters
 are of five types: L, V, T, LV, and LVT. An L character may be followed by an
 L, V, LV, or LVT character; an LV or V character may be followed by a V or T
-character; an LVT or T character may be follwed only by a T character.
+character; an LVT or T character may be followed only by a T character.
 </P>
 <P>
 4. Do not end before extending characters or spacing marks or the "zero-width
@ -1160,8 +1085,11 @@ For example, when the pattern
 matches "foobar", the first substring is still set to "foo".
 </P>
 <P>
-Perl documents that the use of \K within assertions is "not well defined". In
-PCRE2, \K is acted upon when it occurs inside positive assertions, but is
+From version 5.32.0 Perl forbids the use of \K in lookaround assertions. From
+release 10.38 PCRE2 also forbids this by default. However, the
+PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK option can be used when calling
+<b>pcre2_compile()</b> to re-enable the previous behaviour. When this option is
+set, \K is acted upon when it occurs inside positive assertions, but is
 ignored in negative assertions. Note that when a pattern such as (?=ab\K)
 matches, the reported start of the match can be greater than the end of the
 match. Using \K in a lookbehind assertion at the start of a pattern can also
@ -1318,15 +1246,17 @@ end of the subject in both modes, and if all branches of a pattern start with
 <P>
 Outside a character class, a dot in the pattern matches any one character in
 the subject string except (by default) a character that signifies the end of a
-line.
+line. One or more characters may be specified as line terminators (see
+<a href="#newlines">"Newline conventions"</a>
+above).
 </P>
 <P>
-When a line ending is defined as a single character, dot never matches that
-character; when the two-character sequence CRLF is used, dot does not match CR
-if it is immediately followed by LF, but otherwise it matches all characters
-(including isolated CRs and LFs). When any Unicode line endings are being
-recognized, dot does not match CR or LF or any of the other line ending
-characters.
+Dot never matches a single line-ending character. When the two-character
+sequence CRLF is the only line ending, dot does not match CR if it is
+immediately followed by LF, but otherwise it matches all characters (including
+isolated CRs and LFs). When ANYCRLF is selected for line endings, no occurences
+of CR of LF match dot. When all Unicode line endings are being recognized, dot
+does not match CR or LF or any of the other line ending characters.
 </P>
 <P>
 The behaviour of dot with regard to newlines can be changed. If the
@ -1438,7 +1368,10 @@ Characters in a class may be specified by their code points using \o, \x, or
 \N{U+hh..} in the usual way. When caseless matching is set, any letters in a
 class represent both their upper case and lower case versions, so for example,
 a caseless [aeiou] matches "A" as well as "a", and a caseless [^aeiou] does not
-match "A", whereas a caseful version would.
+match "A", whereas a caseful version would. Note that there are two ASCII
+characters, K and S, that, in addition to their lower case ASCII equivalents,
+are case-equivalent with Unicode U+212A (Kelvin sign) and U+017F (long S)
+respectively when either PCRE2_UTF or PCRE2_UCP is set.
 </P>
 <P>
 Characters that might indicate line breaks are never treated in any special way
@ -1650,7 +1583,7 @@ that succeeds is used. If the alternatives are within a group
 <a href="#group">(defined below),</a>
 "succeeds" means matching the rest of the main pattern as well as the
 alternative in the group.
-</P>
+<a name="internaloptions"></a></P>
 <br><a name="SEC13" href="#TOC1">INTERNAL OPTION SETTING</a><br>
 <P>
 The settings of the PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL,
@ -1901,12 +1834,19 @@ are permitted for groups with the same number, for example:
  (?|(?&#60;AA&#62;aa)|(?&#60;AA&#62;bb))
 </pre>
 The duplicate name constraint can be disabled by setting the PCRE2_DUPNAMES
-option at compile time, or by the use of (?J) within the pattern. Duplicate
-names can be useful for patterns where only one instance of the named capture
-group can match. Suppose you want to match the name of a weekday, either as a
-3-letter abbreviation or as the full name, and in both cases you want to
-extract the abbreviation. This pattern (ignoring the line breaks) does the job:
+option at compile time, or by the use of (?J) within the pattern, as described
+in the section entitled
+<a href="#internaloptions">"Internal Option Setting"</a>
+above.
+</P>
+<P>
+Duplicate names can be useful for patterns where only one instance of the named
+capture group can match. Suppose you want to match the name of a weekday,
+either as a 3-letter abbreviation or as the full name, and in both cases you
+want to extract the abbreviation. This pattern (ignoring the line breaks) does
+the job:
 <pre>
+  (?J)
  (?&#60;DN&#62;Mon|Fri|Sun)(?:day)?|
  (?&#60;DN&#62;Tue)(?:sday)?|
  (?&#60;DN&#62;Wed)(?:nesday)?|
@ -1927,7 +1867,7 @@ they appear in the overall pattern. The first one that is set is used for the
 reference. For example, this pattern matches both "foofoo" and "barbar" but not
 "foobar" or "barfoo":
 <pre>
-  (?:(?&#60;n&#62;foo)|(?&#60;n&#62;bar))\k&#60;n&#62;
+  (?J)(?:(?&#60;n&#62;foo)|(?&#60;n&#62;bar))\k&#60;n&#62;

 </PRE>
 </P>
@ -1961,7 +1901,7 @@ items:
  an escape such as \d or \pL that matches a single character
  a character class
  a backreference
-  a parenthesized group (including most assertions)
+  a parenthesized group (including lookaround assertions)
  a subroutine call (recursive or otherwise)
 </pre>
 The general repetition quantifier specifies a minimum and maximum number of
@ -2147,10 +2087,10 @@ be easier to remember:
 <pre>
  (*atomic:\d+)foo
 </pre>
-This kind of parenthesized group "locks up" the  part of the pattern it
-contains once it has matched, and a failure further into the pattern is
-prevented from backtracking into it. Backtracking past it to previous items,
-however, works as normal.
+This kind of parenthesized group "locks up" the part of the pattern it contains
+once it has matched, and a failure further into the pattern is prevented from
+backtracking into it. Backtracking past it to previous items, however, works as
+normal.
 </P>
 <P>
 An alternative description is that a group of this type matches exactly the
@ -2349,11 +2289,11 @@ using alternation, as in the example above, or by a quantifier with a minimum
 of zero.
 </P>
 <P>
-Backreferences of this type cause the group that they reference to be treated
-as an
+For versions of PCRE2 less than 10.25, backreferences of this type used to
+cause the group that they reference to be treated as an
 <a href="#atomicgroup">atomic group.</a>
-Once the whole group has been matched, a subsequent matching failure cannot
-cause backtracking into the middle of the group.
+This restriction no longer applies, and backtracking into such groups can occur
+as normal.
 <a name="bigassertions"></a></P>
 <br><a name="SEC20" href="#TOC1">ASSERTIONS</a><br>
 <P>
@ -2413,26 +2353,13 @@ control passes to the previous backtracking point, thus discarding any captured
 strings within the assertion.
 </P>
 <P>
-For compatibility with Perl, most assertion groups may be repeated; though it
-makes no sense to assert the same thing several times, the side effect of
-capturing may occasionally be useful. However, an assertion that forms the
-condition for a conditional group may not be quantified. In practice, for
-other assertions, there only three cases:
-<br>
-<br>
-(1) If the quantifier is {0}, the assertion is never obeyed during matching.
-However, it may contain internal capture groups that are called from elsewhere
-via the
-<a href="#groupsassubroutines">subroutine mechanism.</a>
-<br>
-<br>
-(2) If quantifier is {0,n} where n is greater than zero, it is treated as if it
-were {0,1}. At run time, the rest of the pattern match is tried with and
-without the assertion, the order depending on the greediness of the quantifier.
-<br>
-<br>
-(3) If the minimum repetition is greater than zero, the quantifier is ignored.
-The assertion is obeyed just once when encountered during matching.
+Most assertion groups may be repeated; though it makes no sense to assert the
+same thing several times, the side effect of capturing in positive assertions
+may occasionally be useful. However, an assertion that forms the condition for
+a conditional group may not be quantified. PCRE2 used to restrict the
+repetition of assertions, but from release 10.35 the only restriction is that
+an unlimited maximum repetition is changed to be one more than the minimum. For
+example, {3,} is treated as {3,4}.
 </P>
 <br><b>
 Alphabetic assertion names
@ -2624,8 +2551,8 @@ backtracking into the assertion. However, there are some cases where non-atomic
 positive assertions can be useful. PCRE2 provides these using the following
 syntax:
 <pre>
-  (*non_atomic_positive_lookahead:  or (*napla:
-  (*non_atomic_positive_lookbehind: or (*naplb:
+  (*non_atomic_positive_lookahead:  or (*napla: or (?*
+  (*non_atomic_positive_lookbehind: or (*naplb: or (?&#60;*
 </pre>
 Consider the problem of finding the right-most word in a string that also
 appears earlier in the string, that is, it must appear at least twice in total.
@ -2665,9 +2592,15 @@ as before because nothing has changed, so using a non-atomic assertion just
 wastes resources.
 </P>
 <P>
+There is one exception to backtracking into a non-atomic assertion. If an
+(*ACCEPT) control verb is triggered, the assertion succeeds atomically. That
+is, a subsequent match failure cannot backtrack into the assertion.
+</P>
+<P>
 Non-atomic assertions are not supported by the alternative matching function
-<b>pcre2_dfa_match()</b>. They are also not supported by JIT (but may be in
-future). Note that assertions that appear as conditions for
+<b>pcre2_dfa_match()</b>. They are supported by JIT, but only if they do not
+contain any control verbs such as (*ACCEPT). (This may change in future). Note
+that assertions that appear as conditions for
 <a href="#conditions">conditional groups</a>
 (see below) must be atomic.
 </P>
@ -2878,7 +2811,7 @@ breaks):
  (?(DEFINE) (?&#60;byte&#62; 2[0-4]\d | 25[0-5] | 1\d\d | [1-9]?\d) )
  \b (?&byte) (\.(?&byte)){3} \b
 </pre>
-The first part of the pattern is a DEFINE group inside which a another group
+The first part of the pattern is a DEFINE group inside which another group
 named "byte" is defined. This matches an individual component of an IPv4
 address (a number less than 256). When matching takes place, this part of the
 pattern is skipped because DEFINE acts like a false condition. The rest of the
@ -3588,7 +3521,7 @@ successful match if there is a later mismatch. Consider:
 </pre>
 If the subject is "aaaac...", after the first match attempt fails (starting at
 the first character in the string), the starting point skips on to start the
-next attempt at "c". Note that a possessive quantifer does not have the same
+next attempt at "c". Note that a possessive quantifier does not have the same
 effect as this example; although it would suppress backtracking during the
 first match attempt, the second attempt would start at the second character
 instead of skipping on to "c".
@ -3826,16 +3759,16 @@ there is a backtrack at the outer level.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC32" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 29 July 2019
+Last updated: 12 January 2022
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2perform.html
+++ b/doc/html/pcre2perform.html
@ -83,12 +83,31 @@ From release 10.30, the interpretive (non-JIT) version of <b>pcre2_match()</b>
 uses very little system stack at run time. In earlier releases recursive
 function calls could use a great deal of stack, and this could cause problems,
 but this usage has been eliminated. Backtracking positions are now explicitly
-remembered in memory frames controlled by the code. An initial 20KiB vector of
-frames is allocated on the system stack (enough for about 100 frames for small
-patterns), but if this is insufficient, heap memory is used. The amount of heap
-memory can be limited; if the limit is set to zero, only the initial stack
-vector is used. Rewriting patterns to be time-efficient, as described below,
-may also reduce the memory requirements.
+remembered in memory frames controlled by the code. 
+</P>
+<P>
+The size of each frame depends on the size of pointer variables and the number
+of capturing parenthesized groups in the pattern being matched. On a 64-bit
+system the frame size for a pattern with no captures is 128 bytes. For each
+capturing group the size increases by 16 bytes.
+</P>
+<P>
+Until release 10.41, an initial 20KiB frames vector was allocated on the system 
+stack, but this still caused some issues for multi-thread applications where
+each thread has a very small stack. From release 10.41 backtracking memory
+frames are always held in heap memory. An initial heap allocation is obtained
+the first time any match data block is passed to <b>pcre2_match()</b>. This is
+remembered with the match data block and re-used if that block is used for
+another match. It is freed when the match data block itself is freed.
+</P>
+<P>
+The size of the initial block is the larger of 20KiB or ten times the pattern's 
+frame size, unless the heap limit is less than this, in which case the heap 
+limit is used. If the initial block proves to be too small during matching, it
+is replaced by a larger block, subject to the heap limit. The heap limit is 
+checked only when a new block is to be allocated. Reducing the heap limit 
+between calls to <b>pcre2_match()</b> with the same match data block does not 
+affect the saved block.
 </P>
 <P>
 In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive
@ -245,16 +264,16 @@ pattern to match. This is done by repeatedly matching with different limits.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC6" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 03 February 2019
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2posix.html
+++ b/doc/html/pcre2posix.html
@ -68,11 +68,14 @@ application. Because the POSIX functions call the native ones, it is also
 necessary to add <b>-lpcre2-8</b>.
 </P>
 <P>
-Although they are not defined as protypes in <b>pcre2posix.h</b>, the library
-does contain functions with the POSIX names <b>regcomp()</b> etc. These simply
-pass their arguments to the PCRE2 functions. These functions are provided for
-backwards compatibility with earlier versions of PCRE2, so that existing
-programs do not have to be recompiled.
+Although they were not defined as protypes in <b>pcre2posix.h</b>, releases
+10.33 to 10.36 of the library contained functions with the POSIX names
+<b>regcomp()</b> etc. These simply passed their arguments to the PCRE2
+functions. These functions were provided for backwards compatibility with
+earlier versions of PCRE2, which had only POSIX names. However, this has proved
+troublesome in situations where a program links with several libraries, some of
+which use PCRE2's POSIX interface while others use the real POSIX functions.
+For this reason, the POSIX names have been removed since release 10.37.
 </P>
 <P>
 Calling the header file <b>pcre2posix.h</b> avoids any conflict with other POSIX
@ -344,9 +347,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC10" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 30 January 2019
+Last updated: 26 April 2021
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2021 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2serialize.html
+++ b/doc/html/pcre2serialize.html
@ -23,12 +23,12 @@ please consult the man page, in case the conversion went wrong.
 <br><a name="SEC1" href="#TOC1">SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS</a><br>
 <P>
 <b>int32_t pcre2_serialize_decode(pcre2_code **<i>codes</i>,</b>
-<b>  int32_t <i>number_of_codes</i>, const uint32_t *<i>bytes</i>,</b>
+<b>  int32_t <i>number_of_codes</i>, const uint8_t *<i>bytes</i>,</b>
 <b>  pcre2_general_context *<i>gcontext</i>);</b>
 <br>
 <br>
-<b>int32_t pcre2_serialize_encode(pcre2_code **<i>codes</i>,</b>
-<b>  int32_t <i>number_of_codes</i>, uint32_t **<i>serialized_bytes</i>,</b>
+<b>int32_t pcre2_serialize_encode(const pcre2_code **<i>codes</i>,</b>
+<b>  int32_t <i>number_of_codes</i>, uint8_t **<i>serialized_bytes</i>,</b>
 <b>  PCRE2_SIZE *<i>serialized_size</i>, pcre2_general_context *<i>gcontext</i>);</b>
 <br>
 <br>
@ -94,7 +94,7 @@ of serialized patterns, or one of the following negative error codes:
 <pre>
  PCRE2_ERROR_BADDATA      the number of patterns is zero or less
  PCRE2_ERROR_BADMAGIC     mismatch of id bytes in one of the patterns
-  PCRE2_ERROR_MEMORY       memory allocation failed
+  PCRE2_ERROR_NOMEMORY     memory allocation failed
  PCRE2_ERROR_MIXEDTABLES  the patterns do not all use the same tables
  PCRE2_ERROR_NULL         the 1st, 3rd, or 4th argument is NULL
 </pre>
@ -154,7 +154,6 @@ mangagement functions for the decoded patterns. If this argument is NULL,
 <b>malloc()</b> and <b>free()</b> are used. After deserialization, the byte
 stream is no longer needed and can be discarded.
 <pre>
-  int32_t number_of_codes;
  pcre2_code *list_of_codes[2];
  uint8_t *bytes = &#60;serialized data&#62;;
  int32_t number_of_codes =
--- a/doc/html/pcre2syntax.html
+++ b/doc/html/pcre2syntax.html
@ -19,29 +19,31 @@ please consult the man page, in case the conversion went wrong.
 <li><a name="TOC4" href="#SEC4">CHARACTER TYPES</a>
 <li><a name="TOC5" href="#SEC5">GENERAL CATEGORY PROPERTIES FOR \p and \P</a>
 <li><a name="TOC6" href="#SEC6">PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P</a>
-<li><a name="TOC7" href="#SEC7">SCRIPT NAMES FOR \p AND \P</a>
-<li><a name="TOC8" href="#SEC8">CHARACTER CLASSES</a>
-<li><a name="TOC9" href="#SEC9">QUANTIFIERS</a>
-<li><a name="TOC10" href="#SEC10">ANCHORS AND SIMPLE ASSERTIONS</a>
-<li><a name="TOC11" href="#SEC11">REPORTED MATCH POINT SETTING</a>
-<li><a name="TOC12" href="#SEC12">ALTERNATION</a>
-<li><a name="TOC13" href="#SEC13">CAPTURING</a>
-<li><a name="TOC14" href="#SEC14">ATOMIC GROUPS</a>
-<li><a name="TOC15" href="#SEC15">COMMENT</a>
-<li><a name="TOC16" href="#SEC16">OPTION SETTING</a>
-<li><a name="TOC17" href="#SEC17">NEWLINE CONVENTION</a>
-<li><a name="TOC18" href="#SEC18">WHAT \R MATCHES</a>
-<li><a name="TOC19" href="#SEC19">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
-<li><a name="TOC20" href="#SEC20">NON-ATOMIC LOOKAROUND ASSERTIONS</a>
-<li><a name="TOC21" href="#SEC21">SCRIPT RUNS</a>
-<li><a name="TOC22" href="#SEC22">BACKREFERENCES</a>
-<li><a name="TOC23" href="#SEC23">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
-<li><a name="TOC24" href="#SEC24">CONDITIONAL PATTERNS</a>
-<li><a name="TOC25" href="#SEC25">BACKTRACKING CONTROL</a>
-<li><a name="TOC26" href="#SEC26">CALLOUTS</a>
-<li><a name="TOC27" href="#SEC27">SEE ALSO</a>
-<li><a name="TOC28" href="#SEC28">AUTHOR</a>
-<li><a name="TOC29" href="#SEC29">REVISION</a>
+<li><a name="TOC7" href="#SEC7">BINARY PROPERTIES FOR \p AND \P</a>
+<li><a name="TOC8" href="#SEC8">SCRIPT MATCHING WITH \p AND \P</a>
+<li><a name="TOC9" href="#SEC9">THE BIDI_CLASS PROPERTY FOR \p AND \P</a>
+<li><a name="TOC10" href="#SEC10">CHARACTER CLASSES</a>
+<li><a name="TOC11" href="#SEC11">QUANTIFIERS</a>
+<li><a name="TOC12" href="#SEC12">ANCHORS AND SIMPLE ASSERTIONS</a>
+<li><a name="TOC13" href="#SEC13">REPORTED MATCH POINT SETTING</a>
+<li><a name="TOC14" href="#SEC14">ALTERNATION</a>
+<li><a name="TOC15" href="#SEC15">CAPTURING</a>
+<li><a name="TOC16" href="#SEC16">ATOMIC GROUPS</a>
+<li><a name="TOC17" href="#SEC17">COMMENT</a>
+<li><a name="TOC18" href="#SEC18">OPTION SETTING</a>
+<li><a name="TOC19" href="#SEC19">NEWLINE CONVENTION</a>
+<li><a name="TOC20" href="#SEC20">WHAT \R MATCHES</a>
+<li><a name="TOC21" href="#SEC21">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
+<li><a name="TOC22" href="#SEC22">NON-ATOMIC LOOKAROUND ASSERTIONS</a>
+<li><a name="TOC23" href="#SEC23">SCRIPT RUNS</a>
+<li><a name="TOC24" href="#SEC24">BACKREFERENCES</a>
+<li><a name="TOC25" href="#SEC25">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
+<li><a name="TOC26" href="#SEC26">CONDITIONAL PATTERNS</a>
+<li><a name="TOC27" href="#SEC27">BACKTRACKING CONTROL</a>
+<li><a name="TOC28" href="#SEC28">CALLOUTS</a>
+<li><a name="TOC29" href="#SEC29">SEE ALSO</a>
+<li><a name="TOC30" href="#SEC30">AUTHOR</a>
+<li><a name="TOC31" href="#SEC31">REVISION</a>
 </ul>
 <br><a name="SEC1" href="#TOC1">PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY</a><br>
 <P>
@ -136,6 +138,11 @@ happening, \s and \w may also match characters with code points in the range
 sequences is changed to use Unicode properties and they match many more
 characters.
 </P>
+<P>
+Property descriptions in \p and \P are matched caselessly; hyphens,
+underscores, and white space are ignored, in accordance with Unicode's "loose
+matching" rules.
+</P>
 <br><a name="SEC5" href="#TOC1">GENERAL CATEGORY PROPERTIES FOR \p and \P</a><br>
 <P>
 <pre>
@ -152,6 +159,7 @@ characters.
  Lo         Other letter
  Lt         Title case letter
  Lu         Upper case letter
+  Lc         Ll, Lu, or Lt
  L&         Ll, Lu, or Lt

  M          Mark
@ -198,162 +206,58 @@ characters.
 Perl and POSIX space are now the same. Perl added VT to its space character set
 at release 5.18.
 </P>
-<br><a name="SEC7" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
+<br><a name="SEC7" href="#TOC1">BINARY PROPERTIES FOR \p AND \P</a><br>
 <P>
-Adlam,
-Ahom,
-Anatolian_Hieroglyphs,
-Arabic,
-Armenian,
-Avestan,
-Balinese,
-Bamum,
-Bassa_Vah,
-Batak,
-Bengali,
-Bhaiksuki,
-Bopomofo,
-Brahmi,
-Braille,
-Buginese,
-Buhid,
-Canadian_Aboriginal,
-Carian,
-Caucasian_Albanian,
-Chakma,
-Cham,
-Cherokee,
-Common,
-Coptic,
-Cuneiform,
-Cypriot,
-Cyrillic,
-Deseret,
-Devanagari,
-Dogra,
-Duployan,
-Egyptian_Hieroglyphs,
-Elbasan,
-Elymaic,
-Ethiopic,
-Georgian,
-Glagolitic,
-Gothic,
-Grantha,
-Greek,
-Gujarati,
-Gunjala_Gondi,
-Gurmukhi,
-Han,
-Hangul,
-Hanifi_Rohingya,
-Hanunoo,
-Hatran,
-Hebrew,
-Hiragana,
-Imperial_Aramaic,
-Inherited,
-Inscriptional_Pahlavi,
-Inscriptional_Parthian,
-Javanese,
-Kaithi,
-Kannada,
-Katakana,
-Kayah_Li,
-Kharoshthi,
-Khmer,
-Khojki,
-Khudawadi,
-Lao,
-Latin,
-Lepcha,
-Limbu,
-Linear_A,
-Linear_B,
-Lisu,
-Lycian,
-Lydian,
-Mahajani,
-Makasar,
-Malayalam,
-Mandaic,
-Manichaean,
-Marchen,
-Masaram_Gondi,
-Medefaidrin,
-Meetei_Mayek,
-Mende_Kikakui,
-Meroitic_Cursive,
-Meroitic_Hieroglyphs,
-Miao,
-Modi,
-Mongolian,
-Mro,
-Multani,
-Myanmar,
-Nabataean,
-Nandinagari,
-New_Tai_Lue,
-Newa,
-Nko,
-Nushu,
-Nyakeng_Puachue_Hmong,
-Ogham,
-Ol_Chiki,
-Old_Hungarian,
-Old_Italic,
-Old_North_Arabian,
-Old_Permic,
-Old_Persian,
-Old_Sogdian,
-Old_South_Arabian,
-Old_Turkic,
-Oriya,
-Osage,
-Osmanya,
-Pahawh_Hmong,
-Palmyrene,
-Pau_Cin_Hau,
-Phags_Pa,
-Phoenician,
-Psalter_Pahlavi,
-Rejang,
-Runic,
-Samaritan,
-Saurashtra,
-Sharada,
-Shavian,
-Siddham,
-SignWriting,
-Sinhala,
-Sogdian,
-Sora_Sompeng,
-Soyombo,
-Sundanese,
-Syloti_Nagri,
-Syriac,
-Tagalog,
-Tagbanwa,
-Tai_Le,
-Tai_Tham,
-Tai_Viet,
-Takri,
-Tamil,
-Tangut,
-Telugu,
-Thaana,
-Thai,
-Tibetan,
-Tifinagh,
-Tirhuta,
-Ugaritic,
-Vai,
-Wancho,
-Warang_Citi,
-Yi,
-Zanabazar_Square.
+Unicode defines a number of binary properties, that is, properties whose only
+values are true or false. You can obtain a list of those that are recognized by
+\p and \P, along with their abbreviations, by running this command:
+<pre>
+  pcre2test -LP
+</PRE>
 </P>
-<br><a name="SEC8" href="#TOC1">CHARACTER CLASSES</a><br>
+<br><a name="SEC8" href="#TOC1">SCRIPT MATCHING WITH \p AND \P</a><br>
+<P>
+Many script names and their 4-letter abbreviations are recognized in
+\p{sc:...} or \p{scx:...} items, or on their own with \p (and also \P of
+course). You can obtain a list of these scripts by running this command:
+<pre>
+  pcre2test -LS
+</PRE>
+</P>
+<br><a name="SEC9" href="#TOC1">THE BIDI_CLASS PROPERTY FOR \p AND \P</a><br>
+<P>
+<pre>
+  \p{Bidi_Class:&#60;class&#62;}   matches a character with the given class
+  \p{BC:&#60;class&#62;}           matches a character with the given class
+</pre>
+The recognized classes are:
+<pre>
+  AL          Arabic letter
+  AN          Arabic number
+  B           paragraph separator
+  BN          boundary neutral
+  CS          common separator
+  EN          European number
+  ES          European separator
+  ET          European terminator
+  FSI         first strong isolate
+  L           left-to-right
+  LRE         left-to-right embedding
+  LRI         left-to-right isolate
+  LRO         left-to-right override
+  NSM         non-spacing mark
+  ON          other neutral
+  PDF         pop directional format
+  PDI         pop directional isolate
+  R           right-to-left
+  RLE         right-to-left embedding
+  RLI         right-to-left isolate
+  RLO         right-to-left override
+  S           segment separator
+  WS          which space
+</PRE>
+</P>
+<br><a name="SEC10" href="#TOC1">CHARACTER CLASSES</a><br>
 <P>
 <pre>
  [...]       positive character class
@ -381,7 +285,7 @@ In PCRE2, POSIX character set names recognize only ASCII characters by default,
 but some of them use Unicode properties if PCRE2_UCP is set. You can use
 \Q...\E inside a character class.
 </P>
-<br><a name="SEC9" href="#TOC1">QUANTIFIERS</a><br>
+<br><a name="SEC11" href="#TOC1">QUANTIFIERS</a><br>
 <P>
 <pre>
  ?           0 or 1, greedy
@ -402,7 +306,7 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
  {n,}?       n or more, lazy
 </PRE>
 </P>
-<br><a name="SEC10" href="#TOC1">ANCHORS AND SIMPLE ASSERTIONS</a><br>
+<br><a name="SEC12" href="#TOC1">ANCHORS AND SIMPLE ASSERTIONS</a><br>
 <P>
 <pre>
  \b          word boundary
@ -420,20 +324,23 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
  \G          first matching position in subject
 </PRE>
 </P>
-<br><a name="SEC11" href="#TOC1">REPORTED MATCH POINT SETTING</a><br>
+<br><a name="SEC13" href="#TOC1">REPORTED MATCH POINT SETTING</a><br>
 <P>
 <pre>
  \K          set reported start of match
 </pre>
+From release 10.38 \K is not permitted by default in lookaround assertions,
+for compatibility with Perl. However, if the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
+option is set, the previous behaviour is re-enabled. When this option is set,
 \K is honoured in positive assertions, but ignored in negative ones.
 </P>
-<br><a name="SEC12" href="#TOC1">ALTERNATION</a><br>
+<br><a name="SEC14" href="#TOC1">ALTERNATION</a><br>
 <P>
 <pre>
  expr|expr|expr...
 </PRE>
 </P>
-<br><a name="SEC13" href="#TOC1">CAPTURING</a><br>
+<br><a name="SEC15" href="#TOC1">CAPTURING</a><br>
 <P>
 <pre>
  (...)           capture group
@ -448,26 +355,26 @@ In non-UTF modes, names may contain underscores and ASCII letters and digits;
 in UTF modes, any Unicode letters and Unicode decimal digits are permitted. In
 both cases, a name must not start with a digit.
 </P>
-<br><a name="SEC14" href="#TOC1">ATOMIC GROUPS</a><br>
+<br><a name="SEC16" href="#TOC1">ATOMIC GROUPS</a><br>
 <P>
 <pre>
  (?&#62;...)         atomic non-capture group
  (*atomic:...)   atomic non-capture group
 </PRE>
 </P>
-<br><a name="SEC15" href="#TOC1">COMMENT</a><br>
+<br><a name="SEC17" href="#TOC1">COMMENT</a><br>
 <P>
 <pre>
  (?#....)        comment (not nestable)
 </PRE>
 </P>
-<br><a name="SEC16" href="#TOC1">OPTION SETTING</a><br>
+<br><a name="SEC18" href="#TOC1">OPTION SETTING</a><br>
 <P>
 Changes of these options within a group are automatically cancelled at the end
 of the group.
 <pre>
  (?i)            caseless
-  (?J)            allow duplicate names
+  (?J)            allow duplicate named groups
  (?m)            multiline
  (?n)            no auto capture
  (?s)            single line (dotall)
@ -506,7 +413,7 @@ not increase them. LIMIT_RECURSION is an obsolete synonym for LIMIT_DEPTH. The
 application can lock out the use of (*UTF) and (*UCP) by setting the
 PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, respectively, at compile time.
 </P>
-<br><a name="SEC17" href="#TOC1">NEWLINE CONVENTION</a><br>
+<br><a name="SEC19" href="#TOC1">NEWLINE CONVENTION</a><br>
 <P>
 These are recognized only at the very start of the pattern or after option
 settings with a similar syntax.
@ -519,7 +426,7 @@ settings with a similar syntax.
  (*NUL)          the NUL character (binary zero)
 </PRE>
 </P>
-<br><a name="SEC18" href="#TOC1">WHAT \R MATCHES</a><br>
+<br><a name="SEC20" href="#TOC1">WHAT \R MATCHES</a><br>
 <P>
 These are recognized only at the very start of the pattern or after option
 setting with a similar syntax.
@ -528,7 +435,7 @@ setting with a similar syntax.
  (*BSR_UNICODE)  any Unicode newline sequence
 </PRE>
 </P>
-<br><a name="SEC19" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
+<br><a name="SEC21" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
 <P>
 <pre>
  (?=...)                     )
@ -549,18 +456,20 @@ setting with a similar syntax.
 </pre>
 Each top-level branch of a lookbehind must be of a fixed length.
 </P>
-<br><a name="SEC20" href="#TOC1">NON-ATOMIC LOOKAROUND ASSERTIONS</a><br>
+<br><a name="SEC22" href="#TOC1">NON-ATOMIC LOOKAROUND ASSERTIONS</a><br>
 <P>
 These assertions are specific to PCRE2 and are not Perl-compatible.
 <pre>
-  (*napla:...)
-  (*non_atomic_positive_lookahead:...)
+  (?*...)                                )
+  (*napla:...)                           ) synonyms
+  (*non_atomic_positive_lookahead:...)   )

-  (*naplb:...)
-  (*non_atomic_positive_lookbehind:...)
+  (?&#60;*...)                               )
+  (*naplb:...)                           ) synonyms
+  (*non_atomic_positive_lookbehind:...)  )
 </PRE>
 </P>
-<br><a name="SEC21" href="#TOC1">SCRIPT RUNS</a><br>
+<br><a name="SEC23" href="#TOC1">SCRIPT RUNS</a><br>
 <P>
 <pre>
  (*script_run:...)           ) script run, can be backtracked into
@ -570,7 +479,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
  (*asr:...)                  )
 </PRE>
 </P>
-<br><a name="SEC22" href="#TOC1">BACKREFERENCES</a><br>
+<br><a name="SEC24" href="#TOC1">BACKREFERENCES</a><br>
 <P>
 <pre>
  \n              reference by number (can be ambiguous)
@ -587,7 +496,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
  (?P=name)       reference by name (Python)
 </PRE>
 </P>
-<br><a name="SEC23" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
+<br><a name="SEC25" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
 <P>
 <pre>
  (?R)            recurse whole pattern
@ -606,7 +515,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
  \g'-n'          call subroutine by relative number (PCRE2 extension)
 </PRE>
 </P>
-<br><a name="SEC24" href="#TOC1">CONDITIONAL PATTERNS</a><br>
+<br><a name="SEC26" href="#TOC1">CONDITIONAL PATTERNS</a><br>
 <P>
 <pre>
  (?(condition)yes-pattern)
@ -629,7 +538,7 @@ Note the ambiguity of (?(R) and (?(Rn) which might be named reference
 conditions or recursion tests. Such a condition is interpreted as a reference
 condition if the relevant named group exists.
 </P>
-<br><a name="SEC25" href="#TOC1">BACKTRACKING CONTROL</a><br>
+<br><a name="SEC27" href="#TOC1">BACKTRACKING CONTROL</a><br>
 <P>
 All backtracking control verbs may be in the form (*VERB:NAME). For (*MARK) the
 name is mandatory, for the others it is optional. (*SKIP) changes its behaviour
@ -656,7 +565,7 @@ pattern is not anchored.
 The effect of one of these verbs in a group called as a subroutine is confined
 to the subroutine call.
 </P>
-<br><a name="SEC26" href="#TOC1">CALLOUTS</a><br>
+<br><a name="SEC28" href="#TOC1">CALLOUTS</a><br>
 <P>
 <pre>
  (?C)            callout (assumed number 0)
@ -667,25 +576,25 @@ The allowed string delimiters are ` ' " ^ % # $ (which are the same for the
 start and the end), and the starting delimiter { matched with the ending
 delimiter }. To encode the ending delimiter within the string, double it.
 </P>
-<br><a name="SEC27" href="#TOC1">SEE ALSO</a><br>
+<br><a name="SEC29" href="#TOC1">SEE ALSO</a><br>
 <P>
 <b>pcre2pattern</b>(3), <b>pcre2api</b>(3), <b>pcre2callout</b>(3),
 <b>pcre2matching</b>(3), <b>pcre2</b>(3).
 </P>
-<br><a name="SEC28" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC30" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
-<br><a name="SEC29" href="#TOC1">REVISION</a><br>
+<br><a name="SEC31" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 29 July 2019
+Last updated: 12 January 2022
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2test.html
+++ b/doc/html/pcre2test.html
@ -59,12 +59,7 @@ patterns, and the subject lines specify PCRE2 function options, control how the
 subject is processed, and what output is produced.
 </P>
 <P>
-As the original fairly simple PCRE library evolved, it acquired many different
-features, and as a result, the original <b>pcretest</b> program ended up with a
-lot of options in a messy, arcane syntax for testing all the features. The
-move to the new PCRE2 API provided an opportunity to re-implement the test
-program as <b>pcre2test</b>, with a cleaner modifier syntax. Nevertheless, there
-are still many obscure modifiers, some of which are specifically designed for
+There are many obscure modifiers, some of which are specifically designed for
 use in conjunction with the test script and data files that are distributed as
 part of PCRE2. All the modifiers are documented here, some without much
 justification, but many of them are unlikely to be of use except when testing
@ -83,16 +78,16 @@ to 8-bit code units for output.
 </P>
 <P>
 In the rest of this document, the names of library functions and structures
-are given in generic form, for example, <b>pcre_compile()</b>. The actual
+are given in generic form, for example, <b>pcre2_compile()</b>. The actual
 names used in the libraries have a suffix _8, _16, or _32, as appropriate.
 <a name="inputencoding"></a></P>
 <br><a name="SEC3" href="#TOC1">INPUT ENCODING</a><br>
 <P>
 Input to <b>pcre2test</b> is processed line by line, either by calling the C
-library's <b>fgets()</b> function, or via the <b>libreadline</b> library. In some
-Windows environments character 26 (hex 1A) causes an immediate end of file, and
-no further data is read, so this character should be avoided unless you really
-want that action.
+library's <b>fgets()</b> function, or via the <b>libreadline</b> or <b>libedit</b>
+library. In some Windows environments character 26 (hex 1A) causes an immediate
+end of file, and no further data is read, so this character should be avoided
+unless you really want that action.
 </P>
 <P>
 The input is processed using using C's string functions, so must not
@ -258,10 +253,22 @@ available, and the use of JIT for matching is verified.
 <b>-LM</b>
 List modifiers: write a list of available pattern and subject modifiers to the
 standard output, then exit with zero exit code. All other options are ignored.
-If both -C and -LM are present, whichever is first is recognized.
+If both -C and any -Lx options are present, whichever is first is recognized.
 </P>
 <P>
-\fB-pattern\fB <i>modifier-list</i>
+<b>-LP</b>
+List properties: write a list of recognized Unicode properties to the standard
+output, then exit with zero exit code. All other options are ignored. If both
+-C and any -Lx options are present, whichever is first is recognized.
+</P>
+<P>
+<b>-LS</b>
+List scripts: write a list of recogized Unicode script names to the standard
+output, then exit with zero exit code. All other options are ignored. If both
+-C and any -Lx options are present, whichever is first is recognized.
+</P>
+<P>
+<b>-pattern</b> <i>modifier-list</i>
 Behave as if each pattern line contains the given modifiers.
 </P>
 <P>
@ -323,7 +330,7 @@ test data, command lines that begin with # may appear. This file format, with
 some restrictions, can also be processed by the <b>perltest.sh</b> script that
 is distributed with PCRE2 as a means of checking that the behaviour of PCRE2
 and Perl is the same. For a specification of <b>perltest.sh</b>, see the
-comments near its beginning.
+comments near its beginning. See also the #perltest command below.
 </P>
 <P>
 When the input is a terminal, <b>pcre2test</b> prompts for each line of input,
@ -375,6 +382,12 @@ output.
 This command is used to load a set of precompiled patterns from a file, as
 described in the section entitled "Saving and restoring compiled patterns"
 <a href="#saverestore">below.</a>
+<pre>
+  #loadtables &#60;filename&#62;
+</pre>
+This command is used to load a set of binary character tables that can be
+accessed by the tables=3 qualifier. Such tables can be created by the
+<b>pcre2_dftables</b> program with the -b option.
 <pre>
  #newline_default [&#60;newline-list&#62;]
 </pre>
@ -414,14 +427,20 @@ patterns. Modifiers on a pattern can change these settings.
 <pre>
  #perltest
 </pre>
-The appearance of this line causes all subsequent modifier settings to be
-checked for compatibility with the <b>perltest.sh</b> script, which is used to
-confirm that Perl gives the same results as PCRE2. Also, apart from comment
-lines, #pattern commands, and #subject commands that set or unset "mark", no
-command lines are permitted, because they and many of the modifiers are
-specific to <b>pcre2test</b>, and should not be used in test files that are also
-processed by <b>perltest.sh</b>. The <b>#perltest</b> command helps detect tests
-that are accidentally put in the wrong file.
+This line is used in test files that can also be processed by <b>perltest.sh</b>
+to confirm that Perl gives the same results as PCRE2. Subsequent tests are
+checked for the use of <b>pcre2test</b> features that are incompatible with the
+<b>perltest.sh</b> script.
+</P>
+<P>
+Patterns must use '/' as their delimiter, and only certain modifiers are
+supported. Comment lines, #pattern commands, and #subject commands that set or
+unset "mark" are recognized and acted on. The #perltest, #forbid_utf, and
+#newline_default commands, which are needed in the relevant pcre2test files,
+are silently ignored. All other command lines are ignored, but give a warning
+message. The <b>#perltest</b> command helps detect tests that are accidentally
+put in the wrong file or use the wrong delimiter. For more details of the
+<b>perltest.sh</b> script see the comments it contains.
 <pre>
  #pop [&#60;modifiers&#62;]
  #popcopy [&#60;modifiers&#62;]
@ -474,15 +493,17 @@ excluding pattern meta-characters):
 </pre>
 This is interpreted as the pattern's delimiter. A regular expression may be
 continued over several input lines, in which case the newline characters are
-included within it. It is possible to include the delimiter within the pattern
-by escaping it with a backslash, for example
+included within it. It is possible to include the delimiter as a literal within
+the pattern by escaping it with a backslash, for example
 <pre>
  /abc\/def/
 </pre>
 If you do this, the escape and the delimiter form part of the pattern, but
-since the delimiters are all non-alphanumeric, this does not affect its
-interpretation. If the terminating delimiter is immediately followed by a
-backslash, for example,
+since the delimiters are all non-alphanumeric, the inclusion of the backslash
+does not affect the pattern's interpretation. Note, however, that this trick
+does not work within \Q...\E literal bracketing because the backslash will
+itself be interpreted as a literal. If the terminating delimiter is immediately
+followed by a backslash, for example,
 <pre>
  /abc/\
 </pre>
@ -500,11 +521,11 @@ A pattern can be followed by a modifier list (details below).
 </P>
 <br><a name="SEC9" href="#TOC1">SUBJECT LINE SYNTAX</a><br>
 <P>
-Before each subject line is passed to <b>pcre2_match()</b> or
-<b>pcre2_dfa_match()</b>, leading and trailing white space is removed, and the
-line is scanned for backslash escapes, unless the <b>subject_literal</b>
-modifier was set for the pattern. The following provide a means of encoding
-non-printing characters in a visible way:
+Before each subject line is passed to <b>pcre2_match()</b>,
+<b>pcre2_dfa_match()</b>, or <b>pcre2_jit_match()</b>, leading and trailing white
+space is removed, and the line is scanned for backslash escapes, unless the
+<b>subject_literal</b> modifier was set for the pattern. The following provide a
+means of encoding non-printing characters in a visible way:
 <pre>
  \a         alarm (BEL, \x07)
  \b         backspace (\x08)
@ -601,6 +622,7 @@ way <b>pcre2_compile()</b> behaves. See
 for a description of the effects of these options.
 <pre>
      allow_empty_class         set PCRE2_ALLOW_EMPTY_CLASS
+      allow_lookaround_bsk      set PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
      allow_surrogate_escapes   set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
      alt_bsux                  set PCRE2_ALT_BSUX
      alt_circumflex            set PCRE2_ALT_CIRCUMFLEX
@ -679,7 +701,7 @@ heavily used in the test files.
      pushcopy                  push a copy onto the stack
      stackguard=&#60;number&#62;       test the stackguard feature
      subject_literal           treat all subject lines as literal
-      tables=[0|1|2]            select internal tables
+      tables=[0|1|2|3]          select internal tables
      use_length                do not zero-terminate the pattern
      utf8_input                treat input as UTF-8
 </pre>
@ -1027,18 +1049,20 @@ Using alternative character tables
 </b><br>
 <P>
 The value specified for the <b>tables</b> modifier must be one of the digits 0,
-1, or 2. It causes a specific set of built-in character tables to be passed to
-<b>pcre2_compile()</b>. This is used in the PCRE2 tests to check behaviour with
-different character tables. The digit specifies the tables as follows:
+1, 2, or 3. It causes a specific set of built-in character tables to be passed
+to <b>pcre2_compile()</b>. This is used in the PCRE2 tests to check behaviour
+with different character tables. The digit specifies the tables as follows:
 <pre>
  0   do not pass any special character tables
  1   the default ASCII tables, as distributed in
        pcre2_chartables.c.dist
  2   a set of tables defining ISO 8859 characters
+  3   a set of tables loaded by the #loadtables command
 </pre>
-In table 2, some characters whose codes are greater than 128 are identified as
-letters, digits, spaces, etc. Setting alternate character tables and a locale
-are mutually exclusive.
+In tables 2, some characters whose codes are greater than 128 are identified as
+letters, digits, spaces, etc. Tables 3 can be used only after a
+<b>#loadtables</b> command has loaded them from a binary file. Setting alternate
+character tables and a locale are mutually exclusive.
 </P>
 <br><b>
 Setting certain match controls
@ -1050,24 +1074,27 @@ modifier list, in which case they are applied to every subject line that is
 processed with that pattern. These modifiers do not affect the compilation
 process.
 <pre>
-      aftertext                  show text after match
-      allaftertext               show text after captures
-      allcaptures                show all captures
-      allvector                  show the entire ovector
-      allusedtext                show all consulted text
-      altglobal                  alternative global matching
-  /g  global                     global matching
-      jitstack=&#60;n&#62;               set size of JIT stack
-      mark                       show mark values
-      replace=&#60;string&#62;           specify a replacement string
-      startchar                  show starting character when relevant
-      substitute_callout         use substitution callouts
-      substitute_extended        use PCRE2_SUBSTITUTE_EXTENDED
-      substitute_skip=&#60;n&#62;        skip substitution number n
-      substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
-      substitute_stop=&#60;n&#62;        skip substitution number n and greater
-      substitute_unknown_unset   use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
-      substitute_unset_empty     use PCRE2_SUBSTITUTE_UNSET_EMPTY
+      aftertext                   show text after match
+      allaftertext                show text after captures
+      allcaptures                 show all captures
+      allvector                   show the entire ovector
+      allusedtext                 show all consulted text
+      altglobal                   alternative global matching
+  /g  global                      global matching
+      jitstack=&#60;n&#62;                set size of JIT stack
+      mark                        show mark values
+      replace=&#60;string&#62;            specify a replacement string
+      startchar                   show starting character when relevant
+      substitute_callout          use substitution callouts
+      substitute_extended         use PCRE2_SUBSTITUTE_EXTENDED
+      substitute_literal          use PCRE2_SUBSTITUTE_LITERAL
+      substitute_matched          use PCRE2_SUBSTITUTE_MATCHED
+      substitute_overflow_length  use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+      substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+      substitute_skip=&#60;n&#62;         skip substitution &#60;n&#62;
+      substitute_stop=&#60;n&#62;         skip substitution &#60;n&#62; and following
+      substitute_unknown_unset    use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
+      substitute_unset_empty      use PCRE2_SUBSTITUTE_UNSET_EMPTY
 </pre>
 These modifiers may not appear in a <b>#pattern</b> command. If you want them as
 defaults, set them in a <b>#subject</b> command.
@ -1196,7 +1223,7 @@ Setting match controls
 The following modifiers affect the matching process or request additional
 information. Some of them may also be specified on a pattern line (see above),
 in which case they apply to every subject line that is matched against that
-pattern.
+pattern, but can be overridden by modifiers on the subject.
 <pre>
      aftertext                  show text after match
      allaftertext               show text after captures
@ -1214,7 +1241,8 @@ pattern.
      copy=&#60;number or name&#62;      copy captured substring
      depth_limit=&#60;n&#62;            set a depth limit
      dfa                        use <b>pcre2_dfa_match()</b>
-      find_limits                find match and depth limits
+      find_limits                find heap, match and depth limits
+      find_limits_noheap         find match and depth limits
      get=&#60;number or name&#62;       extract captured substring
      getall                     extract all captured substrings
  /g  global                     global matching
@ -1224,6 +1252,8 @@ pattern.
      match_limit=&#60;n&#62;            set a match limit
      memory                     show heap memory usage
      null_context               match with a NULL context
+      null_replacement           substitute with NULL replacement
+      null_subject               match with NULL subject
      offset=&#60;n&#62;                 set starting offset
      offset_limit=&#60;n&#62;           set offset limit
      ovector=&#60;n&#62;                set size of output vector
@ -1233,8 +1263,11 @@ pattern.
      startoffset=&#60;n&#62;            same as offset=&#60;n&#62;
      substitute_callout         use substitution callouts
      substitute_extedded        use PCRE2_SUBSTITUTE_EXTENDED
-      substitute_skip=&#60;n&#62;        skip substitution number n
+      substitute_literal         use PCRE2_SUBSTITUTE_LITERAL
+      substitute_matched         use PCRE2_SUBSTITUTE_MATCHED
      substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+      substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+      substitute_skip=&#60;n&#62;        skip substitution number n
      substitute_stop=&#60;n&#62;        skip substitution number n and greater
      substitute_unknown_unset   use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
      substitute_unset_empty     use PCRE2_SUBSTITUTE_UNSET_EMPTY
@ -1395,9 +1428,15 @@ Testing the substitution function
 </b><br>
 <P>
 If the <b>replace</b> modifier is set, the <b>pcre2_substitute()</b> function is
-called instead of one of the matching functions. Note that replacement strings
-cannot contain commas, because a comma signifies the end of a modifier. This is
-not thought to be an issue in a test program.
+called instead of one of the matching functions (or after one call of
+<b>pcre2_match()</b> in the case of PCRE2_SUBSTITUTE_MATCHED). Note that
+replacement strings cannot contain commas, because a comma signifies the end of
+a modifier. This is not thought to be an issue in a test program.
+</P>
+<P>
+Specifying a completely empty replacement string disables this modifier.
+However, it is possible to specify an empty replacement by providing a buffer
+length, as described below, for an otherwise empty replacement.
 </P>
 <P>
 Unlike subject strings, <b>pcre2test</b> does not process replacement strings
@ -1413,11 +1452,16 @@ for <b>pcre2_substitute()</b>:
 <pre>
  global                      PCRE2_SUBSTITUTE_GLOBAL
  substitute_extended         PCRE2_SUBSTITUTE_EXTENDED
+  substitute_literal          PCRE2_SUBSTITUTE_LITERAL
+  substitute_matched          PCRE2_SUBSTITUTE_MATCHED
  substitute_overflow_length  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+  substitute_replacement_only PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
  substitute_unknown_unset    PCRE2_SUBSTITUTE_UNKNOWN_UNSET
  substitute_unset_empty      PCRE2_SUBSTITUTE_UNSET_EMPTY
-
-</PRE>
+</pre>
+See the
+<a href="pcre2api.html"><b>pcre2api</b></a>
+documentation for details of these options.
 </P>
 <P>
 After a successful substitution, the modified string is output, preceded by the
@ -1521,7 +1565,7 @@ Setting heap, match, and depth limits
 <P>
 The <b>heap_limit</b>, <b>match_limit</b>, and <b>depth_limit</b> modifiers set
 the appropriate limits in the match context. These values are ignored when the
-<b>find_limits</b> modifier is specified.
+<b>find_limits</b> or <b>find_limits_noheap</b> modifier is specified.
 </P>
 <br><b>
 Finding minimum limits
@ -1531,8 +1575,12 @@ If the <b>find_limits</b> modifier is present on a subject line, <b>pcre2test</b
 calls the relevant matching function several times, setting different values in
 the match context via <b>pcre2_set_heap_limit()</b>,
 <b>pcre2_set_match_limit()</b>, or <b>pcre2_set_depth_limit()</b> until it finds
-the minimum values for each parameter that allows the match to complete without
-error. If JIT is being used, only the match limit is relevant.
+the smallest value for each parameter that allows the match to complete without
+a "limit exceeded" error. The match itself may succeed or fail. An alternative
+modifier, <b>find_limits_noheap</b>, omits the heap limit. This is used in the
+standard tests, because the minimum heap limit varies between systems. If JIT
+is being used, only the match limit is relevant, and the other two are
+automatically omitted.
 </P>
 <P>
 When using this modifier, the pattern should not contain any limit settings
@ -1560,9 +1608,7 @@ overall amount of computing resource that is used.
 </P>
 <P>
 For both kinds of matching, the <i>heap_limit</i> number, which is in kibibytes
-(units of 1024 bytes), limits the amount of heap memory used for matching. A
-value of zero disables the use of any heap memory; many simple pattern matches
-can be done without using the heap, so zero is not an unreasonable setting.
+(units of 1024 bytes), limits the amount of heap memory used for matching.
 </P>
 <br><b>
 Showing MARK names
@ -1580,12 +1626,10 @@ Showing memory usage
 <P>
 The <b>memory</b> modifier causes <b>pcre2test</b> to log the sizes of all heap
 memory allocation and freeing calls that occur during a call to
-<b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>. These occur only when a match
-requires a bigger vector than the default for remembering backtracking points
-(<b>pcre2_match()</b>) or for internal workspace (<b>pcre2_dfa_match()</b>). In
-many cases there will be no heap memory used and therefore no additional
-output. No heap memory is allocated during matching with JIT, so in that case
-the <b>memory</b> modifier never has any effect. For this modifier to work, the
+<b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>. In the latter case, heap memory
+is used only when a match requires more internal workspace that the default
+allocation on the stack, so in many cases there will be no output. No heap
+memory is allocated during matching with JIT. For this modifier to work, the
 <b>null_context</b> modifier must not be set on both the pattern and the
 subject, though it can be set on one or the other.
 </P>
@ -1639,7 +1683,7 @@ When testing <b>pcre2_substitute()</b>, this modifier also has the effect of
 passing the replacement string as zero-terminated.
 </P>
 <br><b>
-Passing a NULL context
+Passing a NULL context, subject, or replacement
 </b><br>
 <P>
 Normally, <b>pcre2test</b> passes a context block to <b>pcre2_match()</b>,
@ -1647,7 +1691,13 @@ Normally, <b>pcre2test</b> passes a context block to <b>pcre2_match()</b>,
 If the <b>null_context</b> modifier is set, however, NULL is passed. This is for
 testing that the matching and substitution functions behave correctly in this
 case (they use default values). This modifier cannot be used with the
-<b>find_limits</b> or <b>substitute_callout</b> modifiers.
+<b>find_limits</b>, <b>find_limits_noheap</b>, or <b>substitute_callout</b>
+modifiers.
+</P>
+<P>
+Similarly, for testing purposes, if the <b>null_subject</b> or
+<b>null_replacement</b> modifier is set, the subject or replacement string
+pointers are passed as NULL, respectively, to the relevant functions.
 </P>
 <br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
 <P>
@ -2086,16 +2136,16 @@ on the stack.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC21" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 30 July 2019
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2unicode.html
+++ b/doc/html/pcre2unicode.html
@ -19,7 +19,7 @@ UNICODE AND UTF SUPPORT
 PCRE2 is normally built with Unicode support, though if you do not need it, you
 can build it without, in which case the library will be smaller. With Unicode
 support, PCRE2 has knowledge of Unicode character properties and can process
-text strings in UTF-8, UTF-16, or UTF-32 format (depending on the code unit
+strings of text in UTF-8, UTF-16, and UTF-32 format (depending on the code unit
 width), but this is not the default. Unless specifically requested, PCRE2
 treats each code unit in a string as one character.
 </P>
@ -50,17 +50,18 @@ UNICODE PROPERTY SUPPORT
 <P>
 When PCRE2 is built with Unicode support, the escape sequences \p{..},
 \P{..}, and \X can be used. This is not dependent on the PCRE2_UTF setting.
-The Unicode properties that can be tested are limited to the general category
-properties such as Lu for an upper case letter or Nd for a decimal number, the
-Unicode script names such as Arabic or Han, and the derived properties Any and
-L&. Full lists are given in the
+The Unicode properties that can be tested are a subset of those that Perl
+supports. Currently they are limited to the general category properties such as
+Lu for an upper case letter or Nd for a decimal number, the Unicode script
+names such as Arabic or Han, Bidi_Class, Bidi_Control, and the derived
+properties Any and LC (synonym L&). Full lists are given in the
 <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
 and
 <a href="pcre2syntax.html"><b>pcre2syntax</b></a>
-documentation. Only the short names for properties are supported. For example,
-\p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported.
-Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
-compatibility with Perl 5.6. PCRE2 does not support this.
+documentation. In general, only the short names for properties are supported.
+For example, \p{L} matches a letter. Its longer synonym, \p{Letter}, is not
+supported. Furthermore, in Perl, many properties may optionally be prefixed by
+"Is", for compatibility with Perl 5.6. PCRE2 does not support this.
 </P>
 <br><b>
 WIDE CHARACTERS AND UTF MODES
@ -134,14 +135,16 @@ However, the special horizontal and vertical white space matching escapes (\h,
 not PCRE2_UCP is set.
 </P>
 <br><b>
-CASE-EQUIVALENCE IN UTF MODE
+UNICODE CASE-EQUIVALENCE
 </b><br>
 <P>
-Case-insensitive matching in UTF mode makes use of Unicode properties except
-for characters whose code points are less than 128 and that have at most two
-case-equivalent values. For these, a direct table lookup is used for speed. A
-few Unicode characters such as Greek sigma have more than two code points that
-are case-equivalent, and these are treated specially.
+If either PCRE2_UTF or PCRE2_UCP is set, upper/lower case processing makes use
+of Unicode properties except for characters whose code points are less than 128
+and that have at most two case-equivalent values. For these, a direct table
+lookup is used for speed. A few Unicode characters such as Greek sigma have
+more than two code points that are case-equivalent, and these are treated
+specially. Setting PCRE2_UCP without PCRE2_UTF allows Unicode-style case
+processing for non-UTF character encodings such as UCS-2.
 <a name="scriptruns"></a></P>
 <br><b>
 SCRIPT RUNS
@ -475,7 +478,7 @@ AUTHOR
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
@ -484,9 +487,9 @@ Cambridge, England.
 REVISION
 </b><br>
 <P>
-Last updated: 24 May 2019
+Last updated: 22 December 2021
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2021 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/pcre2.3
+++ b/doc/pcre2.3
@ -1,4 +1,4 @@
-.TH PCRE2 3 "17 September 2018" "PCRE2 10.33"
+.TH PCRE2 3 "27 August 2021" "PCRE2 10.38"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH INTRODUCTION
@ -11,7 +11,8 @@ nearly two decades, the limitations of the original API were making development
 increasingly difficult. The new API is more extensible, and it was simplified
 by abolishing the separate "study" optimizing function; in PCRE2, patterns are
 automatically optimized where possible. Since forking from PCRE1, the code has
-been extensively refactored and new features introduced.
+been extensively refactored and new features introduced. The old library is now
+obsolete and is no longer maintained.
 .P
 As well as Perl-style regular expression patterns, some features that appeared
 in Python and the original PCRE before they appeared in Perl are available
@ -19,8 +20,13 @@ using the Python syntax. There is also some support for one or two .NET and
 Oniguruma syntax items, and there are options for requesting some minor changes
 that give better ECMAScript (aka JavaScript) compatibility.
 .P
-The source code for PCRE2 can be compiled to support 8-bit, 16-bit, or 32-bit
-code units, which means that up to three separate libraries may be installed.
+The source code for PCRE2 can be compiled to support strings of 8-bit, 16-bit,
+or 32-bit code units, which means that up to three separate libraries may be
+installed, one for each code unit size. The size of code unit is not related to
+the bit size of the underlying hardware. In a 64-bit environment that also
+supports 32-bit applications, versions of PCRE2 that are compiled in both
+64-bit and 32-bit modes may be needed.
+.P
 The original work to extend PCRE to 16-bit and 32-bit code units was done by
 Zoltan Herczeg and Christian Persch, respectively. In all three cases, strings
 can be interpreted either as one character per code unit, or as UTF-encoded
@ -185,18 +191,18 @@ function, listing its arguments and results.
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .P
 Putting an actual email address here is a spam magnet. If you want to email me,
-use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
+use my two names separated by a dot at gmail.com.
 .
 .
 .SH REVISION
 .rs
 .sp
 .nf
-Last updated: 17 September 2018
-Copyright (c) 1997-2018 University of Cambridge.
+Last updated: 27 August 2021
+Copyright (c) 1997-2021 University of Cambridge.
 .fi
--- a/doc/pcre2.txt
+++ b/doc/pcre2.txt
--- a/doc/pcre2_compile.3
+++ b/doc/pcre2_compile.3
@ -1,4 +1,4 @@
-.TH PCRE2_COMPILE 3 "23 May 2019" "PCRE2 10.34"
+.TH PCRE2_COMPILE 3 "22 April 2022" "PCRE2 10.41"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@ -80,8 +80,17 @@ Additional options may be set in the compile context via the
 .\"
 function.
 .P
-The yield of this function is a pointer to a private data structure that
-contains the compiled pattern, or NULL if an error was detected.
+If either of \fIerrorcode\fP or \fIerroroffset\fP is NULL, the function returns
+NULL immediately. Otherwise, the yield of this function is a pointer to a
+private data structure that contains the compiled pattern, or NULL if an error
+was detected. In the error case, a text error message can be obtained by
+passing the value returned via the \fIerrorcode\fP argument to the the
+\fBpcre2_get_error_message()\fP function. The offset (in code units) where the
+error was encountered is returned via the \fIerroroffset\fP argument.
+.P
+If there is no error, the value passed via \fIerrorcode\fP returns the message
+"no error" if passed to \fBpcre2_get_error_message()\fP, and the value passed
+via \fIerroroffset\fP is zero.
 .P
 There is a complete description of the PCRE2 native API, with more detail on
 each option, in the
--- a/doc/pcre2_dfa_match.3
+++ b/doc/pcre2_dfa_match.3
@ -1,4 +1,4 @@
-.TH PCRE2_DFA_MATCH 3 "16 October 2018" "PCRE2 10.33"
+.TH PCRE2_DFA_MATCH 3 "28 August 2021" "PCRE2 10.38"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@ -33,10 +33,15 @@ just once (except when processing lookaround assertions). This function is
  \fIworkspace\fP    Points to a vector of ints used as working space
  \fIwscount\fP      Number of elements in the vector
 .sp
-For \fBpcre2_dfa_match()\fP, a match context is needed only if you want to set
-up a callout function or specify the heap limit or the match or the recursion
-depth limits. The \fIlength\fP and \fIstartoffset\fP values are code units, not
-characters. The options are:
+The size of output vector needed to contain all the results depends on the
+number of simultaneous matches, not on the number of parentheses in the
+pattern. Using \fBpcre2_match_data_create_from_pattern()\fP to create the match
+data block is therefore not advisable when using this function.
+.P
+A match context is needed only if you want to set up a callout function or
+specify the heap limit or the match or the recursion depth limits. The
+\fIlength\fP and \fIstartoffset\fP values are code units, not characters. The
+options are:
 .sp
  PCRE2_ANCHORED          Match only at the first position
  PCRE2_COPY_MATCHED_SUBJECT
--- a/doc/pcre2_jit_free_unused_memory.3
+++ b/doc/pcre2_jit_free_unused_memory.3
@ -17,7 +17,7 @@ This function frees unused JIT executable memory. The argument is a general
 context, for custom memory management, or NULL for standard memory management.
 JIT memory allocation retains some memory in order to improve future JIT
 compilation speed. In low memory conditions,
-\fBpcre2_jit_free_unused_memory()\fB can be used to cause this memory to be
+\fBpcre2_jit_free_unused_memory()\fP can be used to cause this memory to be
 freed.
 .P
 There is a complete description of the PCRE2 native API in the
--- a/doc/pcre2_jit_match.3
+++ b/doc/pcre2_jit_match.3
@ -1,4 +1,4 @@
-.TH PCRE2_JIT_MATCH 3 "03 November 2014" "PCRE2 10.0"
+.TH PCRE2_JIT_MATCH 3 "11 February 2020" "PCRE2 10.35"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@ -22,8 +22,10 @@ algorithm that is similar to Perl's. It is a "fast path" interface to JIT, and
 it bypasses some of the sanity checks that \fBpcre2_match()\fP applies.
 Its arguments are exactly the same as for
 .\" HREF
-\fBpcre2_match()\fP.
+\fBpcre2_match()\fP,
 .\"
+except that the subject string must be specified with a length;
+PCRE2_ZERO_TERMINATED is not supported.
 .P
 The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
 PCRE2_NOTEMPTY_ATSTART, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Unsupported
--- a/doc/pcre2_jit_stack_create.3
+++ b/doc/pcre2_jit_stack_create.3
@ -22,7 +22,8 @@ allocation. The result can be passed to the JIT run-time code by calling
 \fBpcre2_jit_stack_assign()\fP to associate the stack with a compiled pattern,
 which can then be processed by \fBpcre2_match()\fP or \fBpcre2_jit_match()\fP.
 A maximum stack size of 512KiB to 1MiB should be more than enough for any
-pattern. For more details, see the
+pattern. If the stack couldn't be allocated or the values passed were not
+reasonable, NULL will be returned. For more details, see the
 .\" HREF
 \fBpcre2jit\fP
 .\"
--- a/doc/pcre2_match_data_create.3
+++ b/doc/pcre2_match_data_create.3
@ -1,4 +1,4 @@
-.TH PCRE2_MATCH_DATA_CREATE 3 "29 July 2015" "PCRE2 10.21"
+.TH PCRE2_MATCH_DATA_CREATE 3 "28 August 2021" "PCRE2 10.38"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@ -18,8 +18,9 @@ This function creates a new match data block, which is used for holding the
 result of a match. The first argument specifies the number of pairs of offsets
 that are required. These form the "output vector" (ovector) within the match
 data block, and are used to identify the matched string and any captured
-substrings. There is always one pair of offsets; if \fBovecsize\fP is zero, it
-is treated as one.
+substrings when matching with \fBpcre2_match()\fP, or a number of different
+matches at the same point when used with \fBpcre2_dfa_match()\fP. There is
+always one pair of offsets; if \fBovecsize\fP is zero, it is treated as one.
 .P
 The second argument points to a general context, for custom memory management,
 or is NULL for system memory management. The result of the function is NULL if
--- a/doc/pcre2_match_data_create_from_pattern.3
+++ b/doc/pcre2_match_data_create_from_pattern.3
@ -1,4 +1,4 @@
-.TH PCRE2_MATCH_DATA_CREATE_FROM_PATTERN 3 "29 July 2015" "PCRE2 10.21"
+.TH PCRE2_MATCH_DATA_CREATE_FROM_PATTERN 3 "28 August 2021" "PCRE2 10.38"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@ -14,12 +14,15 @@ PCRE2 - Perl-compatible regular expressions (revised API)
 .SH DESCRIPTION
 .rs
 .sp
-This function creates a new match data block, which is used for holding the
-result of a match. The first argument points to a compiled pattern. The number
-of capturing parentheses within the pattern is used to compute the number of
-pairs of offsets that are required in the match data block. These form the
-"output vector" (ovector) within the match data block, and are used to identify
-the matched string and any captured substrings.
+This function creates a new match data block for holding the result of a match.
+The first argument points to a compiled pattern. The number of capturing
+parentheses within the pattern is used to compute the number of pairs of
+offsets that are required in the match data block. These form the "output
+vector" (ovector) within the match data block, and are used to identify the
+matched string and any captured substrings when matching with
+\fBpcre2_match()\fP. If you are using \fBpcre2_dfa_match()\fP, which uses the
+outut vector in a different way, you should use \fBpcre2_match_data_create()\fP
+instead of this function.
 .P
 The second argument points to a general context, for custom memory management,
 or is NULL to use the same memory allocator as was used for the compiled
--- a/doc/pcre2_serialize_decode.3
+++ b/doc/pcre2_serialize_decode.3
@ -36,7 +36,7 @@ the following negative error codes:
  PCRE2_ERROR_BADDATA   \fInumber_of_codes\fP is zero or less
  PCRE2_ERROR_BADMAGIC  mismatch of id bytes in \fIbytes\fP
  PCRE2_ERROR_BADMODE   mismatch of variable unit size or PCRE version
-  PCRE2_ERROR_MEMORY    memory allocation failed
+  PCRE2_ERROR_NOMEMORY  memory allocation failed
  PCRE2_ERROR_NULL      \fIcodes\fP or \fIbytes\fP is NULL
 .sp
 PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled
--- a/doc/pcre2_set_character_tables.3
+++ b/doc/pcre2_set_character_tables.3
@ -1,4 +1,4 @@
-.TH PCRE2_SET_CHARACTER_TABLES 3 "22 October 2014" "PCRE2 10.00"
+.TH PCRE2_SET_CHARACTER_TABLES 3 "20 March 2020" "PCRE2 10.35"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@ -15,9 +15,14 @@ PCRE2 - Perl-compatible regular expressions (revised API)
 .rs
 .sp
 This function sets a pointer to custom character tables within a compile
-context. The second argument must be the result of a call to
-\fBpcre2_maketables()\fP or NULL to request the default tables. The result is
-always zero.
+context. The second argument must point to a set of PCRE2 character tables or
+be NULL to request the default tables. The result is always zero. Character
+tables can be created by calling \fBpcre2_maketables()\fP or by running the
+\fBpcre2_dftables\fP maintenance command in binary mode (see the
+.\" HREF
+\fBpcre2build\fP
+.\"
+documentation).
 .P
 There is a complete description of the PCRE2 native API in the
 .\" HREF
--- a/doc/pcre2_set_compile_extra_options.3
+++ b/doc/pcre2_set_compile_extra_options.3
@ -1,4 +1,4 @@
-.TH PCRE2_SET_COMPILE_EXTRA_OPTIONS 3 "11 February 2019" "PCRE2 10.33"
+.TH PCRE2_SET_COMPILE_EXTRA_OPTIONS 3 "31 August 2021" "PCRE2 10.38"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@ -18,12 +18,13 @@ This function sets additional option bits for \fBpcre2_compile()\fP that are
 housed in a compile context. It completely replaces all the bits. The extra
 options are:
 .sp
+  PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK     Allow \eK in lookarounds
 .\" JOIN
-  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \ex{df800} to \ex{dfff}
+  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \ex{d800} to \ex{dfff}
                                         in UTF-8 and UTF-32 modes
 .\" JOIN
-  PCRE2_EXTRA_ALT_BSUX                 Extended alternate \eu, \eU, and \ex
-                                         handling
+  PCRE2_EXTRA_ALT_BSUX                 Extended alternate \eu, \eU, and
+                                         \ex handling
 .\" JOIN
  PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL    Treat all invalid escapes as
                                         a literal following character
--- a/doc/pcre2_substitute.3
+++ b/doc/pcre2_substitute.3
@ -1,4 +1,4 @@
-.TH PCRE2_SUBSTITUTE 3 "04 April 2017" "PCRE2 10.30"
+.TH PCRE2_SUBSTITUTE 3 "22 January 2020" "PCRE2 10.35"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@ -36,8 +36,8 @@ Its arguments are:
  \fIoutlengthptr\fP  Points to the length of the output buffer
 .sp
 A match data block is needed only if you want to inspect the data from the
-match that is returned in that block. A match context is needed only if you
-want to:
+final match that is returned in that block or if PCRE2_SUBSTITUTE_MATCHED is
+set. A match context is needed only if you want to:
 .sp
  Set up a callout function
  Set a matching offset limit
@ -45,33 +45,57 @@ want to:
  Change the backtracking depth limit
  Set custom memory management in the match context
 .sp
-The \fIlength\fP, \fIstartoffset\fP and \fIrlength\fP values are code
-units, not characters, as is the contents of the variable pointed at by
-\fIoutlengthptr\fP, which is updated to the actual length of the new string.
+The \fIlength\fP, \fIstartoffset\fP and \fIrlength\fP values are code units,
+not characters, as is the contents of the variable pointed at by
+\fIoutlengthptr\fP. This variable must contain the length of the output buffer
+when the function is called. If the function is successful, the value is
+changed to the length of the new string, excluding the trailing zero that is
+automatically added.
+.P
 The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for
 zero-terminated strings. The options are:
 .sp
-  PCRE2_ANCHORED             Match only at the first position
-  PCRE2_ENDANCHORED          Pattern can match only at end of subject
-  PCRE2_NOTBOL               Subject is not the beginning of a line
-  PCRE2_NOTEOL               Subject is not the end of a line
-  PCRE2_NOTEMPTY             An empty string is not a valid match
+  PCRE2_ANCHORED                     Match only at the first position
+  PCRE2_ENDANCHORED                  Match only at end of subject
 .\" JOIN
-  PCRE2_NOTEMPTY_ATSTART     An empty string at the start of the
-                              subject is not a valid match
-  PCRE2_NO_JIT               Do not use JIT matching
+  PCRE2_NOTBOL                       Subject is not the beginning of a
+                                      line
+  PCRE2_NOTEOL                       Subject is not the end of a line
 .\" JOIN
-  PCRE2_NO_UTF_CHECK         Do not check the subject or replacement
-                              for UTF validity (only relevant if
-                              PCRE2_UTF was set at compile time)
-  PCRE2_SUBSTITUTE_EXTENDED  Do extended replacement processing
-  PCRE2_SUBSTITUTE_GLOBAL    Replace all occurrences in the subject
-  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH  If overflow, compute needed length
-  PCRE2_SUBSTITUTE_UNKNOWN_UNSET  Treat unknown group as unset
-  PCRE2_SUBSTITUTE_UNSET_EMPTY  Simple unset insert = empty string
+  PCRE2_NOTEMPTY                     An empty string is not a
+                                      valid match
+.\" JOIN
+  PCRE2_NOTEMPTY_ATSTART             An empty string at the start of
+                                      the subject is not a valid match
+  PCRE2_NO_JIT                       Do not use JIT matching
+.\" JOIN
+  PCRE2_NO_UTF_CHECK                 Do not check for UTF validity in
+                                      the subject or replacement
+.\" JOIN
+                                      (only relevant if PCRE2_UTF was
+                                      set at compile time)
+  PCRE2_SUBSTITUTE_EXTENDED          Do extended replacement processing
+.\" JOIN
+  PCRE2_SUBSTITUTE_GLOBAL            Replace all occurrences in the
+                                      subject
+  PCRE2_SUBSTITUTE_LITERAL           The replacement string is literal
+.\" JOIN
+  PCRE2_SUBSTITUTE_MATCHED           Use pre-existing match data for
+                                      first match
+  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH   If overflow, compute needed length
+  PCRE2_SUBSTITUTE_REPLACEMENT_ONLY  Return only replacement string(s)
+  PCRE2_SUBSTITUTE_UNKNOWN_UNSET     Treat unknown group as unset
+  PCRE2_SUBSTITUTE_UNSET_EMPTY       Simple unset insert = empty string
 .sp
+If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_EXTENDED,
+PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and PCRE2_SUBSTITUTE_UNSET_EMPTY are ignored.
+.P
+If PCRE2_SUBSTITUTE_MATCHED is set, \fImatch_data\fP must be non-NULL; its
+contents must be the result of a call to \fBpcre2_match()\fP using the same
+pattern and subject.
+.P
 The function returns the number of substitutions, which may be zero if there
-were no matches. The result can be greater than one only when
+are no matches. The result may be greater than one only when
 PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code
 is returned.
 .P
--- a/doc/pcre2api.3
+++ b/doc/pcre2api.3
@ -1,4 +1,4 @@
-.TH PCRE2API 3 "02 September 2019" "PCRE2 10.34"
+.TH PCRE2API 3 "27 July 2022" "PCRE2 10.41"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
@ -187,7 +187,7 @@ document for an overview of all the PCRE2 documentation.
 .B int pcre2_substitute(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP,
 .B "  PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP,"
 .B "  uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP,"
-.B "  pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacementzfP,"
+.B "  pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacementz\fP,"
 .B "  PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\fP,"
 .B "  PCRE2_SIZE *\fIoutlengthptr\fP);"
 .fi
@ -564,24 +564,53 @@ documentation for more details.
 .P
 In a more complicated situation, where patterns are compiled only when they are
 first needed, but are still shared between threads, pointers to compiled
-patterns must be protected from simultaneous writing by multiple threads, at
-least until a pattern has been compiled. The logic can be something like this:
+patterns must be protected from simultaneous writing by multiple threads. This
+is somewhat tricky to do correctly. If you know that writing to a pointer is
+atomic in your environment, you can use logic like this:
 .sp
  Get a read-only (shared) lock (mutex) for pointer
  if (pointer == NULL)
    {
    Get a write (unique) lock for pointer
-    pointer = pcre2_compile(...
+    if (pointer == NULL) pointer = pcre2_compile(...
    }
  Release the lock
  Use pointer in pcre2_match()
 .sp
 Of course, testing for compilation errors should also be included in the code.
 .P
-If JIT is being used, but the JIT compilation is not being done immediately,
-(perhaps waiting to see if the pattern is used often enough) similar logic is
-required. JIT compilation updates a pointer within the compiled code block, so
-a thread must gain unique write access to the pointer before calling
+The reason for checking the pointer a second time is as follows: Several
+threads may have acquired the shared lock and tested the pointer for being
+NULL, but only one of them will be given the write lock, with the rest kept
+waiting. The winning thread will compile the pattern and store the result.
+After this thread releases the write lock, another thread will get it, and if
+it does not retest pointer for being NULL, will recompile the pattern and
+overwrite the pointer, creating a memory leak and possibly causing other
+issues.
+.P
+In an environment where writing to a pointer may not be atomic, the above logic
+is not sufficient. The thread that is doing the compiling may be descheduled
+after writing only part of the pointer, which could cause other threads to use
+an invalid value. Instead of checking the pointer itself, a separate "pointer
+is valid" flag (that can be updated atomically) must be used:
+.sp
+  Get a read-only (shared) lock (mutex) for pointer
+  if (!pointer_is_valid)
+    {
+    Get a write (unique) lock for pointer
+    if (!pointer_is_valid)
+      {
+      pointer = pcre2_compile(...
+      pointer_is_valid = TRUE
+      }
+    }
+  Release the lock
+  Use pointer in pcre2_match()
+.sp
+If JIT is being used, but the JIT compilation is not being done immediately
+(perhaps waiting to see if the pattern is used often enough), similar logic is
+required. JIT compilation updates a value within the compiled code block, so a
+thread must gain unique write access to the pointer before calling
 \fBpcre2_jit_compile()\fP. Alternatively, \fBpcre2_code_copy()\fP or
 \fBpcre2_code_copy_with_tables()\fP can be used to obtain a private copy of the
 compiled code before calling the JIT compiler.
@ -924,7 +953,7 @@ has its own memory control arrangements (see the
 documentation for more details). If the limit is reached, the negative error
 code PCRE2_ERROR_HEAPLIMIT is returned. The default limit can be set when PCRE2
 is built; if it is not, the default is set very large and is essentially
-"unlimited".
+unlimited.
 .P
 A value for the heap limit may also be supplied by an item at the start of a
 pattern of the form
@ -935,18 +964,18 @@ where ddd is a decimal number. However, such a setting is ignored unless ddd is
 less than the limit set by the caller of \fBpcre2_match()\fP or, if no such
 limit is set, less than the default.
 .P
-The \fBpcre2_match()\fP function starts out using a 20KiB vector on the system
-stack for recording backtracking points. The more nested backtracking points
-there are (that is, the deeper the search tree), the more memory is needed.
-Heap memory is used only if the initial vector is too small. If the heap limit
-is set to a value less than 21 (in particular, zero) no heap memory will be
-used. In this case, only patterns that do not have a lot of nested backtracking
-can be successfully processed.
+The \fBpcre2_match()\fP function always needs some heap memory, so setting a
+value of zero guarantees a "heap limit exceeded" error. Details of how
+\fBpcre2_match()\fP uses the heap are given in the
+.\" HREF
+\fBpcre2perform\fP
+.\"
+documentation.
 .P
-Similarly, for \fBpcre2_dfa_match()\fP, a vector on the system stack is used
-when processing pattern recursions, lookarounds, or atomic groups, and only if
-this is not big enough is heap memory used. In this case, too, setting a value
-of zero disables the use of the heap.
+For \fBpcre2_dfa_match()\fP, a vector on the system stack is used when
+processing pattern recursions, lookarounds, or atomic groups, and only if this
+is not big enough is heap memory used. In this case, setting a value of zero
+disables the use of the heap.
 .sp
 .nf
 .B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP,
@ -990,10 +1019,10 @@ less than the limit set by the caller of \fBpcre2_match()\fP or
 .fi
 .sp
 This parameter limits the depth of nested backtracking in \fBpcre2_match()\fP.
-Each time a nested backtracking point is passed, a new memory "frame" is used
+Each time a nested backtracking point is passed, a new memory frame is used
 to remember the state of matching at that point. Thus, this parameter
 indirectly limits the amount of memory that is used in a match. However,
-because the size of each memory "frame" depends on the number of capturing
+because the size of each memory frame depends on the number of capturing
 parentheses, the actual memory limit varies from pattern to pattern. This limit
 was more useful in versions before 10.30, where function recursion was used for
 backtracking.
@ -1034,12 +1063,13 @@ less than the limit set by the caller of \fBpcre2_match()\fP or
 .sp
 .B int pcre2_config(uint32_t \fIwhat\fP, void *\fIwhere\fP);
 .P
-The function \fBpcre2_config()\fP makes it possible for a PCRE2 client to
-discover which optional features have been compiled into the PCRE2 library. The
+The function \fBpcre2_config()\fP makes it possible for a PCRE2 client to find
+the value of certain configuration parameters and to discover which optional
+features have been compiled into the PCRE2 library. The
 .\" HREF
 \fBpcre2build\fP
 .\"
-documentation has more details about these optional features.
+documentation has more details about these features.
 .P
 The first argument for \fBpcre2_config()\fP specifies which information is
 required. The second argument is a pointer to memory into which the information
@ -1152,6 +1182,16 @@ over compilation stack usage, see \fBpcre2_set_compile_recursion_guard()\fP.
 .sp
 This parameter is obsolete and should not be used in new code. The output is a
 uint32_t integer that is always set to zero.
+.sp
+  PCRE2_CONFIG_TABLES_LENGTH
+.sp
+The output is a uint32_t integer that gives the length of PCRE2's character
+processing tables in bytes. For details of these tables see the
+.\" HTML <a href="#localesupport">
+.\" </a>
+section on locale support
+.\"
+below.
 .sp
  PCRE2_CONFIG_UNICODE_VERSION
 .sp
@ -1283,8 +1323,7 @@ If \fIerrorcode\fP or \fIerroroffset\fP is NULL, \fBpcre2_compile()\fP returns
 NULL immediately. Otherwise, the variables to which these point are set to an
 error code and an offset (number of code units) within the pattern,
 respectively, when \fBpcre2_compile()\fP returns NULL because a compilation
-error has occurred. The values are not defined when compilation is successful
-and \fBpcre2_compile()\fP returns a non-NULL value.
+error has occurred. 
 .P
 There are nearly 100 positive error codes that \fBpcre2_compile()\fP may return
 if it finds an error in the pattern. There are also some negative error codes
@ -1303,14 +1342,17 @@ message"
 below)
 .\"
 should be self-explanatory. Macro names starting with PCRE2_ERROR_ are defined
-for both positive and negative error codes in \fBpcre2.h\fP.
+for both positive and negative error codes in \fBpcre2.h\fP. When compilation
+is successful \fIerrorcode\fP is set to a value that returns the message "no
+error" if passed to \fBpcre2_get_error_message()\fP.
 .P
 The value returned in \fIerroroffset\fP is an indication of where in the
-pattern the error occurred. It is not necessarily the furthest point in the
-pattern that was read. For example, after the error "lookbehind assertion is
-not fixed length", the error offset points to the start of the failing
-assertion. For an invalid UTF-8 or UTF-16 string, the offset is that of the
-first code unit of the failing character.
+pattern an error occurred. When there is no error, zero is returned. A non-zero
+value is not necessarily the furthest point in the pattern that was read. For
+example, after the error "lookbehind assertion is not fixed length", the error
+offset points to the start of the failing assertion. For an invalid UTF-8 or
+UTF-16 string, the offset is that of the first code unit of the failing
+character.
 .P
 Some errors are not detected until the whole pattern has been scanned; in these
 cases, the offset passed back is the length of the pattern. Note that the
@ -1420,13 +1462,16 @@ documentation.
 .sp
 If this bit is set, letters in the pattern match both upper and lower case
 letters in the subject. It is equivalent to Perl's /i option, and it can be
-changed within a pattern by a (?i) option setting. If PCRE2_UTF is set, Unicode
-properties are used for all characters with more than one other case, and for
-all characters whose code points are greater than U+007F. For lower valued
-characters with only one other case, a lookup table is used for speed. When
-PCRE2_UTF is not set, a lookup table is used for all code points less than 256,
-and higher code points (available only in 16-bit or 32-bit mode) are treated as
-not having another case.
+changed within a pattern by a (?i) option setting. If either PCRE2_UTF or
+PCRE2_UCP is set, Unicode properties are used for all characters with more than
+one other case, and for all characters whose code points are greater than
+U+007F. Note that there are two ASCII characters, K and S, that, in addition to
+their lower case ASCII equivalents, are case-equivalent with U+212A (Kelvin
+sign) and U+017F (long S) respectively. For lower valued characters with only
+one other case, a lookup table is used for speed. When neither PCRE2_UTF nor
+PCRE2_UCP is set, a lookup table is used for all code points less than 256, and
+higher code points (available only in 16-bit or 32-bit mode) are treated as not
+having another case.
 .sp
  PCRE2_DOLLAR_ENDONLY
 .sp
@ -1751,7 +1796,7 @@ it is set, the effect of passing an invalid UTF string as a pattern is
 undefined. It may cause your program to crash or loop.
 .P
 Note that this option can also be passed to \fBpcre2_match()\fP and
-\fBpcre_dfa_match()\fP, to suppress UTF validity checking of the subject
+\fBpcre2_dfa_match()\fP, to suppress UTF validity checking of the subject
 string.
 .P
 Note also that setting PCRE2_NO_UTF_CHECK at compile time does not disable the
@ -1769,10 +1814,11 @@ are not representable in UTF-16.
 .sp
  PCRE2_UCP
 .sp
-This option changes the way PCRE2 processes \eB, \eb, \eD, \ed, \eS, \es, \eW,
-\ew, and some of the POSIX character classes. By default, only ASCII characters
-are recognized, but if PCRE2_UCP is set, Unicode properties are used instead to
-classify characters. More details are given in the section on
+This option has two effects. Firstly, it change the way PCRE2 processes \eB,
+\eb, \eD, \ed, \eS, \es, \eW, \ew, and some of the POSIX character classes. By
+default, only ASCII characters are recognized, but if PCRE2_UCP is set, Unicode
+properties are used instead to classify characters. More details are given in
+the section on
 .\" HTML <a href="pcre2pattern.html#genericchartypes">
 .\" </a>
 generic character types
@ -1782,8 +1828,13 @@ in the
 \fBpcre2pattern\fP
 .\"
 page. If you set PCRE2_UCP, matching one of the items it affects takes much
-longer. The option is available only if PCRE2 has been compiled with Unicode
-support (which is the default).
+longer.
+.P
+The second effect of PCRE2_UCP is to force the use of Unicode properties for
+upper/lower casing operations on characters with code points greater than 127,
+even when PCRE2_UTF is not set. This makes it possible, for example, to process
+strings in the 16-bit UCS-2 code. This option is available only if PCRE2 has
+been compiled with Unicode support (which is the default).
 .sp
  PCRE2_UNGREEDY
 .sp
@ -1826,6 +1877,13 @@ characters with code points greater than 127.
 .sp
 The option bits that can be set in a compile context by calling the
 \fBpcre2_set_compile_extra_options()\fP function are as follows:
+.sp
+  PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
+.sp
+Since release 10.38 PCRE2 has forbidden the use of \eK within lookaround
+assertions, following Perl's lead. This option is provided to re-enable the
+previous behaviour (act in positive lookarounds, ignore in negative ones) in
+case anybody is relying on it.
 .sp
  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
 .sp
@ -1957,13 +2015,18 @@ PCRE2 handles caseless matching, and determines whether characters are letters,
 digits, or whatever, by reference to a set of tables, indexed by character code
 point. However, this applies only to characters whose code points are less than
 256. By default, higher-valued code points never match escapes such as \ew or
-\ed. When PCRE2 is built with Unicode support (the default), all characters can
-be tested with \ep and \eP, or, alternatively, the PCRE2_UCP option can be set
-when a pattern is compiled; this causes \ew and friends to use Unicode property
-support instead of the built-in tables.
+\ed.
+.P
+When PCRE2 is built with Unicode support (the default), certain Unicode
+character properties can be tested with \ep and \eP, or, alternatively, the
+PCRE2_UCP option can be set when a pattern is compiled; this causes \ew and
+friends to use Unicode property support instead of the built-in tables.
+PCRE2_UCP also causes upper/lower casing operations on characters with code
+points greater than 127 to use Unicode properties. These effects apply even
+when PCRE2_UTF is not set.
 .P
 The use of locales with Unicode is discouraged. If you are handling characters
-with code points greater than 128, you should either use Unicode support, or
+with code points greater than 127, you should either use Unicode support, or
 use locales, but not try to mix the two.
 .P
 PCRE2 contains a built-in set of character tables that are used by default.
@ -1985,7 +2048,7 @@ the system \fBmalloc()\fP is used. The result can be passed to
 calling \fBpcre2_set_character_tables()\fP to set the tables pointer therein.
 .P
 For example, to build and use tables that are appropriate for the French locale
-(where accented characters with values greater than 128 are treated as
+(where accented characters with values greater than 127 are treated as
 letters), the following code could be used:
 .sp
  setlocale(LC_CTYPE, "fr_FR");
@ -1998,10 +2061,10 @@ The locale name "fr_FR" is used on Linux and other Unix-like systems; if you
 are using Windows, the name for the French locale is "french".
 .P
 The pointer that is passed (via the compile context) to \fBpcre2_compile()\fP
-is saved with the compiled pattern, and the same tables are used by
-\fBpcre2_match()\fP and \fBpcre_dfa_match()\fP. Thus, for any single pattern,
-compilation and matching both happen in the same locale, but different patterns
-can be processed in different locales.
+is saved with the compiled pattern, and the same tables are used by the
+matching functions. Thus, for any single pattern, compilation and matching both
+happen in the same locale, but different patterns can be processed in different
+locales.
 .P
 It is the caller's responsibility to ensure that the memory containing the
 tables remains available while they are still in use. When they are no longer
@ -2010,6 +2073,26 @@ pass as its first parameter the same global context that was used to create the
 tables.
 .
 .
+.SS "Saving locale tables"
+.rs
+.sp
+The tables described above are just a sequence of binary bytes, which makes
+them independent of hardware characteristics such as endianness or whether the
+processor is 32-bit or 64-bit. A copy of the result of \fBpcre2_maketables()\fP
+can therefore be saved in a file or elsewhere and re-used later, even in a
+different program or on another computer. The size of the tables (number of
+bytes) must be obtained by calling \fBpcre2_config()\fP with the
+PCRE2_CONFIG_TABLES_LENGTH option because \fBpcre2_maketables()\fP does not
+return this value. Note that the \fBpcre2_dftables\fP program, which is part of
+the PCRE2 build system, can be used stand-alone to create a file that contains
+a set of binary tables. See the
+.\" HTML <a href="pcre2build.html#createtables">
+.\" </a>
+\fBpcre2build\fP
+.\"
+documentation for details.
+.
+.
 .\" HTML <a name="infoaboutpattern"></a>
 .SH "INFORMATION ABOUT A COMPILED PATTERN"
 .rs
@ -2020,7 +2103,7 @@ tables.
 .P
 The \fBpcre2_pattern_info()\fP function returns general information about a
 compiled pattern. For information about callouts, see the
-.\" HTML <a href="pcre2pattern.html#infoaboutcallouts">
+.\" HTML <a href="#infoaboutcallouts">
 .\" </a>
 next section.
 .\"
@ -2198,7 +2281,7 @@ return zero. The third argument should point to a \fBsize_t\fP variable.
  PCRE2_INFO_LASTCODETYPE
 .sp
 Returns 1 if there is a rightmost literal code unit that must exist in any
-matched string, other than at its start. The third argument should  point to a
+matched string, other than at its start. The third argument should point to a
 \fBuint32_t\fP variable. If there is no such value, 0 is returned. When 1 is
 returned, the code unit value itself can be retrieved using
 PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last literal value is
@ -2416,19 +2499,27 @@ to an abstract format like Java or .NET serialization.
 Information about a successful or unsuccessful match is placed in a match
 data block, which is an opaque structure that is accessed by function calls. In
 particular, the match data block contains a vector of offsets into the subject
-string that define the matched part of the subject and any substrings that were
-captured. This is known as the \fIovector\fP.
+string that define the matched parts of the subject. This is known as the
+\fIovector\fP.
 .P
 Before calling \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP, or
 \fBpcre2_jit_match()\fP you must create a match data block by calling one of
 the creation functions above. For \fBpcre2_match_data_create()\fP, the first
-argument is the number of pairs of offsets in the \fIovector\fP. One pair of
-offsets is required to identify the string that matched the whole pattern, with
-an additional pair for each captured substring. For example, a value of 4
-creates enough space to record the matched portion of the subject plus three
-captured substrings. A minimum of at least 1 pair is imposed by
-\fBpcre2_match_data_create()\fP, so it is always possible to return the overall
-matched string.
+argument is the number of pairs of offsets in the \fIovector\fP.
+.P
+When using \fBpcre2_match()\fP, one pair of offsets is required to identify the
+string that matched the whole pattern, with an additional pair for each
+captured substring. For example, a value of 4 creates enough space to record
+the matched portion of the subject plus three captured substrings.
+.P
+When using \fBpcre2_dfa_match()\fP there may be multiple matched substrings of
+different lengths at the same point in the subject. The ovector should be made
+large enough to hold as many as are expected.
+.P
+A minimum of at least 1 pair is imposed by \fBpcre2_match_data_create()\fP, so
+it is always possible to return the overall matched string in the case of
+\fBpcre2_match()\fP or the longest match in the case of
+\fBpcre2_dfa_match()\fP.
 .P
 The second argument of \fBpcre2_match_data_create()\fP is a pointer to a
 general context, which can specify custom memory management for obtaining the
@ -2437,10 +2528,11 @@ pass NULL, which causes \fBmalloc()\fP to be used.
 .P
 For \fBpcre2_match_data_create_from_pattern()\fP, the first argument is a
 pointer to a compiled pattern. The ovector is created to be exactly the right
-size to hold all the substrings a pattern might capture. The second argument is
-again a pointer to a general context, but in this case if NULL is passed, the
-memory is obtained using the same allocator that was used for the compiled
-pattern (custom or default).
+size to hold all the substrings a pattern might capture when matched using
+\fBpcre2_match()\fP. You should not use this call when matching with
+\fBpcre2_dfa_match()\fP. The second argument is again a pointer to a general
+context, but in this case if NULL is passed, the memory is obtained using the
+same allocator that was used for the compiled pattern (custom or default).
 .P
 A match data block can be used many times, with the same or different compiled
 patterns. You can extract information from a match data block after a match
@ -2534,7 +2626,9 @@ The subject string is passed to \fBpcre2_match()\fP as a pointer in
 \fIstartoffset\fP. The length and offset are in code units, not characters.
 That is, they are in bytes for the 8-bit library, 16-bit code units for the
 16-bit library, and 32-bit code units for the 32-bit library, whether or not
-UTF processing is enabled.
+UTF processing is enabled. As a special case, if \fIsubject\fP is NULL and
+\fIlength\fP is zero, the subject is assumed to be an empty string. If
+\fIlength\fP is non-zero, an error occurs if \fIsubject\fP is NULL.
 .P
 If \fIstartoffset\fP is greater than the length of the subject,
 \fBpcre2_match()\fP returns PCRE2_ERROR_BADOFFSET. When the starting offset is
@ -2554,10 +2648,10 @@ lookbehind. For example, consider the pattern
 .sp
 which finds occurrences of "iss" in the middle of words. (\eB matches only if
 the current position in the subject is not a word boundary.) When applied to
-the string "Mississipi" the first call to \fBpcre2_match()\fP finds the first
+the string "Mississippi" the first call to \fBpcre2_match()\fP finds the first
 occurrence. If \fBpcre2_match()\fP is called again with just the remainder of
-the subject, namely "issipi", it does not match, because \eB is always false at
-the start of the subject, which is deemed to be a word boundary. However, if
+the subject, namely "issippi", it does not match, because \eB is always false
+at the start of the subject, which is deemed to be a word boundary. However, if
 \fBpcre2_match()\fP is passed the entire string again, but with
 \fIstartoffset\fP set to 4, it finds the second occurrence of "iss" because it
 is able to look behind the starting point to discover that it is preceded by a
@ -3068,11 +3162,11 @@ The backtracking match limit was reached.
 .sp
  PCRE2_ERROR_NOMEMORY
 .sp
-If a pattern contains many nested backtracking points, heap memory is used to
-remember them. This error is given when the memory allocation function (default
-or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
-if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
-also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
+Heap memory is used to remember backgracking points. This error is given when
+the memory allocation function (default or custom) fails. Note that a different
+error, PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds
+the heap limit. PCRE2_ERROR_NOMEMORY is also returned if
+PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
 .sp
  PCRE2_ERROR_NULL
 .sp
@ -3321,12 +3415,23 @@ same number causes an error at compile time.
 .B "  PCRE2_SIZE *\fIoutlengthptr\fP);"
 .fi
 .P
-This function calls \fBpcre2_match()\fP and then makes a copy of the subject
-string in \fIoutputbuffer\fP, replacing one or more parts that were matched
-with the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP.
-This can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
-The default is to perform just one replacement, but there is an option that
-requests multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below for details).
+This function optionally calls \fBpcre2_match()\fP and then makes a copy of the
+subject string in \fIoutputbuffer\fP, replacing parts that were matched with
+the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP, which
+can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As a
+special case, if \fIreplacement\fP is NULL and \fIrlength\fP is zero, the
+replacement is assumed to be an empty string. If \fIrlength\fP is non-zero, an
+error occurs if \fIreplacement\fP is NULL.
+.P
+There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just
+the replacement string(s). The default action is to perform just one
+replacement if the pattern matches, but there is an option that requests
+multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below).
+.P
+If successful, \fBpcre2_substitute()\fP returns the number of substitutions
+that were carried out. This may be zero if no match was found, and is never
+greater than one unless PCRE2_SUBSTITUTE_GLOBAL is set. A negative value is
+returned if an error is detected.
 .P
 Matches in which a \eK item in a lookahead in the pattern causes the match to
 end before it starts are not supported, and give rise to an error return. For
@ -3341,32 +3446,79 @@ data block is obtained and freed within this function, using memory management
 functions from the match context, if provided, or else those that were used to
 allocate memory for the compiled code.
 .P
-If an external \fImatch_data\fP block is provided, its contents afterwards
-are those set by the final call to \fBpcre2_match()\fP. For global changes,
-this will have ended in a matching error. The contents of the ovector within
-the match data block may or may not have been changed.
+If \fImatch_data\fP is not NULL and PCRE2_SUBSTITUTE_MATCHED is not set, the
+provided block is used for all calls to \fBpcre2_match()\fP, and its contents
+afterwards are the result of the final call. For global changes, this will
+always be a no-match error. The contents of the ovector within the match data
+block may or may not have been changed.
 .P
-The \fIoutlengthptr\fP argument must point to a variable that contains the
-length, in code units, of the output buffer. If the function is successful, the
-value is updated to contain the length of the new string, excluding the
-trailing zero that is automatically added.
+As well as the usual options for \fBpcre2_match()\fP, a number of additional
+options can be set in the \fIoptions\fP argument of \fBpcre2_substitute()\fP.
+One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external
+\fImatch_data\fP block must be provided, and it must have already been used for
+an external call to \fBpcre2_match()\fP with the same pattern and subject
+arguments. The data in the \fImatch_data\fP block (return code, offset vector)
+is then used for the first substitution instead of calling \fBpcre2_match()\fP
+from within \fBpcre2_substitute()\fP. This allows an application to check for a
+match before choosing to substitute, without having to repeat the match.
+.P
+The contents of the externally supplied match data block are not changed when
+PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTITUTE_GLOBAL is also set,
+\fBpcre2_match()\fP is called after the first substitution to check for further
+matches, but this is done using an internally obtained match data block, thus
+always leaving the external block unchanged.
+.P
+The \fIcode\fP argument is not used for matching before the first substitution
+when PCRE2_SUBSTITUTE_MATCHED is set, but it must be provided, even when
+PCRE2_SUBSTITUTE_GLOBAL is not set, because it contains information such as the
+UTF setting and the number of capturing parentheses in the pattern.
+.P
+The default action of \fBpcre2_substitute()\fP is to return a copy of the
+subject string with matched substrings replaced. However, if
+PCRE2_SUBSTITUTE_REPLACEMENT_ONLY is set, only the replacement substrings are
+returned. In the global case, multiple replacements are concatenated in the
+output buffer. Substitution callouts (see
+.\" HTML <a href="#subcallouts">
+.\" </a>
+below)
+.\"
+can be used to separate them if necessary.
+.P
+The \fIoutlengthptr\fP argument of \fBpcre2_substitute()\fP must point to a
+variable that contains the length, in code units, of the output buffer. If the
+function is successful, the value is updated to contain the length in code
+units of the new string, excluding the trailing zero that is automatically
+added.
 .P
 If the function is not successful, the value set via \fIoutlengthptr\fP depends
 on the type of error. For syntax errors in the replacement string, the value is
 the offset in the replacement string where the error was detected. For other
 errors, the value is PCRE2_UNSET by default. This includes the case of the
-output buffer being too small, unless PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set
-(see below), in which case the value is the minimum length needed, including
-space for the trailing zero. Note that in order to compute the required length,
-\fBpcre2_substitute()\fP has to simulate all the matching and copying, instead
-of giving an error return as soon as the buffer overflows. Note also that the
-length is in code units, not bytes.
+output buffer being too small, unless PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set.
 .P
-In the replacement string, which is interpreted as a UTF string in UTF mode,
-and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
-dollar character is an escape character that can specify the insertion of
-characters from capture groups or names from (*MARK) or other control verbs
-in the pattern. The following forms are always recognized:
+PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is
+too small. The default action is to return PCRE2_ERROR_NOMEMORY immediately. If
+this option is set, however, \fBpcre2_substitute()\fP continues to go through
+the motions of matching and substituting (without, of course, writing anything)
+in order to compute the size of buffer that is needed. This value is passed
+back via the \fIoutlengthptr\fP variable, with the result of the function still
+being PCRE2_ERROR_NOMEMORY.
+.P
+Passing a buffer size of zero is a permitted way of finding out how much memory
+is needed for given substitution. However, this does mean that the entire
+operation is carried out twice. Depending on the application, it may be more
+efficient to allocate a large buffer and free the excess afterwards, instead of
+using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH.
+.P
+The replacement string, which is interpreted as a UTF string in UTF mode, is
+checked for UTF validity unless PCRE2_NO_UTF_CHECK is set. An invalid UTF
+replacement string causes an immediate return with the relevant UTF error code.
+.P
+If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not interpreted
+in any way. By default, however, a dollar character is an escape character that
+can specify the insertion of characters from capture groups and names from
+(*MARK) or other control verbs in the pattern. The following forms are always
+recognized:
 .sp
  $$                  insert a dollar character
  $<n> or ${<n>}      insert the contents of group <n>
@ -3389,9 +3541,6 @@ facility can be used to perform simple simultaneous substitutions, as this
      apple lemon
   2: pear orange
 .sp
-As well as the usual options for \fBpcre2_match()\fP, a number of additional
-options can be set in the \fIoptions\fP argument of \fBpcre2_substitute()\fP.
-.P
 PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject string,
 replacing every matching substring. If this option is not set, only the first
 matching substring is replaced. The search for matches takes place in the
@ -3402,7 +3551,7 @@ set in the match context, searching stops when that limit is reached.
 .P
 You can restrict the effect of a global substitution to a portion of the
 subject string by setting either or both of \fIstartoffset\fP and an offset
-limit. Here is a \fPpcre2test\fP example:
+limit. Here is a \fBpcre2test\fP example:
 .sp
  /B/g,replace=!,use_offset_limit
  ABC ABC ABC ABC\e=offset=3,offset_limit=12
@ -3414,20 +3563,6 @@ If this is not successful, the offset is advanced by one character except when
 CRLF is a valid newline sequence and the next two characters are CR, LF. In
 this case, the offset is advanced by two characters.
 .P
-PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is
-too small. The default action is to return PCRE2_ERROR_NOMEMORY immediately. If
-this option is set, however, \fBpcre2_substitute()\fP continues to go through
-the motions of matching and substituting (without, of course, writing anything)
-in order to compute the size of buffer that is needed. This value is passed
-back via the \fIoutlengthptr\fP variable, with the result of the function still
-being PCRE2_ERROR_NOMEMORY.
-.P
-Passing a buffer size of zero is a permitted way of finding out how much memory
-is needed for given substitution. However, this does mean that the entire
-operation is carried out twice. Depending on the application, it may be more
-efficient to allocate a large buffer and free the excess afterwards, instead of
-using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH.
-.P
 PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups that do
 not appear in the pattern to be treated as unset groups. This option should be
 used with care, because it means that a typo in a group name or number no
@ -3457,8 +3592,11 @@ and force lower case. The escape sequences change the current state: \eU and
 terminating a \eQ quoted sequence) reverts to no case forcing. The sequences
 \eu and \el force the next character (if it is a letter) to upper or lower
 case, respectively, and then the state automatically reverts to no case
-forcing. Case forcing applies to all inserted  characters, including those from
-capture groups and letters within \eQ...\eE quoted sequences.
+forcing. Case forcing applies to all inserted characters, including those from
+capture groups and letters within \eQ...\eE quoted sequences. If either
+PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode
+properties are used for case forcing characters whose code points are greater
+than 127.
 .P
 Note that case forcing sequences such as \eU...\eE do not nest. For example,
 the result of processing "\eUaa\eLBB\eEcc\eE" is "AAbbcc"; the final \eE has no
@ -3494,13 +3632,17 @@ The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended
 substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause unknown
 groups in the extended syntax forms to be treated as unset.
 .P
-If successful, \fBpcre2_substitute()\fP returns the number of successful
-matches. This may be zero if no matches were found, and is never greater than 1
-unless PCRE2_SUBSTITUTE_GLOBAL is set.
-.P
-In the event of an error, a negative error code is returned. Except for
-PCRE2_ERROR_NOMATCH (which is never returned), errors from \fBpcre2_match()\fP
-are passed straight back.
+If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_UNKNOWN_UNSET,
+PCRE2_SUBSTITUTE_UNSET_EMPTY, and PCRE2_SUBSTITUTE_EXTENDED are irrelevant and
+are ignored.
+.
+.
+.SS "Substitution errors"
+.rs
+.sp
+In the event of an error, \fBpcre2_substitute()\fP returns a negative error
+code. Except for PCRE2_ERROR_NOMATCH (which is never returned), errors from
+\fBpcre2_match()\fP are passed straight back.
 .P
 PCRE2_ERROR_NOSUBSTRING is returned for a non-existent substring insertion,
 unless PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set.
@ -3514,6 +3656,11 @@ PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set, the size of buffer that is
 needed is returned via \fIoutlengthptr\fP. Note that this does not happen by
 default.
 .P
+PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the
+\fImatch_data\fP argument is NULL or if the \fIsubject\fP or \fIreplacement\fP
+arguments are NULL. For backward compatibility reasons an exception is made for
+the \fIreplacement\fP argument if the \fIrlength\fP argument is also 0.
+.P
 PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in the
 replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE
 (invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE (closing curly bracket
@ -3531,6 +3678,7 @@ above).
 .\"
 .
 .
+.\" HTML <a name="subcallouts"></a>
 .SS "Substitution callouts"
 .rs
 .sp
@ -3673,12 +3821,13 @@ other alternatives. Ultimately, when it runs out of matches,
 .P
 The function \fBpcre2_dfa_match()\fP is called to match a subject string
 against a compiled pattern, using a matching algorithm that scans the subject
-string just once (not counting lookaround assertions), and does not backtrack.
-This has different characteristics to the normal algorithm, and is not
-compatible with Perl. Some of the features of PCRE2 patterns are not supported.
-Nevertheless, there are times when this kind of matching can be useful. For a
-discussion of the two matching algorithms, and a list of features that
-\fBpcre2_dfa_match()\fP does not support, see the
+string just once (not counting lookaround assertions), and does not backtrack
+(except when processing lookaround assertions). This has different
+characteristics to the normal algorithm, and is not compatible with Perl. Some
+of the features of PCRE2 patterns are not supported. Nevertheless, there are
+times when this kind of matching can be useful. For a discussion of the two
+matching algorithms, and a list of features that \fBpcre2_dfa_match()\fP does
+not support, see the
 .\" HREF
 \fBpcre2matching\fP
 .\"
@ -3710,7 +3859,7 @@ Here is an example of a simple call to \fBpcre2_dfa_match()\fP:
    wspace,         /* working space vector */
    20);            /* number of elements (NOT size in bytes) */
 .
-.SS "Option bits for \fBpcre_dfa_match()\fP"
+.SS "Option bits for \fBpcre2_dfa_match()\fP"
 .rs
 .sp
 The unused bits of the \fIoptions\fP argument for \fBpcre2_dfa_match()\fP must
@ -3869,7 +4018,7 @@ fail, this error is given.
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@ -3878,6 +4027,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 02 September 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 27 July 2022
+Copyright (c) 1997-2022 University of Cambridge.
 .fi
--- a/doc/pcre2build.3
+++ b/doc/pcre2build.3
@ -1,4 +1,4 @@
-.TH PCRE2BUILD 3 "03 March 2019" "PCRE2 10.33"
+.TH PCRE2BUILD 3 "27 July 2022" "PCRE2 10.41"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .
@ -110,7 +110,7 @@ To build it without Unicode support, add
  --disable-unicode
 .sp
 to the \fBconfigure\fP command. This setting applies to all three libraries. It
-is not possible to build one library with Unicode support, and another without,
+is not possible to build one library with Unicode support and another without
 in the same configuration.
 .P
 Of itself, Unicode support does not make PCRE2 treat strings as UTF-8, UTF-16
@ -122,8 +122,9 @@ locked this out by setting PCRE2_NEVER_UTF.
 UTF support allows the libraries to process character code points up to
 0x10ffff in the strings that they handle. Unicode support also gives access to
 the Unicode properties of characters, using pattern escapes such as \eP, \ep,
-and \eX. Only the general category properties such as \fILu\fP and \fINd\fP are
-supported. Details are given in the
+and \eX. Only the general category properties such as \fILu\fP and \fINd\fP,
+script names, and some bi-directional properties are supported. Details are
+given in the
 .\" HREF
 \fBpcre2pattern\fP
 .\"
@ -175,11 +176,11 @@ SELinux. This has no effect if JIT is not enabled. See the
 \fBpcre2jit\fP
 .\"
 documentation for a discussion of JIT usage. When JIT support is enabled,
-pcre2grep automatically makes use of it, unless you add
+\fBpcre2grep\fP automatically makes use of it, unless you add
 .sp
  --disable-pcre2grep-jit
 .sp
-to the "configure" command.
+to the \fBconfigure\fP command.
 .
 .
 .SH "NEWLINE RECOGNITION"
@ -277,12 +278,11 @@ to the \fBconfigure\fP command. This setting also applies to the
 \fBpcre2_dfa_match()\fP matching function, and to JIT matching (though the
 counting is done differently).
 .P
-The \fBpcre2_match()\fP function starts out using a 20KiB vector on the system
-stack to record backtracking points. The more nested backtracking points there
-are (that is, the deeper the search tree), the more memory is needed. If the
-initial vector is not large enough, heap memory is used, up to a certain limit,
-which is specified in kibibytes (units of 1024 bytes). The limit can be changed
-at run time, as described in the
+The \fBpcre2_match()\fP function uses heap memory to record backtracking
+points. The more nested backtracking points there are (that is, the deeper the
+search tree), the more memory is needed. There is an upper limit, specified in
+kibibytes (units of 1024 bytes). This limit can be changed at run time, as
+described in the
 .\" HREF
 \fBpcre2api\fP
 .\"
@ -302,7 +302,7 @@ You can also explicitly limit the depth of nested backtracking in the
 for --with-match-limit. You can set a lower default limit by adding, for
 example,
 .sp
-  --with-match-limit_depth=10000
+  --with-match-limit-depth=10000
 .sp
 to the \fBconfigure\fP command. This value can be overridden at run time. This
 depth limit indirectly limits the amount of heap memory that is used, but
@ -317,6 +317,7 @@ used for lookaround assertions, atomic groups, and recursion within patterns.
 The limit does not apply to JIT matching.
 .
 .
+.\" HTML <a name="createtables"></a>
 .SH "CREATING CHARACTER TABLES AT BUILD TIME"
 .rs
 .sp
@ -328,12 +329,33 @@ only. If you add
  --enable-rebuild-chartables
 .sp
 to the \fBconfigure\fP command, the distributed tables are no longer used.
-Instead, a program called \fBdftables\fP is compiled and run. This outputs the
-source for new set of tables, created in the default locale of your C run-time
-system. This method of replacing the tables does not work if you are cross
-compiling, because \fBdftables\fP is run on the local host. If you need to
-create alternative tables when cross compiling, you will have to do so "by
-hand".
+Instead, a program called \fBpcre2_dftables\fP is compiled and run. This
+outputs the source for new set of tables, created in the default locale of your
+C run-time system. This method of replacing the tables does not work if you are
+cross compiling, because \fBpcre2_dftables\fP needs to be run on the local
+host and therefore not compiled with the cross compiler.
+.P
+If you need to create alternative tables when cross compiling, you will have to
+do so "by hand". There may also be other reasons for creating tables manually.
+To cause \fBpcre2_dftables\fP to be built on the local host, run a normal
+compiling command, and then run the program with the output file as its
+argument, for example:
+.sp
+  cc src/pcre2_dftables.c -o pcre2_dftables
+  ./pcre2_dftables src/pcre2_chartables.c
+.sp
+This builds the tables in the default locale of the local host. If you want to
+specify a locale, you must use the -L option:
+.sp
+  LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c
+.sp
+You can also specify -b (with or without -L). This causes the tables to be
+written in binary instead of as source code. A set of binary tables can be
+loaded into memory by an application and passed to \fBpcre2_compile()\fP in the
+same way as tables created by calling \fBpcre2_maketables()\fP. The tables are
+just a string of bytes, independent of hardware characteristics such as
+endianness. This means they can be bundled with an application that runs in
+different environments, to ensure consistent behaviour.
 .
 .
 .SH "USING EBCDIC CODE"
@ -417,7 +439,7 @@ default parameter values by adding, for example,
  --with-pcre2grep-bufsize=51200
  --with-pcre2grep-max-bufsize=2097152
 .sp
-to the \fBconfigure\fP command. The caller of \fPpcre2grep\fP can override
+to the \fBconfigure\fP command. The caller of \fBpcre2grep\fP can override
 these values by using --buffer-size and --max-buffer-size on the command line.
 .
 .
@ -541,15 +563,16 @@ documentation.
 .sp
 The C99 standard defines formatting modifiers z and t for size_t and
 ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
-environments other than Microsoft Visual Studio when __STDC_VERSION__ is
-defined and has a value greater than or equal to 199901L (indicating C99).
+environments other than old versions of Microsoft Visual Studio when
+__STDC_VERSION__ is defined and has a value greater than or equal to 199901L
+(indicating support for C99).
 However, there is at least one environment that claims to be C99 but does not
 support these modifiers. If
 .sp
  --disable-percent-zt
 .sp
-is specified, no use is made of the z or t modifiers. Instead or %td or %zu,
-%lu is used, with a cast for size_t values.
+is specified, no use is made of the z or t modifiers. Instead of %td or %zu,
+a suitable format is used depending in the size of long for the platform.
 .
 .
 .SH "SUPPORT FOR FUZZERS"
@ -601,7 +624,7 @@ give a warning.
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@ -610,6 +633,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 03 March 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 27 July 2022
+Copyright (c) 1997-2022 University of Cambridge.
 .fi
--- a/doc/pcre2compat.3
+++ b/doc/pcre2compat.3
@ -1,33 +1,43 @@
-.TH PCRE2COMPAT 3 "13 July 2019" "PCRE2 10.34"
+.TH PCRE2COMPAT 3 "08 December 2021" "PCRE2 10.40"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "DIFFERENCES BETWEEN PCRE2 AND PERL"
 .rs
 .sp
-This document describes the differences in the ways that PCRE2 and Perl handle
-regular expressions. The differences described here are with respect to Perl
-versions 5.26, but as both Perl and PCRE2 are continually changing, the
-information may sometimes be out of date.
+This document describes some of the differences in the ways that PCRE2 and Perl
+handle regular expressions. The differences described here are with respect to
+Perl version 5.34.0, but as both Perl and PCRE2 are continually changing, the
+information may at times be out of date.
 .P
-1. PCRE2 has only a subset of Perl's Unicode support. Details of what it does
+1. When PCRE2_DOTALL (equivalent to Perl's /s qualifier) is not set, the
+behaviour of the '.' metacharacter differs from Perl. In PCRE2, '.' matches the
+next character unless it is the start of a newline sequence. This means that,
+if the newline setting is CR, CRLF, or NUL, '.' will match the code point LF
+(0x0A) in ASCII/Unicode environments, and NL (either 0x15 or 0x25) when using
+EBCDIC. In Perl, '.' appears never to match LF, even when 0x0A is not a newline
+indicator.
+.P
+2. PCRE2 has only a subset of Perl's Unicode support. Details of what it does
 have are given in the
 .\" HREF
 \fBpcre2unicode\fP
 .\"
 page.
 .P
-2. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but
+3. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but
 they do not mean what you might think. For example, (?!a){3} does not assert
 that the next three characters are not "a". It just asserts that the next
 character is not "a" three times (in principle; PCRE2 optimizes this to run the
 assertion just once). Perl allows some repeat quantifiers on other assertions,
-for example, \eb* (but not \eb{3}), but these do not seem to have any use.
+for example, \eb* , but these do not seem to have any use. PCRE2 does not allow
+any kind of quantifier on non-lookaround assertions.
 .P
-3. Capture groups that occur inside negative lookaround assertions are counted,
+4. Capture groups that occur inside negative lookaround assertions are counted,
 but their entries in the offsets vector are set only when a negative assertion
 is a condition that has a matching branch (that is, the condition is false).
+Perl may set such capture groups in other circumstances.
 .P
-4. The following Perl escape sequences are not supported: \eF, \el, \eL, \eu,
+5. The following Perl escape sequences are not supported: \eF, \el, \eL, \eu,
 \eU, and \eN when followed by a character name. \eN on its own, matching a
 non-newline character, and \eN{U+dd..}, matching a Unicode code point, are
 supported. The escapes that modify the case of following letters are
@ -37,23 +47,27 @@ generated by default. However, if either of the PCRE2_ALT_BSUX or
 PCRE2_EXTRA_ALT_BSUX options is set, \eU and \eu are interpreted as ECMAScript
 interprets them.
 .P
-5. The Perl escape sequences \ep, \eP, and \eX are supported only if PCRE2 is
+6. The Perl escape sequences \ep, \eP, and \eX are supported only if PCRE2 is
 built with Unicode support (the default). The properties that can be tested
 with \ep and \eP are limited to the general category properties such as Lu and
-Nd, script names such as Greek or Han, and the derived properties Any and L&.
-PCRE2 does support the Cs (surrogate) property, which Perl does not; the Perl
-documentation says "Because Perl hides the need for the user to understand the
-internal representation of Unicode characters, there is no need to implement
-the somewhat messy concept of surrogates."
+Nd, script names such as Greek or Han, Bidi_Class, Bidi_Control, and the
+derived properties Any and LC (synonym L&). Both PCRE2 and Perl support the Cs
+(surrogate) property, but in PCRE2 its use is limited. See the
+.\" HREF
+\fBpcre2pattern\fP
+.\"
+documentation for details. The long synonyms for property names that Perl
+supports (such as \ep{Letter}) are not supported by PCRE2, nor is it permitted
+to prefix any of these properties with "Is".
 .P
-6. PCRE2 supports the \eQ...\eE escape for quoting substrings. Characters
+7. PCRE2 supports the \eQ...\eE escape for quoting substrings. Characters
 in between are treated as literals. However, this is slightly different from
 Perl in that $ and @ are also handled as literals inside the quotes. In Perl,
-they cause variable interpolation (but of course PCRE2 does not have
-variables). Also, Perl does "double-quotish backslash interpolation" on any
-backslashes between \eQ and \eE which, its documentation says, "may lead to
-confusing results". PCRE2 treats a backslash between \eQ and \eE just like any
-other character. Note the following examples:
+they cause variable interpolation (PCRE2 does not have variables). Also, Perl
+does "double-quotish backslash interpolation" on any backslashes between \eQ
+and \eE which, its documentation says, "may lead to confusing results". PCRE2
+treats a backslash between \eQ and \eE just like any other character. Note the
+following examples:
 .sp
    Pattern            PCRE2 matches     Perl matches
 .sp
@ -65,9 +79,10 @@ other character. Note the following examples:
    \eQA\eB\eE            A\eB               A\eB
    \eQ\e\eE              \e                 \e\eE
 .sp
-The \eQ...\eE sequence is recognized both inside and outside character classes.
+The \eQ...\eE sequence is recognized both inside and outside character classes
+by both PCRE2 and Perl.
 .P
-7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
+8. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
 constructions. However, PCRE2 does have a "callout" feature, which allows an
 external function to be called during pattern matching. See the
 .\" HREF
@ -75,27 +90,24 @@ external function to be called during pattern matching. See the
 .\"
 documentation for details.
 .P
-8. Subroutine calls (whether recursive or not) were treated as atomic groups up
+9. Subroutine calls (whether recursive or not) were treated as atomic groups up
 to PCRE2 release 10.23, but from release 10.30 this changed, and backtracking
 into subroutine calls is now supported, as in Perl.
 .P
-9. If any of the backtracking control verbs are used in a group that is called
-as a subroutine (whether or not recursively), their effect is confined to that
-group; it does not extend to the surrounding pattern. This is not always the
-case in Perl. In particular, if (*THEN) is present in a group that is called as
-a subroutine, its action is limited to that group, even if the group does not
-contain any | characters. Note that such groups are processed as anchored
-at the point where they are tested.
+10. In PCRE2, if any of the backtracking control verbs are used in a group that
+is called as a subroutine (whether or not recursively), their effect is
+confined to that group; it does not extend to the surrounding pattern. This is
+not always the case in Perl. In particular, if (*THEN) is present in a group
+that is called as a subroutine, its action is limited to that group, even if
+the group does not contain any | characters. Note that such groups are
+processed as anchored at the point where they are tested.
 .P
-10. If a pattern contains more than one backtracking control verb, the first
+11. If a pattern contains more than one backtracking control verb, the first
 one that is backtracked onto acts. For example, in the pattern
 A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C
 triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the
 same as PCRE2, but there are cases where it differs.
 .P
-11. Most backtracking verbs in assertions have their normal actions. They are
-not confined to the assertion.
-.P
 12. There are some differences that are concerned with the settings of captured
 strings when part of a pattern is repeated. For example, matching "aba" against
 the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to
@ -104,7 +116,7 @@ the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to
 13. PCRE2's handling of duplicate capture group numbers and names is not as
 general as Perl's. This is a consequence of the fact the PCRE2 works internally
 just with numbers, using an external table to translate between numbers and
-names. In particular, a pattern such as (?|(?<a>A)|(?<b>B), where the two
+names. In particular, a pattern such as (?|(?<a>A)|(?<b>B)), where the two
 capture groups have the same number but different names, is not supported, and
 causes an error at compile time. If it were allowed, it would not be possible
 to distinguish which group matched, because both names map to capture group
@ -124,17 +136,24 @@ certainly user mistakes.
 16. In PCRE2, the upper/lower case character properties Lu and Ll are not
 affected when case-independent matching is specified. For example, \ep{Lu}
 always matches an upper case letter. I think Perl has changed in this respect;
-in the release at the time of writing (5.24), \ep{Lu} and \ep{Ll} match all
+in the release at the time of writing (5.34), \ep{Lu} and \ep{Ll} match all
 letters, regardless of case, when case independence is specified.
 .P
-17. PCRE2 provides some extensions to the Perl regular expression facilities.
-Perl 5.10 includes new features that are not in earlier versions of Perl, some
+17. From release 5.32.0, Perl locks out the use of \eK in lookaround
+assertions. From release 10.38 PCRE2 does the same by default. However, there
+is an option for re-enabling the previous behaviour. When this option is set,
+\eK is acted on when it occurs in positive assertions, but is ignored in
+negative assertions.
+.P
+18. PCRE2 provides some extensions to the Perl regular expression facilities.
+Perl 5.10 included new features that were not in earlier versions of Perl, some
 of which (such as named parentheses) were in PCRE2 for some time before. This
-list is with respect to Perl 5.26:
+list is with respect to Perl 5.34:
 .sp
 (a) Although lookbehind assertions in PCRE2 must match fixed length strings,
-each alternative branch of a lookbehind assertion can match a different length
-of string. Perl requires them all to have the same length.
+each alternative toplevel branch of a lookbehind assertion can match a
+different length of string. Perl used to require them all to have the same
+length, but the latest version has some variable length support.
 .sp
 (b) From PCRE2 10.23, backreferences to groups of fixed length are supported
 in lookbehinds, provided that there is no possibility of referencing a
@ -168,18 +187,18 @@ variable interpolation, but not general hooks on every match.
 different way and is not Perl-compatible.
 .sp
 (l) PCRE2 recognizes some special sequences such as (*CR) or (*NO_JIT) at
-the start of a pattern that set overall options that cannot be changed within
+the start of a pattern. These set overall options that cannot be changed within
 the pattern.
 .sp
 (m) PCRE2 supports non-atomic positive lookaround assertions. This is an
 extension to the lookaround facilities. The default, Perl-compatible
 lookarounds are atomic.
 .P
-18. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa
+19. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa
 modifier restricts /i case-insensitive matching to pure ascii, ignoring Unicode
 rules. This separation cannot be represented with PCRE2_UCP.
 .P
-19. Perl has different limits than PCRE2. See the
+20. Perl has different limits than PCRE2. See the
 .\" HREF
 \fBpcre2limit\fP
 .\"
@ -194,7 +213,7 @@ fall into any stack-overflow limit. PCRE2 made a similar change at release
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@ -203,6 +222,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 13 July 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 08 December 2021
+Copyright (c) 1997-2021 University of Cambridge.
 .fi
--- a/doc/pcre2convert.3
+++ b/doc/pcre2convert.3
@ -116,8 +116,8 @@ permitted to match separator characters, but the double-star (**) feature
 (which does match separators) is supported.
 .P
 PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR matches globs with wildcards allowed to
-match separator characters. PCRE2_GLOB_NO_STARSTAR matches globs with the
-double-star feature disabled. These options may be given together.
+match separator characters. PCRE2_CONVERT_GLOB_NO_STARSTAR matches globs with
+the double-star feature disabled. These options may be given together.
 .
 .
 .SH "CONVERTING POSIX PATTERNS"
--- a/doc/pcre2demo.3
+++ b/doc/pcre2demo.3
@ -215,8 +215,8 @@ if (rc < 0)
  return 1;
  }

-/* Match succeded. Get a pointer to the output vector, where string offsets are
-stored. */
+/* Match succeeded. Get a pointer to the output vector, where string offsets
+are stored. */

 ovector = pcre2_get_ovector_pointer(match_data);
 printf("Match succeeded at offset %d\en", (int)ovector[0]);
@ -234,9 +234,12 @@ pcre2_match_data_create_from_pattern() above. */
 if (rc == 0)
  printf("ovector was not big enough for all the captured substrings\en");

-/* We must guard against patterns such as /(?=.\eK)/ that use \eK in an assertion
-to set the start of a match later than its end. In this demonstration program,
-we just detect this case and give up. */
+/* Since release 10.38 PCRE2 has locked out the use of \eK in lookaround
+assertions. However, there is an option to re-enable the old behaviour. If that
+is set, it is possible to run patterns such as /(?=.\eK)/ that use \eK in an
+assertion to set the start of a match later than its end. In this demonstration
+program, we show how to detect this case, but it shouldn't arise because the
+option is never set. */

 if (ovector[0] > ovector[1])
  {
@ -453,7 +456,7 @@ for (;;)
    return 1;
    }

-  /* Match succeded */
+  /* Match succeeded */

  printf("\enMatch succeeded again at offset %d\en", (int)ovector[0]);

--- a/doc/pcre2grep.1
+++ b/doc/pcre2grep.1
@ -1,4 +1,4 @@
-.TH PCRE2GREP 1 "15 June 2019" "PCRE2 10.34"
+.TH PCRE2GREP 1 "30 July 2022" "PCRE2 10.41"
 .SH NAME
 pcre2grep - a grep with Perl-compatible regular expressions.
 .SH SYNOPSIS
@ -43,13 +43,15 @@ For example:
 .sp
  pcre2grep some-pattern file1 - file3
 .sp
-Input files are searched line by line. By default, each line that matches a
+By default, input files are searched line by line. Each line that matches a
 pattern is copied to the standard output, and if there is more than one file,
 the file name is output at the start of each line, followed by a colon.
-However, there are options that can change how \fBpcre2grep\fP behaves. In
-particular, the \fB-M\fP option makes it possible to search for strings that
-span line boundaries. What defines a line boundary is controlled by the
-\fB-N\fP (\fB--newline\fP) option.
+However, there are options that can change how \fBpcre2grep\fP behaves. For
+example, the \fB-M\fP option makes it possible to search for strings that span
+line boundaries. What defines a line boundary is controlled by the \fB-N\fP
+(\fB--newline\fP) option. The \fB-h\fP and \fB-H\fP options control whether or
+not file names are shown, and the \fB-Z\fP option changes the file name
+terminator to a zero byte.
 .P
 The amount of memory used for buffering files that are being scanned is
 controlled by parameters that can be set by the \fB--buffer-size\fP and
@ -79,8 +81,8 @@ matching substrings, or if \fB--only-matching\fP, \fB--file-offsets\fP, or
 (either shown literally, or as an offset), scanning resumes immediately
 following the match, so that further matches on the same line can be found. If
 there are multiple patterns, they are all tried on the remainder of the line,
-but patterns that follow the one that matched are not tried on the earlier part
-of the line.
+but patterns that follow the one that matched are not tried on the earlier
+matched part of the line.
 .P
 This behaviour means that the order in which multiple patterns are specified
 can affect the output when one of the above options is used. This is no longer
@ -115,11 +117,10 @@ ignored.
 .rs
 .sp
 By default, a file that contains a binary zero byte within the first 1024 bytes
-is identified as a binary file, and is processed specially. (GNU grep
-identifies binary files in this manner.) However, if the newline type is
-specified as "nul", that is, the line terminator is a binary zero, the test for
-a binary file is not applied. See the \fB--binary-files\fP option for a means
-of changing the way binary files are handled.
+is identified as a binary file, and is processed specially. However, if the
+newline type is specified as NUL, that is, the line terminator is a binary
+zero, the test for a binary file is not applied. See the \fB--binary-files\fP
+option for a means of changing the way binary files are handled.
 .
 .
 .SH "BINARY ZEROS IN PATTERNS"
@ -150,22 +151,30 @@ Output up to \fInumber\fP lines of context after each matching line. Fewer
 lines are output if the next match or the end of the file is reached, or if the
 processing buffer size has been set too small. If file names and/or line
 numbers are being output, a hyphen separator is used instead of a colon for the
-context lines. A line containing "--" is output between each group of lines,
-unless they are in fact contiguous in the input file. The value of \fInumber\fP
-is expected to be relatively small. When \fB-c\fP is used, \fB-A\fP is ignored.
+context lines (the \fB-Z\fP option can be used to change the file name
+terminator to a zero byte). A line containing "--" is output between each group
+of lines, unless they are in fact contiguous in the input file. The value of
+\fInumber\fP is expected to be relatively small. When \fB-c\fP is used,
+\fB-A\fP is ignored.
 .TP
 \fB-a\fP, \fB--text\fP
 Treat binary files as text. This is equivalent to
 \fB--binary-files\fP=\fItext\fP.
 .TP
+\fB--allow-lookaround-bsk\fP
+PCRE2 now forbids the use of \eK in lookarounds by default, in line with Perl.
+This option causes \fBpcre2grep\fP to set the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
+option, which enables this somewhat dangerous usage.
+.TP
 \fB-B\fP \fInumber\fP, \fB--before-context=\fP\fInumber\fP
 Output up to \fInumber\fP lines of context before each matching line. Fewer
 lines are output if the previous match or the start of the file is within
 \fInumber\fP lines, or if the processing buffer size has been set too small. If
 file names and/or line numbers are being output, a hyphen separator is used
-instead of a colon for the context lines. A line containing "--" is output
-between each group of lines, unless they are in fact contiguous in the input
-file. The value of \fInumber\fP is expected to be relatively small. When
+instead of a colon for the context lines (the \fB-Z\fP option can be used to
+change the file name terminator to a zero byte). A line containing "--" is
+output between each group of lines, unless they are in fact contiguous in the
+input file. The value of \fInumber\fP is expected to be relatively small. When
 \fB-c\fP is used, \fB-B\fP is ignored.
 .TP
 \fB--binary-files=\fP\fIword\fP
@ -352,19 +361,21 @@ shown separately. This option is mutually exclusive with \fB--output\fP,
 .TP
 \fB-H\fP, \fB--with-filename\fP
 Force the inclusion of the file name at the start of output lines when
-searching a single file. By default, the file name is not shown in this case.
-For matching lines, the file name is followed by a colon; for context lines, a
-hyphen separator is used. If a line number is also being output, it follows the
-file name. When the \fB-M\fP option causes a pattern to match more than one
-line, only the first is preceded by the file name. This option overrides any
-previous \fB-h\fP, \fB-l\fP, or \fB-L\fP options.
+searching a single file. The file name is not normally shown in this case.
+By default, for matching lines, the file name is followed by a colon; for
+context lines, a hyphen separator is used. The \fB-Z\fP option can be used to
+change the terminator to a zero byte. If a line number is also being output,
+it follows the file name. When the \fB-M\fP option causes a pattern to match
+more than one line, only the first is preceded by the file name. This option
+overrides any previous \fB-h\fP, \fB-l\fP, or \fB-L\fP options.
 .TP
 \fB-h\fP, \fB--no-filename\fP
-Suppress the output file names when searching multiple files. By default,
-file names are shown when multiple files are searched. For matching lines, the
-file name is followed by a colon; for context lines, a hyphen separator is used.
-If a line number is also being output, it follows the file name. This option
-overrides any previous \fB-H\fP, \fB-L\fP, or \fB-l\fP options.
+Suppress the output file names when searching multiple files. File names are
+normally shown when multiple files are searched. By default, for matching
+lines, the file name is followed by a colon; for context lines, a hyphen
+separator is used. The \fB-Z\fP option can be used to change the terminator to
+a zero byte. If a line number is also being output, it follows the file name.
+This option overrides any previous \fB-H\fP, \fB-L\fP, or \fB-l\fP options.
 .TP
 \fB--heap-limit\fP=\fInumber\fP
 See \fB--match-limit\fP below.
@ -383,8 +394,8 @@ Ignore upper/lower case distinctions during comparisons.
 .TP
 \fB--include\fP=\fIpattern\fP
 If any \fB--include\fP patterns are specified, the only files that are
-processed are those that match one of the patterns (and do not match an
-\fB--exclude\fP pattern). This option does not affect directories, but it
+processed are those whose names match one of the patterns and do not match an
+\fB--exclude\fP pattern. This option does not affect directories, but it
 applies to all files, whether listed on the command line, obtained from
 \fB--file-list\fP, or by scanning a directory. The pattern is a PCRE2 regular
 expression, and is matched against the final component of the file name, not
@ -401,8 +412,8 @@ may be given any number of times; all the files are read.
 .TP
 \fB--include-dir\fP=\fIpattern\fP
 If any \fB--include-dir\fP patterns are specified, the only directories that
-are processed are those that match one of the patterns (and do not match an
-\fB--exclude-dir\fP pattern). This applies to all directories, whether listed
+are processed are those whose names match one of the patterns and do not match
+an \fB--exclude-dir\fP pattern. This applies to all directories, whether listed
 on the command line, obtained from \fB--file-list\fP, or by scanning a parent
 directory. The pattern is a PCRE2 regular expression, and is matched against
 the final component of the directory name, not the entire path. The \fB-F\fP,
@ -413,18 +424,21 @@ given any number of times. If a directory matches both \fB--include-dir\fP and
 \fB-L\fP, \fB--files-without-match\fP
 Instead of outputting lines from the files, just output the names of the files
 that do not contain any lines that would have been output. Each file name is
-output once, on a separate line. This option overrides any previous \fB-H\fP,
-\fB-h\fP, or \fB-l\fP options.
+output once, on a separate line by default, but if the \fB-Z\fP option is set, 
+they are separated by zero bytes instead of newlines. This option overrides any
+previous \fB-H\fP, \fB-h\fP, or \fB-l\fP options.
 .TP
 \fB-l\fP, \fB--files-with-matches\fP
 Instead of outputting lines from the files, just output the names of the files
 containing lines that would have been output. Each file name is output once, on
-a separate line. Searching normally stops as soon as a matching line is found
-in a file. However, if the \fB-c\fP (count) option is also used, matching
-continues in order to obtain the correct count, and those files that have at
-least one match are listed along with their counts. Using this option with
-\fB-c\fP is a way of suppressing the listing of files with no matches. This
-opeion overrides any previous \fB-H\fP, \fB-h\fP, or \fB-L\fP options.
+a separate line, but if the \fB-Z\fP option is set, they are separated by zero
+bytes instead of newlines. Searching normally stops as soon as a matching line
+is found in a file. However, if the \fB-c\fP (count) option is also used,
+matching continues in order to obtain the correct count, and those files that
+have at least one match are listed along with their counts. Using this option
+with \fB-c\fP is a way of suppressing the listing of files with no matches that
+occurs with \fB-c\fP on its own. This option overrides any previous \fB-H\fP,
+\fB-h\fP, or \fB-L\fP options.
 .TP
 \fB--label\fP=\fIname\fP
 This option supplies a name to be used for the standard input when file names
@ -435,8 +449,8 @@ short form for this option.
 When this option is given, non-compressed input is read and processed line by
 line, and the output is flushed after each write. By default, input is read in
 large chunks, unless \fBpcre2grep\fP can determine that it is reading from a
-terminal (which is currently possible only in Unix-like environments or
-Windows). Output to terminal is normally automatically flushed by the operating
+terminal, which is currently possible only in Unix-like environments or
+Windows. Output to terminal is normally automatically flushed by the operating
 system. This option can be useful when the input or output is attached to a
 pipe and you do not want \fBpcre2grep\fP to buffer up large amounts of data.
 However, its use will affect performance, and the \fB-M\fP (multiline) option
@ -459,40 +473,6 @@ the value in the \fBLC_ALL\fP or \fBLC_CTYPE\fP environment variables. If no
 locale is specified, the PCRE2 library's default (usually the "C" locale) is
 used. There is no short form for this option.
 .TP
-\fB--match-limit\fP=\fInumber\fP
-Processing some regular expression patterns may take a very long time to search
-for all possible matching strings. Others may require a very large amount of
-memory. There are three options that set resource limits for matching.
-.sp
-The \fB--match-limit\fP option provides a means of limiting computing resource
-usage when processing patterns that are not going to match, but which have a
-very large number of possibilities in their search trees. The classic example
-is a pattern that uses nested unlimited repeats. Internally, PCRE2 has a
-counter that is incremented each time around its main processing loop. If the
-value set by \fB--match-limit\fP is reached, an error occurs.
-.sp
-The \fB--heap-limit\fP option specifies, as a number of kibibytes (units of
-1024 bytes), the amount of heap memory that may be used for matching. Heap
-memory is needed only if matching the pattern requires a significant number of
-nested backtracking points to be remembered. This parameter can be set to zero
-to forbid the use of heap memory altogether.
-.sp
-The \fB--depth-limit\fP option limits the depth of nested backtracking points,
-which indirectly limits the amount of memory that is used. The amount of memory
-needed for each backtracking point depends on the number of capturing
-parentheses in the pattern, so the amount of memory that is used before this
-limit acts varies from pattern to pattern. This limit is of use only if it is
-set smaller than \fB--match-limit\fP.
-.sp
-There are no short forms for these options. The default limits can be set
-when the PCRE2 library is compiled; if they are not specified, the defaults
-are very large and so effectively unlimited.
-.TP
-\fB--max-buffer-size=\fInumber\fP
-This limits the expansion of the processing buffer, whose initial size can be
-set by \fB--buffer-size\fP. The maximum buffer size is silently forced to be no
-smaller than the starting buffer size.
-.TP
 \fB-M\fP, \fB--multiline\fP
 Allow patterns to match more than one line. When this option is set, the PCRE2
 library is called in "multiline" mode. This allows a matched string to extend
@ -520,27 +500,74 @@ well as possibly handling a two-character newline sequence.
 There is a limit to the number of lines that can be matched, imposed by the way
 that \fBpcre2grep\fP buffers the input file as it scans it. With a sufficiently
 large processing buffer, this should not be a problem, but the \fB-M\fP option
-does not work when input is read line by line (see \fP--line-buffered\fP.)
+does not work when input is read line by line (see \fB--line-buffered\fP.)
+.TP
+\fB-m\fP \fInumber\fP, \fB--max-count\fP=\fInumber\fP
+Stop processing after finding \fInumber\fP matching lines, or non-matching
+lines if \fB-v\fP is also set. Any trailing context lines are output after the
+final match. In multiline mode, each multiline match counts as just one line
+for this purpose. If this limit is reached when reading the standard input from
+a regular file, the file is left positioned just after the last matching line.
+If \fB-c\fP is also set, the count that is output is never greater than
+\fInumber\fP. This option has no effect if used with \fB-L\fP, \fB-l\fP, or
+\fB-q\fP, or when just checking for a match in a binary file.
+.TP
+\fB--match-limit\fP=\fInumber\fP
+Processing some regular expression patterns may take a very long time to search
+for all possible matching strings. Others may require a very large amount of
+memory. There are three options that set resource limits for matching.
+.sp
+The \fB--match-limit\fP option provides a means of limiting computing resource
+usage when processing patterns that are not going to match, but which have a
+very large number of possibilities in their search trees. The classic example
+is a pattern that uses nested unlimited repeats. Internally, PCRE2 has a
+counter that is incremented each time around its main processing loop. If the
+value set by \fB--match-limit\fP is reached, an error occurs.
+.sp
+The \fB--heap-limit\fP option specifies, as a number of kibibytes (units of
+1024 bytes), the maximum amount of heap memory that may be used for matching.
+.sp
+The \fB--depth-limit\fP option limits the depth of nested backtracking points,
+which indirectly limits the amount of memory that is used. The amount of memory
+needed for each backtracking point depends on the number of capturing
+parentheses in the pattern, so the amount of memory that is used before this
+limit acts varies from pattern to pattern. This limit is of use only if it is
+set smaller than \fB--match-limit\fP.
+.sp
+There are no short forms for these options. The default limits can be set
+when the PCRE2 library is compiled; if they are not specified, the defaults
+are very large and so effectively unlimited.
+.TP
+\fB--max-buffer-size\fP=\fInumber\fP
+This limits the expansion of the processing buffer, whose initial size can be
+set by \fB--buffer-size\fP. The maximum buffer size is silently forced to be no
+smaller than the starting buffer size.
 .TP
 \fB-N\fP \fInewline-type\fP, \fB--newline\fP=\fInewline-type\fP
-The PCRE2 library supports five different conventions for indicating
-the ends of lines. They are the single-character sequences CR (carriage return)
-and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
-which recognizes any of the preceding three types, and an "any" convention, in
-which any Unicode line ending sequence is assumed to end a line. The Unicode
-sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
-(form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
-PS (paragraph separator, U+2029).
+Six different conventions for indicating the ends of lines in scanned files are
+supported. For example:
+.sp
+  pcre2grep -N CRLF 'some pattern' <file>
+.sp
+The newline type may be specified in upper, lower, or mixed case. If the
+newline type is NUL, lines are separated by binary zero characters. The other
+types are the single-character sequences CR (carriage return) and LF
+(linefeed), the two-character sequence CRLF, an "anycrlf" type, which
+recognizes any of the preceding three types, and an "any" type, for which any
+Unicode line ending sequence is assumed to end a line. The Unicode sequences
+are the three just mentioned, plus VT (vertical tab, U+000B), FF (form feed,
+U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
+(paragraph separator, U+2029).
 .sp
 When the PCRE2 library is built, a default line-ending sequence is specified.
 This is normally the standard sequence for the operating system. Unless
 otherwise specified by this option, \fBpcre2grep\fP uses the library's default.
-The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
-makes it possible to use \fBpcre2grep\fP to scan files that have come from
-other environments without having to modify their line endings. If the data
-that is being scanned does not agree with the convention set by this option,
-\fBpcre2grep\fP may behave in strange ways. Note that this option does not
-apply to files specified by the \fB-f\fP, \fB--exclude-from\fP, or
+.sp
+This option makes it possible to use \fBpcre2grep\fP to scan files that have
+come from other environments without having to modify their line endings. If
+the data that is being scanned does not agree with the convention set by this
+option, \fBpcre2grep\fP may behave in strange ways. Note that this option does
+not apply to files specified by the \fB-f\fP, \fB--exclude-from\fP, or
 \fB--include-from\fP options, which are expected to use the operating system's
 standard newline sequence.
 .TP
@ -559,25 +586,36 @@ use of JIT at run time. It is provided for testing and working round problems.
 It should never be needed in normal use.
 .TP
 \fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP
-When there is a match, instead of outputting the whole line that matched,
-output just the given text. This option is mutually exclusive with
-\fB--only-matching\fP, \fB--file-offsets\fP, and \fB--line-offsets\fP. Escape
-sequences starting with a dollar character may be used to insert the contents
-of the matched part of the line and/or captured substrings into the text.
+When there is a match, instead of outputting the line that matched, output just
+the text specified in this option, followed by an operating-system standard
+newline. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP,
+and \fB-C\fP options are ignored. The \fB--newline\fP option has no effect on
+this option, which is mutually exclusive with \fB--only-matching\fP,
+\fB--file-offsets\fP, and \fB--line-offsets\fP. However, like
+\fB--only-matching\fP, if there is more than one match in a line, each of them
+causes a line of output.
 .sp
-$<digits> or ${<digits>} is replaced by the captured
-substring of the given decimal number; zero substitutes the whole match. If
-the number is greater than the number of capturing substrings, or if the
-capture is unset, the replacement is empty.
+Escape sequences starting with a dollar character may be used to insert the
+contents of the matched part of the line and/or captured substrings into the
+text.
+.sp
+$<digits> or ${<digits>} is replaced by the captured substring of the given
+decimal number; zero substitutes the whole match. If the number is greater than
+the number of capturing substrings, or if the capture is unset, the replacement
+is empty.
 .sp
 $a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
 newline; $r by carriage return; $t by tab; $v by vertical tab.
 .sp
-$o<digits> is replaced by the character represented by the given octal
-number; up to three digits are processed.
+$o<digits> or $o{<digits>} is replaced by the character whose code point is the
+given octal number. In the first form, up to three octal digits are processed.
+When more digits are needed in Unicode mode to specify a wide character, the
+second form must be used.
 .sp
-$x<digits> is replaced by the character represented by the given hexadecimal
-number; up to two digits are processed.
+$x<digits> or $x{<digits>} is replaced by the character represented by the
+given hexadecimal number. In the first form, up to two hexadecimal digits are
+processed. When more digits are needed in Unicode mode to specify a wide
+character, the second form must be used.
 .sp
 Any other character is substituted by itself. In particular, $$ is replaced by
 a single dollar.
@ -636,7 +674,8 @@ immediate end-of-file. This option is a shorthand for setting the \fB-d\fP
 option to "recurse".
 .TP
 \fB--recursion-limit\fP=\fInumber\fP
-See \fB--match-limit\fP above.
+This is an obsolete synonym for \fB--depth-limit\fP. See \fB--match-limit\fP
+above for details.
 .TP
 \fB-s\fP, \fB--no-messages\fP
 Suppress error messages about non-existent or unreadable files. Such files are
@ -657,14 +696,17 @@ total would always be zero.
 \fB-u\fP, \fB--utf\fP
 Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
 with UTF-8 support. All patterns (including those for any \fB--exclude\fP and
-\fB--include\fP options) and all subject lines that are scanned must be valid
-strings of UTF-8 characters.
+\fB--include\fP options) and all lines that are scanned must be valid strings
+of UTF-8 characters. If an invalid UTF-8 string is encountered, an error
+occurs.
 .TP
 \fB-U\fP, \fB--utf-allow-invalid\fP
 As \fB--utf\fP, but in addition subject lines may contain invalid UTF-8 code
-unit sequences. These can never form part of any pattern match. This facility
-allows valid UTF-8 strings to be sought in executable or other binary files.
-For more details about matching in non-valid UTF-8 strings, see the
+unit sequences. These can never form part of any pattern match. Patterns
+themselves, however, must still be valid UTF-8 strings. This facility allows
+valid UTF-8 strings to be sought within arbitrary byte sequences in executable
+or other binary files. For more details about matching in non-valid UTF-8
+strings, see the
 .\" HREF
 \fBpcre2unicode\fP(3)
 .\"
@ -677,7 +719,9 @@ ignored.
 .TP
 \fB-v\fP, \fB--invert-match\fP
 Invert the sense of the match, so that lines which do \fInot\fP match any of
-the patterns are the ones that are found.
+the patterns are the ones that are found. When this option is set, options such
+as \fB--only-matching\fP and \fB--output\fP, which specify parts of a match
+that are to be output, are ignored.
 .TP
 \fB-w\fP, \fB--word-regex\fP, \fB--word-regexp\fP
 Force the patterns only to match "words". That is, there must be a word
@ -694,6 +738,12 @@ be more than one line. This is equivalent to having "^(?:" at the start of each
 pattern and ")$" at the end. This option applies only to the patterns that are
 matched against the contents of files; it does not apply to patterns specified
 by any of the \fB--include\fP or \fB--exclude\fP options.
+.TP
+\fB-Z\fP, \fB--null\fP
+Terminate files names in the regular output with a zero byte (the NUL
+character) instead of what would normally appear. This is useful when file
+names contain unusual characters such as colons, hyphens, or even newlines. The
+option does not apply to file names in error messages.
 .
 .
 .SH "ENVIRONMENT VARIABLES"
@ -709,16 +759,25 @@ by the \fB--locale\fP option. If no locale is set, the PCRE2 library's default
 .rs
 .sp
 The \fB-N\fP (\fB--newline\fP) option allows \fBpcre2grep\fP to scan files with
-different newline conventions from the default. Any parts of the input files
-that are written to the standard output are copied identically, with whatever
-newline sequences they have in the input. However, the setting of this option
-affects only the way scanned files are processed. It does not affect the
-interpretation of files specified by the \fB-f\fP, \fB--file-list\fP,
-\fB--exclude-from\fP, or \fB--include-from\fP options, nor does it affect the
-way in which \fBpcre2grep\fP writes informational messages to the standard
-error and output streams. For these it uses the string "\en" to indicate
-newlines, relying on the C I/O library to convert this to an appropriate
-sequence.
+newline conventions that differ from the default. This option affects only the
+way scanned files are processed. It does not affect the interpretation of files
+specified by the \fB-f\fP, \fB--file-list\fP, \fB--exclude-from\fP, or
+\fB--include-from\fP options.
+.P
+Any parts of the scanned input files that are written to the standard output
+are copied with whatever newline sequences they have in the input. However, if
+the final line of a file is output, and it does not end with a newline
+sequence, a newline sequence is added. If the newline setting is CR, LF, CRLF
+or NUL, that line ending is output; for the other settings (ANYCRLF or ANY) a
+single NL is used.
+.P
+The newline setting does not affect the way in which \fBpcre2grep\fP writes
+newlines in informational messages to the standard output and error streams.
+Under Windows, the standard output is set to be binary, so that "\er\en" at the
+ends of output lines that are copied from the input is not converted to
+"\er\er\en" by the C I/O library. This means that any messages written to the
+standard output must end with "\er\en". For all other operating systems, and
+for all messages to the standard error stream, "\en" is used.
 .
 .
 .SH "OPTIONS COMPATIBILITY"
@ -795,12 +854,36 @@ documentation for details). Numbered callouts are ignored by \fBpcre2grep\fP;
 only callouts with string arguments are useful.
 .
 .
+.SS "Echoing a specific string"
+.rs
+.sp
+Starting the callout string with a pipe character invokes an echoing facility
+that avoids calling an external program or script. This facility is always
+available, provided that callouts were not completely disabled when
+\fBpcre2grep\fP was built. The rest of the callout string is processed as a
+zero-terminated string, which means it should not contain any internal binary
+zeros. It is written to the output, having first been passed through the same
+escape processing as text from the \fB--output\fP (\fB-O\fP) option (see
+above). However, $0 cannot be used to insert a matched substring because the
+match is still in progress. Instead, the single character '0' is inserted. Any
+syntax errors in the string (for example, a dollar not followed by another
+character) causes the callout to be ignored. No terminator is added to the
+output string, so if you want a newline, you must include it explicitly using
+the escape $n. For example:
+.sp
+  pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' <some file>
+.sp
+Matching continues normally after the string is output. If you want to see only
+the callout output but not any output from an actual match, you should end the
+pattern with (*FAIL).
+.
+.
 .SS "Calling external programs or scripts"
 .rs
 .sp
 This facility can be independently disabled when \fBpcre2grep\fP is built. It
 is supported for Windows, where a call to \fB_spawnvp()\fP is used, for VMS,
-where \fBlib$spawn()\fP is used, and for any other Unix-like environment where
+where \fBlib$spawn()\fP is used, and for any Unix-like environment where
 \fBfork()\fP and \fBexecv()\fP are available.
 .P
 If the callout string does not start with a pipe (vertical bar) character, it
@ -811,13 +894,11 @@ arguments:
  executable_name|arg1|arg2|...
 .sp
 Any substring (including the executable name) may contain escape sequences
-started by a dollar character: $<digits> or ${<digits>} is replaced by the
-captured substring of the given decimal number, which must be greater than
-zero. If the number is greater than the number of capturing substrings, or if
-the capture is unset, the replacement is empty.
-.P
-Any other character is substituted by itself. In particular, $$ is replaced by
-a single dollar and $| is replaced by a pipe character. Here is an example:
+started by a dollar character. These are the same as for the \fB--output\fP
+(\fB-O\fP) option documented above, except that $0 cannot insert the matched
+string because the match is still in progress. Instead, the character '0'
+is inserted. If you need a literal dollar or pipe character in any
+substring, use $$ or $| respectively. Here is an example:
 .sp
  echo -e "abcde\en12345" | pcre2grep \e
    '(?x)(.)(..(.))
@ -830,28 +911,14 @@ a single dollar and $| is replaced by a pipe character. Here is an example:
    Arg1: [1] [234] [4] Arg2: |1| ()
    12345
 .sp
-The parameters for the system call that is used to run the
-program or script are zero-terminated strings. This means that binary zero
-characters in the callout argument will cause premature termination of their
-substrings, and therefore should not be present. Any syntax errors in the
-string (for example, a dollar not followed by another character) cause the
-callout to be ignored. If running the program fails for any reason (including
-the non-existence of the executable), a local matching failure occurs and the
-matcher backtracks in the normal way.
-.
-.
-.SS "Echoing a specific string"
-.rs
-.sp
-This facility is always available, provided that callouts were not completely
-disabled when \fBpcre2grep\fP was built. If the callout string starts with a
-pipe (vertical bar) character, the rest of the string is written to the output,
-having been passed through the same escape processing as text from the --output
-option. This provides a simple echoing facility that avoids calling an external
-program or script. No terminator is added to the string, so if you want a
-newline, you must include it explicitly. Matching continues normally after the
-string is output. If you want to see only the callout output but not any output
-from an actual match, you should end the relevant pattern with (*FAIL).
+The parameters for the system call that is used to run the program or script
+are zero-terminated strings. This means that binary zero characters in the
+callout argument will cause premature termination of their substrings, and
+therefore should not be present. Any syntax errors in the string (for example,
+a dollar not followed by another character) causes the callout to be ignored.
+If running the program fails for any reason (including the non-existence of the
+executable), a local matching failure occurs and the matcher backtracks in the
+normal way.
 .
 .
 .SH "MATCHING ERRORS"
@ -887,7 +954,8 @@ because VMS does not distinguish between exit(0) and exit(1).
 .SH "SEE ALSO"
 .rs
 .sp
-\fBpcre2pattern\fP(3), \fBpcre2syntax\fP(3), \fBpcre2callout\fP(3).
+\fBpcre2pattern\fP(3), \fBpcre2syntax\fP(3), \fBpcre2callout\fP(3),
+\fBpcre2unicode\fP(3).
 .
 .
 .SH AUTHOR
@ -895,7 +963,7 @@ because VMS does not distinguish between exit(0) and exit(1).
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@ -904,6 +972,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 15 June 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 30 July 2022
+Copyright (c) 1997-2022 University of Cambridge.
 .fi
--- a/doc/pcre2grep.txt
+++ b/doc/pcre2grep.txt
@ -42,13 +42,15 @@ DESCRIPTION

         pcre2grep some-pattern file1 - file3

-       Input files are searched line by  line.  By  default,  each  line  that
+       By default, input files are searched  line  by  line.  Each  line  that
       matches  a  pattern  is  copied to the standard output, and if there is
       more than one file, the file name is output at the start of each  line,
       followed  by  a  colon.  However, there are options that can change how
-       pcre2grep behaves. In particular, the -M option makes  it  possible  to
+       pcre2grep behaves. For example, the -M  option  makes  it  possible  to
       search  for  strings  that  span  line  boundaries. What defines a line
-       boundary is controlled by the -N (--newline) option.
+       boundary is controlled by the -N (--newline) option. The -h and -H  op-
+       tions  control  whether  or not file names are shown, and the -Z option
+       changes the file name terminator to a zero byte.

       The amount of memory used for buffering files that are being scanned is
       controlled  by  parameters  that  can  be  set by the --buffer-size and
@ -80,7 +82,7 @@ DESCRIPTION
       following  the  match,  so that further matches on the same line can be
       found. If there are multiple patterns, they are all tried  on  the  re-
       mainder  of the line, but patterns that follow the one that matched are
-       not tried on the earlier part of the line.
+       not tried on the earlier matched part of the line.

       This behaviour means that the order  in  which  multiple  patterns  are
       specified  can affect the output when one of the above options is used.
@ -115,10 +117,10 @@ BINARY FILES

       By  default,  a  file that contains a binary zero byte within the first
       1024 bytes is identified as a binary file, and is processed  specially.
-       (GNU grep identifies binary files in this manner.) However, if the new-
-       line type is specified as "nul", that is, the line terminator is a  bi-
-       nary zero, the test for a binary file is not applied. See the --binary-
-       files option for a means of changing the way binary files are handled.
+       However,  if  the  newline  type is specified as NUL, that is, the line
+       terminator is a binary zero, the test for a binary file is not applied.
+       See  the  --binary-files  option for a means of changing the way binary
+       files are handled.


 BINARY ZEROS IN PATTERNS
@ -149,26 +151,35 @@ OPTIONS
                 the  file  is  reached,  or if the processing buffer size has
                 been set too small. If file names and/or line numbers are be-
                 ing output, a hyphen separator is used instead of a colon for
-                 the context lines. A line containing "--" is  output  between
-                 each  group  of  lines, unless they are in fact contiguous in
-                 the input file. The value of number is expected to  be  rela-
-                 tively small. When -c is used, -A is ignored.
+                 the context lines (the -Z option can be used  to  change  the
+                 file  name terminator to a zero byte). A line containing "--"
+                 is output between each group of lines,  unless  they  are  in
+                 fact contiguous in the input file. The value of number is ex-
+                 pected to be relatively small. When -c is  used,  -A  is  ig-
+                 nored.

       -a, --text
                 Treat  binary  files as text. This is equivalent to --binary-
                 files=text.

+       --allow-lookaround-bsk
+                 PCRE2 now forbids the use of \K in lookarounds by default, in
+                 line  with  Perl.   This  option  causes pcre2grep to set the
+                 PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK option, which  enables  this
+                 somewhat dangerous usage.
+
       -B number, --before-context=number
-                 Output up to number lines of  context  before  each  matching
-                 line.  Fewer  lines  are  output if the previous match or the
-                 start of the file is within number lines, or if the  process-
-                 ing  buffer size has been set too small. If file names and/or
+                 Output  up  to  number  lines of context before each matching
+                 line. Fewer lines are output if the  previous  match  or  the
+                 start  of the file is within number lines, or if the process-
+                 ing buffer size has been set too small. If file names  and/or
                 line numbers are being output, a hyphen separator is used in-
-                 stead  of  a  colon  for the context lines. A line containing
-                 "--" is output between each group of lines, unless  they  are
-                 in  fact contiguous in the input file. The value of number is
-                 expected to be relatively small. When -c is used, -B  is  ig-
-                 nored.
+                 stead of a colon for the context lines (the -Z option can  be
+                 used  to  change  the file name terminator to a zero byte). A
+                 line containing "--" is output between each group  of  lines,
+                 unless  they  are  in  fact contiguous in the input file. The
+                 value of number is expected to be relatively small.  When  -c
+                 is used, -B is ignored.

       --binary-files=word
                 Specify  how binary files are to be processed. If the word is
@ -381,88 +392,94 @@ OPTIONS

       -H, --with-filename
                 Force  the  inclusion of the file name at the start of output
-                 lines when searching a single file. By default, the file name
-                 is not shown in this case.  For matching lines, the file name
-                 is followed by a colon; for context lines, a hyphen separator
-                 is  used.  If  a line number is also being output, it follows
-                 the file name. When the -M option causes a pattern  to  match
-                 more  than  one  line, only the first is preceded by the file
-                 name. This option overrides any previous -h, -l,  or  -L  op-
-                 tions.
+                 lines when searching a single file. The file name is not nor-
+                 mally  shown  in  this case.  By default, for matching lines,
+                 the file name is followed by a colon; for  context  lines,  a
+                 hyphen separator is used. The -Z option can be used to change
+                 the terminator to a zero byte. If a line number is also being
+                 output, it follows the file name. When the -M option causes a
+                 pattern to match more than one line, only the first  is  pre-
+                 ceded  by  the  file name. This option overrides any previous
+                 -h, -l, or -L options.

       -h, --no-filename
                 Suppress the output file names when searching multiple files.
-                 By default, file names are  shown  when  multiple  files  are
-                 searched.  For matching lines, the file name is followed by a
-                 colon; for context lines, a hyphen separator is used.   If  a
-                 line  number  is also being output, it follows the file name.
-                 This option overrides any previous -H, -L, or -l options.
+                 File  names  are  normally  shown  when  multiple  files  are
+                 searched. By default, for matching lines, the  file  name  is
+                 followed by a colon; for context lines, a hyphen separator is
+                 used. The -Z option can be used to change the terminator to a
+                 zero  byte. If a line number is also being output, it follows
+                 the file name.  This option overrides any previous -H, -L, or
+                 -l options.

       --heap-limit=number
                 See --match-limit below.

-       --help    Output a help message, giving brief details  of  the  command
-                 options  and  file type support, and then exit. Anything else
+       --help    Output  a  help  message, giving brief details of the command
+                 options and file type support, and then exit.  Anything  else
                 on the command line is ignored.

-       -I        Ignore  binary  files.  This  is  equivalent   to   --binary-
+       -I        Ignore   binary   files.  This  is  equivalent  to  --binary-
                 files=without-match.

       -i, --ignore-case
                 Ignore upper/lower case distinctions during comparisons.

       --include=pattern
-                 If  any --include patterns are specified, the only files that
-                 are processed are those that match one of the  patterns  (and
-                 do  not match an --exclude pattern). This option does not af-
-                 fect directories, but it applies to all files, whether listed
-                 on  the  command line, obtained from --file-list, or by scan-
-                 ning a directory. The pattern is a PCRE2 regular  expression,
-                 and  is matched against the final component of the file name,
-                 not the entire path. The -F, -w, and -x options do not  apply
-                 to this pattern. The option may be given any number of times.
-                 If a file name matches both an  --include  and  an  --exclude
-                 pattern, it is excluded.  There is no short form for this op-
-                 tion.
+                 If any --include patterns are specified, the only files  that
+                 are processed are those whose names match one of the patterns
+                 and do not match an --exclude pattern. This option  does  not
+                 affect  directories,  but  it  applies  to all files, whether
+                 listed on the command line, obtained from --file-list, or  by
+                 scanning  a directory. The pattern is a PCRE2 regular expres-
+                 sion, and is matched against the final component of the  file
+                 name,  not the entire path. The -F, -w, and -x options do not
+                 apply to this pattern. The option may be given any number  of
+                 times.  If a file name matches both an --include and an --ex-
+                 clude pattern, it is excluded.  There is no  short  form  for
+                 this option.

       --include-from=filename
-                 Treat each non-empty line of the file  as  the  data  for  an
+                 Treat  each  non-empty  line  of  the file as the data for an
                 --include option. What constitutes a newline for this purpose
-                 is the operating system's default. The --newline  option  has
+                 is  the  operating system's default. The --newline option has
                 no effect on this option. This option may be given any number
                 of times; all the files are read.

       --include-dir=pattern
-                 If any --include-dir patterns are specified, the only  direc-
-                 tories  that  are  processed  are those that match one of the
-                 patterns (and do not match an  --exclude-dir  pattern).  This
-                 applies  to  all  directories,  whether listed on the command
-                 line, obtained from --file-list, or by scanning a parent  di-
-                 rectory.  The  pattern  is a PCRE2 regular expression, and is
-                 matched against the final component of  the  directory  name,
-                 not  the entire path. The -F, -w, and -x options do not apply
+                 If  any --include-dir patterns are specified, the only direc-
+                 tories that are processed are those whose names match one  of
+                 the  patterns and do not match an --exclude-dir pattern. This
+                 applies to all directories, whether  listed  on  the  command
+                 line,  obtained from --file-list, or by scanning a parent di-
+                 rectory. The pattern is a PCRE2 regular  expression,  and  is
+                 matched  against  the  final component of the directory name,
+                 not the entire path. The -F, -w, and -x options do not  apply
                 to this pattern. The option may be given any number of times.
-                 If  a directory matches both --include-dir and --exclude-dir,
+                 If a directory matches both --include-dir and  --exclude-dir,
                 it is excluded. There is no short form for this option.

       -L, --files-without-match
-                 Instead of outputting lines from the files, just  output  the
-                 names  of  the files that do not contain any lines that would
-                 have been output. Each file name is output once, on  a  sepa-
-                 rate  line.  This option overrides any previous -H, -h, or -l
-                 options.
+                 Instead  of  outputting lines from the files, just output the
+                 names of the files that do not contain any lines  that  would
+                 have  been  output. Each file name is output once, on a sepa-
+                 rate line by default, but if the -Z option is set,  they  are
+                 separated  by  zero  bytes  instead  of newlines. This option
+                 overrides any previous -H, -h, or -l options.

       -l, --files-with-matches
                 Instead of outputting lines from the files, just  output  the
                 names of the files containing lines that would have been out-
-                 put. Each file name is  output  once,  on  a  separate  line.
-                 Searching  normally stops as soon as a matching line is found
-                 in a file. However, if the -c (count) option  is  also  used,
-                 matching  continues in order to obtain the correct count, and
-                 those files that have at least one  match  are  listed  along
-                 with their counts. Using this option with -c is a way of sup-
-                 pressing the listing of files with no  matches.  This  opeion
-                 overrides any previous -H, -h, or -L options.
+                 put. Each file name is output once, on a separate  line,  but
+                 if the -Z option is set, they are separated by zero bytes in-
+                 stead of newlines. Searching normally  stops  as  soon  as  a
+                 matching  line is found in a file. However, if the -c (count)
+                 option is also used, matching continues in  order  to  obtain
+                 the  correct  count,  and  those files that have at least one
+                 match are listed along with their counts. Using  this  option
+                 with  -c is a way of suppressing the listing of files with no
+                 matches that occurs with -c on its own. This option overrides
+                 any previous -H, -h, or -L options.

       --label=name
                 This option supplies a name to be used for the standard input
@ -473,15 +490,15 @@ OPTIONS
                 When  this  option is given, non-compressed input is read and
                 processed line by line, and the output is flushed after  each
                 write.  By  default,  input  is  read in large chunks, unless
-                 pcre2grep can determine that it is reading  from  a  terminal
-                 (which  is  currently possible only in Unix-like environments
-                 or Windows). Output to  terminal  is  normally  automatically
-                 flushed  by  the  operating system. This option can be useful
-                 when the input or output is attached to a pipe and you do not
-                 want  pcre2grep to buffer up large amounts of data.  However,
-                 its use will affect performance, and the -M  (multiline)  op-
-                 tion  ceases  to work. When input is from a compressed .gz or
-                 .bz2 file, --line-buffered is ignored.
+                 pcre2grep can determine that it is reading from  a  terminal,
+                 which is currently possible only in Unix-like environments or
+                 Windows. Output to terminal is normally automatically flushed
+                 by  the  operating system. This option can be useful when the
+                 input or output is attached to a pipe and  you  do  not  want
+                 pcre2grep  to  buffer up large amounts of data.  However, its
+                 use will affect performance, and the  -M  (multiline)  option
+                 ceases  to  work. When input is from a compressed .gz or .bz2
+                 file, --line-buffered is ignored.

       --line-offsets
                 Instead of showing lines or parts of lines that  match,  show
@ -501,27 +518,71 @@ OPTIONS
                 brary's default (usually the "C" locale) is used. There is no
                 short form for this option.

+       -M, --multiline
+                 Allow patterns to match more than one line. When this  option
+                 is set, the PCRE2 library is called in "multiline" mode. This
+                 allows a matched string to extend past the end of a line  and
+                 continue  on one or more subsequent lines. Patterns used with
+                 -M may usefully contain literal newline characters and inter-
+                 nal  occurrences of ^ and $ characters. The output for a suc-
+                 cessful match may consist of more than one  line.  The  first
+                 line  is  the  line  in which the match started, and the last
+                 line is the line in which the match  ended.  If  the  matched
+                 string  ends  with a newline sequence, the output ends at the
+                 end of that line.  If -v is set,  none  of  the  lines  in  a
+                 multi-line  match  are output. Once a match has been handled,
+                 scanning restarts at the beginning of the line after the  one
+                 in which the match ended.
+
+                 The  newline  sequence  that separates multiple lines must be
+                 matched as part of the pattern.  For  example,  to  find  the
+                 phrase  "regular  expression" in a file where "regular" might
+                 be at the end of a line and "expression" at the start of  the
+                 next line, you could use this command:
+
+                   pcre2grep -M 'regular\s+expression' <file>
+
+                 The \s escape sequence matches any white space character, in-
+                 cluding newlines, and is followed by + so as to match  trail-
+                 ing  white  space  on the first line as well as possibly han-
+                 dling a two-character newline sequence.
+
+                 There is a limit to the number of lines that can be  matched,
+                 imposed  by  the way that pcre2grep buffers the input file as
+                 it scans it. With a  sufficiently  large  processing  buffer,
+                 this should not be a problem, but the -M option does not work
+                 when input is read line by line (see --line-buffered.)
+
+       -m number, --max-count=number
+                 Stop processing after finding number matching lines, or  non-
+                 matching  lines if -v is also set. Any trailing context lines
+                 are output after the final match.  In  multiline  mode,  each
+                 multiline  match counts as just one line for this purpose. If
+                 this limit is reached when reading the standard input from  a
+                 regular file, the file is left positioned just after the last
+                 matching line.  If -c is also set, the count that  is  output
+                 is  never  greater  than number. This option has no effect if
+                 used with -L, -l, or -q, or when just checking for a match in
+                 a binary file.
+
       --match-limit=number
-                 Processing some regular expression patterns may take  a  very
+                 Processing  some  regular expression patterns may take a very
                 long time to search for all possible matching strings. Others
-                 may require a very large amount of memory.  There  are  three
+                 may  require  a  very large amount of memory. There are three
                 options that set resource limits for matching.

                 The --match-limit option provides a means of limiting comput-
-                 ing resource usage when processing patterns that are not  go-
+                 ing  resource usage when processing patterns that are not go-
                 ing to match, but which have a very large number of possibil-
                 ities in their search trees. The classic example is a pattern
-                 that  uses  nested unlimited repeats. Internally, PCRE2 has a
-                 counter that is incremented each time around  its  main  pro-
-                 cessing  loop.  If the value set by --match-limit is reached,
+                 that uses nested unlimited repeats. Internally, PCRE2  has  a
+                 counter  that  is  incremented each time around its main pro-
+                 cessing loop. If the value set by --match-limit  is  reached,
                 an error occurs.

-                 The --heap-limit option specifies, as a number  of  kibibytes
-                 (units  of 1024 bytes), the amount of heap memory that may be
-                 used for matching. Heap memory is needed only if matching the
-                 pattern  requires a significant number of nested backtracking
-                 points to be remembered. This parameter can be set to zero to
-                 forbid the use of heap memory altogether.
+                 The  --heap-limit  option specifies, as a number of kibibytes
+                 (units of 1024 bytes), the maximum amount of heap memory that
+                 may be used for matching.

                 The  --depth-limit  option  limits  the depth of nested back-
                 tracking points, which indirectly limits the amount of memory
@ -542,66 +603,37 @@ OPTIONS
                 size is silently forced to be no smaller  than  the  starting
                 buffer size.

-       -M, --multiline
-                 Allow  patterns to match more than one line. When this option
-                 is set, the PCRE2 library is called in "multiline" mode. This
-                 allows  a matched string to extend past the end of a line and
-                 continue on one or more subsequent lines. Patterns used  with
-                 -M may usefully contain literal newline characters and inter-
-                 nal occurrences of ^ and $ characters. The output for a  suc-
-                 cessful  match  may  consist of more than one line. The first
-                 line is the line in which the match  started,  and  the  last
-                 line  is  the  line  in which the match ended. If the matched
-                 string ends with a newline sequence, the output ends  at  the
-                 end  of  that  line.   If  -v  is set, none of the lines in a
-                 multi-line match are output. Once a match has  been  handled,
-                 scanning  restarts at the beginning of the line after the one
-                 in which the match ended.
-
-                 The newline sequence that separates multiple  lines  must  be
-                 matched  as  part  of  the  pattern. For example, to find the
-                 phrase "regular expression" in a file where  "regular"  might
-                 be  at the end of a line and "expression" at the start of the
-                 next line, you could use this command:
-
-                   pcre2grep -M 'regular\s+expression' <file>
-
-                 The \s escape sequence matches any white space character, in-
-                 cluding  newlines, and is followed by + so as to match trail-
-                 ing white space on the first line as well  as  possibly  han-
-                 dling a two-character newline sequence.
-
-                 There  is a limit to the number of lines that can be matched,
-                 imposed by the way that pcre2grep buffers the input  file  as
-                 it  scans  it.  With  a sufficiently large processing buffer,
-                 this should not be a problem, but the -M option does not work
-                 when input is read line by line (see --line-buffered.)
-
       -N newline-type, --newline=newline-type
-                 The PCRE2 library supports five different conventions for in-
-                 dicating the ends of lines. They are the single-character se-
-                 quences CR (carriage return) and LF (linefeed), the two-char-
-                 acter sequence CRLF, an "anycrlf"  convention,  which  recog-
-                 nizes  any of the preceding three types, and an "any" conven-
-                 tion, in which any Unicode line ending sequence is assumed to
-                 end  a  line.  The  Unicode sequences are the three just men-
-                 tioned, plus  VT  (vertical  tab,  U+000B),  FF  (form  feed,
-                 U+000C),   NEL  (next  line,  U+0085),  LS  (line  separator,
-                 U+2028), and PS (paragraph separator, U+2029).
+                 Six different conventions for indicating the ends of lines in
+                 scanned files are supported. For example:
+
+                   pcre2grep -N CRLF 'some pattern' <file>
+
+                 The newline type may be specified in upper, lower,  or  mixed
+                 case.  If the newline type is NUL, lines are separated by bi-
+                 nary zero characters. The other types are the  single-charac-
+                 ter  sequences  CR  (carriage  return) and LF (linefeed), the
+                 two-character sequence CRLF, an "anycrlf" type, which  recog-
+                 nizes  any  of  the preceding three types, and an "any" type,
+                 for which any Unicode line ending sequence is assumed to  end
+                 a  line.  The Unicode sequences are the three just mentioned,
+                 plus VT (vertical tab, U+000B), FF (form feed,  U+000C),  NEL
+                 (next  line,  U+0085),  LS  (line  separator, U+2028), and PS
+                 (paragraph separator, U+2029).

                 When the PCRE2 library is built, a  default  line-ending  se-
                 quence  is specified.  This is normally the standard sequence
                 for the operating system. Unless otherwise specified by  this
-                 option,  pcre2grep  uses the library's default.  The possible
-                 values for this option are CR, LF,  CRLF,  ANYCRLF,  or  ANY.
-                 This  makes  it  possible to use pcre2grep to scan files that
-                 have come from other environments without  having  to  modify
-                 their  line  endings.  If the data that is being scanned does
-                 not agree with the convention set by this  option,  pcre2grep
-                 may  behave  in  strange ways. Note that this option does not
-                 apply to files specified by the -f, --exclude-from, or  --in-
-                 clude-from  options,  which are expected to use the operating
-                 system's standard newline sequence.
+                 option, pcre2grep uses the library's default.
+
+                 This  option makes it possible to use pcre2grep to scan files
+                 that have come from other environments without having to mod-
+                 ify  their  line  endings.  If the data that is being scanned
+                 does not agree  with  the  convention  set  by  this  option,
+                 pcre2grep  may  behave in strange ways. Note that this option
+                 does not apply to files specified by the -f,  --exclude-from,
+                 or  --include-from options, which are expected to use the op-
+                 erating system's standard newline sequence.

       -n, --line-number
                 Precede each output line by its line number in the file, fol-
@ -619,95 +651,109 @@ OPTIONS
                 lems.  It should never be needed in normal use.

       -O text, --output=text
-                 When there is a match, instead of outputting the  whole  line
-                 that  matched, output just the given text. This option is mu-
-                 tually exclusive with  --only-matching,  --file-offsets,  and
-                 --line-offsets. Escape sequences starting with a dollar char-
-                 acter may be used to insert the contents of the matched  part
-                 of the line and/or captured substrings into the text.
+                 When there is a match, instead of outputting  the  line  that
+                 matched,  output just the text specified in this option, fol-
+                 lowed by an operating-system standard newline. In this  mode,
+                 no  context is shown. That is, the -A, -B, and -C options are
+                 ignored. The --newline option has no effect on  this  option,
+                 which is mutually exclusive with --only-matching, --file-off-
+                 sets, and --line-offsets. However, like  --only-matching,  if
+                 there is more than one match in a line, each of them causes a
+                 line of output.

-                 $<digits>  or  ${<digits>}  is  replaced by the captured sub-
-                 string of the given  decimal  number;  zero  substitutes  the
+                 Escape sequences starting with a dollar character may be used
+                 to insert the contents of the matched part of the line and/or
+                 captured substrings into the text.
+
+                 $<digits> or ${<digits>} is replaced  by  the  captured  sub-
+                 string  of  the  given  decimal  number; zero substitutes the
                 whole match. If the number is greater than the number of cap-
-                 turing substrings, or if the capture is unset,  the  replace-
+                 turing  substrings,  or if the capture is unset, the replace-
                 ment is empty.

-                 $a  is replaced by bell; $b by backspace; $e by escape; $f by
-                 form feed; $n by newline; $r by carriage return; $t  by  tab;
+                 $a is replaced by bell; $b by backspace; $e by escape; $f  by
+                 form  feed;  $n by newline; $r by carriage return; $t by tab;
                 $v by vertical tab.

-                 $o<digits>  is  replaced  by the character represented by the
-                 given octal number; up to three digits are processed.
+                 $o<digits> or $o{<digits>} is replaced by the character whose
+                 code  point  is the given octal number. In the first form, up
+                 to three octal digits are processed.  When  more  digits  are
+                 needed  in Unicode mode to specify a wide character, the sec-
+                 ond form must be used.

-                 $x<digits> is replaced by the character  represented  by  the
-                 given hexadecimal number; up to two digits are processed.
+                 $x<digits> or $x{<digits>} is replaced by the character  rep-
+                 resented  by the given hexadecimal number. In the first form,
+                 up to two hexadecimal digits are processed. When more  digits
+                 are  needed  in Unicode mode to specify a wide character, the
+                 second form must be used.

-                 Any  other character is substituted by itself. In particular,
+                 Any other character is substituted by itself. In  particular,
                 $$ is replaced by a single dollar.

       -o, --only-matching
                 Show only the part of the line that matched a pattern instead
-                 of  the  whole  line. In this mode, no context is shown. That
-                 is, the -A, -B, and -C options are ignored. If there is  more
-                 than  one  match in a line, each of them is shown separately,
-                 on a separate line of output. If -o is combined with -v  (in-
-                 vert  the  sense of the match to find non-matching lines), no
-                 output is generated, but the return  code  is  set  appropri-
-                 ately.  If  the matched portion of the line is empty, nothing
-                 is output unless the file  name  or  line  number  are  being
-                 printed,  in  which case they are shown on an otherwise empty
+                 of the whole line. In this mode, no context  is  shown.  That
+                 is,  the -A, -B, and -C options are ignored. If there is more
+                 than one match in a line, each of them is  shown  separately,
+                 on  a separate line of output. If -o is combined with -v (in-
+                 vert the sense of the match to find non-matching  lines),  no
+                 output  is  generated,  but  the return code is set appropri-
+                 ately. If the matched portion of the line is  empty,  nothing
+                 is  output  unless  the  file  name  or line number are being
+                 printed, in which case they are shown on an  otherwise  empty
                 line.  This  option  is  mutually  exclusive  with  --output,
                 --file-offsets and --line-offsets.

       -onumber, --only-matching=number
-                 Show  only  the  part  of the line that matched the capturing
+                 Show only the part of the line  that  matched  the  capturing
                 parentheses of the given number. Up to 50 capturing parenthe-
-                 ses  are  supported by default. This limit can be changed via
-                 the --om-capture option. A pattern may contain any number  of
-                 capturing  parentheses, but only those whose number is within
-                 the limit can be accessed by -o. An error occurs if the  num-
+                 ses are supported by default. This limit can be  changed  via
+                 the  --om-capture option. A pattern may contain any number of
+                 capturing parentheses, but only those whose number is  within
+                 the  limit can be accessed by -o. An error occurs if the num-
                 ber specified by -o is greater than the limit.

                 -o0 is the same as -o without a number. Because these options
-                 can be given without an argument (see above), if an  argument
-                 is  present, it must be given in the same shell item, for ex-
-                 ample, -o3 or --only-matching=2. The comments given  for  the
-                 non-argument  case  above  also  apply to this option. If the
-                 specified capturing parentheses do not exist in the  pattern,
-                 or  were  not  set in the match, nothing is output unless the
+                 can  be given without an argument (see above), if an argument
+                 is present, it must be given in the same shell item, for  ex-
+                 ample,  -o3  or --only-matching=2. The comments given for the
+                 non-argument case above also apply to  this  option.  If  the
+                 specified  capturing parentheses do not exist in the pattern,
+                 or were not set in the match, nothing is  output  unless  the
                 file name or line number are being output.

-                 If this option is given multiple times,  multiple  substrings
-                 are  output  for  each  match,  in  the order the options are
-                 given, and all on one line. For example, -o3 -o1  -o3  causes
-                 the  substrings  matched by capturing parentheses 3 and 1 and
-                 then 3 again to be output. By default, there is no  separator
+                 If  this  option is given multiple times, multiple substrings
+                 are output for each match,  in  the  order  the  options  are
+                 given,  and  all on one line. For example, -o3 -o1 -o3 causes
+                 the substrings matched by capturing parentheses 3 and  1  and
+                 then  3 again to be output. By default, there is no separator
                 (but see the next but one option).

       --om-capture=number
-                 Set  the number of capturing parentheses that can be accessed
+                 Set the number of capturing parentheses that can be  accessed
                 by -o. The default is 50.

       --om-separator=text
-                 Specify a separating string for multiple occurrences  of  -o.
-                 The  default is an empty string. Separating strings are never
+                 Specify  a  separating string for multiple occurrences of -o.
+                 The default is an empty string. Separating strings are  never
                 coloured.

       -q, --quiet
                 Work quietly, that is, display nothing except error messages.
-                 The  exit  status  indicates  whether or not any matches were
+                 The exit status indicates whether or  not  any  matches  were
                 found.

       -r, --recursive
-                 If any given path is a directory, recursively scan the  files
-                 it  contains, taking note of any --include and --exclude set-
-                 tings. By default, a directory is read as a normal  file;  in
-                 some  operating  systems this gives an immediate end-of-file.
-                 This option is a shorthand for setting the -d option to  "re-
+                 If  any given path is a directory, recursively scan the files
+                 it contains, taking note of any --include and --exclude  set-
+                 tings.  By  default, a directory is read as a normal file; in
+                 some operating systems this gives an  immediate  end-of-file.
+                 This  option is a shorthand for setting the -d option to "re-
                 curse".

       --recursion-limit=number
-                 See --match-limit above.
+                 This is an obsolete synonym for --depth-limit.  See  --match-
+                 limit above for details.

       -s, --no-messages
                 Suppress  error  messages  about  non-existent  or unreadable
@ -729,26 +775,30 @@ OPTIONS

       -u, --utf Operate in UTF-8 mode. This option is available only if PCRE2
                 has been compiled with UTF-8 support. All patterns (including
-                 those  for  any --exclude and --include options) and all sub-
-                 ject lines that are scanned must be valid  strings  of  UTF-8
-                 characters.
+                 those  for any --exclude and --include options) and all lines
+                 that are scanned must be valid strings of  UTF-8  characters.
+                 If an invalid UTF-8 string is encountered, an error occurs.

       -U, --utf-allow-invalid
                 As  --utf,  but in addition subject lines may contain invalid
                 UTF-8 code unit sequences. These can never form part  of  any
-                 pattern match. This facility allows valid UTF-8 strings to be
-                 sought in executable or other binary files.  For more details
-                 about  matching in non-valid UTF-8 strings, see the pcre2uni-
-                 code(3) documentation.
+                 pattern  match.  Patterns  themselves, however, must still be
+                 valid UTF-8 strings. This facility allows valid UTF-8 strings
+                 to be sought within arbitrary byte sequences in executable or
+                 other binary files. For more details about matching  in  non-
+                 valid UTF-8 strings, see the pcre2unicode(3) documentation.

       -V, --version
-                 Write the version numbers of pcre2grep and the PCRE2  library
-                 to  the  standard  output and then exit. Anything else on the
+                 Write  the version numbers of pcre2grep and the PCRE2 library
+                 to the standard output and then exit. Anything  else  on  the
                 command line is ignored.

       -v, --invert-match
-                 Invert the sense of the match, so that  lines  which  do  not
-                 match any of the patterns are the ones that are found.
+                 Invert  the  sense  of  the match, so that lines which do not
+                 match any of the patterns are the ones that are  found.  When
+                 this  option  is  set,  options  such  as --only-matching and
+                 --output, which specify parts of a match that are to be  out-
+                 put, are ignored.

       -w, --word-regex, --word-regexp
                 Force the patterns only to match "words". That is, there must
@ -769,6 +819,13 @@ OPTIONS
                 does  not apply to patterns specified by any of the --include
                 or --exclude options.

+       -Z, --null
+                 Terminate files names in the regular output with a zero  byte
+                 (the  NUL  character)  instead of what would normally appear.
+                 This is useful when file  names  contain  unusual  characters
+                 such  as  colons,  hyphens, or even newlines. The option does
+                 not apply to file names in error messages.
+

 ENVIRONMENT VARIABLES

@ -780,17 +837,27 @@ ENVIRONMENT VARIABLES

 NEWLINES

-       The -N (--newline) option allows pcre2grep to scan files with different
-       newline conventions from the default. Any parts of the input files that
-       are  written  to the standard output are copied identically, with what-
-       ever newline sequences they have in the input. However, the setting  of
-       this  option  affects only the way scanned files are processed. It does
-       not affect the interpretation of files specified  by  the  -f,  --file-
-       list, --exclude-from, or --include-from options, nor does it affect the
-       way in which pcre2grep writes informational messages  to  the  standard
-       error and output streams. For these it uses the string "\n" to indicate
-       newlines, relying on the C I/O library to convert this to an  appropri-
-       ate sequence.
+       The  -N  (--newline) option allows pcre2grep to scan files with newline
+       conventions that differ from the default. This option affects only  the
+       way  scanned files are processed. It does not affect the interpretation
+       of files specified by the -f,  --file-list,  --exclude-from,  or  --in-
+       clude-from options.
+
+       Any  parts  of the scanned input files that are written to the standard
+       output are copied with whatever newline sequences they have in the  in-
+       put.  However,  if  the final line of a file is output, and it does not
+       end with a newline sequence, a newline sequence is added. If  the  new-
+       line  setting  is  CR, LF, CRLF or NUL, that line ending is output; for
+       the other settings (ANYCRLF or ANY) a single NL is used.
+
+       The newline setting does not affect the way in which  pcre2grep  writes
+       newlines  in  informational  messages  to the standard output and error
+       streams.  Under Windows, the standard output is set to  be  binary,  so
+       that  "\r\n" at the ends of output lines that are copied from the input
+       is not converted to "\r\r\n" by the C I/O library. This means that  any
+       messages  written  to the standard output must end with "\r\n". For all
+       other operating systems, and for all messages  to  the  standard  error
+       stream, "\n" is used.


 OPTIONS COMPATIBILITY
@ -860,30 +927,49 @@ USING PCRE2'S CALLOUT FACILITY
       mentation  for  details).  Numbered  callouts are ignored by pcre2grep;
       only callouts with string arguments are useful.

+   Echoing a specific string
+
+       Starting the callout string with a pipe character  invokes  an  echoing
+       facility that avoids calling an external program or script. This facil-
+       ity is always available, provided that  callouts  were  not  completely
+       disabled  when  pcre2grep  was built. The rest of the callout string is
+       processed as a zero-terminated string, which means it should  not  con-
+       tain  any  internal  binary  zeros. It is written to the output, having
+       first been passed through the same escape processing as text  from  the
+       --output  (-O) option (see above). However, $0 cannot be used to insert
+       a matched substring because the match is still  in  progress.  Instead,
+       the  single  character '0' is inserted. Any syntax errors in the string
+       (for example, a dollar not followed by another  character)  causes  the
+       callout  to be ignored. No terminator is added to the output string, so
+       if you want a newline, you must include it explicitly using the  escape
+       $n. For example:
+
+         pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' <some file>
+
+       Matching  continues normally after the string is output. If you want to
+       see only the callout output but not any output from  an  actual  match,
+       you should end the pattern with (*FAIL).
+
   Calling external programs or scripts

       This facility can be independently disabled when pcre2grep is built. It
-       is  supported for Windows, where a call to _spawnvp() is used, for VMS,
-       where lib$spawn() is used, and  for  any  other  Unix-like  environment
-       where fork() and execv() are available.
+       is supported for Windows, where a call to _spawnvp() is used, for  VMS,
+       where  lib$spawn()  is  used,  and  for any Unix-like environment where
+       fork() and execv() are available.

       If the callout string does not start with a pipe (vertical bar) charac-
-       ter, it is parsed into a list of substrings separated by  pipe  charac-
-       ters.  The first substring must be an executable name, with the follow-
+       ter,  it  is parsed into a list of substrings separated by pipe charac-
+       ters. The first substring must be an executable name, with the  follow-
       ing substrings specifying arguments:

         executable_name|arg1|arg2|...

-       Any substring (including the executable name) may  contain  escape  se-
-       quences  started by a dollar character: $<digits> or ${<digits>} is re-
-       placed by the captured substring of the  given  decimal  number,  which
-       must  be greater than zero. If the number is greater than the number of
-       capturing substrings, or if the capture is unset,  the  replacement  is
-       empty.
-
-       Any  other character is substituted by itself. In particular, $$ is re-
-       placed by a single dollar and $| is replaced by a pipe character.  Here
-       is an example:
+       Any  substring  (including  the executable name) may contain escape se-
+       quences started by a dollar character. These are the same  as  for  the
+       --output (-O) option documented above, except that $0 cannot insert the
+       matched string because the match is still  in  progress.  Instead,  the
+       character '0' is inserted. If you need a literal dollar or pipe charac-
+       ter in any substring, use $$ or $| respectively. Here is an example:

         echo -e "abcde\n12345" | pcre2grep \
           '(?x)(.)(..(.))
@ -896,28 +982,15 @@ USING PCRE2'S CALLOUT FACILITY
           Arg1: [1] [234] [4] Arg2: |1| ()
           12345

-       The  parameters  for the system call that is used to run the program or
+       The parameters for the system call that is used to run the  program  or
       script are zero-terminated strings. This means that binary zero charac-
-       ters  in the callout argument will cause premature termination of their
-       substrings, and therefore should not be present. Any syntax  errors  in
-       the  string  (for  example, a dollar not followed by another character)
-       cause the callout to be ignored. If running the program fails  for  any
-       reason  (including the non-existence of the executable), a local match-
+       ters in the callout argument will cause premature termination of  their
+       substrings,  and  therefore should not be present. Any syntax errors in
+       the string (for example, a dollar not followed  by  another  character)
+       causes the callout to be ignored.  If running the program fails for any
+       reason (including the non-existence of the executable), a local  match-
       ing failure occurs and the matcher backtracks in the normal way.

-   Echoing a specific string
-
-       This facility is always available, provided that callouts were not com-
-       pletely disabled when pcre2grep was built. If the callout string starts
-       with a pipe (vertical bar) character, the rest of the string is written
-       to the output, having been passed through the same escape processing as
-       text from the --output option. This provides a simple echoing  facility
-       that  avoids  calling  an  external program or script. No terminator is
-       added to the string, so if you want a newline, you must include it  ex-
-       plicitly.  Matching  continues  normally after the string is output. If
-       you want to see only the callout output but not any output from an  ac-
-       tual match, you should end the relevant pattern with (*FAIL).
-

 MATCHING ERRORS

@ -951,17 +1024,17 @@ DIAGNOSTICS

 SEE ALSO

-       pcre2pattern(3), pcre2syntax(3), pcre2callout(3).
+       pcre2pattern(3), pcre2syntax(3), pcre2callout(3), pcre2unicode(3).


 AUTHOR

       Philip Hazel
-       University Computing Service
+       Retired from University Computing Service
       Cambridge, England.


 REVISION

-       Last updated: 15 June 2019
-       Copyright (c) 1997-2019 University of Cambridge.
+       Last updated: 30 July 2022
+       Copyright (c) 1997-2022 University of Cambridge.
--- a/doc/pcre2jit.3
+++ b/doc/pcre2jit.3
@ -1,4 +1,4 @@
-.TH PCRE2JIT 3 "23 May 2019" "PCRE2 10.34"
+.TH PCRE2JIT 3 "30 November 2021" "PCRE2 10.40"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "PCRE2 JUST-IN-TIME COMPILER SUPPORT"
@ -29,6 +29,7 @@ platforms:
 .sp
  ARM 32-bit (v5, v7, and Thumb2)
  ARM 64-bit
+  IBM s390x 64 bit
  Intel x86 32-bit and 64-bit
  MIPS 32-bit and 64-bit
  Power PC 32-bit and 64-bit
@ -64,7 +65,7 @@ or a negative error code.
 There is a limit to the size of pattern that JIT supports, imposed by the size
 of machine stack that it uses. The exact rules are not documented because they
 may change at any time, in particular, when new optimizations are introduced.
-If a pattern is too big, a call to \fBpcre2_jit_compile()\fB returns
+If a pattern is too big, a call to \fBpcre2_jit_compile()\fP returns
 PCRE2_ERROR_NOMEMORY.
 .P
 PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for complete
@ -250,11 +251,11 @@ non-sequential matches in one thread is to use callouts: if a callout function
 starts another match, that match must use a different JIT stack to the one used
 for currently suspended match(es).
 .P
-In a multithread application, if you do not
-specify a JIT stack, or if you assign or pass back NULL from a callback, that
-is thread-safe, because each thread has its own machine stack. However, if you
-assign or pass back a non-NULL JIT stack, this must be a different stack for
-each thread so that the application is thread-safe.
+In a multithread application, if you do not specify a JIT stack, or if you
+assign or pass back NULL from a callback, that is thread-safe, because each
+thread has its own machine stack. However, if you assign or pass back a
+non-NULL JIT stack, this must be a different stack for each thread so that the
+application is thread-safe.
 .P
 Strictly speaking, even more is allowed. You can assign the same non-NULL stack
 to a match context that is used by any number of patterns, as long as they are
@ -266,7 +267,7 @@ inefficient solution, and not recommended.
 This is a suggestion for how a multithreaded program that needs to set up
 non-default JIT stacks might operate:
 .sp
-  During thread initalization
+  During thread initialization
    thread_local_var = pcre2_jit_stack_create(...)
 .sp
  During thread exit
@ -315,12 +316,12 @@ stack through the JIT callback function.
 You can free a JIT stack at any time, as long as it will not be used by
 \fBpcre2_match()\fP again. When you assign the stack to a match context, only a
 pointer is set. There is no reference counting or any other magic. You can free
-compiled patterns, contexts, and stacks in any order, anytime. Just \fIdo
-not\fP call \fBpcre2_match()\fP with a match context pointing to an already
-freed stack, as that will cause SEGFAULT. (Also, do not free a stack currently
-used by \fBpcre2_match()\fP in another thread). You can also replace the stack
-in a context at any time when it is not in use. You should free the previous
-stack before assigning a replacement.
+compiled patterns, contexts, and stacks in any order, anytime.
+Just \fIdo not\fP call \fBpcre2_match()\fP with a match context pointing to an
+already freed stack, as that will cause SEGFAULT. (Also, do not free a stack
+currently used by \fBpcre2_match()\fP in another thread). You can also replace
+the stack in a context at any time when it is not in use. You should free the
+previous stack before assigning a replacement.
 .P
 (5) Should I allocate/free a stack every time before/after calling
 \fBpcre2_match()\fP?
@ -354,8 +355,8 @@ out this complicated API.
 .B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP);
 .fi
 .P
-The JIT executable allocator does not free all memory when it is possible.
-It expects new allocations, and keeps some free memory around to improve
+The JIT executable allocator does not free all memory when it is possible. It
+expects new allocations, and keeps some free memory around to improve
 allocation speed. However, in low memory conditions, it might be better to free
 all possible memory. You can cause this to happen by calling
 pcre2_jit_free_unused_memory(). Its argument is a general context, for custom
@ -415,10 +416,10 @@ that was not compiled.
 .P
 When you call \fBpcre2_match()\fP, as well as testing for invalid options, a
 number of other sanity checks are performed on the arguments. For example, if
-the subject pointer is NULL, an immediate error is given. Also, unless
-PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for validity. In the
-interests of speed, these checks do not happen on the JIT fast path, and if
-invalid data is passed, the result is undefined.
+the subject pointer is NULL but the length is non-zero, an immediate error is
+given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested
+for validity. In the interests of speed, these checks do not happen on the JIT
+fast path, and if invalid data is passed, the result is undefined.
 .P
 Bypassing the sanity checks and the \fBpcre2_match()\fP wrapping can give
 speedups of more than 10%.
@ -444,6 +445,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 23 May 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 30 November 2021
+Copyright (c) 1997-2021 University of Cambridge.
 .fi
--- a/doc/pcre2limits.3
+++ b/doc/pcre2limits.3
@ -1,4 +1,4 @@
-.TH PCRE2LIMITS 3 "03 February 2019" "PCRE2 10.33"
+.TH PCRE2LIMITS 3 "26 July 2022" "PCRE2 10.41"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "SIZE AND OTHER LIMITATIONS"
@ -51,6 +51,10 @@ is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
 .P
 The maximum length of a string argument to a callout is the largest number a
 32-bit unsigned integer can hold.
+.P
+The maximum amount of heap memory used for matching is controlled by the heap 
+limit, which can be set in a pattern or in a match context. The default is a 
+very large number, effectively unlimited.
 .
 .
 .SH AUTHOR
@ -58,7 +62,7 @@ The maximum length of a string argument to a callout is the largest number a
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@ -67,6 +71,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 02 February 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 26 July 2022
+Copyright (c) 1997-2022 University of Cambridge.
 .fi
--- a/doc/pcre2matching.3
+++ b/doc/pcre2matching.3
@ -1,4 +1,4 @@
-.TH PCRE2MATCHING 3 "23 May 2019" "PCRE2 10.34"
+.TH PCRE2MATCHING 3 "28 August 2021" "PCRE2 10.38"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "PCRE2 MATCHING ALGORITHMS"
@ -61,8 +61,9 @@ tried is controlled by the greedy or ungreedy nature of the quantifier.
 If a leaf node is reached, a matching string has been found, and at that point
 the algorithm stops. Thus, if there is more than one possible match, this
 algorithm returns the first one that it finds. Whether this is the shortest,
-the longest, or some intermediate length depends on the way the greedy and
-ungreedy repetition quantifiers are specified in the pattern.
+the longest, or some intermediate length depends on the way the alternations
+and the greedy or ungreedy repetition quantifiers are specified in the
+pattern.
 .P
 Because it ends up with a single path through the tree, it is relatively
 straightforward for this algorithm to keep track of the substrings that are
@ -91,10 +92,15 @@ no more unterminated paths. At this point, terminated paths represent the
 different matching possibilities (if there are none, the match has failed).
 Thus, if there is more than one possible match, this algorithm finds all of
 them, and in particular, it finds the longest. The matches are returned in
-decreasing order of length. There is an option to stop the algorithm after the
-first match (which is necessarily the shortest) is found.
+the output vector in decreasing order of length. There is an option to stop the
+algorithm after the first match (which is necessarily the shortest) is found.
 .P
-Note that all the matches that are found start at the same point in the
+Note that the size of vector needed to contain all the results depends on the
+number of simultaneous matches, not on the number of parentheses in the
+pattern. Using \fBpcre2_match_data_create_from_pattern()\fP to create the match
+data block is therefore not advisable when doing DFA matching.
+.P
+Note also that all the matches that are found start at the same point in the
 subject. If the pattern
 .sp
  cat(er(pillar)?)?
@ -165,19 +171,13 @@ supported by \fBpcre2_dfa_match()\fP.
 .SH "ADVANTAGES OF THE ALTERNATIVE ALGORITHM"
 .rs
 .sp
-Using the alternative matching algorithm provides the following advantages:
+The main advantage of the alternative algorithm is that all possible matches
+(at a single point in the subject) are automatically found, and in particular,
+the longest match is found. To find more than one match at the same point using
+the standard algorithm, you have to do kludgy things with callouts.
 .P
-1. All possible matches (at a single point in the subject) are automatically
-found, and in particular, the longest match is found. To find more than one
-match using the standard algorithm, you have to do kludgy things with
-callouts.
-.P
-2. Because the alternative algorithm scans the subject string just once, and
-never needs to backtrack (except for lookbehinds), it is possible to pass very
-long subject strings to the matching function in several pieces, checking for
-partial matching each time. Although it is also possible to do multi-segment
-matching using the standard algorithm, by retaining partially matched
-substrings, it is more complicated. The
+Partial matching is possible with this algorithm, though it has some
+limitations. The
 .\" HREF
 \fBpcre2partial\fP
 .\"
@ -199,6 +199,8 @@ invalid UTF string are not supported.
 .P
 3. Although atomic groups are supported, their use does not provide the
 performance advantage that it does for the standard algorithm.
+.P
+4. JIT optimization is not supported.
 .
 .
 .SH AUTHOR
@ -206,7 +208,7 @@ performance advantage that it does for the standard algorithm.
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@ -215,6 +217,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 23 May 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 28 August 2021
+Copyright (c) 1997-2021 University of Cambridge.
 .fi
--- a/doc/pcre2partial.3
+++ b/doc/pcre2partial.3
@ -261,7 +261,7 @@ these characters with '<' if the \fBallusedtext\fP modifier is set:
  Partial match: 123ab
                 <<<
 .sp
-However, the \fPallusedtext\fP modifier is not available for JIT matching,
+However, the \fBallusedtext\fP modifier is not available for JIT matching,
 because JIT matching does not record the first (or last) consulted characters.
 For this reason, this information is not available via the API. It is therefore
 not possible in general to obtain the exact number of characters that must be
--- a/doc/pcre2pattern.3
+++ b/doc/pcre2pattern.3
@ -1,4 +1,4 @@
-.TH PCRE2PATTERN 3 "29 July 2019" "PCRE2 10.34"
+.TH PCRE2PATTERN 3 "12 January 2022" "PCRE2 10.40"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "PCRE2 REGULAR EXPRESSION DETAILS"
@ -75,7 +75,8 @@ Another special sequence that may appear at the start of a pattern is (*UCP).
 This has the same effect as setting the PCRE2_UCP option: it causes sequences
 such as \ed and \ew to use Unicode properties to determine character types,
 instead of recognizing only characters with codes less than 256 via a lookup
-table.
+table. If also causes upper/lower casing operations to use Unicode properties
+for characters with code points greater than 127, even when UTF is not set.
 .P
 Some applications that allow their users to supply patterns may wish to
 restrict them for security reasons. If the PCRE2_NEVER_UCP option is passed to
@ -262,8 +263,11 @@ corresponding characters in the subject. As a trivial example, the pattern
  The quick brown fox
 .sp
 matches a portion of a subject string that is identical to itself. When
-caseless matching is specified (the PCRE2_CASELESS option), letters are matched
-independently of case.
+caseless matching is specified (the PCRE2_CASELESS option or (?i) within the
+pattern), letters are matched independently of case. Note that there are two
+ASCII characters, K and S, that, in addition to their lower case ASCII
+equivalents, are case-equivalent with Unicode U+212A (Kelvin sign) and U+017F
+(long S) respectively when either PCRE2_UTF or PCRE2_UCP is set.
 .P
 The power of regular expressions comes from the ability to include wild cards,
 character classes, alternatives, and repetitions in the pattern. These are
@ -297,6 +301,22 @@ a character class the only metacharacters are:
  [      POSIX character class (if followed by POSIX syntax)
  ]      terminates the character class
 .sp
+If a pattern is compiled with the PCRE2_EXTENDED option, most white space in
+the pattern, other than in a character class, and characters between a #
+outside a character class and the next newline, inclusive, are ignored. An
+escaping backslash can be used to include a white space or a # character as
+part of the pattern. If the PCRE2_EXTENDED_MORE option is set, the same
+applies, but in addition unescaped space and horizontal tab characters are
+ignored inside a character class. Note: only these two characters are ignored,
+not the full set of pattern white space characters that are ignored outside a
+character class. Option settings can be changed within a pattern; see the
+section entitled
+.\" HTML <a href="#internaloptions">
+.\" </a>
+"Internal Option Setting"
+.\"
+below.
+.P
 The following sections describe the use of each of the metacharacters.
 .
 .
@ -314,15 +334,9 @@ would otherwise be interpreted as a metacharacter, so it is always safe to
 precede a non-alphanumeric with backslash to specify that it stands for itself.
 In particular, if you want to match a backslash, you write \e\e.
 .P
-In a UTF mode, only ASCII digits and letters have any special meaning after a
-backslash. All other characters (in particular, those whose code points are
-greater than 127) are treated as literals.
-.P
-If a pattern is compiled with the PCRE2_EXTENDED option, most white space in
-the pattern (other than in a character class), and characters between a #
-outside a character class and the next newline, inclusive, are ignored. An
-escaping backslash can be used to include a white space or # character as part
-of the pattern.
+Only ASCII digits and letters have any special meaning after a backslash. All
+other characters (in particular, those whose code points are greater than 127)
+are treated as literals.
 .P
 If you want to treat all characters in a sequence as literals, you can do so by
 putting them between \eQ and \eE. This is different from Perl in that $ and @
@ -495,7 +509,6 @@ for themselves. For example, outside a character class:
 .\" JOIN
  \e377   might be a backreference, otherwise
            the value 255 (decimal)
-.\" JOIN
  \e81    is always a backreference
 .sp
 Note that octal values of 100 or greater that are specified using this syntax
@ -727,7 +740,7 @@ Unicode support is not needed for these characters to be recognized.
 .P
 It is possible to restrict \eR to match only CR, LF, or CRLF (instead of the
 complete set of Unicode line endings) by setting the option PCRE2_BSR_ANYCRLF
-at compile time. (BSR is an abbrevation for "backslash R".) This can be made
+at compile time. (BSR is an abbreviation for "backslash R".) This can be made
 the default when PCRE2 is built; if this is the case, the other behaviour can
 be requested via the PCRE2_BSR_UNICODE option. It is also possible to specify
 these settings by starting a pattern string with one of the following
@ -759,191 +772,64 @@ can be used in any mode, though in 8-bit and 16-bit non-UTF modes these
 sequences are of course limited to testing characters whose code points are
 less than U+0100 and U+10000, respectively. In 32-bit non-UTF mode, code points
 greater than 0x10ffff (the Unicode limit) may be encountered. These are all
-treated as being in the Unknown script and with an unassigned type. The extra
-escape sequences are:
+treated as being in the Unknown script and with an unassigned type.
+.P
+Matching characters by Unicode property is not fast, because PCRE2 has to do a
+multistage table lookup in order to find a character's property. That is why
+the traditional escape sequences such as \ed and \ew do not use Unicode
+properties in PCRE2 by default, though you can make them do so by setting the
+PCRE2_UCP option or by starting the pattern with (*UCP).
+.P
+The extra escape sequences that provide property support are:
 .sp
  \ep{\fIxx\fP}   a character with the \fIxx\fP property
  \eP{\fIxx\fP}   a character without the \fIxx\fP property
  \eX       a Unicode extended grapheme cluster
 .sp
-The property names represented by \fIxx\fP above are case-sensitive. There is
-support for Unicode script names, Unicode general category properties, "Any",
-which matches any character (including newline), and some special PCRE2
-properties (described in the
+The property names represented by \fIxx\fP above are not case-sensitive, and in
+accordance with Unicode's "loose matching" rules, spaces, hyphens, and
+underscores are ignored. There is support for Unicode script names, Unicode
+general category properties, "Any", which matches any character (including
+newline), Bidi_Class, a number of binary (yes/no) properties, and some special
+PCRE2 properties (described
 .\" HTML <a href="#extraprops">
 .\" </a>
-next section).
+below).
 .\"
-Other Perl properties such as "InMusicalSymbols" are not supported by PCRE2.
-Note that \eP{Any} does not match any characters, so always causes a match
-failure.
+Certain other Perl properties such as "InMusicalSymbols" are not supported by
+PCRE2. Note that \eP{Any} does not match any characters, so always causes a
+match failure.
+.
+.
+.
+.SS "Script properties for \ep and \eP"
+.rs
+.sp
+There are three different syntax forms for matching a script. Each Unicode
+character has a basic script and, optionally, a list of other scripts ("Script
+Extensions") with which it is commonly used. Using the Adlam script as an
+example, \ep{sc:Adlam} matches characters whose basic script is Adlam, whereas
+\ep{scx:Adlam} matches, in addition, characters that have Adlam in their
+extensions list. The full names "script" and "script extensions" for the
+property types are recognized, and a equals sign is an alternative to the
+colon. If a script name is given without a property type, for example,
+\ep{Adlam}, it is treated as \ep{scx:Adlam}. Perl changed to this
+interpretation at release 5.26 and PCRE2 changed at release 10.40.
 .P
-Sets of Unicode characters are defined as belonging to certain scripts. A
-character from one of these sets can be matched using a script name. For
-example:
-.sp
-  \ep{Greek}
-  \eP{Han}
-.sp
 Unassigned characters (and in non-UTF 32-bit mode, characters with code points
 greater than 0x10FFFF) are assigned the "Unknown" script. Others that are not
 part of an identified script are lumped together as "Common". The current list
-of scripts is:
-.P
-Adlam,
-Ahom,
-Anatolian_Hieroglyphs,
-Arabic,
-Armenian,
-Avestan,
-Balinese,
-Bamum,
-Bassa_Vah,
-Batak,
-Bengali,
-Bhaiksuki,
-Bopomofo,
-Brahmi,
-Braille,
-Buginese,
-Buhid,
-Canadian_Aboriginal,
-Carian,
-Caucasian_Albanian,
-Chakma,
-Cham,
-Cherokee,
-Common,
-Coptic,
-Cuneiform,
-Cypriot,
-Cyrillic,
-Deseret,
-Devanagari,
-Dogra,
-Duployan,
-Egyptian_Hieroglyphs,
-Elbasan,
-Elymaic,
-Ethiopic,
-Georgian,
-Glagolitic,
-Gothic,
-Grantha,
-Greek,
-Gujarati,
-Gunjala_Gondi,
-Gurmukhi,
-Han,
-Hangul,
-Hanifi_Rohingya,
-Hanunoo,
-Hatran,
-Hebrew,
-Hiragana,
-Imperial_Aramaic,
-Inherited,
-Inscriptional_Pahlavi,
-Inscriptional_Parthian,
-Javanese,
-Kaithi,
-Kannada,
-Katakana,
-Kayah_Li,
-Kharoshthi,
-Khmer,
-Khojki,
-Khudawadi,
-Lao,
-Latin,
-Lepcha,
-Limbu,
-Linear_A,
-Linear_B,
-Lisu,
-Lycian,
-Lydian,
-Mahajani,
-Makasar,
-Malayalam,
-Mandaic,
-Manichaean,
-Marchen,
-Masaram_Gondi,
-Medefaidrin,
-Meetei_Mayek,
-Mende_Kikakui,
-Meroitic_Cursive,
-Meroitic_Hieroglyphs,
-Miao,
-Modi,
-Mongolian,
-Mro,
-Multani,
-Myanmar,
-Nabataean,
-Nandinagari,
-New_Tai_Lue,
-Newa,
-Nko,
-Nushu,
-Nyakeng_Puachue_Hmong,
-Ogham,
-Ol_Chiki,
-Old_Hungarian,
-Old_Italic,
-Old_North_Arabian,
-Old_Permic,
-Old_Persian,
-Old_Sogdian,
-Old_South_Arabian,
-Old_Turkic,
-Oriya,
-Osage,
-Osmanya,
-Pahawh_Hmong,
-Palmyrene,
-Pau_Cin_Hau,
-Phags_Pa,
-Phoenician,
-Psalter_Pahlavi,
-Rejang,
-Runic,
-Samaritan,
-Saurashtra,
-Sharada,
-Shavian,
-Siddham,
-SignWriting,
-Sinhala,
-Sogdian,
-Sora_Sompeng,
-Soyombo,
-Sundanese,
-Syloti_Nagri,
-Syriac,
-Tagalog,
-Tagbanwa,
-Tai_Le,
-Tai_Tham,
-Tai_Viet,
-Takri,
-Tamil,
-Tangut,
-Telugu,
-Thaana,
-Thai,
-Tibetan,
-Tifinagh,
-Tirhuta,
-Ugaritic,
-Unknown,
-Vai,
-Wancho,
-Warang_Citi,
-Yi,
-Zanabazar_Square.
-.P
+of recognized script names and their 4-character abbreviations can be obtained
+by running this command:
+.sp
+  pcre2test -LS
+.sp
+.
+.
+.
+.SS "The general category property for \ep and \eP"
+.rs
+.sp
 Each character has exactly one Unicode general category property, specified by
 a two-letter abbreviation. For compatibility with Perl, negation can be
 specified by including a circumflex between the opening brace and the property
@ -1003,9 +889,9 @@ The following general category property codes are supported:
  Zp    Paragraph separator
  Zs    Space separator
 .sp
-The special property L& is also supported: it matches a character that has
-the Lu, Ll, or Lt property, in other words, a letter that is not classified as
-a modifier or "other".
+The special property LC, which has the synonym L&, is also supported: it
+matches a character that has the Lu, Ll, or Lt property, in other words, a
+letter that is not classified as a modifier or "other".
 .P
 The Cs (Surrogate) property applies only to characters whose code points are in
 the range U+D800 to U+DFFF. These characters are no different to any other
@ -1029,12 +915,53 @@ Unicode table.
 Specifying caseless matching does not affect these escape sequences. For
 example, \ep{Lu} always matches only upper case letters. This is different from
 the behaviour of current versions of Perl.
-.P
-Matching characters by Unicode property is not fast, because PCRE2 has to do a
-multistage table lookup in order to find a character's property. That is why
-the traditional escape sequences such as \ed and \ew do not use Unicode
-properties in PCRE2 by default, though you can make them do so by setting the
-PCRE2_UCP option or by starting the pattern with (*UCP).
+.
+.
+.SS "Binary (yes/no) properties for \ep and \eP"
+.rs
+.sp
+Unicode defines a number of binary properties, that is, properties whose only
+values are true or false. You can obtain a list of those that are recognized by
+\ep and \eP, along with their abbreviations, by running this command:
+.sp
+  pcre2test -LP
+.sp
+.
+.
+.SS "The Bidi_Class property for \ep and \eP"
+.rs
+.sp
+  \ep{Bidi_Class:<class>}   matches a character with the given class
+  \ep{BC:<class>}           matches a character with the given class
+.sp
+The recognized classes are:
+.sp
+  AL          Arabic letter
+  AN          Arabic number
+  B           paragraph separator
+  BN          boundary neutral
+  CS          common separator
+  EN          European number
+  ES          European separator
+  ET          European terminator
+  FSI         first strong isolate
+  L           left-to-right
+  LRE         left-to-right embedding
+  LRI         left-to-right isolate
+  LRO         left-to-right override
+  NSM         non-spacing mark
+  ON          other neutral
+  PDF         pop directional format
+  PDI         pop directional isolate
+  R           right-to-left
+  RLE         right-to-left embedding
+  RLI         right-to-left isolate
+  RLO         right-to-left override
+  S           segment separator
+  WS          which space
+.sp
+An equals sign may be used instead of a colon. The class names are
+case-insensitive; only the short names listed above are recognized.
 .
 .
 .SS Extended grapheme clusters
@ -1064,7 +991,7 @@ additional characters according to the following rules for ending a cluster:
 3. Do not break Hangul (a Korean script) syllable sequences. Hangul characters
 are of five types: L, V, T, LV, and LVT. An L character may be followed by an
 L, V, LV, or LVT character; an LV or V character may be followed by a V or T
-character; an LVT or T character may be follwed only by a T character.
+character; an LVT or T character may be followed only by a T character.
 .P
 4. Do not end before extending characters or spacing marks or the "zero-width
 joiner" character. Characters with the "mark" property always have the
@ -1150,8 +1077,11 @@ For example, when the pattern
 .sp
 matches "foobar", the first substring is still set to "foo".
 .P
-Perl documents that the use of \eK within assertions is "not well defined". In
-PCRE2, \eK is acted upon when it occurs inside positive assertions, but is
+From version 5.32.0 Perl forbids the use of \eK in lookaround assertions. From
+release 10.38 PCRE2 also forbids this by default. However, the
+PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK option can be used when calling
+\fBpcre2_compile()\fP to re-enable the previous behaviour. When this option is
+set, \eK is acted upon when it occurs inside positive assertions, but is
 ignored in negative assertions. Note that when a pattern such as (?=ab\eK)
 matches, the reported start of the match can be greater than the end of the
 match. Using \eK in a lookbehind assertion at the start of a pattern can also
@ -1310,14 +1240,19 @@ end of the subject in both modes, and if all branches of a pattern start with
 .sp
 Outside a character class, a dot in the pattern matches any one character in
 the subject string except (by default) a character that signifies the end of a
-line.
+line. One or more characters may be specified as line terminators (see
+.\" HTML <a href="#newlines">
+.\" </a>
+"Newline conventions"
+.\"
+above).
 .P
-When a line ending is defined as a single character, dot never matches that
-character; when the two-character sequence CRLF is used, dot does not match CR
-if it is immediately followed by LF, but otherwise it matches all characters
-(including isolated CRs and LFs). When any Unicode line endings are being
-recognized, dot does not match CR or LF or any of the other line ending
-characters.
+Dot never matches a single line-ending character. When the two-character
+sequence CRLF is the only line ending, dot does not match CR if it is
+immediately followed by LF, but otherwise it matches all characters (including
+isolated CRs and LFs). When ANYCRLF is selected for line endings, no occurences
+of CR of LF match dot. When all Unicode line endings are being recognized, dot
+does not match CR or LF or any of the other line ending characters.
 .P
 The behaviour of dot with regard to newlines can be changed. If the
 PCRE2_DOTALL option is set, a dot matches any one character, without exception.
@ -1431,7 +1366,10 @@ Characters in a class may be specified by their code points using \eo, \ex, or
 \eN{U+hh..} in the usual way. When caseless matching is set, any letters in a
 class represent both their upper case and lower case versions, so for example,
 a caseless [aeiou] matches "A" as well as "a", and a caseless [^aeiou] does not
-match "A", whereas a caseful version would.
+match "A", whereas a caseful version would. Note that there are two ASCII
+characters, K and S, that, in addition to their lower case ASCII equivalents,
+are case-equivalent with Unicode U+212A (Kelvin sign) and U+017F (long S)
+respectively when either PCRE2_UTF or PCRE2_UCP is set.
 .P
 Characters that might indicate line breaks are never treated in any special way
 when matching character classes, whatever line-ending sequence is in use, and
@ -1643,6 +1581,7 @@ that succeeds is used. If the alternatives are within a group
 alternative in the group.
 .
 .
+.\" HTML <a name="internaloptions"></a>
 .SH "INTERNAL OPTION SETTING"
 .rs
 .sp
@ -1901,12 +1840,21 @@ are permitted for groups with the same number, for example:
  (?|(?<AA>aa)|(?<AA>bb))
 .sp
 The duplicate name constraint can be disabled by setting the PCRE2_DUPNAMES
-option at compile time, or by the use of (?J) within the pattern. Duplicate
-names can be useful for patterns where only one instance of the named capture
-group can match. Suppose you want to match the name of a weekday, either as a
-3-letter abbreviation or as the full name, and in both cases you want to
-extract the abbreviation. This pattern (ignoring the line breaks) does the job:
+option at compile time, or by the use of (?J) within the pattern, as described
+in the section entitled
+.\" HTML <a href="#internaloptions">
+.\" </a>
+"Internal Option Setting"
+.\"
+above.
+.P
+Duplicate names can be useful for patterns where only one instance of the named
+capture group can match. Suppose you want to match the name of a weekday,
+either as a 3-letter abbreviation or as the full name, and in both cases you
+want to extract the abbreviation. This pattern (ignoring the line breaks) does
+the job:
 .sp
+  (?J)
  (?<DN>Mon|Fri|Sun)(?:day)?|
  (?<DN>Tue)(?:sday)?|
  (?<DN>Wed)(?:nesday)?|
@ -1926,7 +1874,7 @@ they appear in the overall pattern. The first one that is set is used for the
 reference. For example, this pattern matches both "foofoo" and "barbar" but not
 "foobar" or "barfoo":
 .sp
-  (?:(?<n>foo)|(?<n>bar))\ek<n>
+  (?J)(?:(?<n>foo)|(?<n>bar))\ek<n>
 .sp
 .P
 If you make a subroutine call to a non-unique named group, the one that
@ -1965,7 +1913,7 @@ items:
  an escape such as \ed or \epL that matches a single character
  a character class
  a backreference
-  a parenthesized group (including most assertions)
+  a parenthesized group (including lookaround assertions)
  a subroutine call (recursive or otherwise)
 .sp
 The general repetition quantifier specifies a minimum and maximum number of
@ -2147,10 +2095,10 @@ be easier to remember:
 .sp
  (*atomic:\ed+)foo
 .sp
-This kind of parenthesized group "locks up" the  part of the pattern it
-contains once it has matched, and a failure further into the pattern is
-prevented from backtracking into it. Backtracking past it to previous items,
-however, works as normal.
+This kind of parenthesized group "locks up" the part of the pattern it contains
+once it has matched, and a failure further into the pattern is prevented from
+backtracking into it. Backtracking past it to previous items, however, works as
+normal.
 .P
 An alternative description is that a group of this type matches exactly the
 string of characters that an identical standalone pattern would match, if
@ -2346,14 +2294,14 @@ the first iteration does not need to match the backreference. This can be done
 using alternation, as in the example above, or by a quantifier with a minimum
 of zero.
 .P
-Backreferences of this type cause the group that they reference to be treated
-as an
+For versions of PCRE2 less than 10.25, backreferences of this type used to
+cause the group that they reference to be treated as an
 .\" HTML <a href="#atomicgroup">
 .\" </a>
 atomic group.
 .\"
-Once the whole group has been matched, a subsequent matching failure cannot
-cause backtracking into the middle of the group.
+This restriction no longer applies, and backtracking into such groups can occur
+as normal.
 .
 .
 .\" HTML <a name="bigassertions"></a>
@ -2421,26 +2369,13 @@ the "no" branch of the condition. For other failing negative assertions,
 control passes to the previous backtracking point, thus discarding any captured
 strings within the assertion.
 .P
-For compatibility with Perl, most assertion groups may be repeated; though it
-makes no sense to assert the same thing several times, the side effect of
-capturing may occasionally be useful. However, an assertion that forms the
-condition for a conditional group may not be quantified. In practice, for
-other assertions, there only three cases:
-.sp
-(1) If the quantifier is {0}, the assertion is never obeyed during matching.
-However, it may contain internal capture groups that are called from elsewhere
-via the
-.\" HTML <a href="#groupsassubroutines">
-.\" </a>
-subroutine mechanism.
-.\"
-.sp
-(2) If quantifier is {0,n} where n is greater than zero, it is treated as if it
-were {0,1}. At run time, the rest of the pattern match is tried with and
-without the assertion, the order depending on the greediness of the quantifier.
-.sp
-(3) If the minimum repetition is greater than zero, the quantifier is ignored.
-The assertion is obeyed just once when encountered during matching.
+Most assertion groups may be repeated; though it makes no sense to assert the
+same thing several times, the side effect of capturing in positive assertions
+may occasionally be useful. However, an assertion that forms the condition for
+a conditional group may not be quantified. PCRE2 used to restrict the
+repetition of assertions, but from release 10.35 the only restriction is that
+an unlimited maximum repetition is changed to be one more than the minimum. For
+example, {3,} is treated as {3,4}.
 .
 .
 .SS "Alphabetic assertion names"
@ -2637,8 +2572,8 @@ backtracking into the assertion. However, there are some cases where non-atomic
 positive assertions can be useful. PCRE2 provides these using the following
 syntax:
 .sp
-  (*non_atomic_positive_lookahead:  or (*napla:
-  (*non_atomic_positive_lookbehind: or (*naplb:
+  (*non_atomic_positive_lookahead:  or (*napla: or (?*
+  (*non_atomic_positive_lookbehind: or (*naplb: or (?<*
 .sp
 Consider the problem of finding the right-most word in a string that also
 appears earlier in the string, that is, it must appear at least twice in total.
@ -2674,9 +2609,14 @@ pattern. If this is not the case, the rest of the pattern match fails exactly
 as before because nothing has changed, so using a non-atomic assertion just
 wastes resources.
 .P
+There is one exception to backtracking into a non-atomic assertion. If an
+(*ACCEPT) control verb is triggered, the assertion succeeds atomically. That
+is, a subsequent match failure cannot backtrack into the assertion.
+.P
 Non-atomic assertions are not supported by the alternative matching function
-\fBpcre2_dfa_match()\fP. They are also not supported by JIT (but may be in
-future). Note that assertions that appear as conditions for
+\fBpcre2_dfa_match()\fP. They are supported by JIT, but only if they do not
+contain any control verbs such as (*ACCEPT). (This may change in future). Note
+that assertions that appear as conditions for
 .\" HTML <a href="#conditions">
 .\" </a>
 conditional groups
@ -2904,7 +2844,7 @@ breaks):
  (?(DEFINE) (?<byte> 2[0-4]\ed | 25[0-5] | 1\ed\ed | [1-9]?\ed) )
  \eb (?&byte) (\e.(?&byte)){3} \eb
 .sp
-The first part of the pattern is a DEFINE group inside which a another group
+The first part of the pattern is a DEFINE group inside which another group
 named "byte" is defined. This matches an individual component of an IPv4
 address (a number less than 256). When matching takes place, this part of the
 pattern is skipped because DEFINE acts like a false condition. The rest of the
@ -3634,7 +3574,7 @@ successful match if there is a later mismatch. Consider:
 .sp
 If the subject is "aaaac...", after the first match attempt fails (starting at
 the first character in the string), the starting point skips on to start the
-next attempt at "c". Note that a possessive quantifer does not have the same
+next attempt at "c". Note that a possessive quantifier does not have the same
 effect as this example; although it would suppress backtracking during the
 first match attempt, the second attempt would start at the second character
 instead of skipping on to "c".
@ -3865,7 +3805,7 @@ there is a backtrack at the outer level.
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@ -3874,6 +3814,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 29 July 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 12 January 2022
+Copyright (c) 1997-2022 University of Cambridge.
 .fi
--- a/doc/pcre2perform.3
+++ b/doc/pcre2perform.3
@ -1,4 +1,4 @@
-.TH PCRE2PERFORM 3 "03 February 2019" "PCRE2 10.33"
+.TH PCRE2PERFORM 3 "27 July 2022" "PCRE2 10.41"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "PCRE2 PERFORMANCE"
@ -69,12 +69,28 @@ From release 10.30, the interpretive (non-JIT) version of \fBpcre2_match()\fP
 uses very little system stack at run time. In earlier releases recursive
 function calls could use a great deal of stack, and this could cause problems,
 but this usage has been eliminated. Backtracking positions are now explicitly
-remembered in memory frames controlled by the code. An initial 20KiB vector of
-frames is allocated on the system stack (enough for about 100 frames for small
-patterns), but if this is insufficient, heap memory is used. The amount of heap
-memory can be limited; if the limit is set to zero, only the initial stack
-vector is used. Rewriting patterns to be time-efficient, as described below,
-may also reduce the memory requirements.
+remembered in memory frames controlled by the code. 
+.P
+The size of each frame depends on the size of pointer variables and the number
+of capturing parenthesized groups in the pattern being matched. On a 64-bit
+system the frame size for a pattern with no captures is 128 bytes. For each
+capturing group the size increases by 16 bytes.
+.P
+Until release 10.41, an initial 20KiB frames vector was allocated on the system 
+stack, but this still caused some issues for multi-thread applications where
+each thread has a very small stack. From release 10.41 backtracking memory
+frames are always held in heap memory. An initial heap allocation is obtained
+the first time any match data block is passed to \fBpcre2_match()\fP. This is
+remembered with the match data block and re-used if that block is used for
+another match. It is freed when the match data block itself is freed.
+.P
+The size of the initial block is the larger of 20KiB or ten times the pattern's 
+frame size, unless the heap limit is less than this, in which case the heap 
+limit is used. If the initial block proves to be too small during matching, it
+is replaced by a larger block, subject to the heap limit. The heap limit is 
+checked only when a new block is to be allocated. Reducing the heap limit 
+between calls to \fBpcre2_match()\fP with the same match data block does not 
+affect the saved block.
 .P
 In contrast to \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP does use recursive
 function calls, but only for processing atomic groups, lookaround assertions,
@ -230,7 +246,7 @@ pattern to match. This is done by repeatedly matching with different limits.
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@ -239,6 +255,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 03 February 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 27 July 2022
+Copyright (c) 1997-2022 University of Cambridge.
 .fi
--- a/doc/pcre2posix.3
+++ b/doc/pcre2posix.3
@ -1,4 +1,4 @@
-.TH PCRE2POSIX 3 "30 January 2019" "PCRE2 10.33"
+.TH PCRE2POSIX 3 "26 April 2021" "PCRE2 10.37"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "SYNOPSIS"
@ -44,11 +44,14 @@ can be accessed by adding \fB-lpcre2-posix\fP to the command for linking an
 application. Because the POSIX functions call the native ones, it is also
 necessary to add \fB-lpcre2-8\fP.
 .P
-Although they are not defined as protypes in \fBpcre2posix.h\fP, the library
-does contain functions with the POSIX names \fBregcomp()\fP etc. These simply
-pass their arguments to the PCRE2 functions. These functions are provided for
-backwards compatibility with earlier versions of PCRE2, so that existing
-programs do not have to be recompiled.
+Although they were not defined as protypes in \fBpcre2posix.h\fP, releases
+10.33 to 10.36 of the library contained functions with the POSIX names
+\fBregcomp()\fP etc. These simply passed their arguments to the PCRE2
+functions. These functions were provided for backwards compatibility with
+earlier versions of PCRE2, which had only POSIX names. However, this has proved
+troublesome in situations where a program links with several libraries, some of
+which use PCRE2's POSIX interface while others use the real POSIX functions.
+For this reason, the POSIX names have been removed since release 10.37.
 .P
 Calling the header file \fBpcre2posix.h\fP avoids any conflict with other POSIX
 libraries. It can, of course, be renamed or aliased as \fBregex.h\fP, which is
@ -321,6 +324,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 30 January 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 26 April 2021
+Copyright (c) 1997-2021 University of Cambridge.
 .fi
--- a/doc/pcre2serialize.3
+++ b/doc/pcre2serialize.3
@ -6,11 +6,11 @@ PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
 .nf
 .B int32_t pcre2_serialize_decode(pcre2_code **\fIcodes\fP,
-.B "  int32_t \fInumber_of_codes\fP, const uint32_t *\fIbytes\fP,"
+.B "  int32_t \fInumber_of_codes\fP, const uint8_t *\fIbytes\fP,"
 .B "  pcre2_general_context *\fIgcontext\fP);"
 .sp
-.B int32_t pcre2_serialize_encode(pcre2_code **\fIcodes\fP,
-.B "  int32_t \fInumber_of_codes\fP, uint32_t **\fIserialized_bytes\fP,"
+.B int32_t pcre2_serialize_encode(const pcre2_code **\fIcodes\fP,
+.B "  int32_t \fInumber_of_codes\fP, uint8_t **\fIserialized_bytes\fP,"
 .B "  PCRE2_SIZE *\fIserialized_size\fP, pcre2_general_context *\fIgcontext\fP);"
 .sp
 .B void pcre2_serialize_free(uint8_t *\fIbytes\fP);
@ -81,7 +81,7 @@ of serialized patterns, or one of the following negative error codes:
 .sp
  PCRE2_ERROR_BADDATA      the number of patterns is zero or less
  PCRE2_ERROR_BADMAGIC     mismatch of id bytes in one of the patterns
-  PCRE2_ERROR_MEMORY       memory allocation failed
+  PCRE2_ERROR_NOMEMORY     memory allocation failed
  PCRE2_ERROR_MIXEDTABLES  the patterns do not all use the same tables
  PCRE2_ERROR_NULL         the 1st, 3rd, or 4th argument is NULL
 .sp
@ -141,7 +141,6 @@ mangagement functions for the decoded patterns. If this argument is NULL,
 \fBmalloc()\fP and \fBfree()\fP are used. After deserialization, the byte
 stream is no longer needed and can be discarded.
 .sp
-  int32_t number_of_codes;
  pcre2_code *list_of_codes[2];
  uint8_t *bytes = <serialized data>;
  int32_t number_of_codes =
--- a/doc/pcre2syntax.3
+++ b/doc/pcre2syntax.3
@ -1,4 +1,4 @@
-.TH PCRE2SYNTAX 3 "29 July 2019" "PCRE2 10.34"
+.TH PCRE2SYNTAX 3 "12 January 2022" "PCRE2 10.40"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY"
@ -102,6 +102,10 @@ happening, \es and \ew may also match characters with code points in the range
 128-255. If the PCRE2_UCP option is set, the behaviour of these escape
 sequences is changed to use Unicode properties and they match many more
 characters.
+.P
+Property descriptions in \ep and \eP are matched caselessly; hyphens,
+underscores, and white space are ignored, in accordance with Unicode's "loose
+matching" rules.
 .
 .
 .SH "GENERAL CATEGORY PROPERTIES FOR \ep and \eP"
@ -120,6 +124,7 @@ characters.
  Lo         Other letter
  Lt         Title case letter
  Lu         Upper case letter
+  Lc         Ll, Lu, or Lt
  L&         Ll, Lu, or Lt
 .sp
  M          Mark
@ -167,161 +172,59 @@ Perl and POSIX space are now the same. Perl added VT to its space character set
 at release 5.18.
 .
 .
-.SH "SCRIPT NAMES FOR \ep AND \eP"
+.SH "BINARY PROPERTIES FOR \ep AND \eP"
 .rs
 .sp
-Adlam,
-Ahom,
-Anatolian_Hieroglyphs,
-Arabic,
-Armenian,
-Avestan,
-Balinese,
-Bamum,
-Bassa_Vah,
-Batak,
-Bengali,
-Bhaiksuki,
-Bopomofo,
-Brahmi,
-Braille,
-Buginese,
-Buhid,
-Canadian_Aboriginal,
-Carian,
-Caucasian_Albanian,
-Chakma,
-Cham,
-Cherokee,
-Common,
-Coptic,
-Cuneiform,
-Cypriot,
-Cyrillic,
-Deseret,
-Devanagari,
-Dogra,
-Duployan,
-Egyptian_Hieroglyphs,
-Elbasan,
-Elymaic,
-Ethiopic,
-Georgian,
-Glagolitic,
-Gothic,
-Grantha,
-Greek,
-Gujarati,
-Gunjala_Gondi,
-Gurmukhi,
-Han,
-Hangul,
-Hanifi_Rohingya,
-Hanunoo,
-Hatran,
-Hebrew,
-Hiragana,
-Imperial_Aramaic,
-Inherited,
-Inscriptional_Pahlavi,
-Inscriptional_Parthian,
-Javanese,
-Kaithi,
-Kannada,
-Katakana,
-Kayah_Li,
-Kharoshthi,
-Khmer,
-Khojki,
-Khudawadi,
-Lao,
-Latin,
-Lepcha,
-Limbu,
-Linear_A,
-Linear_B,
-Lisu,
-Lycian,
-Lydian,
-Mahajani,
-Makasar,
-Malayalam,
-Mandaic,
-Manichaean,
-Marchen,
-Masaram_Gondi,
-Medefaidrin,
-Meetei_Mayek,
-Mende_Kikakui,
-Meroitic_Cursive,
-Meroitic_Hieroglyphs,
-Miao,
-Modi,
-Mongolian,
-Mro,
-Multani,
-Myanmar,
-Nabataean,
-Nandinagari,
-New_Tai_Lue,
-Newa,
-Nko,
-Nushu,
-Nyakeng_Puachue_Hmong,
-Ogham,
-Ol_Chiki,
-Old_Hungarian,
-Old_Italic,
-Old_North_Arabian,
-Old_Permic,
-Old_Persian,
-Old_Sogdian,
-Old_South_Arabian,
-Old_Turkic,
-Oriya,
-Osage,
-Osmanya,
-Pahawh_Hmong,
-Palmyrene,
-Pau_Cin_Hau,
-Phags_Pa,
-Phoenician,
-Psalter_Pahlavi,
-Rejang,
-Runic,
-Samaritan,
-Saurashtra,
-Sharada,
-Shavian,
-Siddham,
-SignWriting,
-Sinhala,
-Sogdian,
-Sora_Sompeng,
-Soyombo,
-Sundanese,
-Syloti_Nagri,
-Syriac,
-Tagalog,
-Tagbanwa,
-Tai_Le,
-Tai_Tham,
-Tai_Viet,
-Takri,
-Tamil,
-Tangut,
-Telugu,
-Thaana,
-Thai,
-Tibetan,
-Tifinagh,
-Tirhuta,
-Ugaritic,
-Vai,
-Wancho,
-Warang_Citi,
-Yi,
-Zanabazar_Square.
+Unicode defines a number of binary properties, that is, properties whose only
+values are true or false. You can obtain a list of those that are recognized by
+\ep and \eP, along with their abbreviations, by running this command:
+.sp
+  pcre2test -LP
+.
+.
+.
+.SH "SCRIPT MATCHING WITH \ep AND \eP"
+.rs
+.sp
+Many script names and their 4-letter abbreviations are recognized in
+\ep{sc:...} or \ep{scx:...} items, or on their own with \ep (and also \eP of
+course). You can obtain a list of these scripts by running this command:
+.sp
+  pcre2test -LS
+.
+.
+.
+.SH "THE BIDI_CLASS PROPERTY FOR \ep AND \eP"
+.rs
+.sp
+  \ep{Bidi_Class:<class>}   matches a character with the given class
+  \ep{BC:<class>}           matches a character with the given class
+.sp
+The recognized classes are:
+.sp
+  AL          Arabic letter
+  AN          Arabic number
+  B           paragraph separator
+  BN          boundary neutral
+  CS          common separator
+  EN          European number
+  ES          European separator
+  ET          European terminator
+  FSI         first strong isolate
+  L           left-to-right
+  LRE         left-to-right embedding
+  LRI         left-to-right isolate
+  LRO         left-to-right override
+  NSM         non-spacing mark
+  ON          other neutral
+  PDF         pop directional format
+  PDI         pop directional isolate
+  R           right-to-left
+  RLE         right-to-left embedding
+  RLI         right-to-left isolate
+  RLO         right-to-left override
+  S           segment separator
+  WS          which space
 .
 .
 .SH "CHARACTER CLASSES"
@ -397,6 +300,9 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
 .sp
  \eK          set reported start of match
 .sp
+From release 10.38 \eK is not permitted by default in lookaround assertions,
+for compatibility with Perl. However, if the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
+option is set, the previous behaviour is re-enabled. When this option is set,
 \eK is honoured in positive assertions, but ignored in negative ones.
 .
 .
@ -441,7 +347,7 @@ Changes of these options within a group are automatically cancelled at the end
 of the group.
 .sp
  (?i)            caseless
-  (?J)            allow duplicate names
+  (?J)            allow duplicate named groups
  (?m)            multiline
  (?n)            no auto capture
  (?s)            single line (dotall)
@ -531,11 +437,13 @@ Each top-level branch of a lookbehind must be of a fixed length.
 .sp
 These assertions are specific to PCRE2 and are not Perl-compatible.
 .sp
-  (*napla:...)
-  (*non_atomic_positive_lookahead:...)
+  (?*...)                                )
+  (*napla:...)                           ) synonyms
+  (*non_atomic_positive_lookahead:...)   )
 .sp
-  (*naplb:...)
-  (*non_atomic_positive_lookbehind:...)
+  (?<*...)                               )
+  (*naplb:...)                           ) synonyms
+  (*non_atomic_positive_lookbehind:...)  )
 .
 .
 .SH "SCRIPT RUNS"
@ -661,7 +569,7 @@ delimiter }. To encode the ending delimiter within the string, double it.
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@ -670,6 +578,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 29 July 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 12 January 2022
+Copyright (c) 1997-2022 University of Cambridge.
 .fi
--- a/doc/pcre2test.1
+++ b/doc/pcre2test.1
@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "30 July 2019" "PCRE 10.34"
+.TH PCRE2TEST 1 "27 July 2022" "PCRE 10.41"
 .SH NAME
 pcre2test - a program for testing Perl-compatible regular expressions.
 .SH SYNOPSIS
@ -27,12 +27,7 @@ each match attempt. Modifiers on external or internal command lines, the
 patterns, and the subject lines specify PCRE2 function options, control how the
 subject is processed, and what output is produced.
 .P
-As the original fairly simple PCRE library evolved, it acquired many different
-features, and as a result, the original \fBpcretest\fP program ended up with a
-lot of options in a messy, arcane syntax for testing all the features. The
-move to the new PCRE2 API provided an opportunity to re-implement the test
-program as \fBpcre2test\fP, with a cleaner modifier syntax. Nevertheless, there
-are still many obscure modifiers, some of which are specifically designed for
+There are many obscure modifiers, some of which are specifically designed for
 use in conjunction with the test script and data files that are distributed as
 part of PCRE2. All the modifiers are documented here, some without much
 justification, but many of them are unlikely to be of use except when testing
@ -52,7 +47,7 @@ format before being passed to the library functions. Results are converted back
 to 8-bit code units for output.
 .P
 In the rest of this document, the names of library functions and structures
-are given in generic form, for example, \fBpcre_compile()\fP. The actual
+are given in generic form, for example, \fBpcre2_compile()\fP. The actual
 names used in the libraries have a suffix _8, _16, or _32, as appropriate.
 .
 .
@ -61,10 +56,10 @@ names used in the libraries have a suffix _8, _16, or _32, as appropriate.
 .rs
 .sp
 Input to \fBpcre2test\fP is processed line by line, either by calling the C
-library's \fBfgets()\fP function, or via the \fBlibreadline\fP library. In some
-Windows environments character 26 (hex 1A) causes an immediate end of file, and
-no further data is read, so this character should be avoided unless you really
-want that action.
+library's \fBfgets()\fP function, or via the \fBlibreadline\fP or \fBlibedit\fP
+library. In some Windows environments character 26 (hex 1A) causes an immediate
+end of file, and no further data is read, so this character should be avoided
+unless you really want that action.
 .P
 The input is processed using using C's string functions, so must not
 contain binary zeros, even though in Unix-like environments, \fBfgets()\fP
@ -216,9 +211,19 @@ available, and the use of JIT for matching is verified.
 \fB-LM\fP
 List modifiers: write a list of available pattern and subject modifiers to the
 standard output, then exit with zero exit code. All other options are ignored.
-If both -C and -LM are present, whichever is first is recognized.
+If both -C and any -Lx options are present, whichever is first is recognized.
 .TP 10
-\fB-pattern\fB \fImodifier-list\fP
+\fB-LP\fP
+List properties: write a list of recognized Unicode properties to the standard
+output, then exit with zero exit code. All other options are ignored. If both
+-C and any -Lx options are present, whichever is first is recognized.
+.TP 10
+\fB-LS\fP
+List scripts: write a list of recogized Unicode script names to the standard
+output, then exit with zero exit code. All other options are ignored. If both
+-C and any -Lx options are present, whichever is first is recognized.
+.TP 10
+\fB-pattern\fP \fImodifier-list\fP
 Behave as if each pattern line contains the given modifiers.
 .TP 10
 \fB-q\fP
@ -273,7 +278,7 @@ test data, command lines that begin with # may appear. This file format, with
 some restrictions, can also be processed by the \fBperltest.sh\fP script that
 is distributed with PCRE2 as a means of checking that the behaviour of PCRE2
 and Perl is the same. For a specification of \fBperltest.sh\fP, see the
-comments near its beginning.
+comments near its beginning. See also the #perltest command below.
 .P
 When the input is a terminal, \fBpcre2test\fP prompts for each line of input,
 using "re>" to prompt for regular expression patterns, and "data>" to prompt
@ -326,6 +331,12 @@ described in the section entitled "Saving and restoring compiled patterns"
 .\" </a>
 below.
 .\"
+.sp
+  #loadtables <filename>
+.sp
+This command is used to load a set of binary character tables that can be
+accessed by the tables=3 qualifier. Such tables can be created by the
+\fBpcre2_dftables\fP program with the -b option.
 .sp
  #newline_default [<newline-list>]
 .sp
@ -363,14 +374,19 @@ patterns. Modifiers on a pattern can change these settings.
 .sp
  #perltest
 .sp
-The appearance of this line causes all subsequent modifier settings to be
-checked for compatibility with the \fBperltest.sh\fP script, which is used to
-confirm that Perl gives the same results as PCRE2. Also, apart from comment
-lines, #pattern commands, and #subject commands that set or unset "mark", no
-command lines are permitted, because they and many of the modifiers are
-specific to \fBpcre2test\fP, and should not be used in test files that are also
-processed by \fBperltest.sh\fP. The \fB#perltest\fP command helps detect tests
-that are accidentally put in the wrong file.
+This line is used in test files that can also be processed by \fBperltest.sh\fP
+to confirm that Perl gives the same results as PCRE2. Subsequent tests are
+checked for the use of \fBpcre2test\fP features that are incompatible with the
+\fBperltest.sh\fP script.
+.P
+Patterns must use '/' as their delimiter, and only certain modifiers are
+supported. Comment lines, #pattern commands, and #subject commands that set or
+unset "mark" are recognized and acted on. The #perltest, #forbid_utf, and
+#newline_default commands, which are needed in the relevant pcre2test files,
+are silently ignored. All other command lines are ignored, but give a warning
+message. The \fB#perltest\fP command helps detect tests that are accidentally
+put in the wrong file or use the wrong delimiter. For more details of the
+\fBperltest.sh\fP script see the comments it contains.
 .sp
  #pop [<modifiers>]
  #popcopy [<modifiers>]
@ -432,15 +448,17 @@ excluding pattern meta-characters):
 .sp
 This is interpreted as the pattern's delimiter. A regular expression may be
 continued over several input lines, in which case the newline characters are
-included within it. It is possible to include the delimiter within the pattern
-by escaping it with a backslash, for example
+included within it. It is possible to include the delimiter as a literal within
+the pattern by escaping it with a backslash, for example
 .sp
  /abc\e/def/
 .sp
 If you do this, the escape and the delimiter form part of the pattern, but
-since the delimiters are all non-alphanumeric, this does not affect its
-interpretation. If the terminating delimiter is immediately followed by a
-backslash, for example,
+since the delimiters are all non-alphanumeric, the inclusion of the backslash
+does not affect the pattern's interpretation. Note, however, that this trick
+does not work within \eQ...\eE literal bracketing because the backslash will
+itself be interpreted as a literal. If the terminating delimiter is immediately
+followed by a backslash, for example,
 .sp
  /abc/\e
 .sp
@ -459,11 +477,11 @@ A pattern can be followed by a modifier list (details below).
 .SH "SUBJECT LINE SYNTAX"
 .rs
 .sp
-Before each subject line is passed to \fBpcre2_match()\fP or
-\fBpcre2_dfa_match()\fP, leading and trailing white space is removed, and the
-line is scanned for backslash escapes, unless the \fBsubject_literal\fP
-modifier was set for the pattern. The following provide a means of encoding
-non-printing characters in a visible way:
+Before each subject line is passed to \fBpcre2_match()\fP,
+\fBpcre2_dfa_match()\fP, or \fBpcre2_jit_match()\fP, leading and trailing white
+space is removed, and the line is scanned for backslash escapes, unless the
+\fBsubject_literal\fP modifier was set for the pattern. The following provide a
+means of encoding non-printing characters in a visible way:
 .sp
  \ea         alarm (BEL, \ex07)
  \eb         backspace (\ex08)
@ -559,6 +577,7 @@ way \fBpcre2_compile()\fP behaves. See
 for a description of the effects of these options.
 .sp
      allow_empty_class         set PCRE2_ALLOW_EMPTY_CLASS
+      allow_lookaround_bsk      set PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
      allow_surrogate_escapes   set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
      alt_bsux                  set PCRE2_ALT_BSUX
      alt_circumflex            set PCRE2_ALT_CIRCUMFLEX
@ -638,7 +657,7 @@ heavily used in the test files.
      pushcopy                  push a copy onto the stack
      stackguard=<number>       test the stackguard feature
      subject_literal           treat all subject lines as literal
-      tables=[0|1|2]            select internal tables
+      tables=[0|1|2|3]          select internal tables
      use_length                do not zero-terminate the pattern
      utf8_input                treat input as UTF-8
 .sp
@ -988,18 +1007,20 @@ be aborted.
 .rs
 .sp
 The value specified for the \fBtables\fP modifier must be one of the digits 0,
-1, or 2. It causes a specific set of built-in character tables to be passed to
-\fBpcre2_compile()\fP. This is used in the PCRE2 tests to check behaviour with
-different character tables. The digit specifies the tables as follows:
+1, 2, or 3. It causes a specific set of built-in character tables to be passed
+to \fBpcre2_compile()\fP. This is used in the PCRE2 tests to check behaviour
+with different character tables. The digit specifies the tables as follows:
 .sp
  0   do not pass any special character tables
  1   the default ASCII tables, as distributed in
        pcre2_chartables.c.dist
  2   a set of tables defining ISO 8859 characters
+  3   a set of tables loaded by the #loadtables command
 .sp
-In table 2, some characters whose codes are greater than 128 are identified as
-letters, digits, spaces, etc. Setting alternate character tables and a locale
-are mutually exclusive.
+In tables 2, some characters whose codes are greater than 128 are identified as
+letters, digits, spaces, etc. Tables 3 can be used only after a
+\fB#loadtables\fP command has loaded them from a binary file. Setting alternate
+character tables and a locale are mutually exclusive.
 .
 .
 .SS "Setting certain match controls"
@ -1011,24 +1032,27 @@ modifier list, in which case they are applied to every subject line that is
 processed with that pattern. These modifiers do not affect the compilation
 process.
 .sp
-      aftertext                  show text after match
-      allaftertext               show text after captures
-      allcaptures                show all captures
-      allvector                  show the entire ovector
-      allusedtext                show all consulted text
-      altglobal                  alternative global matching
-  /g  global                     global matching
-      jitstack=<n>               set size of JIT stack
-      mark                       show mark values
-      replace=<string>           specify a replacement string
-      startchar                  show starting character when relevant
-      substitute_callout         use substitution callouts
-      substitute_extended        use PCRE2_SUBSTITUTE_EXTENDED
-      substitute_skip=<n>        skip substitution number n
-      substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
-      substitute_stop=<n>        skip substitution number n and greater
-      substitute_unknown_unset   use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
-      substitute_unset_empty     use PCRE2_SUBSTITUTE_UNSET_EMPTY
+      aftertext                   show text after match
+      allaftertext                show text after captures
+      allcaptures                 show all captures
+      allvector                   show the entire ovector
+      allusedtext                 show all consulted text
+      altglobal                   alternative global matching
+  /g  global                      global matching
+      jitstack=<n>                set size of JIT stack
+      mark                        show mark values
+      replace=<string>            specify a replacement string
+      startchar                   show starting character when relevant
+      substitute_callout          use substitution callouts
+      substitute_extended         use PCRE2_SUBSTITUTE_EXTENDED
+      substitute_literal          use PCRE2_SUBSTITUTE_LITERAL
+      substitute_matched          use PCRE2_SUBSTITUTE_MATCHED
+      substitute_overflow_length  use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+      substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+      substitute_skip=<n>         skip substitution <n>
+      substitute_stop=<n>         skip substitution <n> and following
+      substitute_unknown_unset    use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
+      substitute_unset_empty      use PCRE2_SUBSTITUTE_UNSET_EMPTY
 .sp
 These modifiers may not appear in a \fB#pattern\fP command. If you want them as
 defaults, set them in a \fB#subject\fP command.
@ -1164,7 +1188,7 @@ its input), you must use \fBposix_startend\fP to specify its length.
 The following modifiers affect the matching process or request additional
 information. Some of them may also be specified on a pattern line (see above),
 in which case they apply to every subject line that is matched against that
-pattern.
+pattern, but can be overridden by modifiers on the subject.
 .sp
      aftertext                  show text after match
      allaftertext               show text after captures
@ -1182,7 +1206,8 @@ pattern.
      copy=<number or name>      copy captured substring
      depth_limit=<n>            set a depth limit
      dfa                        use \fBpcre2_dfa_match()\fP
-      find_limits                find match and depth limits
+      find_limits                find heap, match and depth limits
+      find_limits_noheap         find match and depth limits
      get=<number or name>       extract captured substring
      getall                     extract all captured substrings
  /g  global                     global matching
@ -1192,6 +1217,8 @@ pattern.
      match_limit=<n>            set a match limit
      memory                     show heap memory usage
      null_context               match with a NULL context
+      null_replacement           substitute with NULL replacement
+      null_subject               match with NULL subject
      offset=<n>                 set starting offset
      offset_limit=<n>           set offset limit
      ovector=<n>                set size of output vector
@ -1201,8 +1228,11 @@ pattern.
      startoffset=<n>            same as offset=<n>
      substitute_callout         use substitution callouts
      substitute_extedded        use PCRE2_SUBSTITUTE_EXTENDED
-      substitute_skip=<n>        skip substitution number n
+      substitute_literal         use PCRE2_SUBSTITUTE_LITERAL
+      substitute_matched         use PCRE2_SUBSTITUTE_MATCHED
      substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+      substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+      substitute_skip=<n>        skip substitution number n
      substitute_stop=<n>        skip substitution number n and greater
      substitute_unknown_unset   use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
      substitute_unset_empty     use PCRE2_SUBSTITUTE_UNSET_EMPTY
@ -1365,9 +1395,14 @@ by name.
 .rs
 .sp
 If the \fBreplace\fP modifier is set, the \fBpcre2_substitute()\fP function is
-called instead of one of the matching functions. Note that replacement strings
-cannot contain commas, because a comma signifies the end of a modifier. This is
-not thought to be an issue in a test program.
+called instead of one of the matching functions (or after one call of
+\fBpcre2_match()\fP in the case of PCRE2_SUBSTITUTE_MATCHED). Note that
+replacement strings cannot contain commas, because a comma signifies the end of
+a modifier. This is not thought to be an issue in a test program.
+.P
+Specifying a completely empty replacement string disables this modifier.
+However, it is possible to specify an empty replacement by providing a buffer
+length, as described below, for an otherwise empty replacement.
 .P
 Unlike subject strings, \fBpcre2test\fP does not process replacement strings
 for escape sequences. In UTF mode, a replacement string is checked to see if it
@ -1381,10 +1416,18 @@ for \fBpcre2_substitute()\fP:
 .sp
  global                      PCRE2_SUBSTITUTE_GLOBAL
  substitute_extended         PCRE2_SUBSTITUTE_EXTENDED
+  substitute_literal          PCRE2_SUBSTITUTE_LITERAL
+  substitute_matched          PCRE2_SUBSTITUTE_MATCHED
  substitute_overflow_length  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+  substitute_replacement_only PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
  substitute_unknown_unset    PCRE2_SUBSTITUTE_UNKNOWN_UNSET
  substitute_unset_empty      PCRE2_SUBSTITUTE_UNSET_EMPTY
 .sp
+See the
+.\" HREF
+\fBpcre2api\fP
+.\"
+documentation for details of these options.
 .P
 After a successful substitution, the modified string is output, preceded by the
 number of replacements. This may be zero if there were no matches. Here is a
@ -1486,7 +1529,7 @@ value that was set on the pattern.
 .sp
 The \fBheap_limit\fP, \fBmatch_limit\fP, and \fBdepth_limit\fP modifiers set
 the appropriate limits in the match context. These values are ignored when the
-\fBfind_limits\fP modifier is specified.
+\fBfind_limits\fP or \fBfind_limits_noheap\fP modifier is specified.
 .
 .
 .SS "Finding minimum limits"
@ -1496,8 +1539,12 @@ If the \fBfind_limits\fP modifier is present on a subject line, \fBpcre2test\fP
 calls the relevant matching function several times, setting different values in
 the match context via \fBpcre2_set_heap_limit()\fP,
 \fBpcre2_set_match_limit()\fP, or \fBpcre2_set_depth_limit()\fP until it finds
-the minimum values for each parameter that allows the match to complete without
-error. If JIT is being used, only the match limit is relevant.
+the smallest value for each parameter that allows the match to complete without
+a "limit exceeded" error. The match itself may succeed or fail. An alternative
+modifier, \fBfind_limits_noheap\fP, omits the heap limit. This is used in the
+standard tests, because the minimum heap limit varies between systems. If JIT
+is being used, only the match limit is relevant, and the other two are
+automatically omitted.
 .P
 When using this modifier, the pattern should not contain any limit settings
 such as (*LIMIT_MATCH=...) within it. If such a setting is present and is
@ -1521,9 +1568,7 @@ and non-recursive, to the internal matching function, thus controlling the
 overall amount of computing resource that is used.
 .P
 For both kinds of matching, the \fIheap_limit\fP number, which is in kibibytes
-(units of 1024 bytes), limits the amount of heap memory used for matching. A
-value of zero disables the use of any heap memory; many simple pattern matches
-can be done without using the heap, so zero is not an unreasonable setting.
+(units of 1024 bytes), limits the amount of heap memory used for matching.
 .
 .
 .SS "Showing MARK names"
@ -1542,12 +1587,10 @@ is added to the non-match message.
 .sp
 The \fBmemory\fP modifier causes \fBpcre2test\fP to log the sizes of all heap
 memory allocation and freeing calls that occur during a call to
-\fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP. These occur only when a match
-requires a bigger vector than the default for remembering backtracking points
-(\fBpcre2_match()\fP) or for internal workspace (\fBpcre2_dfa_match()\fP). In
-many cases there will be no heap memory used and therefore no additional
-output. No heap memory is allocated during matching with JIT, so in that case
-the \fBmemory\fP modifier never has any effect. For this modifier to work, the
+\fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP. In the latter case, heap memory
+is used only when a match requires more internal workspace that the default
+allocation on the stack, so in many cases there will be no output. No heap
+memory is allocated during matching with JIT. For this modifier to work, the
 \fBnull_context\fP modifier must not be set on both the pattern and the
 subject, though it can be set on one or the other.
 .
@ -1599,7 +1642,7 @@ When testing \fBpcre2_substitute()\fP, this modifier also has the effect of
 passing the replacement string as zero-terminated.
 .
 .
-.SS "Passing a NULL context"
+.SS "Passing a NULL context, subject, or replacement"
 .rs
 .sp
 Normally, \fBpcre2test\fP passes a context block to \fBpcre2_match()\fP,
@ -1607,7 +1650,12 @@ Normally, \fBpcre2test\fP passes a context block to \fBpcre2_match()\fP,
 If the \fBnull_context\fP modifier is set, however, NULL is passed. This is for
 testing that the matching and substitution functions behave correctly in this
 case (they use default values). This modifier cannot be used with the
-\fBfind_limits\fP or \fBsubstitute_callout\fP modifiers.
+\fBfind_limits\fP, \fBfind_limits_noheap\fP, or \fBsubstitute_callout\fP
+modifiers.
+.P
+Similarly, for testing purposes, if the \fBnull_subject\fP or
+\fBnull_replacement\fP modifier is set, the subject or replacement string
+pointers are passed as NULL, respectively, to the relevant functions.
 .
 .
 .SH "THE ALTERNATIVE MATCHING FUNCTION"
@ -2064,7 +2112,7 @@ on the stack.
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@ -2073,6 +2121,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 30 July 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 27 July 2022
+Copyright (c) 1997-2022 University of Cambridge.
 .fi
--- a/doc/pcre2test.txt
+++ b/doc/pcre2test.txt
@ -24,17 +24,11 @@ SYNOPSIS
       tion  options, control how the subject is processed, and what output is
       produced.

-       As the original fairly simple PCRE library evolved,  it  acquired  many
-       different  features,  and  as  a  result, the original pcretest program
-       ended up with a lot of options in a messy, arcane  syntax  for  testing
-       all the features. The move to the new PCRE2 API provided an opportunity
-       to re-implement the test program as pcre2test, with a cleaner  modifier
-       syntax.  Nevertheless,  there are still many obscure modifiers, some of
-       which are specifically designed for use in conjunction  with  the  test
-       script  and  data  files that are distributed as part of PCRE2. All the
-       modifiers are documented here, some  without  much  justification,  but
-       many  of  them  are  unlikely  to be of use except when testing the li-
-       braries.
+       There are many obscure modifiers, some of which  are  specifically  de-
+       signed  for use in conjunction with the test script and data files that
+       are distributed as part of PCRE2.  All  the  modifiers  are  documented
+       here, some without much justification, but many of them are unlikely to
+       be of use except when testing the libraries.


 PCRE2's 8-BIT, 16-BIT AND 32-BIT LIBRARIES
@ -50,7 +44,7 @@ PCRE2's 8-BIT, 16-BIT AND 32-BIT LIBRARIES
       output.

       In the rest of this document, the names of library functions and struc-
-       tures  are  given in generic form, for example, pcre_compile(). The ac-
+       tures  are given in generic form, for example, pcre2_compile(). The ac-
       tual names used in the libraries have a suffix _8, _16, or _32, as  ap-
       propriate.

@ -58,10 +52,10 @@ PCRE2's 8-BIT, 16-BIT AND 32-BIT LIBRARIES
 INPUT ENCODING

       Input  to  pcre2test is processed line by line, either by calling the C
-       library's fgets() function, or via the  libreadline  library.  In  some
-       Windows  environments  character 26 (hex 1A) causes an immediate end of
-       file, and no further data is read, so this character should be  avoided
-       unless you really want that action.
+       library's fgets() function, or via the libreadline or libedit  library.
+       In  some Windows environments character 26 (hex 1A) causes an immediate
+       end of file, and no further data is read, so this character  should  be
+       avoided unless you really want that action.

       The  input  is  processed using using C's string functions, so must not
       contain binary zeros, even though in  Unix-like  environments,  fgets()
@ -203,7 +197,17 @@ COMMAND LINE OPTIONS

       -LM       List modifiers: write a list of available pattern and subject
                 modifiers to the standard output, then exit  with  zero  exit
-                 code.  All other options are ignored.  If both -C and -LM are
+                 code.  All other options are ignored.  If both -C and any -Lx
+                 options are present, whichever is first is recognized.
+
+       -LP       List properties: write a list of recognized  Unicode  proper-
+                 ties  to  the standard output, then exit with zero exit code.
+                 All other options are ignored. If both -C and any -Lx options
+                 are present, whichever is first is recognized.
+
+       -LS       List  scripts: write a list of recogized Unicode script names
+                 to the standard output, then exit with zero  exit  code.  All
+                 other options are ignored. If both -C and any -Lx options are
                 present, whichever is first is recognized.

       -pattern modifier-list
@ -257,121 +261,134 @@ DESCRIPTION
       appear. This file format, with some restrictions, can also be processed
       by the perltest.sh script that is distributed with PCRE2 as a means  of
       checking that the behaviour of PCRE2 and Perl is the same. For a speci-
-       fication of perltest.sh, see the comments near its beginning.
+       fication of perltest.sh, see the comments near its beginning. See  also
+       the #perltest command below.

       When the input is a terminal, pcre2test prompts for each line of input,
-       using  "re>"  to prompt for regular expression patterns, and "data>" to
-       prompt for subject lines. Command lines starting with # can be  entered
+       using "re>" to prompt for regular expression patterns, and  "data>"  to
+       prompt  for subject lines. Command lines starting with # can be entered
       only in response to the "re>" prompt.

-       Each  subject line is matched separately and independently. If you want
+       Each subject line is matched separately and independently. If you  want
       to do multi-line matches, you have to use the \n escape sequence (or \r
-       or  \r\n,  etc.,  depending on the newline setting) in a single line of
-       input to encode the newline sequences. There is no limit on the  length
-       of  subject  lines; the input buffer is automatically extended if it is
-       too small. There are replication features that  makes  it  possible  to
-       generate  long  repetitive  pattern  or subject lines without having to
+       or \r\n, etc., depending on the newline setting) in a  single  line  of
+       input  to encode the newline sequences. There is no limit on the length
+       of subject lines; the input buffer is automatically extended if  it  is
+       too  small.  There  are  replication features that makes it possible to
+       generate long repetitive pattern or subject  lines  without  having  to
       supply them explicitly.

-       An empty line or the end of the file signals the  end  of  the  subject
-       lines  for  a test, at which point a new pattern or command line is ex-
+       An  empty  line  or  the end of the file signals the end of the subject
+       lines for a test, at which point a new pattern or command line  is  ex-
       pected if there is still input to be read.


 COMMAND LINES

-       In between sets of test data, a line that begins with # is  interpreted
+       In  between sets of test data, a line that begins with # is interpreted
       as a command line. If the first character is followed by white space or
-       an exclamation mark, the line is treated as  a  comment,  and  ignored.
+       an  exclamation  mark,  the  line is treated as a comment, and ignored.
       Otherwise, the following commands are recognized:

         #forbid_utf

-       Subsequent   patterns   automatically   have  the  PCRE2_NEVER_UTF  and
-       PCRE2_NEVER_UCP options set, which locks out the use of  the  PCRE2_UTF
-       and  PCRE2_UCP options and the use of (*UTF) and (*UCP) at the start of
-       patterns. This command also forces an error  if  a  subsequent  pattern
-       contains  any  occurrences  of \P, \p, or \X, which are still supported
-       when PCRE2_UTF is not set, but which require Unicode  property  support
+       Subsequent  patterns  automatically  have   the   PCRE2_NEVER_UTF   and
+       PCRE2_NEVER_UCP  options  set, which locks out the use of the PCRE2_UTF
+       and PCRE2_UCP options and the use of (*UTF) and (*UCP) at the start  of
+       patterns.  This  command  also  forces an error if a subsequent pattern
+       contains any occurrences of \P, \p, or \X, which  are  still  supported
+       when  PCRE2_UTF  is not set, but which require Unicode property support
       to be included in the library.

-       This  is  a trigger guard that is used in test files to ensure that UTF
-       or Unicode property tests are not accidentally added to files that  are
-       used  when  Unicode  support  is  not  included in the library. Setting
-       PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as a default can also  be  obtained
-       by  the  use  of #pattern; the difference is that #forbid_utf cannot be
-       unset, and the automatic options are not displayed in pattern  informa-
+       This is a trigger guard that is used in test files to ensure  that  UTF
+       or  Unicode property tests are not accidentally added to files that are
+       used when Unicode support is  not  included  in  the  library.  Setting
+       PCRE2_NEVER_UTF  and  PCRE2_NEVER_UCP as a default can also be obtained
+       by the use of #pattern; the difference is that  #forbid_utf  cannot  be
+       unset,  and the automatic options are not displayed in pattern informa-
       tion, to avoid cluttering up test output.

         #load <filename>

       This command is used to load a set of precompiled patterns from a file,
-       as described in the section entitled  "Saving  and  restoring  compiled
+       as  described  in  the  section entitled "Saving and restoring compiled
       patterns" below.

+         #loadtables <filename>
+
+       This command is used to load a set of binary character tables that  can
+       be  accessed  by  the tables=3 qualifier. Such tables can be created by
+       the pcre2_dftables program with the -b option.
+
         #newline_default [<newline-list>]

-       When  PCRE2  is  built,  a default newline convention can be specified.
-       This determines which characters and/or character pairs are  recognized
+       When PCRE2 is built, a default newline  convention  can  be  specified.
+       This  determines which characters and/or character pairs are recognized
       as indicating a newline in a pattern or subject string. The default can
-       be overridden when a pattern is compiled. The standard test files  con-
-       tain  tests  of  various  newline  conventions, but the majority of the
-       tests expect a single linefeed to be recognized as  a  newline  by  de-
-       fault.  Without  special action the tests would fail when PCRE2 is com-
+       be  overridden when a pattern is compiled. The standard test files con-
+       tain tests of various newline conventions,  but  the  majority  of  the
+       tests  expect  a  single  linefeed to be recognized as a newline by de-
+       fault. Without special action the tests would fail when PCRE2  is  com-
       piled with either CR or CRLF as the default newline.

       The #newline_default command specifies a list of newline types that are
-       acceptable  as the default. The types must be one of CR, LF, CRLF, ANY-
+       acceptable as the default. The types must be one of CR, LF, CRLF,  ANY-
       CRLF, ANY, or NUL (in upper or lower case), for example:

         #newline_default LF Any anyCRLF

       If the default newline is in the list, this command has no effect. Oth-
-       erwise,  except  when  testing  the  POSIX API, a newline modifier that
+       erwise, except when testing the POSIX  API,  a  newline  modifier  that
       specifies the first newline convention in the list (LF in the above ex-
-       ample)  is  added  to  any pattern that does not already have a newline
+       ample) is added to any pattern that does not  already  have  a  newline
       modifier. If the newline list is empty, the feature is turned off. This
       command is present in a number of the standard test input files.

-       When  the POSIX API is being tested there is no way to override the de-
+       When the POSIX API is being tested there is no way to override the  de-
       fault newline convention, though it is possible to set the newline con-
-       vention  from  within  the  pattern. A warning is given if the posix or
-       posix_nosub modifier is used when #newline_default would set a  default
+       vention from within the pattern. A warning is given  if  the  posix  or
+       posix_nosub  modifier is used when #newline_default would set a default
       for the non-POSIX API.

         #pattern <modifier-list>

-       This  command  sets  a default modifier list that applies to all subse-
+       This command sets a default modifier list that applies  to  all  subse-
       quent patterns. Modifiers on a pattern can change these settings.

         #perltest

-       The appearance of this line causes all subsequent modifier settings  to
-       be checked for compatibility with the perltest.sh script, which is used
-       to confirm that Perl gives the same results as PCRE2. Also, apart  from
-       comment lines, #pattern commands, and #subject commands that set or un-
-       set "mark", no command lines are permitted, because they  and  many  of
-       the modifiers are specific to pcre2test, and should not be used in test
-       files that are also processed by  perltest.sh.  The  #perltest  command
-       helps detect tests that are accidentally put in the wrong file.
+       This  line  is  used  in test files that can also be processed by perl-
+       test.sh to confirm that Perl gives the same results  as  PCRE2.  Subse-
+       quent  tests are checked for the use of pcre2test features that are in-
+       compatible with the perltest.sh script.
+
+       Patterns must use '/' as their delimiter, and  only  certain  modifiers
+       are  supported. Comment lines, #pattern commands, and #subject commands
+       that set or unset "mark" are recognized and acted  on.  The  #perltest,
+       #forbid_utf,  and  #newline_default  commands,  which are needed in the
+       relevant pcre2test files, are silently ignored. All other command lines
+       are  ignored,  but  give a warning message. The #perltest command helps
+       detect tests that are accidentally put in the wrong  file  or  use  the
+       wrong  delimiter.  For  more  details of the perltest.sh script see the
+       comments it contains.

         #pop [<modifiers>]
         #popcopy [<modifiers>]

-       These  commands  are used to manipulate the stack of compiled patterns,
-       as described in the section entitled  "Saving  and  restoring  compiled
+       These commands are used to manipulate the stack of  compiled  patterns,
+       as  described  in  the  section entitled "Saving and restoring compiled
       patterns" below.

         #save <filename>

-       This  command  is used to save a set of compiled patterns to a file, as
-       described in the section entitled "Saving and restoring  compiled  pat-
+       This command is used to save a set of compiled patterns to a  file,  as
+       described  in  the section entitled "Saving and restoring compiled pat-
       terns" below.

         #subject <modifier-list>

-       This  command  sets  a default modifier list that applies to all subse-
-       quent subject lines. Modifiers on a subject line can change these  set-
+       This command sets a default modifier list that applies  to  all  subse-
+       quent  subject lines. Modifiers on a subject line can change these set-
       tings.


@ -379,47 +396,50 @@ MODIFIER SYNTAX

       Modifier lists are used with both pattern and subject lines. Items in a
       list are separated by commas followed by optional white space. Trailing
-       whitespace  in  a modifier list is ignored. Some modifiers may be given
-       for both patterns and subject lines, whereas others are valid only  for
-       one  or  the  other.  Each  modifier  has a long name, for example "an-
-       chored", and some of them must be followed by  an  equals  sign  and  a
-       value,  for  example,  "offset=12". Values cannot contain comma charac-
-       ters, but may contain spaces. Modifiers that do not take values may  be
+       whitespace in a modifier list is ignored. Some modifiers may  be  given
+       for  both patterns and subject lines, whereas others are valid only for
+       one or the other. Each modifier has  a  long  name,  for  example  "an-
+       chored",  and  some  of  them  must be followed by an equals sign and a
+       value, for example, "offset=12". Values cannot  contain  comma  charac-
+       ters,  but may contain spaces. Modifiers that do not take values may be
       preceded by a minus sign to turn off a previous setting.

       A few of the more common modifiers can also be specified as single let-
-       ters, for example "i" for "caseless". In documentation,  following  the
+       ters,  for  example "i" for "caseless". In documentation, following the
       Perl convention, these are written with a slash ("the /i modifier") for
-       clarity. Abbreviated modifiers must all be concatenated  in  the  first
-       item  of a modifier list. If the first item is not recognized as a long
-       modifier name, it is interpreted as a sequence of these  abbreviations.
+       clarity.  Abbreviated  modifiers  must all be concatenated in the first
+       item of a modifier list. If the first item is not recognized as a  long
+       modifier  name, it is interpreted as a sequence of these abbreviations.
       For example:

         /abc/ig,newline=cr,jit=3

-       This  is  a pattern line whose modifier list starts with two one-letter
-       modifiers (/i and /g). The lower-case  abbreviated  modifiers  are  the
+       This is a pattern line whose modifier list starts with  two  one-letter
+       modifiers  (/i  and  /g).  The lower-case abbreviated modifiers are the
       same as used in Perl.


 PATTERN SYNTAX

-       A  pattern line must start with one of the following characters (common
+       A pattern line must start with one of the following characters  (common
       symbols, excluding pattern meta-characters):

         / ! " ' ` - = _ : ; , % & @ ~

-       This is interpreted as the pattern's delimiter.  A  regular  expression
-       may  be  continued  over several input lines, in which case the newline
+       This  is  interpreted  as the pattern's delimiter. A regular expression
+       may be continued over several input lines, in which  case  the  newline
       characters are included within it. It is possible to include the delim-
-       iter within the pattern by escaping it with a backslash, for example
+       iter as a literal within the pattern by escaping it with  a  backslash,
+       for example

         /abc\/def/

       If  you do this, the escape and the delimiter form part of the pattern,
-       but since the delimiters are all non-alphanumeric, this does not affect
-       its  interpretation.  If  the terminating delimiter is immediately fol-
-       lowed by a backslash, for example,
+       but since the delimiters are all non-alphanumeric, the inclusion of the
+       backslash  does not affect the pattern's interpretation. Note, however,
+       that this trick does not work within \Q...\E literal bracketing because
+       the backslash will itself be interpreted as a literal. If the terminat-
+       ing delimiter is immediately followed by a backslash, for example,

         /abc/\

@ -438,11 +458,11 @@ PATTERN SYNTAX

 SUBJECT LINE SYNTAX

-       Before   each   subject   line   is   passed   to   pcre2_match()    or
-       pcre2_dfa_match(), leading and trailing white space is removed, and the
-       line is scanned for backslash escapes, unless the subject_literal modi-
-       fier was set for the pattern. The following provide a means of encoding
-       non-printing characters in a visible way:
+       Before each subject line is passed to pcre2_match(), pcre2_dfa_match(),
+       or  pcre2_jit_match(), leading and trailing white space is removed, and
+       the line is scanned for backslash escapes, unless  the  subject_literal
+       modifier  was set for the pattern. The following provide a means of en-
+       coding non-printing characters in a visible way:

         \a         alarm (BEL, \x07)
         \b         backspace (\x08)
@ -537,6 +557,7 @@ PATTERN MODIFIERS
       options.

             allow_empty_class         set PCRE2_ALLOW_EMPTY_CLASS
+             allow_lookaround_bsk      set PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
             allow_surrogate_escapes   set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
             alt_bsux                  set PCRE2_ALT_BSUX
             alt_circumflex            set PCRE2_ALT_CIRCUMFLEX
@ -613,7 +634,7 @@ PATTERN MODIFIERS
             pushcopy                  push a copy onto the stack
             stackguard=<number>       test the stackguard feature
             subject_literal           treat all subject lines as literal
-             tables=[0|1|2]            select internal tables
+             tables=[0|1|2|3]          select internal tables
             use_length                do not zero-terminate the pattern
             utf8_input                treat input as UTF-8

@ -914,80 +935,85 @@ PATTERN MODIFIERS
   Using alternative character tables

       The value specified for the tables modifier must be one of  the  digits
-       0, 1, or 2. It causes a specific set of built-in character tables to be
-       passed to pcre2_compile(). This is used in the PCRE2 tests to check be-
-       haviour with different character tables. The digit specifies the tables
-       as follows:
+       0, 1, 2, or 3. It causes a specific set of built-in character tables to
+       be passed to pcre2_compile(). This is used in the PCRE2 tests to  check
+       behaviour  with different character tables. The digit specifies the ta-
+       bles as follows:

         0   do not pass any special character tables
         1   the default ASCII tables, as distributed in
               pcre2_chartables.c.dist
         2   a set of tables defining ISO 8859 characters
+         3   a set of tables loaded by the #loadtables command

-       In table 2, some characters whose codes are greater than 128 are  iden-
-       tified as letters, digits, spaces, etc. Setting alternate character ta-
-       bles and a locale are mutually exclusive.
+       In tables 2, some characters whose codes are greater than 128 are iden-
+       tified as letters, digits, spaces, etc. Tables 3 can be used only after
+       a #loadtables command has loaded them from a binary file.  Setting  al-
+       ternate character tables and a locale are mutually exclusive.

   Setting certain match controls

       The following modifiers are really subject modifiers, and are described
-       under  "Subject  Modifiers"  below.  However, they may be included in a
-       pattern's modifier list, in which case they are applied to  every  sub-
-       ject  line  that is processed with that pattern. These modifiers do not
+       under "Subject Modifiers" below. However, they may  be  included  in  a
+       pattern's  modifier  list, in which case they are applied to every sub-
+       ject line that is processed with that pattern. These modifiers  do  not
       affect the compilation process.

-             aftertext                  show text after match
-             allaftertext               show text after captures
-             allcaptures                show all captures
-             allvector                  show the entire ovector
-             allusedtext                show all consulted text
-             altglobal                  alternative global matching
-         /g  global                     global matching
-             jitstack=<n>               set size of JIT stack
-             mark                       show mark values
-             replace=<string>           specify a replacement string
-             startchar                  show starting character when relevant
-             substitute_callout         use substitution callouts
-             substitute_extended        use PCRE2_SUBSTITUTE_EXTENDED
-             substitute_skip=<n>        skip substitution number n
-             substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
-             substitute_stop=<n>        skip substitution number n and greater
-             substitute_unknown_unset   use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
-             substitute_unset_empty     use PCRE2_SUBSTITUTE_UNSET_EMPTY
+             aftertext                   show text after match
+             allaftertext                show text after captures
+             allcaptures                 show all captures
+             allvector                   show the entire ovector
+             allusedtext                 show all consulted text
+             altglobal                   alternative global matching
+         /g  global                      global matching
+             jitstack=<n>                set size of JIT stack
+             mark                        show mark values
+             replace=<string>            specify a replacement string
+             startchar                   show starting character when relevant
+             substitute_callout          use substitution callouts
+             substitute_extended         use PCRE2_SUBSTITUTE_EXTENDED
+             substitute_literal          use PCRE2_SUBSTITUTE_LITERAL
+             substitute_matched          use PCRE2_SUBSTITUTE_MATCHED
+             substitute_overflow_length  use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+             substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+             substitute_skip=<n>         skip substitution <n>
+             substitute_stop=<n>         skip substitution <n> and following
+             substitute_unknown_unset    use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
+             substitute_unset_empty      use PCRE2_SUBSTITUTE_UNSET_EMPTY

-       These modifiers may not appear in a #pattern command. If you want  them
+       These  modifiers may not appear in a #pattern command. If you want them
       as defaults, set them in a #subject command.

   Specifying literal subject lines

-       If  the  subject_literal modifier is present on a pattern, all the sub-
+       If the subject_literal modifier is present on a pattern, all  the  sub-
       ject lines that it matches are taken as literal strings, with no inter-
-       pretation  of  backslashes. It is not possible to set subject modifiers
-       on such lines, but any that are set as defaults by a  #subject  command
+       pretation of backslashes. It is not possible to set  subject  modifiers
+       on  such  lines, but any that are set as defaults by a #subject command
       are recognized.

   Saving a compiled pattern

-       When  a  pattern with the push modifier is successfully compiled, it is
-       pushed onto a stack of compiled patterns,  and  pcre2test  expects  the
-       next  line to contain a new pattern (or a command) instead of a subject
+       When a pattern with the push modifier is successfully compiled,  it  is
+       pushed  onto  a  stack  of compiled patterns, and pcre2test expects the
+       next line to contain a new pattern (or a command) instead of a  subject
       line. This facility is used when saving compiled patterns to a file, as
-       described  in  the section entitled "Saving and restoring compiled pat-
-       terns" below.  If pushcopy is used instead of push, a copy of the  com-
-       piled  pattern  is  stacked,  leaving the original as current, ready to
-       match the following input lines. This provides a  way  of  testing  the
-       pcre2_code_copy()  function.   The push and pushcopy  modifiers are in-
-       compatible with compilation modifiers such as global that act at  match
+       described in the section entitled "Saving and restoring  compiled  pat-
+       terns"  below.  If pushcopy is used instead of push, a copy of the com-
+       piled pattern is stacked, leaving the original  as  current,  ready  to
+       match  the  following  input  lines. This provides a way of testing the
+       pcre2_code_copy() function.  The push and pushcopy  modifiers  are  in-
+       compatible  with compilation modifiers such as global that act at match
       time. Any that are specified are ignored (for the stacked copy), with a
-       warning message, except for replace, which causes an error.  Note  that
-       jitverify,  which  is allowed, does not carry through to any subsequent
+       warning  message,  except for replace, which causes an error. Note that
+       jitverify, which is allowed, does not carry through to  any  subsequent
       matching that uses a stacked pattern.

   Testing foreign pattern conversion

-       The experimental foreign pattern conversion functions in PCRE2  can  be
-       tested  by  setting the convert modifier. Its argument is a colon-sepa-
-       rated list  of  options,  which  set  the  equivalent  option  for  the
+       The  experimental  foreign pattern conversion functions in PCRE2 can be
+       tested by setting the convert modifier. Its argument is  a  colon-sepa-
+       rated  list  of  options,  which  set  the  equivalent  option  for the
       pcre2_pattern_convert() function:

         glob                    PCRE2_CONVERT_GLOB
@ -999,19 +1025,19 @@ PATTERN MODIFIERS

       The "unset" value is useful for turning off a default that has been set
       by a #pattern command. When one of these options is set, the input pat-
-       tern  is  passed  to pcre2_pattern_convert(). If the conversion is suc-
-       cessful, the result is reflected in  the  output  and  then  passed  to
+       tern is passed to pcre2_pattern_convert(). If the  conversion  is  suc-
+       cessful,  the  result  is  reflected  in  the output and then passed to
       pcre2_compile(). The normal utf and no_utf_check options, if set, cause
-       the PCRE2_CONVERT_UTF  and  PCRE2_CONVERT_NO_UTF_CHECK  options  to  be
+       the  PCRE2_CONVERT_UTF  and  PCRE2_CONVERT_NO_UTF_CHECK  options  to be
       passed to pcre2_pattern_convert().

       By default, the conversion function is allowed to allocate a buffer for
-       its output. However, if the convert_length modifier is set to  a  value
-       greater  than zero, pcre2test passes a buffer of the given length. This
+       its  output.  However, if the convert_length modifier is set to a value
+       greater than zero, pcre2test passes a buffer of the given length.  This
       makes it possible to test the length check.

-       The convert_glob_escape and  convert_glob_separator  modifiers  can  be
-       used  to  specify the escape and separator characters for glob process-
+       The  convert_glob_escape  and  convert_glob_separator  modifiers can be
+       used to specify the escape and separator characters for  glob  process-
       ing, overriding the defaults, which are operating-system dependent.


@ -1022,7 +1048,7 @@ SUBJECT MODIFIERS

   Setting match options

-       The    following   modifiers   set   options   for   pcre2_match()   or
+       The   following   modifiers   set   options   for   pcre2_match()    or
       pcre2_dfa_match(). See pcreapi for a description of their effects.

             anchored                  set PCRE2_ANCHORED
@ -1038,35 +1064,36 @@ SUBJECT MODIFIERS
             partial_hard (or ph)      set PCRE2_PARTIAL_HARD
             partial_soft (or ps)      set PCRE2_PARTIAL_SOFT

-       The partial matching modifiers are provided with abbreviations  because
+       The  partial matching modifiers are provided with abbreviations because
       they appear frequently in tests.

-       If  the posix or posix_nosub modifier was present on the pattern, caus-
+       If the posix or posix_nosub modifier was present on the pattern,  caus-
       ing the POSIX wrapper API to be used, the only option-setting modifiers
       that have any effect are notbol, notempty, and noteol, causing REG_NOT-
-       BOL, REG_NOTEMPTY,  and  REG_NOTEOL,  respectively,  to  be  passed  to
+       BOL,  REG_NOTEMPTY,  and  REG_NOTEOL,  respectively,  to  be  passed to
       regexec(). The other modifiers are ignored, with a warning message.

-       There  is one additional modifier that can be used with the POSIX wrap-
+       There is one additional modifier that can be used with the POSIX  wrap-
       per. It is ignored (with a warning) if used for non-POSIX matching.

             posix_startend=<n>[:<m>]

-       This causes the subject string to be  passed  to  regexec()  using  the
-       REG_STARTEND  option,  which  uses offsets to specify which part of the
-       string is searched. If only one number is  given,  the  end  offset  is
-       passed  as  the end of the subject string. For more detail of REG_STAR-
-       TEND, see the pcre2posix documentation. If the subject string  contains
-       binary  zeros  (coded  as escapes such as \x{00} because pcre2test does
+       This  causes  the  subject  string  to be passed to regexec() using the
+       REG_STARTEND option, which uses offsets to specify which  part  of  the
+       string  is  searched.  If  only  one number is given, the end offset is
+       passed as the end of the subject string. For more detail  of  REG_STAR-
+       TEND,  see the pcre2posix documentation. If the subject string contains
+       binary zeros (coded as escapes such as \x{00}  because  pcre2test  does
       not support actual binary zeros in its input), you must use posix_star-
       tend to specify its length.

   Setting match controls

-       The  following  modifiers  affect the matching process or request addi-
-       tional information. Some of them may also be  specified  on  a  pattern
-       line  (see  above), in which case they apply to every subject line that
-       is matched against that pattern.
+       The following modifiers affect the matching process  or  request  addi-
+       tional  information.  Some  of  them may also be specified on a pattern
+       line (see above), in which case they apply to every subject  line  that
+       is  matched against that pattern, but can be overridden by modifiers on
+       the subject.

             aftertext                  show text after match
             allaftertext               show text after captures
@ -1084,7 +1111,8 @@ SUBJECT MODIFIERS
             copy=<number or name>      copy captured substring
             depth_limit=<n>            set a depth limit
             dfa                        use pcre2_dfa_match()
-             find_limits                find match and depth limits
+             find_limits                find heap, match and depth limits
+             find_limits_noheap         find match and depth limits
             get=<number or name>       extract captured substring
             getall                     extract all captured substrings
         /g  global                     global matching
@ -1094,6 +1122,8 @@ SUBJECT MODIFIERS
             match_limit=<n>            set a match limit
             memory                     show heap memory usage
             null_context               match with a NULL context
+             null_replacement           substitute with NULL replacement
+             null_subject               match with NULL subject
             offset=<n>                 set starting offset
             offset_limit=<n>           set offset limit
             ovector=<n>                set size of output vector
@ -1103,8 +1133,11 @@ SUBJECT MODIFIERS
             startoffset=<n>            same as offset=<n>
             substitute_callout         use substitution callouts
             substitute_extedded        use PCRE2_SUBSTITUTE_EXTENDED
-             substitute_skip=<n>        skip substitution number n
+             substitute_literal         use PCRE2_SUBSTITUTE_LITERAL
+             substitute_matched         use PCRE2_SUBSTITUTE_MATCHED
             substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+             substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+             substitute_skip=<n>        skip substitution number n
             substitute_stop=<n>        skip substitution number n and greater
             substitute_unknown_unset   use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
             substitute_unset_empty     use PCRE2_SUBSTITUTE_UNSET_EMPTY
@ -1249,29 +1282,40 @@ SUBJECT MODIFIERS
   Testing the substitution function

       If  the  replace  modifier  is  set, the pcre2_substitute() function is
-       called instead of one of the matching functions. Note that  replacement
-       strings  cannot  contain commas, because a comma signifies the end of a
-       modifier. This is not thought to be an issue in a test program.
+       called instead of one of the matching functions (or after one  call  of
+       pcre2_match()  in  the case of PCRE2_SUBSTITUTE_MATCHED). Note that re-
+       placement strings cannot contain commas, because a comma signifies  the
+       end  of  a  modifier. This is not thought to be an issue in a test pro-
+       gram.

-       Unlike subject strings, pcre2test does not process replacement  strings
-       for  escape  sequences. In UTF mode, a replacement string is checked to
-       see if it is a valid UTF-8 string. If so, it is correctly converted  to
-       a  UTF  string of the appropriate code unit width. If it is not a valid
-       UTF-8 string, the individual code units are copied directly. This  pro-
+       Specifying a completely empty replacement string  disables  this  modi-
+       fier.   However, it is possible to specify an empty replacement by pro-
+       viding a buffer length, as described below, for an otherwise empty  re-
+       placement.
+
+       Unlike  subject strings, pcre2test does not process replacement strings
+       for escape sequences. In UTF mode, a replacement string is  checked  to
+       see  if it is a valid UTF-8 string. If so, it is correctly converted to
+       a UTF string of the appropriate code unit width. If it is not  a  valid
+       UTF-8  string, the individual code units are copied directly. This pro-
       vides a means of passing an invalid UTF-8 string for testing purposes.

-       The  following modifiers set options (in additional to the normal match
+       The following modifiers set options (in additional to the normal  match
       options) for pcre2_substitute():

         global                      PCRE2_SUBSTITUTE_GLOBAL
         substitute_extended         PCRE2_SUBSTITUTE_EXTENDED
+         substitute_literal          PCRE2_SUBSTITUTE_LITERAL
+         substitute_matched          PCRE2_SUBSTITUTE_MATCHED
         substitute_overflow_length  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+         substitute_replacement_only PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
         substitute_unknown_unset    PCRE2_SUBSTITUTE_UNKNOWN_UNSET
         substitute_unset_empty      PCRE2_SUBSTITUTE_UNSET_EMPTY

+       See the pcre2api documentation for details of these options.

-       After a successful substitution, the modified string  is  output,  pre-
-       ceded  by the number of replacements. This may be zero if there were no
+       After  a  successful  substitution, the modified string is output, pre-
+       ceded by the number of replacements. This may be zero if there were  no
       matches. Here is a simple example of a substitution test:

         /abc/replace=xxx
@ -1280,12 +1324,12 @@ SUBJECT MODIFIERS
             =abc=abc=\=global
          2: =xxx=xxx=

-       Subject and replacement strings should be kept relatively short  (fewer
-       than  256 characters) for substitution tests, as fixed-size buffers are
-       used. To make it easy to test for buffer overflow, if  the  replacement
-       string  starts  with a number in square brackets, that number is passed
-       to pcre2_substitute() as the size of the output buffer,  with  the  re-
-       placement  string  starting  at  the next character. Here is an example
+       Subject  and replacement strings should be kept relatively short (fewer
+       than 256 characters) for substitution tests, as fixed-size buffers  are
+       used.  To  make it easy to test for buffer overflow, if the replacement
+       string starts with a number in square brackets, that number  is  passed
+       to  pcre2_substitute()  as  the size of the output buffer, with the re-
+       placement string starting at the next character.  Here  is  an  example
       that tests the edge case:

         /abc/
@ -1295,12 +1339,12 @@ SUBJECT MODIFIERS
         Failed: error -47: no more memory

       The  default  action  of  pcre2_substitute()  is  to  return  PCRE2_ER-
-       ROR_NOMEMORY  when  the  output  buffer  is  too small. However, if the
-       PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by  using  the  substi-
+       ROR_NOMEMORY when the output buffer  is  too  small.  However,  if  the
+       PCRE2_SUBSTITUTE_OVERFLOW_LENGTH  option  is  set (by using the substi-
       tute_overflow_length  modifier),  pcre2_substitute()  continues  to  go
-       through the motions of matching and substituting  (but  not  doing  any
-       callouts),  in  order  to  compute the size of buffer that is required.
-       When this happens, pcre2test shows the required  buffer  length  (which
+       through  the  motions  of  matching and substituting (but not doing any
+       callouts), in order to compute the size of  buffer  that  is  required.
+       When  this  happens,  pcre2test shows the required buffer length (which
       includes space for the trailing zero) as part of the error message. For
       example:

@ -1309,15 +1353,15 @@ SUBJECT MODIFIERS
         Failed: error -47: no more memory: 10 code units are needed

       A replacement string is ignored with POSIX and DFA matching. Specifying
-       partial  matching  provokes  an  error return ("bad option value") from
+       partial matching provokes an error return  ("bad  option  value")  from
       pcre2_substitute().

   Testing substitute callouts

       If the substitute_callout modifier is set, a substitution callout func-
-       tion  is set up. The null_context modifier must not be set, because the
-       address of the callout function is passed in a match context. When  the
-       callout  function  is  called (after each substitution), details of the
+       tion is set up. The null_context modifier must not be set, because  the
+       address  of the callout function is passed in a match context. When the
+       callout function is called (after each substitution),  details  of  the
       the input and output strings are output. For example:

         /abc/g,replace=<$0>,substitute_callout
@ -1326,19 +1370,19 @@ SUBJECT MODIFIERS
          2(1) Old 6 9 "abc" New 8 13 "<abc>"
          2: <abc>def<abc>pqr

-       The first number on each callout line is  the  count  of  matches.  The
+       The  first  number  on  each  callout line is the count of matches. The
       parenthesized number is the number of pairs that are set in the ovector
-       (that is, one more than the number of capturing groups that were  set).
+       (that  is, one more than the number of capturing groups that were set).
       Then are listed the offsets of the old substring, its contents, and the
       same for the replacement.

-       By default, the substitution callout function returns zero,  which  ac-
-       cepts  the  replacement and causes matching to continue if /g was used.
-       Two further modifiers can be used to test other return values. If  sub-
-       stitute_skip  is  set to a value greater than zero the callout function
-       returns +1 for the match of that number, and similarly  substitute_stop
-       returns  -1.  These cause the replacement to be rejected, and -1 causes
-       no further matching to take place. If either of them are  set,  substi-
+       By  default,  the substitution callout function returns zero, which ac-
+       cepts the replacement and causes matching to continue if /g  was  used.
+       Two  further modifiers can be used to test other return values. If sub-
+       stitute_skip is set to a value greater than zero the  callout  function
+       returns  +1 for the match of that number, and similarly substitute_stop
+       returns -1. These cause the replacement to be rejected, and  -1  causes
+       no  further  matching to take place. If either of them are set, substi-
       tute_callout is assumed. For example:

         /abc/g,replace=<$0>,substitute_skip=1
@ -1356,126 +1400,131 @@ SUBJECT MODIFIERS

   Setting the JIT stack size

-       The  jitstack modifier provides a way of setting the maximum stack size
-       that is used by the just-in-time optimization code. It  is  ignored  if
-       JIT  optimization is not being used. The value is a number of kibibytes
-       (units of 1024 bytes). Setting zero reverts to the  default  of  32KiB.
+       The jitstack modifier provides a way of setting the maximum stack  size
+       that  is  used  by the just-in-time optimization code. It is ignored if
+       JIT optimization is not being used. The value is a number of  kibibytes
+       (units  of  1024  bytes). Setting zero reverts to the default of 32KiB.
       Providing a stack that is larger than the default is necessary only for
-       very complicated patterns. If jitstack is set  non-zero  on  a  subject
+       very  complicated  patterns.  If  jitstack is set non-zero on a subject
       line it overrides any value that was set on the pattern.

   Setting heap, match, and depth limits

-       The  heap_limit,  match_limit, and depth_limit modifiers set the appro-
-       priate limits in the match context. These values are ignored  when  the
-       find_limits modifier is specified.
+       The heap_limit, match_limit, and depth_limit modifiers set  the  appro-
+       priate  limits  in the match context. These values are ignored when the
+       find_limits or find_limits_noheap modifier is specified.

   Finding minimum limits

-       If  the  find_limits  modifier  is present on a subject line, pcre2test
-       calls the relevant matching function several times,  setting  different
-       values    in    the    match    context   via   pcre2_set_heap_limit(),
-       pcre2_set_match_limit(), or pcre2_set_depth_limit() until it finds  the
-       minimum  values  for  each  parameter that allows the match to complete
-       without error. If JIT is being used, only the match limit is relevant.
+       If the find_limits modifier is present on  a  subject  line,  pcre2test
+       calls  the  relevant matching function several times, setting different
+       values   in   the    match    context    via    pcre2_set_heap_limit(),
+       pcre2_set_match_limit(),  or pcre2_set_depth_limit() until it finds the
+       smallest value for each parameter that allows  the  match  to  complete
+       without a "limit exceeded" error. The match itself may succeed or fail.
+       An alternative modifier, find_limits_noheap, omits the heap limit. This
+       is  used  in  the standard tests, because the minimum heap limit varies
+       between systems. If JIT is being used, only the match  limit  is  rele-
+       vant, and the other two are automatically omitted.

       When using this modifier, the pattern should not contain any limit set-
-       tings  such  as  (*LIMIT_MATCH=...)  within  it.  If  such a setting is
+       tings such as (*LIMIT_MATCH=...)  within  it.  If  such  a  setting  is
       present and is lower than the minimum matching value, the minimum value
-       cannot  be  found because pcre2_set_match_limit() etc. are only able to
+       cannot be found because pcre2_set_match_limit() etc. are only  able  to
       reduce the value of an in-pattern limit; they cannot increase it.

-       For non-DFA matching, the minimum depth_limit number is  a  measure  of
+       For  non-DFA  matching,  the minimum depth_limit number is a measure of
       how much nested backtracking happens (that is, how deeply the pattern's
-       tree is searched). In the case of DFA  matching,  depth_limit  controls
-       the  depth of recursive calls of the internal function that is used for
+       tree  is  searched).  In the case of DFA matching, depth_limit controls
+       the depth of recursive calls of the internal function that is used  for
       handling pattern recursion, lookaround assertions, and atomic groups.

       For non-DFA matching, the match_limit number is a measure of the amount
       of backtracking that takes place, and learning the minimum value can be
-       instructive. For most simple matches, the number is  quite  small,  but
-       for  patterns with very large numbers of matching possibilities, it can
-       become large very quickly with increasing length of subject string.  In
-       the  case  of  DFA  matching,  match_limit controls the total number of
+       instructive.  For  most  simple matches, the number is quite small, but
+       for patterns with very large numbers of matching possibilities, it  can
+       become  large very quickly with increasing length of subject string. In
+       the case of DFA matching, match_limit  controls  the  total  number  of
       calls, both recursive and non-recursive, to the internal matching func-
       tion, thus controlling the overall amount of computing resource that is
       used.

-       For both  kinds  of  matching,  the  heap_limit  number,  which  is  in
-       kibibytes  (units of 1024 bytes), limits the amount of heap memory used
-       for matching. A value of zero disables the use of any heap memory; many
-       simple  pattern  matches can be done without using the heap, so zero is
-       not an unreasonable setting.
+       For  both  kinds  of  matching,  the  heap_limit  number,  which  is in
+       kibibytes (units of 1024 bytes), limits the amount of heap memory  used
+       for matching.

   Showing MARK names


       The mark modifier causes the names from backtracking control verbs that
-       are  returned from calls to pcre2_match() to be displayed. If a mark is
-       returned for a match, non-match, or partial match, pcre2test shows  it.
-       For  a  match, it is on a line by itself, tagged with "MK:". Otherwise,
+       are returned from calls to pcre2_match() to be displayed. If a mark  is
+       returned  for a match, non-match, or partial match, pcre2test shows it.
+       For a match, it is on a line by itself, tagged with  "MK:".  Otherwise,
       it is added to the non-match message.

   Showing memory usage

-       The memory modifier causes pcre2test to log the sizes of all heap  mem-
-       ory   allocation  and  freeing  calls  that  occur  during  a  call  to
-       pcre2_match() or pcre2_dfa_match(). These occur only when a  match  re-
-       quires  a  bigger  vector than the default for remembering backtracking
-       points (pcre2_match()) or for internal  workspace  (pcre2_dfa_match()).
-       In  many cases there will be no heap memory used and therefore no addi-
-       tional output. No heap memory is allocated during matching with JIT, so
-       in  that  case the memory modifier never has any effect. For this modi-
-       fier to work, the null_context modifier must not be  set  on  both  the
+       The  memory modifier causes pcre2test to log the sizes of all heap mem-
+       ory  allocation  and  freeing  calls  that  occur  during  a  call   to
+       pcre2_match()  or pcre2_dfa_match(). In the latter case, heap memory is
+       used only when a match requires more internal workspace  that  the  de-
+       fault  allocation  on the stack, so in many cases there will be no out-
+       put. No heap memory is allocated during matching  with  JIT.  For  this
+       modifier to work, the null_context modifier must not be set on both the
       pattern and the subject, though it can be set on one or the other.

   Setting a starting offset

-       The  offset  modifier  sets  an  offset  in the subject string at which
+       The offset modifier sets an offset  in  the  subject  string  at  which
       matching starts. Its value is a number of code units, not characters.

   Setting an offset limit

-       The offset_limit modifier sets a limit for  unanchored  matches.  If  a
+       The  offset_limit  modifier  sets  a limit for unanchored matches. If a
       match cannot be found starting at or before this offset in the subject,
       a "no match" return is given. The data value is a number of code units,
-       not  characters. When this modifier is used, the use_offset_limit modi-
+       not characters. When this modifier is used, the use_offset_limit  modi-
       fier must have been set for the pattern; if not, an error is generated.

   Setting the size of the output vector

-       The ovector modifier applies only to the subject line in which  it  ap-
+       The  ovector  modifier applies only to the subject line in which it ap-
       pears, though of course it can also be used to set a default in a #sub-
-       ject command. It specifies the number of  pairs  of  offsets  that  are
+       ject  command.  It  specifies  the  number of pairs of offsets that are
       available for storing matching information. The default is 15.

-       A  value of zero is useful when testing the POSIX API because it causes
+       A value of zero is useful when testing the POSIX API because it  causes
       regexec() to be called with a NULL capture vector. When not testing the
-       POSIX  API,  a  value  of  zero  is used to cause pcre2_match_data_cre-
-       ate_from_pattern() to be called, in order to create a  match  block  of
+       POSIX API, a value of  zero  is  used  to  cause  pcre2_match_data_cre-
+       ate_from_pattern()  to  be  called, in order to create a match block of
       exactly the right size for the pattern. (It is not possible to create a
-       match block with a zero-length ovector; there is always  at  least  one
+       match  block  with  a zero-length ovector; there is always at least one
       pair of offsets.)

   Passing the subject as zero-terminated

       By default, the subject string is passed to a native API matching func-
       tion with its correct length. In order to test the facility for passing
-       a  zero-terminated  string, the zero_terminate modifier is provided. It
-       causes the length to be passed as PCRE2_ZERO_TERMINATED. When  matching
+       a zero-terminated string, the zero_terminate modifier is  provided.  It
+       causes  the length to be passed as PCRE2_ZERO_TERMINATED. When matching
       via the POSIX interface, this modifier is ignored, with a warning.

-       When  testing  pcre2_substitute(), this modifier also has the effect of
+       When testing pcre2_substitute(), this modifier also has the  effect  of
       passing the replacement string as zero-terminated.

-   Passing a NULL context
+   Passing a NULL context, subject, or replacement

-       Normally,  pcre2test  passes  a   context   block   to   pcre2_match(),
-       pcre2_dfa_match(),  pcre2_jit_match()  or  pcre2_substitute().   If the
-       null_context modifier is set, however, NULL  is  passed.  This  is  for
-       testing  that  the matching and substitution functions behave correctly
-       in this case (they use default values). This modifier  cannot  be  used
-       with the find_limits or substitute_callout modifiers.
+       Normally,   pcre2test   passes   a   context  block  to  pcre2_match(),
+       pcre2_dfa_match(), pcre2_jit_match()  or  pcre2_substitute().   If  the
+       null_context  modifier  is  set,  however,  NULL is passed. This is for
+       testing that the matching and substitution functions  behave  correctly
+       in  this  case  (they use default values). This modifier cannot be used
+       with the find_limits, find_limits_noheap, or  substitute_callout  modi-
+       fiers.
+
+       Similarly,  for  testing purposes, if the null_subject or null_replace-
+       ment modifier is set, the subject or replacement  string  pointers  are
+       passed as NULL, respectively, to the relevant functions.


 THE ALTERNATIVE MATCHING FUNCTION
@ -1896,11 +1945,11 @@ SEE ALSO
 AUTHOR

       Philip Hazel
-       University Computing Service
+       Retired from University Computing Service
       Cambridge, England.


 REVISION

-       Last updated: 30 July 2019
-       Copyright (c) 1997-2019 University of Cambridge.
+       Last updated: 27 July 2022
+       Copyright (c) 1997-2022 University of Cambridge.
--- a/doc/pcre2unicode.3
+++ b/doc/pcre2unicode.3
@ -1,4 +1,4 @@
-.TH PCRE2UNICODE 3 "24 May 2019" "PCRE2 10.34"
+.TH PCRE2UNICODE 3 "22 December 2021" "PCRE2 10.40"
 .SH NAME
 PCRE - Perl-compatible regular expressions (revised API)
 .SH "UNICODE AND UTF SUPPORT"
@ -7,7 +7,7 @@ PCRE - Perl-compatible regular expressions (revised API)
 PCRE2 is normally built with Unicode support, though if you do not need it, you
 can build it without, in which case the library will be smaller. With Unicode
 support, PCRE2 has knowledge of Unicode character properties and can process
-text strings in UTF-8, UTF-16, or UTF-32 format (depending on the code unit
+strings of text in UTF-8, UTF-16, and UTF-32 format (depending on the code unit
 width), but this is not the default. Unless specifically requested, PCRE2
 treats each code unit in a string as one character.
 .P
@ -40,10 +40,11 @@ handled, as documented below.
 .sp
 When PCRE2 is built with Unicode support, the escape sequences \ep{..},
 \eP{..}, and \eX can be used. This is not dependent on the PCRE2_UTF setting.
-The Unicode properties that can be tested are limited to the general category
-properties such as Lu for an upper case letter or Nd for a decimal number, the
-Unicode script names such as Arabic or Han, and the derived properties Any and
-L&. Full lists are given in the
+The Unicode properties that can be tested are a subset of those that Perl
+supports. Currently they are limited to the general category properties such as
+Lu for an upper case letter or Nd for a decimal number, the Unicode script
+names such as Arabic or Han, Bidi_Class, Bidi_Control, and the derived
+properties Any and LC (synonym L&). Full lists are given in the
 .\" HREF
 \fBpcre2pattern\fP
 .\"
@ -51,10 +52,10 @@ and
 .\" HREF
 \fBpcre2syntax\fP
 .\"
-documentation. Only the short names for properties are supported. For example,
-\ep{L} matches a letter. Its Perl synonym, \ep{Letter}, is not supported.
-Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
-compatibility with Perl 5.6. PCRE2 does not support this.
+documentation. In general, only the short names for properties are supported.
+For example, \ep{L} matches a letter. Its longer synonym, \ep{Letter}, is not
+supported. Furthermore, in Perl, many properties may optionally be prefixed by
+"Is", for compatibility with Perl 5.6. PCRE2 does not support this.
 .
 .
 .SH "WIDE CHARACTERS AND UTF MODES"
@ -126,14 +127,16 @@ However, the special horizontal and vertical white space matching escapes (\eh,
 not PCRE2_UCP is set.
 .
 .
-.SH "CASE-EQUIVALENCE IN UTF MODE"
+.SH "UNICODE CASE-EQUIVALENCE"
 .rs
 .sp
-Case-insensitive matching in UTF mode makes use of Unicode properties except
-for characters whose code points are less than 128 and that have at most two
-case-equivalent values. For these, a direct table lookup is used for speed. A
-few Unicode characters such as Greek sigma have more than two code points that
-are case-equivalent, and these are treated specially.
+If either PCRE2_UTF or PCRE2_UCP is set, upper/lower case processing makes use
+of Unicode properties except for characters whose code points are less than 128
+and that have at most two case-equivalent values. For these, a direct table
+lookup is used for speed. A few Unicode characters such as Greek sigma have
+more than two code points that are case-equivalent, and these are treated
+specially. Setting PCRE2_UCP without PCRE2_UTF allows Unicode-style case
+processing for non-UTF character encodings such as UCS-2.
 .
 .
 .\" HTML <a name="scriptruns"></a>
@ -446,7 +449,7 @@ can be useful when searching for UTF text in executable or other binary files.
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@ -455,6 +458,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 24 May 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 22 December 2021
+Copyright (c) 1997-2021 University of Cambridge.
 .fi
--- a/index.md
+++ b/index.md
@ -0,0 +1,56 @@
+# PCRE2 - Perl-Compatible Regular Expressions
+
+The PCRE2 library is a set of C functions that implement regular expression
+pattern matching using the same syntax and semantics as Perl 5. PCRE2 has its
+own native API, as well as a set of wrapper functions that correspond to the
+POSIX regular expression API. The PCRE2 library is free, even for building 
+proprietary software. It comes in three forms, for processing 8-bit, 16-bit,
+or 32-bit code units, in either literal or UTF encoding.
+
+PCRE2 was first released in 2015 to replace the API in the original PCRE 
+library, which is now obsolete and no longer maintained. As well as a more
+flexible API, the code of PCRE2 has been much improved since the fork.
+ 
+## Download
+
+As well as downloading from the 
+[GitHub site](https://github.com/PCRE2Project/pcre2), you can download PCRE2 
+or the older, unmaintained PCRE1 library from an 
+[*unofficial* mirror](https://sourceforge.net/projects/pcre/files/) at SourceForge.
+
+You can check out the PCRE2 source code via Git or Subversion:
+
+    git clone https://github.com/PCRE2Project/pcre2.git
+    svn co    https://github.com/PCRE2Project/pcre2.git
+
+## Contributed Ports
+
+If you just need the command-line PCRE2 tools on Windows, precompiled binary
+versions are available at this 
+[Rexegg page](http://www.rexegg.com/pcregrep-pcretest.html).
+
+A PCRE2 port for z/OS, a mainframe operating system which uses EBCDIC as its
+default character encoding, can be found at 
+[http://www.cbttape.org](http://www.cbttape.org/) (File 939).
+
+## Documentation
+
+You can read the PCRE2 documentation 
+[here](https://PCRE2Project.github.io/pcre2/doc/html/index.html).
+
+Comparisons to Perl's regular expression semantics can be found in the
+community authored Wikipedia entry for PCRE.
+
+There is a curated summary of changes for each PCRE release, copies of
+documentation from older releases, and other useful information from the third
+party authored 
+[RexEgg PCRE Documentation and Change Log page](http://www.rexegg.com/pcre-documentation.html).
+
+## Contact
+
+To report a problem with the PCRE2 library, or to make a feature request, please
+use the PCRE2 GitHub issues tracker. There is a mailing list for discussion of
+ PCRE2 issues and development at pcre2-dev@googlegroups.com, which is where any
+announcements will be made. You can browse the 
+[list archives](https://groups.google.com/g/pcre2-dev).
+
--- a/libpcre2-16.pc.in
+++ b/libpcre2-16.pc.in
@ -8,6 +8,6 @@ includedir=@includedir@
 Name: libpcre2-16
 Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 16 bit character support
 Version: @PACKAGE_VERSION@
-Libs: -L${libdir} -lpcre2-16
+Libs: -L${libdir} -lpcre2-16@LIB_POSTFIX@
 Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
 Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
--- a/libpcre2-32.pc.in
+++ b/libpcre2-32.pc.in
@ -8,6 +8,6 @@ includedir=@includedir@
 Name: libpcre2-32
 Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 32 bit character support
 Version: @PACKAGE_VERSION@
-Libs: -L${libdir} -lpcre2-32
+Libs: -L${libdir} -lpcre2-32@LIB_POSTFIX@
 Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
 Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
--- a/libpcre2-8.pc.in
+++ b/libpcre2-8.pc.in
@ -8,6 +8,6 @@ includedir=@includedir@
 Name: libpcre2-8
 Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 8 bit character support
 Version: @PACKAGE_VERSION@
-Libs: -L${libdir} -lpcre2-8
+Libs: -L${libdir} -lpcre2-8@LIB_POSTFIX@
 Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
 Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
--- a/libpcre2-posix.pc.in
+++ b/libpcre2-posix.pc.in
@ -8,6 +8,6 @@ includedir=@includedir@
 Name: libpcre2-posix
 Description: Posix compatible interface to libpcre2-8
 Version: @PACKAGE_VERSION@
-Libs: -L${libdir} -lpcre2-posix
+Libs: -L${libdir} -lpcre2-posix@LIB_POSTFIX@
 Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
 Requires.private: libpcre2-8
--- a/maint/GenerateCommon.py
+++ b/maint/GenerateCommon.py
@ -0,0 +1,355 @@
+#! /usr/bin/python
+
+#                   PCRE2 UNICODE PROPERTY SUPPORT
+#                   ------------------------------
+
+# This file is a Python module containing common lists and functions for the
+# GenerateXXX scripts that create various.c and .h files from Unicode data
+# files. It was created as part of a re-organizaton of these scripts in
+# December 2021.
+
+
+import re
+
+
+# ---------------------------------------------------------------------------
+#                             DATA LISTS
+# ---------------------------------------------------------------------------
+
+# BIDI classes in the DerivedBidiClass.txt file, with comments.
+
+bidi_classes = [
+  'AL',  'Arabic letter',
+  'AN',  'Arabic number',
+  'B',   'Paragraph separator',
+  'BN',  'Boundary neutral',
+  'CS',  'Common separator',
+  'EN',  'European number',
+  'ES',  'European separator',
+  'ET',  'European terminator',
+  'FSI', 'First strong isolate',
+  'L',   'Left to right',
+  'LRE', 'Left to right embedding',
+  'LRI', 'Left to right isolate',
+  'LRO', 'Left to right override',
+  'NSM', 'Non-spacing mark',
+  'ON',  'Other neutral',
+  'PDF', 'Pop directional format',
+  'PDI', 'Pop directional isolate',
+  'R',   'Right to left',
+  'RLE', 'Right to left embedding',
+  'RLI', 'Right to left isolate',
+  'RLO', 'Right to left override',
+  'S',   'Segment separator',
+  'WS',  'White space'
+  ]
+
+# Particular category property names, with comments. NOTE: If ever this list
+# is changed, the table called "catposstab" in the pcre2_auto_possess.c file
+# must be edited to keep in step.
+
+category_names = [
+  'Cc', 'Control',
+  'Cf', 'Format',
+  'Cn', 'Unassigned',
+  'Co', 'Private use',
+  'Cs', 'Surrogate',
+  'Ll', 'Lower case letter',
+  'Lm', 'Modifier letter',
+  'Lo', 'Other letter',
+  'Lt', 'Title case letter',
+  'Lu', 'Upper case letter',
+  'Mc', 'Spacing mark',
+  'Me', 'Enclosing mark',
+  'Mn', 'Non-spacing mark',
+  'Nd', 'Decimal number',
+  'Nl', 'Letter number',
+  'No', 'Other number',
+  'Pc', 'Connector punctuation',
+  'Pd', 'Dash punctuation',
+  'Pe', 'Close punctuation',
+  'Pf', 'Final punctuation',
+  'Pi', 'Initial punctuation',
+  'Po', 'Other punctuation',
+  'Ps', 'Open punctuation',
+  'Sc', 'Currency symbol',
+  'Sk', 'Modifier symbol',
+  'Sm', 'Mathematical symbol',
+  'So', 'Other symbol',
+  'Zl', 'Line separator',
+  'Zp', 'Paragraph separator',
+  'Zs', 'Space separator'
+  ]
+
+# The Extended_Pictographic property is not found in the file where all the
+# others are (GraphemeBreakProperty.txt). It comes from the emoji-data.txt
+# file, but we list it here so that the name has the correct index value.
+
+break_properties = [
+  'CR',                    ' 0',
+  'LF',                    ' 1',
+  'Control',               ' 2',
+  'Extend',                ' 3',
+  'Prepend',               ' 4',
+  'SpacingMark',           ' 5',
+  'L',                     ' 6 Hangul syllable type L',
+  'V',                     ' 7 Hangul syllable type V',
+  'T',                     ' 8 Hangul syllable type T',
+  'LV',                    ' 9 Hangul syllable type LV',
+  'LVT',                   '10 Hangul syllable type LVT',
+  'Regional_Indicator',    '11',
+  'Other',                 '12',
+  'ZWJ',                   '13',
+  'Extended_Pictographic', '14'
+  ]
+
+# List of files from which the names of Boolean properties are obtained, along
+# with a list of regex patterns for properties to be ignored, and a list of
+# extra pattern names to add.
+
+bool_propsfiles = ['PropList.txt', 'DerivedCoreProperties.txt', 'emoji-data.txt']
+bool_propsignore = [r'^Other_', r'^Hyphen$']
+bool_propsextras = ['ASCII', 'Bidi_Mirrored']
+
+
+# ---------------------------------------------------------------------------
+#                   GET BOOLEAN PROPERTY NAMES
+# ---------------------------------------------------------------------------
+
+# Get a list of Boolean property names from a number of files.
+
+def getbpropslist():
+  bplist = []
+  bplast = ""
+
+  for filename in bool_propsfiles:
+    try:
+      file = open('Unicode.tables/' + filename, 'r')
+    except IOError:
+      print(f"** Couldn't open {'Unicode.tables/' + filename}\n")
+      sys.exit(1)
+
+    for line in file:
+      line = re.sub(r'#.*', '', line)
+      data = list(map(str.strip, line.split(';')))
+      if len(data) <= 1 or data[1] == bplast:
+        continue
+      bplast = data[1]
+      for pat in bool_propsignore:
+        if re.match(pat, bplast) != None:
+          break
+      else:
+        bplist.append(bplast)
+
+    file.close()
+
+  bplist.extend(bool_propsextras)
+  bplist.sort()
+  return bplist
+
+bool_properties = getbpropslist()
+bool_props_list_item_size = (len(bool_properties) + 31) // 32
+
+
+
+# ---------------------------------------------------------------------------
+#                  COLLECTING PROPERTY NAMES AND ALIASES
+# ---------------------------------------------------------------------------
+
+script_names = ['Unknown']
+abbreviations = {}
+
+def collect_property_names():
+  global script_names
+  global abbreviations
+
+  names_re = re.compile(r'^[0-9A-F]{4,6}(?:\.\.[0-9A-F]{4,6})? +; ([A-Za-z_]+) #')
+
+  last_script_name = ""
+  with open("Unicode.tables/Scripts.txt") as f:
+    for line in f:
+      match_obj = names_re.match(line)
+
+      if match_obj == None or match_obj.group(1) == last_script_name:
+        continue
+
+      last_script_name = match_obj.group(1)
+      script_names.append(last_script_name)
+
+  # Sometimes there is comment in the line
+  # so splitting around semicolon is not enough
+  value_alias_re = re.compile(r' *([A-Za-z_]+) *; *([A-Za-z_]+) *; *([A-Za-z_]+)(?: *; *([A-Za-z_ ]+))?')
+
+  with open("Unicode.tables/PropertyValueAliases.txt") as f:
+    for line in f:
+      match_obj = value_alias_re.match(line)
+
+      if match_obj == None:
+        continue
+
+      if match_obj.group(1) == "sc":
+        if match_obj.group(2) == match_obj.group(3):
+          abbreviations[match_obj.group(3)] = ()
+        elif match_obj.group(4) == None:
+          abbreviations[match_obj.group(3)] = (match_obj.group(2),)
+        else:
+          abbreviations[match_obj.group(3)] = (match_obj.group(2), match_obj.group(4))
+
+  # We can also collect Boolean property abbreviations into the same dictionary
+
+  bin_alias_re = re.compile(r' *([A-Za-z_]+) *; *([A-Za-z_]+)(?: *; *([A-Za-z_]+))?')
+  with open("Unicode.tables/PropertyAliases.txt") as f:
+    for line in f:
+      match_obj = bin_alias_re.match(line)
+      if match_obj == None:
+        continue
+
+      if match_obj.group(2) in bool_properties:
+        if match_obj.group(3) == None:
+          abbreviations[match_obj.group(2)] = (match_obj.group(1),)
+        else:
+          abbreviations[match_obj.group(2)] = (match_obj.group(1), match_obj.group(3))
+
+collect_property_names()
+
+
+
+# ---------------------------------------------------------------------------
+#                      REORDERING SCRIPT NAMES
+# ---------------------------------------------------------------------------
+
+script_abbrevs = []
+
+def reorder_scripts():
+  global script_names
+  global script_abbrevs
+  global abbreviations
+
+  for name in script_names:
+    abbrevs = abbreviations[name]
+    script_abbrevs.append(name if len(abbrevs) == 0 else abbrevs[0])
+
+  extended_script_abbrevs = set()
+  with open("Unicode.tables/ScriptExtensions.txt") as f:
+    names_re = re.compile(r'^[0-9A-F]{4,6}(?:\.\.[0-9A-F]{4,6})? +; ([A-Za-z_ ]+) #')
+
+    for line in f:
+      match_obj = names_re.match(line)
+
+      if match_obj == None:
+        continue
+
+      for name in match_obj.group(1).split(" "):
+        extended_script_abbrevs.add(name)
+
+  new_script_names = []
+  new_script_abbrevs = []
+
+  for idx, abbrev in enumerate(script_abbrevs):
+    if abbrev in extended_script_abbrevs:
+      new_script_names.append(script_names[idx])
+      new_script_abbrevs.append(abbrev)
+
+  for idx, abbrev in enumerate(script_abbrevs):
+    if abbrev not in extended_script_abbrevs:
+      new_script_names.append(script_names[idx])
+      new_script_abbrevs.append(abbrev)
+
+  script_names = new_script_names
+  script_abbrevs = new_script_abbrevs
+
+reorder_scripts()
+script_list_item_size = (script_names.index('Unknown') + 31) // 32
+
+
+# ---------------------------------------------------------------------------
+#                         DERIVED LISTS
+# ---------------------------------------------------------------------------
+
+# Create general character property names from the first letters of the
+# particular categories.
+
+gcn_set = set(category_names[i][0] for i in range(0, len(category_names), 2))
+general_category_names = list(gcn_set)
+general_category_names.sort()
+
+
+# ---------------------------------------------------------------------------
+#                           FUNCTIONS
+# ---------------------------------------------------------------------------
+
+import sys
+
+# Open an output file, using the command's argument or a default. Write common
+# preliminary header information.
+
+def open_output(default):
+  if len(sys.argv) > 2:
+    print('** Too many arguments: just give a file name')
+    sys.exit(1)
+  if len(sys.argv) == 2:
+    output_name = sys.argv[1]
+  else:
+    output_name = default
+  try:
+    file = open(output_name, "w")
+  except IOError:
+    print ("** Couldn't open %s" % output_name)
+    sys.exit(1)
+
+  script_name = sys.argv[0]
+  i = script_name.rfind('/')
+  if i >= 0:
+    script_name = script_name[i+1:]
+
+  file.write("""\
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge
+
+This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
+""")
+
+  file.write("Instead, modify the maint/%s script and run it to generate\n"
+  "a new version of this code.\n\n" % script_name)
+
+  file.write("""\
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+\n""")
+  return file
+
+# End of UcpCommon.py
--- a/maint/GenerateTest26.py
+++ b/maint/GenerateTest26.py
@ -0,0 +1,188 @@
+#! /usr/bin/python
+
+#                   PCRE2 UNICODE PROPERTY SUPPORT
+#                   ------------------------------
+#
+# This file auto-generates unicode property tests and their expected output.
+# It is recommended to re-run this generator after the unicode files are
+# updated. The names of the generated files are `testinput26` and `testoutput26`
+
+import re
+import sys
+
+from GenerateCommon import \
+  script_names, \
+  script_abbrevs
+
+def write_both(text):
+  input_file.write(text)
+  output_file.write(text)
+
+def to_string_char(ch_idx):
+  if ch_idx < 128:
+    if ch_idx < 16:
+      return "\\x{0%x}" % ch_idx
+    if ch_idx >= 32:
+      return chr(ch_idx)
+  return "\\x{%x}" % ch_idx
+
+output_directory = ""
+
+if len(sys.argv) > 2:
+  print('** Too many arguments: just give a directory name')
+  sys.exit(1)
+if len(sys.argv) == 2:
+  output_directory = sys.argv[1]
+  if not output_directory.endswith("/"):
+    output_directory += "/"
+
+try:
+  input_file = open(output_directory + "testinput26", "w")
+  output_file = open(output_directory + "testoutput26", "w")
+except IOError:
+  print ("** Couldn't open output files")
+  sys.exit(1)
+
+write_both("# These tests are generated by maint/GenerateTest26.py, do not edit.\n\n")
+
+# ---------------------------------------------------------------------------
+#                      UNICODE SCRIPT EXTENSION TESTS
+# ---------------------------------------------------------------------------
+
+write_both("# Unicode Script Extension tests.\n\n")
+
+def gen_script_tests():
+  script_data = [None] * len(script_names)
+  char_data = [None] * 0x110000
+
+  property_re = re.compile("^([0-9A-F]{4,6})(?:\\.\\.([0-9A-F]{4,6}))? +; ([A-Za-z_ ]+) #")
+  prev_name = ""
+  script_idx = -1
+
+  with open("Unicode.tables/Scripts.txt") as f:
+    for line in f:
+      match_obj = property_re.match(line)
+
+      if match_obj == None:
+        continue
+
+      name = match_obj.group(3)
+      if name != prev_name:
+        script_idx = script_names.index(name)
+        prev_name = name
+
+      low = int(match_obj.group(1), 16)
+      high = low
+      char_data[low] = name
+
+      if match_obj.group(2) != None:
+        high = int(match_obj.group(2), 16)
+        for idx in range(low + 1, high + 1):
+           char_data[idx] = name
+
+      if script_data[script_idx] == None:
+        script_data[script_idx] = [low, None, None, None, None]
+      script_data[script_idx][1] = high
+
+  extended_script_indicies = {}
+
+  with open("Unicode.tables/ScriptExtensions.txt") as f:
+    for line in f:
+      match_obj = property_re.match(line)
+
+      if match_obj == None:
+        continue
+
+      low = int(match_obj.group(1), 16)
+      high = low
+      if match_obj.group(2) != None:
+        high = int(match_obj.group(2), 16)
+
+      for abbrev in match_obj.group(3).split(" "):
+        if abbrev not in extended_script_indicies:
+          idx = script_abbrevs.index(abbrev)
+          extended_script_indicies[abbrev] = idx
+          rec = script_data[idx]
+          rec[2] = low
+          rec[3] = high
+        else:
+          idx = extended_script_indicies[abbrev]
+          rec = script_data[idx]
+          if rec[2] > low:
+            rec[2] = low
+          if rec[3] < high:
+            rec[3] = high
+
+        if rec[4] == None:
+          name = script_names[idx]
+          for idx in range(low, high + 1):
+            if char_data[idx] != name:
+              rec[4] = idx
+              break
+
+  long_property_name = False
+
+  for idx, rec in enumerate(script_data):
+    script_name = script_names[idx]
+
+    if script_name == "Unknown":
+      continue
+
+    script_abbrev = script_abbrevs[idx]
+
+    write_both("# Base script check\n")
+    write_both("/^\\p{sc=%s}/utf\n" % script_name)
+    write_both("  %s\n" % to_string_char(rec[0]))
+    output_file.write(" 0: %s\n" % to_string_char(rec[0]))
+    write_both("\n")
+
+    write_both("/^\\p{Script=%s}/utf\n" % script_abbrev)
+    write_both("  %s\n" % to_string_char(rec[1]))
+    output_file.write(" 0: %s\n" % to_string_char(rec[1]))
+    write_both("\n")
+
+    if rec[2] != None:
+      property_name = "scx"
+      if long_property_name:
+        property_name = "Script_Extensions"
+
+      write_both("# Script extension check\n")
+      write_both("/^\\p{%s}/utf\n" % script_name)
+      write_both("  %s\n" % to_string_char(rec[2]))
+      output_file.write(" 0: %s\n" % to_string_char(rec[2]))
+      write_both("\n")
+
+      write_both("/^\\p{%s=%s}/utf\n" % (property_name, script_abbrev))
+      write_both("  %s\n" % to_string_char(rec[3]))
+      output_file.write(" 0: %s\n" % to_string_char(rec[3]))
+      write_both("\n")
+
+      long_property_name = not long_property_name
+
+      if rec[4] != None:
+        write_both("# Script extension only character\n")
+        write_both("/^\\p{%s}/utf\n" % script_name)
+        write_both("  %s\n" % to_string_char(rec[4]))
+        output_file.write(" 0: %s\n" % to_string_char(rec[4]))
+        write_both("\n")
+
+        write_both("/^\\p{sc=%s}/utf\n" % script_name)
+        write_both("  %s\n" % to_string_char(rec[4]))
+        output_file.write("No match\n")
+        write_both("\n")
+      else:
+        print("External character has not found for %s" % script_name)
+
+    high = rec[1]
+    if rec[3] != None and rec[3] > rec[1]:
+      high = rec[3]
+    write_both("# Character not in script\n")
+    write_both("/^\\p{%s}/utf\n" % script_name)
+    write_both("  %s\n" % to_string_char(high + 1))
+    output_file.write("No match\n")
+    write_both("\n")
+
+
+gen_script_tests()
+
+write_both("# End of testinput26\n")
--- a/Show More
+++ b/Show More