Cleanup of Makefile.os4, added release rule and a README file for this release

Implement -Z in pcre2grep and update documentation
Added some special heap tests
2022-07-31 20:34:33 +01:00 · 2022-07-30 17:41:49 +01:00 · 2022-07-28 17:58:19 +01:00 · 2022-07-27 18:00:40 +01:00 · 2022-07-27 17:44:55 +01:00 · 2022-07-15 17:18:11 +01:00
324 changed files with 99694 additions and 45579 deletions
--- a/.bazelrc
+++ b/.bazelrc
@ -0,0 +1,3 @@
+common --experimental_enable_bzlmod
+build --incompatible_enable_cc_toolchain_resolution
+build --incompatible_strict_action_env
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -0,0 +1,77 @@
+
+name: Build
+on: [push, pull_request]
+
+jobs:
+  linux:
+    name: Linux
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+        
+      - name: Autogen
+        run: ./autogen.sh
+        
+      - name: Configure
+        run: ./configure --enable-jit --enable-pcre2-8 --enable-pcre2-16 --enable-pcre2-32
+        
+      - name: Build
+        run: make
+        
+      - name: Test (main test script)
+        run: ./RunTest
+
+      - name: Test (JIT test program)
+        run: ./pcre2_jit_test
+
+      - name: Test (pcre2grep test script)
+        run: ./RunGrepTest
+    
+  alpine:
+    name: alpine
+    runs-on: ubuntu-latest
+    container: alpine 
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+        
+      - name: Autotools
+        run: apk add --no-cache automake autoconf gcc libtool make musl-dev 
+        
+      - name: Autogen
+        run: ./autogen.sh
+        
+      - name: Configure
+        run: ./configure --enable-jit --enable-pcre2-8 --enable-pcre2-16 --enable-pcre2-32
+        
+      - name: Build
+        run: make
+        
+      - name: Test (main test script)
+        run: ./RunTest
+
+      - name: Test (JIT test program)
+        run: ./pcre2_jit_test
+
+      - name: Test (pcre2grep test script)
+        run: ./RunGrepTest
+        
+  windows:      
+    name: 32bit Windows
+    runs-on: windows-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Configure
+        run: cmake -DPCRE2_SUPPORT_JIT=ON -DPCRE2_BUILD_PCRE2_16=ON -DPCRE2_BUILD_PCRE2_32=ON -B build -A Win32
+
+      - name: Build
+        run: cmake --build build
+
+      - name: Test
+        run: |
+          cd build\Debug
+          ..\..\RunTest.bat
+           
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@ -0,0 +1,73 @@
+# For most projects, this workflow file will not need changing; you simply need
+# to commit it to your repository.
+#
+# You may wish to alter this file to override the set of languages analyzed,
+# or to provide custom queries or build logic.
+#
+# ******** NOTE ********
+# We have attempted to detect the languages in your repository. Please check
+# the `language` matrix defined below to confirm you have the correct set of
+# supported CodeQL languages.
+#
+name: "CodeQL"
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    # The branches below must be a subset of the branches above
+    branches: [ master ]
+  schedule:
+    - cron: '27 6 * * 4'
+
+# Declare default permissions as read only.
+permissions: read-all
+
+jobs:
+  analyze:
+    name: Analyze
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+
+    strategy:
+      fail-fast: false
+      matrix:
+        language: [ 'cpp', 'python' ]
+        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
+        # Learn more about CodeQL language support at https://git.io/codeql-language-support
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+
+    # Initializes the CodeQL tools for scanning.
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v1
+      with:
+        languages: ${{ matrix.language }}
+        # If you wish to specify custom queries, you can do so here or in a config file.
+        # By default, queries listed here will override any specified in a config file.
+        # Prefix the list here with "+" to use these queries and those in the config file.
+        # queries: ./path/to/local/query, your-org/your-repo/queries@main
+
+    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
+    # If this step fails, then you should remove it and run the build manually (see below)
+    - name: Autobuild
+      uses: github/codeql-action/autobuild@v1
+
+    # ℹ️ Command-line programs to run using the OS shell.
+    # 📚 https://git.io/JvXDl
+
+    # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
+    #    and modify them (or add more) to build your code if your project
+    #    uses a compiled language
+
+    #- run: |
+    #   make bootstrap
+    #   make release
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v1
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@ -0,0 +1,55 @@
+name: Scorecards supply-chain security
+on:
+  # Only the default branch is supported.
+  branch_protection_rule:
+  schedule:
+    - cron: '23 17 * * 1'
+  push:
+    branches: [ master ]
+
+# Declare default permissions as read only.
+permissions: read-all
+
+jobs:
+  analysis:
+    name: Scorecards analysis
+    runs-on: ubuntu-latest
+    permissions:
+      # Needed to upload the results to code-scanning dashboard.
+      security-events: write
+      actions: read
+      contents: read
+
+    steps:
+      - name: "Checkout code"
+        uses: actions/checkout@ec3a7ce113134d7a93b817d10a8272cb61118579 # v2.4.0
+        with:
+          persist-credentials: false
+
+      - name: "Run analysis"
+        uses: ossf/scorecard-action@c1aec4ac820532bab364f02a81873c555a0ba3a1 # v1.0.4
+        with:
+          results_file: results.sarif
+          results_format: sarif
+          # Read-only PAT token. To create it,
+          # follow the steps in https://github.com/ossf/scorecard-action#pat-token-creation.
+          repo_token: ${{ secrets.SCORECARD_READ_TOKEN }}
+          # Publish the results to enable scorecard badges. For more details, see
+          # https://github.com/ossf/scorecard-action#publishing-results.
+          # For private repositories, `publish_results` will automatically be set to `false`,
+          # regardless of the value entered here.
+          publish_results: true
+
+      # Upload the results as artifacts (optional).
+      - name: "Upload artifact"
+        uses: actions/upload-artifact@82c141cc518b40d92cc801eee768e7aafc9c2fa2 # v2.3.1
+        with:
+          name: SARIF file
+          path: results.sarif
+          retention-days: 5
+
+      # Upload the results to GitHub's code scanning dashboard.
+      - name: "Upload to code-scanning"
+        uses: github/codeql-action/upload-sarif@5f532563584d71fdef14ee64d17bafb34f751ce5 # v1.0.26
+        with:
+          sarif_file: results.sarif
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,82 @@
+# Public .gitignore file for PCRE2
+
+*.a
+*.lo
+*.la
+*.pc
+*.o
+*~
+*.lha
+
+__pycache__
+.deps
+.libs
+
+INSTALL
+Makefile
+Makefile.in
+RunGrepTest.log
+RunGrepTest.trs
+RunTest.log
+RunTest.trs
+
+aclocal.m4
+ar-lib
+compile
+config.guess
+config.log
+config.status
+config.sub
+configure
+depcomp
+install-sh
+libtool
+ltmain.sh
+missing
+pcre2-config
+pcre2_dftables
+pcre2_jit_test
+pcre2_jit_test.log
+pcre2_jit_test.trs
+pcre2demo
+pcre2fuzzcheck
+pcre2grep
+pcre2test
+test-driver
+test-suite.log
+test3input
+test3output
+testNinput
+testNinputgrep
+teststderr
+teststderrM
+teststderrgrep
+teststdout
+teststdoutM
+testtemp1
+testtemp1grep
+testtemp2
+testtemp2grep
+testtry
+testtrygrep
+
+m4/libtool.m4
+m4/ltoptions.m4
+m4/ltsugar.m4
+m4/ltversion.m4
+m4/lt~obsolete.m4
+
+maint/ucptest
+maint/utf8
+
+src/.deps
+src/.dirstamp
+src/config.h
+src/pcre2.h
+src/pcre2_chartables.c
+src/stamp-h1
+
+/bazel-*
+
+# End
+
--- a/12
+++ b/12
@ -2,13 +2,13 @@ THE MAIN PCRE2 LIBRARY CODE
 ---------------------------

 Written by:       Philip Hazel
-Email local part: ph10
-Email domain:     cam.ac.uk
+Email local part: Philip.Hazel
+Email domain:     gmail.com

-University of Cambridge Computing Service,
+Retired from University of Cambridge Computing Service,
 Cambridge, England.

-Copyright (c) 1997-2018 University of Cambridge
+Copyright (c) 1997-2022 University of Cambridge
 All rights reserved


@ -19,7 +19,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu

-Copyright(c) 2010-2018 Zoltan Herczeg
+Copyright(c) 2010-2022 Zoltan Herczeg
 All rights reserved.


@ -30,7 +30,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu

-Copyright(c) 2009-2018 Zoltan Herczeg
+Copyright(c) 2009-2022 Zoltan Herczeg
 All rights reserved.

 ####
--- a/BUILD.bazel
+++ b/BUILD.bazel
@ -0,0 +1,72 @@
+load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
+load("@bazel_skylib//rules:copy_file.bzl", "copy_file")
+
+copy_file(
+    name = "config_h_generic",
+    src = "src/config.h.generic",
+    out = "src/config.h",
+)
+
+copy_file(
+    name = "pcre2_h_generic",
+    src = "src/pcre2.h.generic",
+    out = "src/pcre2.h",
+)
+
+copy_file(
+    name = "pcre2_chartables_c",
+    src = "src/pcre2_chartables.c.dist",
+    out = "src/pcre2_chartables.c",
+)
+
+cc_library(
+    name = "pcre2",
+    srcs = [
+        "src/pcre2_auto_possess.c",
+        "src/pcre2_compile.c",
+        "src/pcre2_config.c",
+        "src/pcre2_context.c",
+        "src/pcre2_convert.c",
+        "src/pcre2_dfa_match.c",
+        "src/pcre2_error.c",
+        "src/pcre2_extuni.c",
+        "src/pcre2_find_bracket.c",
+        "src/pcre2_maketables.c",
+        "src/pcre2_match.c",
+        "src/pcre2_match_data.c",
+        "src/pcre2_newline.c",
+        "src/pcre2_ord2utf.c",
+        "src/pcre2_pattern_info.c",
+        "src/pcre2_script_run.c",
+        "src/pcre2_serialize.c",
+        "src/pcre2_string_utils.c",
+        "src/pcre2_study.c",
+        "src/pcre2_substitute.c",
+        "src/pcre2_substring.c",
+        "src/pcre2_tables.c",
+        "src/pcre2_ucd.c",
+        "src/pcre2_ucptables.c",
+        "src/pcre2_valid_utf.c",
+        "src/pcre2_xclass.c",
+        ":pcre2_chartables_c",
+    ],
+    hdrs = glob(["src/*.h"]) + [
+        ":config_h_generic",
+        ":pcre2_h_generic",
+    ],
+    defines = [
+        "HAVE_CONFIG_H",
+        "PCRE2_CODE_UNIT_WIDTH=8",
+        "PCRE2_STATIC",
+    ],
+    includes = ["src"],
+    strip_include_prefix = "src",
+    visibility = ["//visibility:public"],
+)
+
+cc_binary(
+    name = "pcre2demo",
+    srcs = ["src/pcre2demo.c"],
+    visibility = ["//visibility:public"],
+    deps = [":pcre2"],
+)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,6 +1,5 @@
 # CMakeLists.txt
 #
-#
 # This file enables PCRE2 to be built with the CMake configuration and build
 # tool. Download CMake in source or binary form from http://www.cmake.org/
 # Converted to support PCRE2 from the original PCRE file, August 2014.
@ -80,18 +79,49 @@
 # 2017-03-11 PH turned HEAP_MATCH_RECURSE into a NO-OP for 10.30
 # 2017-04-08 PH added HEAP_LIMIT
 # 2017-06-15 ZH added SUPPORT_JIT_SEALLOC support
+# 2018-06-19 PH added checks for stdint.h and inttypes.h (later removed)
+# 2018-06-27 PH added Daniel's patch to increase the stack for MSVC
+# 2018-11-14 PH removed unnecessary checks for stdint.h and inttypes.h
+# 2018-11-16 PH added PCRE2GREP_SUPPORT_CALLOUT_FORK support and tidied
+# 2019-02-16 PH hacked to avoid CMP0026 policy issue (see comments below)
+# 2020-03-16 PH renamed dftables as pcre2_dftables (as elsewhere)
+# 2020-03-24 PH changed CMAKE_MODULE_PATH definition to add, not replace
+# 2020-04-08 Carlo added function check for secure_getenv, fixed strerror
+# 2020-04-16 enh added check for __attribute__((uninitialized))
+# 2020-04-25 PH applied patches from Uwe Korn to support pkg-config and
+#            library versioning.
+# 2020-04-25 Carlo added function check for mkostemp used in ProtExecAllocator
+# 2020-04-28 PH added function check for memfd_create based on Carlo's patch
+# 2020-05-25 PH added a check for Intel CET
+# 2020-12-03 PH altered the definition of pcre2test as suggested by Daniel
+# 2021-06-29 JWSB added the option to build static library with PIC.
+# 2021-07-05 JWSB modified such both the static and shared library can be
+#            build in one go.
+# 2021-08-28 PH increased minimum version
+# 2021-08-28 PH added test for realpath()

 PROJECT(PCRE2 C)

-# Increased minimum to 2.8.0 to support newer add_test features. Set policy
-# CMP0026 to avoid warnings for the use of LOCATION in GET_TARGET_PROPERTY.
+# Increased minimum to 2.8.5 to support GNUInstallDirs.
+# Increased minimum to 3.1 to support imported targets.
+CMAKE_MINIMUM_REQUIRED(VERSION 3.1)

-CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
-CMAKE_POLICY(SET CMP0026 OLD)
+# Set policy CMP0026 to avoid warnings for the use of LOCATION in
+# GET_TARGET_PROPERTY. This should no longer be required.
+# CMAKE_POLICY(SET CMP0026 OLD)

-SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
+# With a recent cmake, you can provide a rootdir to look for non
+# standard installed library dependencies, but to do so, the policy
+# needs to be set to new (by uncommenting the following)
+# CMAKE_POLICY(SET CMP0074 NEW)

-SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -I${PROJECT_SOURCE_DIR}/src")
+# For FindReadline.cmake. This was changed to allow setting CMAKE_MODULE_PATH
+# on the command line.
+# SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
+
+LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
+
+INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/src)

 # external packages
 FIND_PACKAGE( BZip2 )
@ -101,29 +131,66 @@ FIND_PACKAGE( Editline )

 # Configuration checks

-INCLUDE(CheckIncludeFile)
+INCLUDE(CheckCSourceCompiles)
 INCLUDE(CheckFunctionExists)
+INCLUDE(CheckSymbolExists)
+INCLUDE(CheckIncludeFile)
 INCLUDE(CheckTypeSize)
+INCLUDE(GNUInstallDirs) # for CMAKE_INSTALL_LIBDIR

 CHECK_INCLUDE_FILE(dirent.h     HAVE_DIRENT_H)
-CHECK_INCLUDE_FILE(stdint.h     HAVE_STDINT_H)
-CHECK_INCLUDE_FILE(inttypes.h   HAVE_INTTYPES_H)
 CHECK_INCLUDE_FILE(sys/stat.h   HAVE_SYS_STAT_H)
 CHECK_INCLUDE_FILE(sys/types.h  HAVE_SYS_TYPES_H)
 CHECK_INCLUDE_FILE(unistd.h     HAVE_UNISTD_H)
 CHECK_INCLUDE_FILE(windows.h    HAVE_WINDOWS_H)

-CHECK_FUNCTION_EXISTS(bcopy     HAVE_BCOPY)
-CHECK_FUNCTION_EXISTS(memmove   HAVE_MEMMOVE)
-CHECK_FUNCTION_EXISTS(strerror  HAVE_STRERROR)
+CHECK_SYMBOL_EXISTS(bcopy         "strings.h"  HAVE_BCOPY)
+CHECK_SYMBOL_EXISTS(memfd_create  "sys/mman.h" HAVE_MEMFD_CREATE)
+CHECK_SYMBOL_EXISTS(memmove       "string.h"   HAVE_MEMMOVE)
+CHECK_SYMBOL_EXISTS(secure_getenv "stdlib.h"   HAVE_SECURE_GETENV)
+CHECK_SYMBOL_EXISTS(strerror      "string.h"   HAVE_STRERROR)
+
+CHECK_C_SOURCE_COMPILES(
+  "#include <stdlib.h>
+   #include <limits.h>
+   int main(int c, char *v[]) { char buf[PATH_MAX]; realpath(v[1], buf); return 0; }"
+  HAVE_REALPATH
+)
+
+set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror")
+CHECK_C_SOURCE_COMPILES(
+  "int main() { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }"
+  HAVE_ATTRIBUTE_UNINITIALIZED
+)
+set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS})
+
+# Check whether Intel CET is enabled, and if so, adjust compiler flags. This
+# code was written by PH, trying to imitate the logic from the autotools
+# configuration.
+
+CHECK_C_SOURCE_COMPILES(
+  "#ifndef __CET__
+   #error CET is not enabled
+   #endif
+   int main() { return 0; }"
+  INTEL_CET_ENABLED
+)
+
+IF (INTEL_CET_ENABLED)
+  SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk")
+ENDIF(INTEL_CET_ENABLED)
+
+

 # User-configurable options
 #
 # Note: CMakeSetup displays these in alphabetical order, regardless of
 # the order we use here.

-SET(BUILD_SHARED_LIBS OFF CACHE BOOL
-    "Build shared libraries instead of static ones.")
+SET(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries.")
+
+OPTION(BUILD_STATIC_LIBS "Build static libraries." ON)

 OPTION(PCRE2_BUILD_PCRE2_8 "Build 8 bit PCRE2 library" ON)

@ -131,8 +198,12 @@ OPTION(PCRE2_BUILD_PCRE2_16 "Build 16 bit PCRE2 library" OFF)

 OPTION(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF)

+OPTION(PCRE2_STATIC_PIC "Build the static library with the option position independent code enabled." OFF)
+
 OPTION(PCRE2_DEBUG "Include debugging code" OFF)

+OPTION(PCRE2_DISABLE_PERCENT_ZT "Disable the use of %zu and %td (rarely needed)" OFF)
+
 SET(PCRE2_EBCDIC OFF CACHE BOOL
    "Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)")

@ -146,7 +217,7 @@ SET(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING
    "Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")

 SET(PCRE2_HEAP_LIMIT "20000000" CACHE STRING
-    "Default limit on heap memory (kilobytes). See HEAP_LIMIT in config.h.in for details.")
+    "Default limit on heap memory (kibibytes). See HEAP_LIMIT in config.h.in for details.")

 SET(PCRE2_MATCH_LIMIT "10000000" CACHE STRING
    "Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
@ -169,15 +240,22 @@ SET(PCRE2_HEAP_MATCH_RECURSE OFF CACHE BOOL
 SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL
    "Enable support for Just-in-time compiling.")

-SET(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL
-    "Enable SELinux compatible execmem allocator in JIT.")
+IF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+    SET(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL
+        "Enable SELinux compatible execmem allocator in JIT (experimental).")
+ELSE(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+    SET(PCRE2_SUPPORT_JIT_SEALLOC IGNORE)
+ENDIF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)

-SET(PCRE2_SUPPORT_PCRE2GREP_JIT ON CACHE BOOL
+SET(PCRE2GREP_SUPPORT_JIT ON CACHE BOOL
    "Enable use of Just-in-time compiling in pcre2grep.")

-SET(PCRE2_SUPPORT_PCRE2GREP_CALLOUT ON CACHE BOOL
+SET(PCRE2GREP_SUPPORT_CALLOUT ON CACHE BOOL
    "Enable callout string support in pcre2grep.")

+SET(PCRE2GREP_SUPPORT_CALLOUT_FORK ON CACHE BOOL
+    "Enable callout string fork support in pcre2grep.")
+
 SET(PCRE2_SUPPORT_UNICODE ON CACHE BOOL
    "Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.")

@ -233,9 +311,19 @@ ENDIF(PCRE2_SUPPORT_LIBZ)
 IF(EDITLINE_FOUND)
  OPTION (PCRE2_SUPPORT_LIBEDIT  "Enable support for linking pcre2test with libedit." OFF)
 ENDIF(EDITLINE_FOUND)
-IF(PCRE2_SUPPORT_LIBEDIT)
-  INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR})
-ENDIF(PCRE2_SUPPORT_LIBEDIT)
+IF(EDITLINE_FOUND)
+  IF(PCRE2_SUPPORT_LIBEDIT)
+    INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR})
+  ENDIF(PCRE2_SUPPORT_LIBEDIT)
+ELSE(EDITLINE_FOUND)
+  IF(PCRE2_SUPPORT_LIBEDIT)
+    MESSAGE(FATAL_ERROR
+      " libedit not found, set EDITLINE_INCLUDE_DIR to a compatible header\n"
+      " or set Editline_ROOT to a full libedit installed tree, as needed\n"
+      " Might need to enable policy CMP0074 in CMakeLists.txt"
+    )
+  ENDIF(PCRE2_SUPPORT_LIBEDIT)
+ENDIF(EDITLINE_FOUND)

 # readline lib
 IF(READLINE_FOUND)
@ -247,9 +335,9 @@ ENDIF(PCRE2_SUPPORT_LIBREADLINE)

 # Prepare build configuration

-IF(NOT BUILD_SHARED_LIBS)
-        SET(PCRE2_STATIC 1)
-ENDIF(NOT BUILD_SHARED_LIBS)
+IF(NOT BUILD_SHARED_LIBS AND NOT BUILD_STATIC_LIBS)
+        MESSAGE(FATAL_ERROR "At least one of BUILD_SHARED_LIBS or BUILD_STATIC_LIBS must be enabled.")
+ENDIF(NOT BUILD_SHARED_LIBS AND NOT BUILD_STATIC_LIBS)

 IF(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32)
        MESSAGE(FATAL_ERROR "At least one of PCRE2_BUILD_PCRE2_8, PCRE2_BUILD_PCRE2_16 or PCRE2_BUILD_PCRE2_32 must be enabled")
@ -273,7 +361,12 @@ IF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8)
 ENDIF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8)

 IF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT)
-        MESSAGE(FATAL_ERROR "Only one of libreadline or libeditline can be specified")
+        IF(READLINE_FOUND)
+                MESSAGE(FATAL_ERROR
+                  " Only one of the readline compatible libraries can be enabled.\n"
+                  " Disable libreadline with -DPCRE2_SUPPORT_LIBREADLINE=OFF"
+                )
+        ENDIF(READLINE_FOUND)
 ENDIF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT)

 IF(PCRE2_SUPPORT_BSR_ANYCRLF)
@ -289,25 +382,50 @@ IF(PCRE2_SUPPORT_UNICODE)
 ENDIF(PCRE2_SUPPORT_UNICODE)

 IF(PCRE2_SUPPORT_JIT)
-        SET(SUPPORT_JIT 1)
+	SET(SUPPORT_JIT 1)
+	IF(UNIX)
+		FIND_PACKAGE(Threads REQUIRED)
+		IF(CMAKE_USE_PTHREADS_INIT)
+			SET(REQUIRE_PTHREAD 1)
+		ENDIF(CMAKE_USE_PTHREADS_INIT)
+	ENDIF(UNIX)
 ENDIF(PCRE2_SUPPORT_JIT)

 IF(PCRE2_SUPPORT_JIT_SEALLOC)
-        SET(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1)
+        SET(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
+	CHECK_SYMBOL_EXISTS(mkostemp stdlib.h REQUIRED)
+        UNSET(CMAKE_REQUIRED_DEFINITIONS)
+        IF(${REQUIRED})
+                IF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+                        ADD_DEFINITIONS(-D_GNU_SOURCE)
+                        SET(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1)
+                ELSE(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+                        MESSAGE(FATAL_ERROR "Your configuration is not supported")
+                ENDIF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+        ELSE(${REQUIRED})
+                SET(PCRE2_SUPPORT_JIT_SEALLOC OFF)
+        ENDIF(${REQUIRED})
 ENDIF(PCRE2_SUPPORT_JIT_SEALLOC)

-IF(PCRE2_SUPPORT_PCRE2GREP_JIT)
+IF(PCRE2GREP_SUPPORT_JIT)
        SET(SUPPORT_PCRE2GREP_JIT 1)
-ENDIF(PCRE2_SUPPORT_PCRE2GREP_JIT)
+ENDIF(PCRE2GREP_SUPPORT_JIT)

-IF(PCRE2_SUPPORT_PCRE2GREP_CALLOUT)
+IF(PCRE2GREP_SUPPORT_CALLOUT)
        SET(SUPPORT_PCRE2GREP_CALLOUT 1)
-ENDIF(PCRE2_SUPPORT_PCRE2GREP_CALLOUT)
+        IF(PCRE2GREP_SUPPORT_CALLOUT_FORK)
+                SET(SUPPORT_PCRE2GREP_CALLOUT_FORK 1)
+        ENDIF(PCRE2GREP_SUPPORT_CALLOUT_FORK)
+ENDIF(PCRE2GREP_SUPPORT_CALLOUT)

 IF(PCRE2_SUPPORT_VALGRIND)
        SET(SUPPORT_VALGRIND 1)
 ENDIF(PCRE2_SUPPORT_VALGRIND)

+IF(PCRE2_DISABLE_PERCENT_ZT)
+        SET(DISABLE_PERCENT_ZT 1)
+ENDIF(PCRE2_DISABLE_PERCENT_ZT)
+
 # This next one used to reference ${READLINE_LIBRARY})
 # but I was advised to add the NCURSES test as well, along with
 # some modifications to cmake/FindReadline.cmake which should
@ -382,12 +500,13 @@ file(STRINGS ${PROJECT_SOURCE_DIR}/configure.ac
  LIMIT_COUNT 50 # Read only the first 50 lines of the file
 )

-set(SEARCHED_VARIABLES "pcre2_major" "pcre2_minor" "pcre2_prerelease" "pcre2_date")
+set(SEARCHED_VARIABLES "pcre2_major" "pcre2_minor" "pcre2_prerelease" "pcre2_date"
+  "libpcre2_posix_version" "libpcre2_8_version" "libpcre2_16_version" "libpcre2_32_version")
 foreach(configure_line ${configure_lines})
    foreach(_substitution_variable ${SEARCHED_VARIABLES})
        string(TOUPPER ${_substitution_variable} _substitution_variable_upper)
        if (NOT ${_substitution_variable_upper})
-            string(REGEX MATCH "m4_define\\(${_substitution_variable}, \\[(.*)\\]" MACTHED_STRING ${configure_line})
+            string(REGEX MATCH "m4_define\\(${_substitution_variable}, *\\[(.*)\\]" MATCHED_STRING ${configure_line})
            if (CMAKE_MATCH_1)
                set(${_substitution_variable_upper} ${CMAKE_MATCH_1})
            endif()
@ -395,21 +514,83 @@ foreach(configure_line ${configure_lines})
    endforeach()
 endforeach()

+macro(PARSE_LIB_VERSION VARIABLE_PREFIX)
+  string(REPLACE ":" ";" ${VARIABLE_PREFIX}_VERSION_LIST ${${VARIABLE_PREFIX}_VERSION})
+  list(GET ${VARIABLE_PREFIX}_VERSION_LIST 0 ${VARIABLE_PREFIX}_VERSION_CURRENT)
+  list(GET ${VARIABLE_PREFIX}_VERSION_LIST 1 ${VARIABLE_PREFIX}_VERSION_REVISION)
+  list(GET ${VARIABLE_PREFIX}_VERSION_LIST 2 ${VARIABLE_PREFIX}_VERSION_AGE)
+
+  math(EXPR ${VARIABLE_PREFIX}_SOVERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} - ${${VARIABLE_PREFIX}_VERSION_AGE}")
+  math(EXPR ${VARIABLE_PREFIX}_MACHO_COMPATIBILITY_VERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} + 1")
+  math(EXPR ${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} + 1")
+  set(${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION "${${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION}.${${VARIABLE_PREFIX}_VERSION_REVISION}}")
+  set(${VARIABLE_PREFIX}_VERSION "${${VARIABLE_PREFIX}_SOVERSION}.${${VARIABLE_PREFIX}_VERSION_AGE}.${${VARIABLE_PREFIX}_VERSION_REVISION}")
+endmacro()
+
+PARSE_LIB_VERSION(LIBPCRE2_POSIX)
+PARSE_LIB_VERSION(LIBPCRE2_8)
+PARSE_LIB_VERSION(LIBPCRE2_16)
+PARSE_LIB_VERSION(LIBPCRE2_32)
+
 CONFIGURE_FILE(src/pcre2.h.in
               ${PROJECT_BINARY_DIR}/pcre2.h
               @ONLY)

-# What about pcre2-config and libpcre2.pc?
+# Make sure to not link debug libs
+# against release libs and vice versa
+IF(WIN32)
+  SET(CMAKE_DEBUG_POSTFIX "d")
+ENDIF(WIN32)
+
+# Generate pkg-config files
+
+SET(PACKAGE_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}")
+SET(prefix ${CMAKE_INSTALL_PREFIX})
+
+SET(exec_prefix "\${prefix}")
+SET(libdir "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}")
+SET(includedir "\${prefix}/include")
+IF(WIN32 AND (CMAKE_BUILD_TYPE MATCHES Debug))
+  SET(LIB_POSTFIX ${CMAKE_DEBUG_POSTFIX})
+ENDIF()
+CONFIGURE_FILE(libpcre2-posix.pc.in libpcre2-posix.pc @ONLY)
+SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-posix.pc")
+
+IF(PCRE2_BUILD_PCRE2_8)
+  CONFIGURE_FILE(libpcre2-8.pc.in libpcre2-8.pc @ONLY)
+  SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-8.pc")
+  SET(enable_pcre2_8 "yes")
+ELSE()
+  SET(enable_pcre2_8 "no")
+ENDIF()
+
+IF(PCRE2_BUILD_PCRE2_16)
+  CONFIGURE_FILE(libpcre2-16.pc.in libpcre2-16.pc @ONLY)
+  SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-16.pc")
+  SET(enable_pcre2_16 "yes")
+ELSE()
+  SET(enable_pcre2_16 "no")
+ENDIF()
+
+IF(PCRE2_BUILD_PCRE2_32)
+  CONFIGURE_FILE(libpcre2-32.pc.in libpcre2-32.pc @ONLY)
+  SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-32.pc")
+  SET(enable_pcre2_32 "yes")
+ELSE()
+  SET(enable_pcre2_32 "no")
+ENDIF()
+
+CONFIGURE_FILE(pcre2-config.in pcre2-config @ONLY)

 # Character table generation

 OPTION(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
 IF(PCRE2_REBUILD_CHARTABLES)
-  ADD_EXECUTABLE(dftables src/dftables.c)
+  ADD_EXECUTABLE(pcre2_dftables src/pcre2_dftables.c)
  ADD_CUSTOM_COMMAND(
    COMMENT "Generating character tables (pcre2_chartables.c) for current locale"
-    DEPENDS dftables
-    COMMAND dftables
+    DEPENDS pcre2_dftables
+    COMMAND pcre2_dftables
    ARGS        ${PROJECT_BINARY_DIR}/pcre2_chartables.c
    OUTPUT      ${PROJECT_BINARY_DIR}/pcre2_chartables.c
  )
@ -441,6 +622,7 @@ SET(PCRE2_SOURCES
  src/pcre2_newline.c
  src/pcre2_ord2utf.c
  src/pcre2_pattern_info.c
+  src/pcre2_script_run.c
  src/pcre2_serialize.c
  src/pcre2_string_utils.c
  src/pcre2_study.c
@ -455,39 +637,37 @@ SET(PCRE2_SOURCES
 SET(PCRE2POSIX_HEADERS src/pcre2posix.h)
 SET(PCRE2POSIX_SOURCES src/pcre2posix.c)

-IF(MINGW AND NOT PCRE2_STATIC)
-IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
-ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2.o
-PRE-LINK
-COMMAND windres ARGS pcre2.rc pcre2.o
-WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
-COMMENT Using pcre2 coff info in mingw build)
-SET(PCRE2_SOURCES
-  ${PCRE2_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2.o
-)
-ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
-IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
-ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2posix.o
-PRE-LINK
-COMMAND windres ARGS pcre2posix.rc pcre2posix.o
-WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
-COMMENT Using pcre2posix coff info in mingw build)
-SET(PCRE2POSIX_SOURCES
-  ${PCRE2POSIX_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2posix.o
-)
-ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
-ENDIF(MINGW AND NOT PCRE2_STATIC)
+IF(MINGW AND BUILD_SHARED_LIBS)
+  IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
+    ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2.o
+      PRE-LINK
+      COMMAND windres ARGS pcre2.rc pcre2.o
+      WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+      COMMENT Using pcre2 coff info in mingw build)
+    SET(PCRE2_SOURCES ${PCRE2_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2.o)
+  ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)

-IF(MSVC AND NOT PCRE2_STATIC)
-IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
-SET(PCRE2_SOURCES
-  ${PCRE2_SOURCES} pcre2.rc)
-ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
-IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
-SET(PCRE2POSIX_SOURCES
-  ${PCRE2POSIX_SOURCES} pcre2posix.rc)
-ENDIF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
-ENDIF(MSVC AND NOT PCRE2_STATIC)
+  IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
+    ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2posix.o
+      PRE-LINK
+      COMMAND windres ARGS pcre2posix.rc pcre2posix.o
+      WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+      COMMENT Using pcre2posix coff info in mingw build)
+    SET(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2posix.o)
+  ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
+ENDIF(MINGW AND BUILD_SHARED_LIBS)
+
+IF(MSVC AND BUILD_SHARED_LIBS)
+  SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-posix.pdb ${dll_pdb_files})
+  SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-posixd.pdb ${dll_pdb_debug_files})
+  IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
+    SET(PCRE2_SOURCES ${PCRE2_SOURCES} pcre2.rc)
+  ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
+
+  IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
+    SET(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} pcre2posix.rc)
+  ENDIF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc)
+ENDIF(MSVC AND BUILD_SHARED_LIBS)

 # Fix static compilation with MSVC: https://bugs.exim.org/show_bug.cgi?id=1681
 # This code was taken from the CMake wiki, not from WebM.
@ -510,71 +690,219 @@ IF(MSVC)
 ENDIF(MSVC)

 SET(CMAKE_INCLUDE_CURRENT_DIR 1)
-# needed to make sure to not link debug libs
-# against release libs and vice versa
-IF(WIN32)
-  SET(CMAKE_DEBUG_POSTFIX "d")
-ENDIF(WIN32)

 SET(targets)

 # 8-bit library

 IF(PCRE2_BUILD_PCRE2_8)
-ADD_LIBRARY(pcre2-8 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
-SET_PROPERTY(TARGET pcre2-8
-  PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
-SET(targets ${targets} pcre2-8)
-ADD_LIBRARY(pcre2-posix ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
-SET_PROPERTY(TARGET pcre2-posix
-  PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
-SET(targets ${targets} pcre2-posix)
-TARGET_LINK_LIBRARIES(pcre2-posix pcre2-8)
+  IF(BUILD_STATIC_LIBS)
+    ADD_LIBRARY(pcre2-8-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
+    SET_TARGET_PROPERTIES(pcre2-8-static PROPERTIES
+      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
+      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}"
+      MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}"
+      VERSION ${LIBPCRE2_8_VERSION}
+      SOVERSION ${LIBPCRE2_8_SOVERSION})
+    TARGET_COMPILE_DEFINITIONS(pcre2-8-static PUBLIC PCRE2_STATIC)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-8-static PUBLIC ${PROJECT_BINARY_DIR})
+    IF(REQUIRE_PTHREAD)
+        TARGET_LINK_LIBRARIES(pcre2-8-static Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
+    SET(targets ${targets} pcre2-8-static)
+    ADD_LIBRARY(pcre2-posix-static STATIC ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
+    SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES
+      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
+      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}"
+      MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}"
+      VERSION ${LIBPCRE2_POSIX_VERSION}
+      SOVERSION ${LIBPCRE2_POSIX_SOVERSION})
+    TARGET_LINK_LIBRARIES(pcre2-posix-static pcre2-8-static)
+    TARGET_COMPILE_DEFINITIONS(pcre2-posix-static PUBLIC PCRE2_STATIC)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-posix-static PUBLIC ${PROJECT_BINARY_DIR})
+    SET(targets ${targets} pcre2-posix-static)

-IF(MINGW AND NOT PCRE2_STATIC)
-  IF(NON_STANDARD_LIB_PREFIX)
-    SET_TARGET_PROPERTIES(pcre2-8 pcre2-posix PROPERTIES PREFIX "")
-  ENDIF(NON_STANDARD_LIB_PREFIX)
-  IF(NON_STANDARD_LIB_SUFFIX)
-    SET_TARGET_PROPERTIES(pcre2-8 pcre2-posix PROPERTIES SUFFIX "-0.dll")
-  ENDIF(NON_STANDARD_LIB_SUFFIX)
-ENDIF(MINGW AND NOT PCRE2_STATIC)
+    IF(MSVC)
+      SET_TARGET_PROPERTIES(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8-static)
+      SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix-static)
+    ELSE(MSVC)
+      SET_TARGET_PROPERTIES(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8)
+      SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix)
+    ENDIF(MSVC)
+    IF(PCRE2_STATIC_PIC)
+      SET_TARGET_PROPERTIES(pcre2-8-static pcre2-posix-static PROPERTIES POSITION_INDEPENDENT_CODE 1)
+    ENDIF(PCRE2_STATIC_PIC)
+  ENDIF(BUILD_STATIC_LIBS)
+
+  IF(BUILD_SHARED_LIBS)
+    ADD_LIBRARY(pcre2-8-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-8-shared PUBLIC ${PROJECT_BINARY_DIR})
+    SET_TARGET_PROPERTIES(pcre2-8-shared PROPERTIES
+      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
+      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}"
+      MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}"
+      VERSION ${LIBPCRE2_8_VERSION}
+      SOVERSION ${LIBPCRE2_8_SOVERSION}
+      OUTPUT_NAME pcre2-8)
+    IF(REQUIRE_PTHREAD)
+        TARGET_LINK_LIBRARIES(pcre2-8-shared Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
+    SET(targets ${targets} pcre2-8-shared)
+    ADD_LIBRARY(pcre2-posix-shared SHARED ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
+    TARGET_INCLUDE_DIRECTORIES(pcre2-posix-shared PUBLIC ${PROJECT_BINARY_DIR})
+    SET_TARGET_PROPERTIES(pcre2-posix-shared PROPERTIES
+      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
+      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}"
+      MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}"
+      VERSION ${LIBPCRE2_POSIX_VERSION}
+      SOVERSION ${LIBPCRE2_POSIX_SOVERSION}
+      OUTPUT_NAME pcre2-posix)
+    TARGET_LINK_LIBRARIES(pcre2-posix-shared pcre2-8-shared)
+    SET(targets ${targets} pcre2-posix-shared)
+    SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-8.pdb ${dll_pdb_files})
+    SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-8d.pdb ${dll_pdb_debug_files})
+
+    IF(MINGW)
+      IF(NON_STANDARD_LIB_PREFIX)
+        SET_TARGET_PROPERTIES(pcre2-8-shared pcre2-posix-shared PROPERTIES PREFIX "")
+      ENDIF(NON_STANDARD_LIB_PREFIX)
+      IF(NON_STANDARD_LIB_SUFFIX)
+        SET_TARGET_PROPERTIES(pcre2-8-shared pcre2-posix-shared PROPERTIES SUFFIX "-0.dll")
+      ENDIF(NON_STANDARD_LIB_SUFFIX)
+    ENDIF(MINGW)
+  ENDIF(BUILD_SHARED_LIBS)
+
+  IF(BUILD_STATIC_LIBS)
+    ADD_LIBRARY(pcre2-8 ALIAS pcre2-8-static)
+    ADD_LIBRARY(pcre2-posix ALIAS pcre2-posix-static)
+  ELSE(BUILD_STATIC_LIBS)
+    ADD_LIBRARY(pcre2-8 ALIAS pcre2-8-shared)
+    ADD_LIBRARY(pcre2-posix ALIAS pcre2-posix-shared)
+  ENDIF(BUILD_STATIC_LIBS)
 ENDIF(PCRE2_BUILD_PCRE2_8)

 # 16-bit library

 IF(PCRE2_BUILD_PCRE2_16)
-ADD_LIBRARY(pcre2-16 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
-SET_PROPERTY(TARGET pcre2-16
-  PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16)
-SET(targets ${targets} pcre2-16)
+  IF(BUILD_STATIC_LIBS)
+    ADD_LIBRARY(pcre2-16-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-16-static PUBLIC ${PROJECT_BINARY_DIR})
+    SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES
+      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16
+      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
+      MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
+      VERSION ${LIBPCRE2_16_VERSION}
+      SOVERSION ${LIBPCRE2_16_SOVERSION})
+    TARGET_COMPILE_DEFINITIONS(pcre2-16-static PUBLIC PCRE2_STATIC)
+    IF(REQUIRE_PTHREAD)
+      TARGET_LINK_LIBRARIES(pcre2-16-static Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
+    SET(targets ${targets} pcre2-16-static)

-IF(MINGW AND NOT PCRE2_STATIC)
-  IF(NON_STANDARD_LIB_PREFIX)
-    SET_TARGET_PROPERTIES(pcre2-16 PROPERTIES PREFIX "")
-  ENDIF(NON_STANDARD_LIB_PREFIX)
-  IF(NON_STANDARD_LIB_SUFFIX)
-    SET_TARGET_PROPERTIES(pcre2-16 PROPERTIES SUFFIX "-0.dll")
-  ENDIF(NON_STANDARD_LIB_SUFFIX)
-ENDIF(MINGW AND NOT PCRE2_STATIC)
+    IF(MSVC)
+      SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16-static)
+    ELSE(MSVC)
+      SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16)
+    ENDIF(MSVC)
+    IF(PCRE2_STATIC_PIC)
+      SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES POSITION_INDEPENDENT_CODE 1)
+    ENDIF(PCRE2_STATIC_PIC)
+  ENDIF(BUILD_STATIC_LIBS)
+
+  IF(BUILD_SHARED_LIBS)
+    ADD_LIBRARY(pcre2-16-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-16-shared PUBLIC ${PROJECT_BINARY_DIR})
+    SET_TARGET_PROPERTIES(pcre2-16-shared PROPERTIES
+      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16
+      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
+      MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
+      VERSION ${LIBPCRE2_16_VERSION}
+      SOVERSION ${LIBPCRE2_16_SOVERSION}
+      OUTPUT_NAME pcre2-16)
+    IF(REQUIRE_PTHREAD)
+      TARGET_LINK_LIBRARIES(pcre2-16-shared Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
+    SET(targets ${targets} pcre2-16-shared)
+    SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-16.pdb ${dll_pdb_files})
+    SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-16d.pdb ${dll_pdb_debug_files})
+
+    IF(MINGW)
+      IF(NON_STANDARD_LIB_PREFIX)
+        SET_TARGET_PROPERTIES(pcre2-16-shared PROPERTIES PREFIX "")
+      ENDIF(NON_STANDARD_LIB_PREFIX)
+      IF(NON_STANDARD_LIB_SUFFIX)
+        SET_TARGET_PROPERTIES(pcre2-16-shared PROPERTIES SUFFIX "-0.dll")
+      ENDIF(NON_STANDARD_LIB_SUFFIX)
+    ENDIF(MINGW)
+  ENDIF(BUILD_SHARED_LIBS)
+
+  IF(BUILD_STATIC_LIBS)
+    ADD_LIBRARY(pcre2-16 ALIAS pcre2-16-static)
+  ELSE(BUILD_STATIC_LIBS)
+    ADD_LIBRARY(pcre2-16 ALIAS pcre2-16-shared)
+  ENDIF(BUILD_STATIC_LIBS)
 ENDIF(PCRE2_BUILD_PCRE2_16)

 # 32-bit library

 IF(PCRE2_BUILD_PCRE2_32)
-ADD_LIBRARY(pcre2-32 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
-SET_PROPERTY(TARGET pcre2-32
-  PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32)
-SET(targets ${targets} pcre2-32)
+  IF(BUILD_STATIC_LIBS)
+    ADD_LIBRARY(pcre2-32-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-32-static PUBLIC ${PROJECT_BINARY_DIR})
+    SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES
+      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32
+      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
+      MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
+      VERSION ${LIBPCRE2_32_VERSION}
+      SOVERSION ${LIBPCRE2_32_SOVERSION})
+    TARGET_COMPILE_DEFINITIONS(pcre2-32-static PUBLIC PCRE2_STATIC)
+    IF(REQUIRE_PTHREAD)
+      TARGET_LINK_LIBRARIES(pcre2-32-static Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
+    SET(targets ${targets} pcre2-32-static)

-IF(MINGW AND NOT PCRE2_STATIC)
-  IF(NON_STANDARD_LIB_PREFIX)
-    SET_TARGET_PROPERTIES(pcre2-32 PROPERTIES PREFIX "")
-  ENDIF(NON_STANDARD_LIB_PREFIX)
-  IF(NON_STANDARD_LIB_SUFFIX)
-    SET_TARGET_PROPERTIES(pcre2-32 PROPERTIES SUFFIX "-0.dll")
-  ENDIF(NON_STANDARD_LIB_SUFFIX)
-ENDIF(MINGW AND NOT PCRE2_STATIC)
+    IF(MSVC)
+      SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32-static)
+    ELSE(MSVC)
+      SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32)
+    ENDIF(MSVC)
+    IF(PCRE2_STATIC_PIC)
+      SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES POSITION_INDEPENDENT_CODE 1)
+    ENDIF(PCRE2_STATIC_PIC)
+  ENDIF(BUILD_STATIC_LIBS)
+
+  IF(BUILD_SHARED_LIBS)
+    ADD_LIBRARY(pcre2-32-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-32-shared PUBLIC ${PROJECT_BINARY_DIR})
+    SET_TARGET_PROPERTIES(pcre2-32-shared PROPERTIES
+      COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32
+      MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
+      MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
+      VERSION ${LIBPCRE2_32_VERSION}
+      SOVERSION ${LIBPCRE2_32_SOVERSION}
+      OUTPUT_NAME pcre2-32)
+    IF(REQUIRE_PTHREAD)
+      TARGET_LINK_LIBRARIES(pcre2-32-shared Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
+    SET(targets ${targets} pcre2-32-shared)
+    SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-32.pdb ${dll_pdb_files})
+    SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-32d.pdb ${dll_pdb_debug_files})
+
+    IF(MINGW)
+      IF(NON_STANDARD_LIB_PREFIX)
+        SET_TARGET_PROPERTIES(pcre2-32-shared PROPERTIES PREFIX "")
+      ENDIF(NON_STANDARD_LIB_PREFIX)
+      IF(NON_STANDARD_LIB_SUFFIX)
+        SET_TARGET_PROPERTIES(pcre2-32-shared PROPERTIES SUFFIX "-0.dll")
+      ENDIF(NON_STANDARD_LIB_SUFFIX)
+    ENDIF(MINGW)
+  ENDIF(BUILD_SHARED_LIBS)
+
+  IF(BUILD_STATIC_LIBS)
+    ADD_LIBRARY(pcre2-32 ALIAS pcre2-32-static)
+  ELSE(BUILD_STATIC_LIBS)
+    ADD_LIBRARY(pcre2-32 ALIAS pcre2-32-shared)
+  ENDIF(BUILD_STATIC_LIBS)
 ENDIF(PCRE2_BUILD_PCRE2_32)

 # Executables
@ -594,6 +922,13 @@ IF(PCRE2_BUILD_TESTS)

  SET(PCRE2TEST_SOURCES src/pcre2test.c)

+  IF(MSVC)
+    # This is needed to avoid a stack overflow error in the standard tests. The
+    # flag should be indicated with a forward-slash instead of a hyphen, but
+    # then CMake treats it as a file path.
+    SET(PCRE2TEST_LINKER_FLAGS -STACK:2500000)
+  ENDIF(MSVC)
+
  ADD_EXECUTABLE(pcre2test ${PCRE2TEST_SOURCES})
  SET(targets ${targets} pcre2test)
  IF(PCRE2_BUILD_PCRE2_8)
@ -605,7 +940,7 @@ IF(PCRE2_BUILD_TESTS)
  IF(PCRE2_BUILD_PCRE2_32)
    LIST(APPEND PCRE2TEST_LIBS pcre2-32)
  ENDIF(PCRE2_BUILD_PCRE2_32)
-  TARGET_LINK_LIBRARIES(pcre2test ${PCRE2TEST_LIBS})
+  TARGET_LINK_LIBRARIES(pcre2test ${PCRE2TEST_LIBS} ${PCRE2TEST_LINKER_FLAGS})

  IF(PCRE2_SUPPORT_JIT)
    ADD_EXECUTABLE(pcre2_jit_test src/pcre2_jit_test.c)
@ -623,14 +958,27 @@ IF(PCRE2_BUILD_TESTS)
    TARGET_LINK_LIBRARIES(pcre2_jit_test ${PCRE2_JIT_TEST_LIBS})
  ENDIF(PCRE2_SUPPORT_JIT)

-  # exes in Debug location tested by the RunTest shell script
+  # exes in Debug location tested by the RunTest and RunGrepTest shell scripts
  # via "make test"

+  # The commented out code below provokes a warning about future removal
+  # of the facility, and requires policy CMP0026 to be set to "OLD". I have
+  # got fed-up with the warnings, but my plea for help on the mailing list
+  # produced no response. So, I've hacked. The new code below seems to work on
+  # Linux.
+
+#  IF(PCRE2_BUILD_PCRE2GREP)
+#    GET_TARGET_PROPERTY(PCRE2GREP_EXE pcre2grep DEBUG_LOCATION)
+#  ENDIF(PCRE2_BUILD_PCRE2GREP)
+#
+#  GET_TARGET_PROPERTY(PCRE2TEST_EXE pcre2test DEBUG_LOCATION)
+
  IF(PCRE2_BUILD_PCRE2GREP)
-    GET_TARGET_PROPERTY(PCRE2GREP_EXE pcre2grep DEBUG_LOCATION)
+    SET(PCRE2GREP_EXE $<TARGET_FILE:pcre2grep>)
  ENDIF(PCRE2_BUILD_PCRE2GREP)

-  GET_TARGET_PROPERTY(PCRE2TEST_EXE pcre2test DEBUG_LOCATION)
+  SET(PCRE2TEST_EXE $<TARGET_FILE:pcre2test>)
+

 # =================================================
  # Write out a CTest configuration file
@ -679,7 +1027,9 @@ if test \"$?\" != \"0\"; then exit 1; fi
 \@echo off
 setlocal
 SET srcdir=\"${winsrc}\"
-SET pcre2test=\"${winexe}\"
+# The next line was replaced by the following one after a user comment.
+# SET pcre2test=\"${winexe}\"
+SET pcre2test=\"${winbin}\\pcre2test.exe\"
 if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\"
 call %srcdir%\\RunTest.Bat
 if errorlevel 1 exit /b 1
@ -715,42 +1065,44 @@ SET(CMAKE_INSTALL_ALWAYS 1)

 INSTALL(TARGETS ${targets}
        RUNTIME DESTINATION bin
-        LIBRARY DESTINATION lib
-        ARCHIVE DESTINATION lib)
+        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+        ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+INSTALL(FILES ${pkg_config_files} DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
+INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2-config"
+  DESTINATION bin
+  # Set 0755 permissions
+  PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)

 INSTALL(FILES ${PCRE2_HEADERS} ${PCRE2POSIX_HEADERS} DESTINATION include)

+# CMake config files.
+set(PCRE2_CONFIG_IN  ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config.cmake.in)
+set(PCRE2_CONFIG_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config.cmake)
+configure_file(${PCRE2_CONFIG_IN} ${PCRE2_CONFIG_OUT} @ONLY)
+set(PCRE2_CONFIG_VERSION_IN  ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config-version.cmake.in)
+set(PCRE2_CONFIG_VERSION_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config-version.cmake)
+configure_file(${PCRE2_CONFIG_VERSION_IN} ${PCRE2_CONFIG_VERSION_OUT} @ONLY)
+install(FILES ${PCRE2_CONFIG_OUT} ${PCRE2_CONFIG_VERSION_OUT} DESTINATION cmake)
+
 FILE(GLOB html ${PROJECT_SOURCE_DIR}/doc/html/*.html)
 FILE(GLOB man1 ${PROJECT_SOURCE_DIR}/doc/*.1)
 FILE(GLOB man3 ${PROJECT_SOURCE_DIR}/doc/*.3)

-FOREACH(man ${man3})
-        GET_FILENAME_COMPONENT(man_tmp ${man} NAME)
-        SET(man3_new ${man3} ${man})
-ENDFOREACH(man ${man3})
-SET(man3 ${man3_new})
-
 INSTALL(FILES ${man1} DESTINATION man/man1)
 INSTALL(FILES ${man3} DESTINATION man/man3)
 INSTALL(FILES ${html} DESTINATION share/doc/pcre2/html)

 IF(MSVC AND INSTALL_MSVC_PDB)
-    INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2.pdb
-                  ${PROJECT_BINARY_DIR}/pcre2posix.pdb
-            DESTINATION bin
-            CONFIGURATIONS RelWithDebInfo)
-    INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2d.pdb
-                  ${PROJECT_BINARY_DIR}/pcre2posixd.pdb
-            DESTINATION bin
-            CONFIGURATIONS Debug)
+ INSTALL(FILES ${dll_pdb_files} DESTINATION bin CONFIGURATIONS RelWithDebInfo)
+ INSTALL(FILES ${dll_pdb_debug_files} DESTINATION bin CONFIGURATIONS Debug)
 ENDIF(MSVC AND INSTALL_MSVC_PDB)

 # Help, only for nice output
-IF(BUILD_SHARED_LIBS)
-  SET(BUILD_STATIC_LIBS OFF)
-ELSE(BUILD_SHARED_LIBS)
+IF(BUILD_STATIC_LIBS)
  SET(BUILD_STATIC_LIBS ON)
-ENDIF(BUILD_SHARED_LIBS)
+ELSE(BUILD_STATIC_LIBS)
+  SET(BUILD_STATIC_LIBS OFF)
+ENDIF(BUILD_STATIC_LIBS)

 IF(PCRE2_HEAP_MATCH_RECURSE)
  MESSAGE(WARNING "HEAP_MATCH_RECURSE is obsolete and does nothing.")
@ -763,7 +1115,7 @@ IF(PCRE2_SHOW_REPORT)
  ENDIF(CMAKE_C_FLAGS)
  MESSAGE(STATUS "")
  MESSAGE(STATUS "")
-  MESSAGE(STATUS "PCRE2 configuration summary:")
+  MESSAGE(STATUS "PCRE2-${PCRE2_MAJOR}.${PCRE2_MINOR} configuration summary:")
  MESSAGE(STATUS "")
  MESSAGE(STATUS "  Install prefix .................. : ${CMAKE_INSTALL_PREFIX}")
  MESSAGE(STATUS "  C compiler ...................... : ${CMAKE_C_COMPILER}")
@ -788,11 +1140,13 @@ IF(PCRE2_SHOW_REPORT)
  MESSAGE(STATUS "  Match depth limit ............... : ${PCRE2_MATCH_LIMIT_DEPTH}")
  MESSAGE(STATUS "  Build shared libs ............... : ${BUILD_SHARED_LIBS}")
  MESSAGE(STATUS "  Build static libs ............... : ${BUILD_STATIC_LIBS}")
+  MESSAGE(STATUS "     with PIC enabled ............. : ${PCRE2_STATIC_PIC}")
  MESSAGE(STATUS "  Build pcre2grep ................. : ${PCRE2_BUILD_PCRE2GREP}")
-  MESSAGE(STATUS "  Enable JIT in pcre2grep ......... : ${PCRE2_SUPPORT_PCRE2GREP_JIT}")
-  MESSAGE(STATUS "  Enable callouts in pcre2grep .... : ${PCRE2_SUPPORT_PCRE2GREP_CALLOUT}")
+  MESSAGE(STATUS "  Enable JIT in pcre2grep ......... : ${PCRE2GREP_SUPPORT_JIT}")
+  MESSAGE(STATUS "  Enable callouts in pcre2grep .... : ${PCRE2GREP_SUPPORT_CALLOUT}")
+  MESSAGE(STATUS "  Enable callout fork in pcre2grep. : ${PCRE2GREP_SUPPORT_CALLOUT_FORK}")
  MESSAGE(STATUS "  Buffer size for pcre2grep ....... : ${PCRE2GREP_BUFSIZE}")
-  MESSAGE(STATUS "  Build tests (implies pcre2test  . : ${PCRE2_BUILD_TESTS}")
+  MESSAGE(STATUS "  Build tests (implies pcre2test .. : ${PCRE2_BUILD_TESTS}")
  MESSAGE(STATUS "               and pcre2grep)")
  IF(ZLIB_FOUND)
    MESSAGE(STATUS "  Link pcre2grep with libz ........ : ${PCRE2_SUPPORT_LIBZ}")
@ -815,11 +1169,16 @@ IF(PCRE2_SHOW_REPORT)
    MESSAGE(STATUS "  Link pcre2test with libreadline . : Library not found" )
  ENDIF(READLINE_FOUND)
  MESSAGE(STATUS "  Support Valgrind .................: ${PCRE2_SUPPORT_VALGRIND}")
+  IF(PCRE2_DISABLE_PERCENT_ZT)
+    MESSAGE(STATUS "  Use %zu and %td ..................: OFF" )
+  ELSE(PCRE2_DISABLE_PERCENT_ZT)
+    MESSAGE(STATUS "  Use %zu and %td ..................: AUTO" )
+  ENDIF(PCRE2_DISABLE_PERCENT_ZT)

-  IF(MINGW AND NOT PCRE2_STATIC)
+  IF(MINGW AND BUILD_SHARED_LIBS)
    MESSAGE(STATUS "  Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}")
    MESSAGE(STATUS "  Non-standard dll names (suffix) . : ${NON_STANDARD_LIB_SUFFIX}")
-  ENDIF(MINGW AND NOT PCRE2_STATIC)
+  ENDIF(MINGW AND BUILD_SHARED_LIBS)

  IF(MSVC)
    MESSAGE(STATUS "  Install MSVC .pdb files ..........: ${INSTALL_MSVC_PDB}")
--- a/1107
+++ b/1107
--- a/25
+++ b/25
@ -16,6 +16,7 @@ while (scalar(@ARGV) > 0)
  
  while (<IN>)
    {  
+    $count = 0;
    $line++; 
    if (/^\s*$/)
      {
@ -50,14 +51,24 @@ while (scalar(@ARGV) > 0)
        $yield = 1;
        }
      }
-    else
+    elsif (/\\[^ef]|\\f[^IBP]/)
      {
-      if (/\\[^ef]|\\f[^IBP]/)
-        {
-        printf "Bad backslash in line $line of $file\n";  
-        $yield = 1; 
-        } 
-      }   
+      printf "Bad backslash in line $line of $file\n";
+      $yield = 1;
+      }
+    while (/\\f[BI]/g)
+      {
+      $count++;
+      }
+    while (/\\fP/g)
+      {
+      $count--;
+      }
+    if ($count != 0)
+      {
+      printf "Mismatching formatting in line $line of $file\n";
+      $yield = 1;
+      }
    }
     
  close(IN);   
--- a/121
+++ b/121
@ -8,8 +8,8 @@ library is referred to as PCRE1 below. For information about testing PCRE2, see
 the pcre2test documentation and the comment at the head of the RunTest file.

 PCRE1 releases were up to 8.3x when PCRE2 was developed, and later bug fix
-releases remain in the 8.xx series. PCRE2 releases started at 10.00 to avoid
-confusion with PCRE1.
+releases carried on the 8.xx series, up to the final 8.45 release. PCRE2
+releases started at 10.00 to avoid confusion with PCRE1.


 Historical note 1
@ -38,8 +38,8 @@ Historical note 2
 By contrast, the code originally written by Henry Spencer (which was
 subsequently heavily modified for Perl) compiles the expression twice: once in
 a dummy mode in order to find out how much store will be needed, and then for
-real. (The Perl version probably doesn't do this any more; I'm talking about
-the original library.) The execution function operates by backtracking and
+real. (The Perl version may or may not still do this; I'm talking about the
+original library.) The execution function operates by backtracking and
 maximizing (or, optionally, minimizing, in Perl) the amount of the subject that
 matches individual wild portions of the pattern. This is an "NFA algorithm" in
 Friedl's terminology.
@ -151,8 +151,8 @@ of code units in the item itself. The exception is the aforementioned large
 advance to check for such values. When auto-callouts are enabled, the generous
 assumption is made that there will be a callout for each pattern code unit
 (which of course is only actually true if all code units are literals) plus one
-at the end. There is a default parsed pattern vector on the system stack, but
-if this is not big enough, heap memory is used.
+at the end. A default parsed pattern vector is defined on the system stack, to
+minimize memory handling, but if this is not big enough, heap memory is used.

 As before, the actual compiling function is run twice, the first time to
 determine the amount of memory needed for the final compiled pattern. It
@ -187,7 +187,7 @@ META_CLASS_EMPTY      [] empty class - only with PCRE2_ALLOW_EMPTY_CLASS
 META_CLASS_EMPTY_NOT  [^] negative empty class - ditto
 META_CLASS_END        ] end of non-empty class
 META_CLASS_NOT        [^ start non-empty negative class
-META_COMMIT           (*COMMIT)
+META_COMMIT           (*COMMIT) - no argument (see below for with argument)
 META_COND_ASSERT      (?(?assertion)
 META_DOLLAR           $ metacharacter
 META_DOT              . metacharacter
@ -195,23 +195,24 @@ META_END              End of pattern (this value is 0x80000000)
 META_FAIL             (*FAIL)
 META_KET              ) closing parenthesis
 META_LOOKAHEAD        (?= start of lookahead
+META_LOOKAHEAD_NA     (*napla: start of non-atomic lookahead
 META_LOOKAHEADNOT     (?! start of negative lookahead
 META_NOCAPTURE        (?: no capture parens
 META_PLUS             +
 META_PLUS_PLUS        ++
 META_PLUS_QUERY       +?
-META_PRUNE            (*PRUNE) - no argument
+META_PRUNE            (*PRUNE) - no argument (see below for with argument)
 META_QUERY            ?
 META_QUERY_PLUS       ?+
 META_QUERY_QUERY      ??
 META_RANGE_ESCAPED    hyphen in class range with at least one escape
 META_RANGE_LITERAL    hyphen in class range defined literally
-META_SKIP             (*SKIP) - no argument
-META_THEN             (*THEN) - no argument
+META_SKIP             (*SKIP) - no argument (see below for with argument)
+META_THEN             (*THEN) - no argument (see below for with argument)

 The two RANGE values occur only in character classes. They are positioned
 between two literals that define the start and end of the range. In an EBCDIC
-evironment it is necessary to know whether either of the range values was
+environment it is necessary to know whether either of the range values was
 specified as an escape. In an ASCII/Unicode environment the distinction is not
 relevant.

@ -228,17 +229,16 @@ If the data for META_ALT is non-zero, it is inside a lookbehind, and the data
 is the length of its branch, for which OP_REVERSE must be generated.

 META_BACKREF, META_CAPTURE, and META_RECURSE have the capture group number as
-their data in the lower 16 bits of the element.
+their data in the lower 16 bits of the element. META_RECURSE is followed by an
+offset, for use in error messages.

 META_BACKREF is followed by an offset if the back reference group number is 10
-or more. The offsets of the first ocurrences of references to groups whose
+or more. The offsets of the first occurrences of references to groups whose
 numbers are less than 10 are put in cb->small_ref_offset[] (only the first
 occurrence is useful). On 64-bit systems this avoids using more than two parsed
 pattern elements for items such as \3. The offset is used when an error occurs
 because the reference is to a non-existent group.

-META_RECURSE is always followed by an offset, for use in error messages.
-
 META_ESCAPE has an ESC_xxx value as its data. For ESC_P and ESC_p, the next
 element contains the 16-bit type and data property values, packed together.
 ESC_g and ESC_k are used only for named references - numerical ones are turned
@ -256,6 +256,7 @@ The following are followed by a length element, then a number of character code
 values (which should match with the length):

 META_MARK             (*MARK:xxxx)
+META_COMMIT_ARG       )*COMMIT:xxxx)
 META_PRUNE_ARG        (*PRUNE:xxx)
 META_SKIP_ARG         (*SKIP:xxxx)
 META_THEN_ARG         (*THEN:xxxx)
@ -285,12 +286,13 @@ The following are also followed just by an offset, but also the lower 16 bits
 of the main word contain the length of the first branch of the lookbehind
 group; this is used when generating OP_REVERSE for that branch.

-META_LOOKBEHIND       (?<=
-META_LOOKBEHINDNOT    (?<!
+META_LOOKBEHIND       (?<=      start of lookbehind
+META_LOOKBEHIND_NA    (*naplb:  start of non-atomic lookbehind
+META_LOOKBEHINDNOT    (?<!      start of negative lookbehind

-The following are followed by two elements, the minimum and maximum. Repeat
-values are limited to 65535 (MAX_REPEAT). A maximum value of "unlimited" is
-represented by UNLIMITED_REPEAT, which is bigger than MAX_REPEAT:
+The following are followed by two elements, the minimum and maximum. The
+maximum value is limited to 65535 (MAX_REPEAT). A maximum value of "unlimited"
+is represented by UNLIMITED_REPEAT, which is bigger than MAX_REPEAT:

 META_MINMAX           {n,m}  repeat
 META_MINMAX_PLUS      {n,m}+ repeat
@ -344,11 +346,11 @@ support is not available for this kind of matching.
 Changeable options
 ------------------

-The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL, and
-others) may be changed in the middle of patterns by items such as (?i). Their
-processing is handled entirely at compile time by generating different opcodes
-for the different settings. The runtime functions do not need to keep track of
-an options state.
+The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL) and
+some others may be changed in the middle of patterns by items such as (?i).
+Their processing is handled entirely at compile time by generating different
+opcodes for the different settings. The runtime functions do not need to keep
+track of an option's state.

 PCRE2_DUPNAMES, PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE
 are tracked and processed during the parsing pre-pass. The others are handled
@ -370,7 +372,7 @@ default value for LINK_SIZE is 2, except for the 32-bit library, where it can
 only be 4. The 8-bit library can be compiled to used 3-byte or 4-byte values,
 and the 16-bit library can be compiled to use 4-byte values, though this
 impairs performance. Specifing a LINK_SIZE larger than 2 for these libraries is
-necessary only when patterns whose compiled length is greater than 64K code
+necessary only when patterns whose compiled length is greater than 65535 code
 units are going to be processed. When a LINK_SIZE value uses more than one code
 unit, the most significant unit is first.

@ -382,7 +384,7 @@ that are counts (e.g. quantifiers) are always two bytes long in 8-bit mode
 Opcodes with no following data
 ------------------------------

-These items are all just one unit long
+These items are all just one unit long:

  OP_END                 end of pattern
  OP_ANY                 match any one character other than newline
@ -430,14 +432,22 @@ character). Another use is for [^] when empty classes are permitted
 (PCRE2_ALLOW_EMPTY_CLASS is set).


-Backtracking control verbs with optional data
---------------------------------------------
+Backtracking control verbs
+--------------------------

-(*THEN) without an argument generates the opcode OP_THEN and no following data.
-OP_MARK is followed by the mark name, preceded by a length in one code unit,
-and followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with
-arguments, the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used,
-with the name following in the same format as OP_MARK.
+Verbs with no arguments generate opcodes with no following data (as listed
+in the section above).
+
+(*MARK:NAME) generates OP_MARK followed by the mark name, preceded by a
+length in one code unit, and followed by a binary zero. The name length is
+limited by the size of the code unit.
+
+(*ACCEPT:NAME) and (*FAIL:NAME) are compiled as (*MARK:NAME)(*ACCEPT) and
+(*MARK:NAME)(*FAIL) respectively.
+
+For (*COMMIT:NAME), (*PRUNE:NAME), (*SKIP:NAME), and (*THEN:NAME), the opcodes
+OP_COMMIT_ARG, OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the
+name following in the same format as for OP_MARK.


 Matching literal characters
@ -457,8 +467,8 @@ Caseless matching (positive or negative) of characters that have more than two
 case-equivalent code points (which is possible only in UTF mode) is handled by
 compiling a Unicode property item (see below), with the pseudo-property
 PT_CLIST. The value of this property is an offset in a vector called
-"ucd_caseless_sets" which identifies the start of a short list of equivalent
-characters, terminated by the value NOTACHAR (0xffffffff).
+"ucd_caseless_sets" which identifies the start of a short list of case
+equivalent characters, terminated by the value NOTACHAR (0xffffffff).


 Repeating single characters
@ -535,8 +545,9 @@ Each is followed by two code units that encode the desired property as a type
 and a value. The types are a set of #defines of the form PT_xxx, and the values
 are enumerations of the form ucp_xx, defined in the pcre2_ucp.h source file.
 The value is relevant only for PT_GC (General Category), PT_PC (Particular
-Category), PT_SC (Script), and the pseudo-property PT_CLIST, which is used to
-identify a list of case-equivalent characters when there are three or more.
+Category), PT_SC (Script), PT_BIDICL (Bidi Class), PT_BOOL (Boolean property),
+and the pseudo-property PT_CLIST, which is used to identify a list of
+case-equivalent characters when there are three or more (see above).

 Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
 three code units: OP_PROP or OP_NOTPROP, and then the desired property type and
@ -654,9 +665,9 @@ a count that immediately follows the offset.
 There are several opcodes that mark the end of a subpattern group. OP_KET is
 used for subpatterns that do not repeat indefinitely, OP_KETRMIN and
 OP_KETRMAX are used for indefinite repetitions, minimally or maximally
-respectively, and OP_KETRPOS for possessive repetitions (see below for more 
+respectively, and OP_KETRPOS for possessive repetitions (see below for more
 details). All four are followed by a LINK_SIZE value giving (as a positive
-number) the offset back to the matching bracket opcode.
+number) the offset back to the matching opening bracket opcode.

 If a subpattern is quantified such that it is permitted to match zero times, it
 is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
@ -706,13 +717,15 @@ Assertions
 ----------

 Forward assertions are also just like other subpatterns, but starting with one
-of the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
-OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
-is OP_REVERSE, followed by a count of the number of characters to move back the
-pointer in the subject string. In ASCII or UTF-32 mode, the count is also the
-number of code units, but in UTF-8/16 mode each character may occupy more than
-one code unit. A separate count is present in each alternative of a lookbehind
-assertion, allowing them to have different (but fixed) lengths.
+of the opcodes OP_ASSERT, OP_ASSERT_NA (non-atomic assertion), or
+OP_ASSERT_NOT. Backward assertions use the opcodes OP_ASSERTBACK,
+OP_ASSERTBACK_NA, and OP_ASSERTBACK_NOT, and the first opcode inside the
+assertion is OP_REVERSE, followed by a count of the number of characters to
+move back the pointer in the subject string. In ASCII or UTF-32 mode, the count
+is also the number of code units, but in UTF-8/16 mode each character may
+occupy more than one code unit. A separate count is present in each alternative
+of a lookbehind assertion, allowing each branch to have a different (but fixed)
+length.


 Conditional subpatterns
@ -745,11 +758,11 @@ tests the PCRE2 version number. This compiles into one of the opcodes OP_TRUE
 or OP_FALSE.

 If a condition is not a back reference, recursion test, DEFINE, or VERSION, it
-must start with a parenthesized assertion, whose opcode normally immediately
-follows OP_COND or OP_SCOND. However, if automatic callouts are enabled, a
-callout is inserted immediately before the assertion. It is also possible to
-insert a manual callout at this point. Only assertion conditions may have
-callouts preceding the condition.
+must start with a parenthesized atomic assertion, whose opcode normally
+immediately follows OP_COND or OP_SCOND. However, if automatic callouts are
+enabled, a callout is inserted immediately before the assertion. It is also
+possible to insert a manual callout at this point. Only assertion conditions
+may have callouts preceding the condition.

 A condition that is the negative assertion (?!) is optimized to OP_FAIL in all
 parts of the pattern, so this is another opcode that may appear as a condition.
@ -764,7 +777,7 @@ OP_RECURSE is followed by a LINK_SIZE value that is the offset to the starting
 bracket from the start of the whole pattern. OP_RECURSE is also used for
 "subroutine" calls, even though they are not strictly a recursion. Up till
 release 10.30 recursions were treated as atomic groups, making them
-incompatible with Perl (but PCRE had then well before Perl did). From 10.30,
+incompatible with Perl (but PCRE had them well before Perl did). From 10.30,
 backtracking into recursions is supported.

 Repeated recursions used to be wrapped inside OP_ONCE brackets, which not only
@ -814,4 +827,4 @@ not a real opcode, but is used to check at compile time that tables indexed by
 opcode are the correct length, in order to catch updating errors.

 Philip Hazel
-21 April 2017
+April 2022
--- a/26
+++ b/26
@ -4,11 +4,11 @@ PCRE2 LICENCE
 PCRE2 is a library of functions to support regular expressions whose syntax
 and semantics are as close as possible to those of the Perl 5 language.

-Release 10 of PCRE2 is distributed under the terms of the "BSD" licence, as
-specified below, with one exemption for certain binary redistributions. The
-documentation for PCRE2, supplied in the "doc" directory, is distributed under
-the same terms as the software itself. The data in the testdata directory is
-not copyrighted and is in the public domain.
+Releases 10.00 and above of PCRE2 are distributed under the terms of the "BSD"
+licence, as specified below, with one exemption for certain binary
+redistributions. The documentation for PCRE2, supplied in the "doc" directory,
+is distributed under the same terms as the software itself. The data in the
+testdata directory is not copyrighted and is in the public domain.

 The basic library functions are written in C and are freestanding. Also
 included in the distribution is a just-in-time compiler that can be used to
@ -20,13 +20,13 @@ THE BASIC LIBRARY FUNCTIONS
 ---------------------------

 Written by:       Philip Hazel
-Email local part: ph10
-Email domain:     cam.ac.uk
+Email local part: Philip.Hazel
+Email domain:     gmail.com

-University of Cambridge Computing Service,
+Retired from University of Cambridge Computing Service,
 Cambridge, England.

-Copyright (c) 1997-2018 University of Cambridge
+Copyright (c) 1997-2022 University of Cambridge
 All rights reserved.


@ -35,9 +35,9 @@ PCRE2 JUST-IN-TIME COMPILATION SUPPORT

 Written by:       Zoltan Herczeg
 Email local part: hzmester
-Emain domain:     freemail.hu
+Email domain:     freemail.hu

-Copyright(c) 2010-2018 Zoltan Herczeg
+Copyright(c) 2010-2022 Zoltan Herczeg
 All rights reserved.


@ -46,9 +46,9 @@ STACK-LESS JUST-IN-TIME COMPILER

 Written by:       Zoltan Herczeg
 Email local part: hzmester
-Emain domain:     freemail.hu
+Email domain:     freemail.hu

-Copyright(c) 2009-2018 Zoltan Herczeg
+Copyright(c) 2009-2022 Zoltan Herczeg
 All rights reserved.


--- a/MODULE.bazel
+++ b/MODULE.bazel
@ -0,0 +1,8 @@
+module(
+    name = "pcre2",
+    version = "10.40",
+    compatibility_level = 1,
+)
+
+bazel_dep(name = "rules_cc", version = "0.0.1")
+bazel_dep(name = "bazel_skylib", version = "1.2.1")
--- a/Makefile.am
+++ b/Makefile.am
@ -46,6 +46,7 @@ dist_html_DATA = \
  doc/html/pcre2_general_context_free.html \
  doc/html/pcre2_get_error_message.html \
  doc/html/pcre2_get_mark.html \
+  doc/html/pcre2_get_match_data_size.html \
  doc/html/pcre2_get_ovector_count.html \
  doc/html/pcre2_get_ovector_pointer.html \
  doc/html/pcre2_get_startchar.html \
@ -56,6 +57,7 @@ dist_html_DATA = \
  doc/html/pcre2_jit_stack_create.html \
  doc/html/pcre2_jit_stack_free.html \
  doc/html/pcre2_maketables.html \
+  doc/html/pcre2_maketables_free.html \
  doc/html/pcre2_match.html \
  doc/html/pcre2_match_context_copy.html \
  doc/html/pcre2_match_context_create.html \
@ -85,6 +87,7 @@ dist_html_DATA = \
  doc/html/pcre2_set_parens_nest_limit.html \
  doc/html/pcre2_set_recursion_limit.html \
  doc/html/pcre2_set_recursion_memory_management.html \
+  doc/html/pcre2_set_substitute_callout.html \
  doc/html/pcre2_substitute.html \
  doc/html/pcre2_substring_copy_byname.html \
  doc/html/pcre2_substring_copy_bynumber.html \
@ -139,6 +142,7 @@ dist_man_MANS = \
  doc/pcre2_general_context_free.3 \
  doc/pcre2_get_error_message.3 \
  doc/pcre2_get_mark.3 \
+  doc/pcre2_get_match_data_size.3 \
  doc/pcre2_get_ovector_count.3 \
  doc/pcre2_get_ovector_pointer.3 \
  doc/pcre2_get_startchar.3 \
@ -149,6 +153,7 @@ dist_man_MANS = \
  doc/pcre2_jit_stack_create.3 \
  doc/pcre2_jit_stack_free.3 \
  doc/pcre2_maketables.3 \
+  doc/pcre2_maketables_free.3 \
  doc/pcre2_match.3 \
  doc/pcre2_match_context_copy.3 \
  doc/pcre2_match_context_create.3 \
@ -178,6 +183,7 @@ dist_man_MANS = \
  doc/pcre2_set_parens_nest_limit.3 \
  doc/pcre2_set_recursion_limit.3 \
  doc/pcre2_set_recursion_memory_management.3 \
+  doc/pcre2_set_substitute_callout.3 \
  doc/pcre2_substitute.3 \
  doc/pcre2_substring_copy_byname.3 \
  doc/pcre2_substring_copy_bynumber.3 \
@ -231,7 +237,7 @@ noinst_PROGRAMS =
 # and 'make maintainer-clean'.

 CLEANFILES =
-DISTCLEANFILES = src/config.h.in~ config.h
+DISTCLEANFILES = src/config.h.in~
 MAINTAINERCLEANFILES =

 # Additional files to bundle with the distribution, over and above what
@ -319,18 +325,18 @@ include_HEADERS = src/pcre2posix.h
 bin_SCRIPTS = pcre2-config

 ## ---------------------------------------------------------------
-## The dftables program is used to rebuild character tables before compiling
-## PCRE2, if --enable-rebuild-chartables is specified. It is not a user-visible
-## program. The default (when --enable-rebuild-chartables is not specified) is
-## to copy a distributed set of tables that are defined for ASCII code. In this
-## case, dftables is not needed.
+## The pcre2_dftables program is used to rebuild character tables before
+## compiling PCRE2, if --enable-rebuild-chartables is specified. It is not an
+## installed program. The default (when --enable-rebuild-chartables is not
+## specified) is to copy a distributed set of tables that are defined for ASCII
+## code. In this case, pcre2_dftables is not needed.

 if WITH_REBUILD_CHARTABLES
-noinst_PROGRAMS += dftables
-dftables_SOURCES = src/dftables.c
-src/pcre2_chartables.c: dftables$(EXEEXT)
+noinst_PROGRAMS += pcre2_dftables
+pcre2_dftables_SOURCES = src/pcre2_dftables.c
+src/pcre2_chartables.c: pcre2_dftables$(EXEEXT)
 	rm -f $@
-	./dftables$(EXEEXT) $@
+	./pcre2_dftables$(EXEEXT) $@
 else
 src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist
 	rm -f $@
@ -356,12 +362,15 @@ COMMON_SOURCES = \
  src/pcre2_internal.h \
  src/pcre2_intmodedep.h \
  src/pcre2_jit_compile.c \
+  src/pcre2_jit_neon_inc.h \
+  src/pcre2_jit_simd_inc.h \
  src/pcre2_maketables.c \
  src/pcre2_match.c \
  src/pcre2_match_data.c \
  src/pcre2_newline.c \
  src/pcre2_ord2utf.c \
  src/pcre2_pattern_info.c \
+  src/pcre2_script_run.c \
  src/pcre2_serialize.c \
  src/pcre2_string_utils.c \
  src/pcre2_study.c \
@ -373,6 +382,10 @@ COMMON_SOURCES = \
  src/pcre2_valid_utf.c \
  src/pcre2_xclass.c

+# The pcre2_ucptables.c file is #included by pcre2_tables.c
+
+EXTRA_DIST += src/pcre2_ucptables.c
+
 if WITH_PCRE2_8
 lib_LTLIBRARIES += libpcre2-8.la
 libpcre2_8_la_SOURCES = \
@ -382,6 +395,7 @@ nodist_libpcre2_8_la_SOURCES = \
 libpcre2_8_la_CFLAGS = \
  -DPCRE2_CODE_UNIT_WIDTH=8 \
  $(VISIBILITY_CFLAGS) \
+  $(CET_CFLAGS) \
  $(AM_CFLAGS)
 libpcre2_8_la_LIBADD =
 endif # WITH_PCRE2_8
@ -395,6 +409,7 @@ nodist_libpcre2_16_la_SOURCES = \
 libpcre2_16_la_CFLAGS = \
  -DPCRE2_CODE_UNIT_WIDTH=16 \
  $(VISIBILITY_CFLAGS) \
+  $(CET_CFLAGS) \
  $(AM_CFLAGS)
 libpcre2_16_la_LIBADD =
 endif # WITH_PCRE2_16
@ -408,6 +423,7 @@ nodist_libpcre2_32_la_SOURCES = \
 libpcre2_32_la_CFLAGS = \
  -DPCRE2_CODE_UNIT_WIDTH=32 \
  $(VISIBILITY_CFLAGS) \
+  $(CET_CFLAGS) \
  $(AM_CFLAGS)
 libpcre2_32_la_LIBADD =
 endif # WITH_PCRE2_32
@ -436,15 +452,16 @@ EXTRA_DIST += \
  src/sljit/sljitNativePPC_32.c \
  src/sljit/sljitNativePPC_64.c \
  src/sljit/sljitNativePPC_common.c \
-  src/sljit/sljitNativeSPARC_32.c \
-  src/sljit/sljitNativeSPARC_common.c \
-  src/sljit/sljitNativeTILEGX-encoder.c \
-  src/sljit/sljitNativeTILEGX_64.c \
+  src/sljit/sljitNativeRISCV_32.c \
+  src/sljit/sljitNativeRISCV_64.c \
+  src/sljit/sljitNativeRISCV_common.c \
+  src/sljit/sljitNativeS390X.c \
  src/sljit/sljitNativeX86_32.c \
  src/sljit/sljitNativeX86_64.c \
  src/sljit/sljitNativeX86_common.c \
  src/sljit/sljitProtExecAllocator.c \
-  src/sljit/sljitUtils.c
+  src/sljit/sljitUtils.c \
+  src/sljit/sljitWXExecAllocator.c

 # Some of the JIT sources are also in separate files that are #included.

@ -528,6 +545,10 @@ noinst_PROGRAMS += pcre2fuzzcheck
 pcre2fuzzcheck_SOURCES = src/pcre2_fuzzsupport.c
 pcre2fuzzcheck_CFLAGS = -DSTANDALONE $(AM_CFLAGS)
 pcre2fuzzcheck_LDADD = libpcre2-8.la
+if WITH_GCOV
+pcre2fuzzcheck_CFLAGS += $(GCOV_CFLAGS)
+pcre2fuzzcheck_LDADD += $(GCOV_LIBS)
+endif # WITH_GCOV
 endif # WITH FUZZ_SUPPORT
 endif # WITH_PCRE2_8

@ -618,8 +639,10 @@ EXTRA_DIST += \
  testdata/grepoutput \
  testdata/grepoutput8 \
  testdata/grepoutputC \
+  testdata/grepoutputCN \
  testdata/grepoutputN \
  testdata/greppatN4 \
+  testdata/testbtables \
  testdata/testinput1 \
  testdata/testinput2 \
  testdata/testinput3 \
@ -645,6 +668,7 @@ EXTRA_DIST += \
  testdata/testinput23 \
  testdata/testinput24 \
  testdata/testinput25 \
+  testdata/testinput26 \
  testdata/testinputEBC \
  testdata/testoutput1 \
  testdata/testoutput2 \
@ -657,7 +681,7 @@ EXTRA_DIST += \
  testdata/testoutput7 \
  testdata/testoutput8-16-2 \
  testdata/testoutput8-16-3 \
-  testdata/testoutput8-16-3 \
+  testdata/testoutput8-16-4 \
  testdata/testoutput8-32-2 \
  testdata/testoutput8-32-3 \
  testdata/testoutput8-32-4 \
@ -687,6 +711,7 @@ EXTRA_DIST += \
  testdata/testoutput23 \
  testdata/testoutput24 \
  testdata/testoutput25 \
+  testdata/testoutput26 \
  testdata/testoutputEBC \
  testdata/valgrind-jit.supp \
  testdata/wintestinput3 \
@ -841,9 +866,11 @@ endif # WITH_GCOV

 EXTRA_DIST += \
  cmake/COPYING-CMAKE-SCRIPTS \
+  cmake/FindEditline.cmake \
  cmake/FindPackageHandleStandardArgs.cmake \
  cmake/FindReadline.cmake \
-  cmake/FindEditline.cmake \
+  cmake/pcre2-config-version.cmake.in \
+  cmake/pcre2-config.cmake.in \
  CMakeLists.txt \
  config-cmake.h.in

--- a/Makefile.os4
+++ b/Makefile.os4
@ -0,0 +1,271 @@
+#
+# Project: pcre2
+#
+# Created on: 10-01-2022 22:01:46
+#
+# commands to use:
+# make -f Makefile.os4 libpcre2.a
+# make -f Makefile.os4 libpcre2-posix.a
+# make -f Makefile.os4 pcre2test
+# sh RunTest
+# make -f Makefile.os4 clean
+#
+
+###################################################################
+##
+##////  Objects
+##
+###################################################################
+
+libpcre2_OBJ := \
+	 src/pcre2_chartables.o src/pcre2_auto_possess.o src/pcre2_compile.o \
+	 src/pcre2_config.o src/pcre2_context.o src/pcre2_convert.o \
+	 src/pcre2_dfa_match.o src/pcre2_error.o src/pcre2_extuni.o \
+	 src/pcre2_find_bracket.o src/pcre2_jit_compile.o src/pcre2_maketables.o \
+	 src/pcre2_match.o src/pcre2_match_data.o src/pcre2_newline.o \
+	 src/pcre2_ord2utf.o src/pcre2_pattern_info.o src/pcre2_script_run.o \
+	 src/pcre2_serialize.o src/pcre2_string_utils.o src/pcre2_study.o \
+	 src/pcre2_substitute.o src/pcre2_substring.o src/pcre2_tables.o \
+	 src/pcre2_ucd.o src/pcre2_valid_utf.o src/pcre2_xclass.o \
+	
+
+
+pcre2posix_OBJ := \
+	 src/pcre2posix.o
+
+
+pcre2test_OBJ := \
+	 src/pcre2test.o
+
+
+pcre2grep_OBJ := \
+	 src/pcre2grep.o
+
+###################################################################
+##
+##////  Variables and Environment
+##
+###################################################################
+
+MCRT := -mcrt=newlib
+ifeq ($(USE_CLIB2), yes)
+MCRT := -mcrt=clib2
+endif
+
+CC := gcc:bin/gcc
+
+INCPATH := -I. -Isrc
+
+# for pcre2test
+CFLAGS := $(MCRT) $(INCPATH) -O2 -DHAVE_CONFIG_H -DPCRE2_CODE_UNIT_WIDTH=8
+
+###################################################################
+##
+##////  General rules
+##
+###################################################################
+
+.PHONY: all all-before all-after clean clean-custom realclean
+
+all: all-before libpcre2.a libpcre2-posix.a all-after
+
+all-before:
+#	You can add rules here to execute before the project is built
+
+all-after:
+#	You can add rules here to execute after the project is built
+
+tests: pcre2test pcre2grep
+
+clean: clean-custom
+	@echo "Cleaning compiler objects..."
+	@rm -f  $(libpcre2_OBJ) $(pcre2posix_OBJ) $(pcre2test_OBJ)
+
+cleanall: clean
+	@echo "Cleaning compiler targets..."
+	@rm -f  libpcre.a libpcre-posix.a pcre2test pcre2grep
+
+###################################################################
+##
+##////  Targets
+##
+###################################################################
+
+libpcre2.a: $(libpcre2_OBJ)
+	ar -rcs libpcre2.a $(libpcre2_OBJ)
+	ranlib libpcre2.a
+
+libpcre2-posix.a: $(pcre2posix_OBJ)
+	ar -rcs libpcre2-posix.a $(pcre2posix_OBJ)
+	ranlib libpcre2-posix.a
+
+pcre2test: libpcre2.a libpcre2-posix.a $(pcre2test_OBJ)
+	@echo "Linking pcre2test"
+	@gcc:bin/gcc $(MCRT) -o pcre2test $(pcre2test_OBJ) -L. -lauto -lpcre2 -lpcre2-posix
+	@echo "Removing stale debug target: pcre2test"
+	@rm -f pcre2test.debug
+	
+pcre2grep: libpcre2.a $(pcre2grep_OBJ)
+	@echo "Linking pcre2grep"
+	@gcc:bin/gcc $(MCRT) -o pcre2grep $(pcre2grep_OBJ) -L . -lauto -lpcre2
+	@echo "Removing stale debug target: pcre2grep"
+	@rm -f pcre2grep.debug
+
+
+###################################################################
+##
+##////  Standard rules
+##
+###################################################################
+
+# A default rule to make all the objects listed below
+# because we are hiding compiler commands from the output
+
+.c.o:
+	@echo "Compiling $<"
+	@$(CC) -c $< -o $*.o $(CFLAGS)
+
+src/pcre2_chartables.o: src/pcre2_chartables.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_auto_possess.o: src/pcre2_auto_possess.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_compile.o: src/pcre2_compile.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h src/pcre2_intmodedep.h \
+	
+
+src/pcre2_config.o: src/pcre2_config.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_context.o: src/pcre2_context.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_convert.o: src/pcre2_convert.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_dfa_match.o: src/pcre2_dfa_match.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_error.o: src/pcre2_error.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_extuni.o: src/pcre2_extuni.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_find_bracket.o: src/pcre2_find_bracket.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_jit_compile.o: src/pcre2_jit_compile.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h src/pcre2_intmodedep.h \
+	 src/sljit/sljitLir.c src/sljit/sljitLir.h src/sljit/sljitConfig.h \
+	 src/sljit/sljitConfigInternal.h src/sljit/sljitUtils.c src/sljit/sljitProtExecAllocator.c \
+	 src/sljit/sljitWXExecAllocator.c src/sljit/sljitExecAllocator.c src/pcre2_jit_simd_inc.h \
+	 src/pcre2_jit_neon_inc.h src/pcre2_jit_match.c
+
+src/pcre2_maketables.o: src/pcre2_maketables.c
+
+src/pcre2_match.o: src/pcre2_match.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_match_data.o: src/pcre2_match_data.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_newline.o: src/pcre2_newline.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_ord2utf.o: src/pcre2_ord2utf.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_pattern_info.o: src/pcre2_pattern_info.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_script_run.o: src/pcre2_script_run.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_serialize.o: src/pcre2_serialize.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2test.o: src/pcre2test.c src/config.h src/pcre2.h \
+	 src/pcre2posix.h src/pcre2_internal.h src/pcre2_ucp.h \
+	 src/pcre2_intmodedep.h src/pcre2_tables.c src/pcre2_ucptables.c \
+	 src/pcre2_ucd.c src/pcre2_printint.c
+
+src/pcre2_string_utils.o: src/pcre2_string_utils.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_study.o: src/pcre2_study.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_substitute.o: src/pcre2_substitute.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_substring.o: src/pcre2_substring.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2posix.o: src/pcre2posix.c src/config.h src/pcre2.h \
+	
+
+src/pcre2_tables.o: src/pcre2_tables.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h src/pcre2_intmodedep.h \
+	
+
+src/pcre2_ucd.o: src/pcre2_ucd.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_valid_utf.o: src/pcre2_valid_utf.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+src/pcre2_xclass.o: src/pcre2_xclass.c src/config.h src/pcre2_internal.h \
+	 src/pcre2.h src/pcre2_ucp.h
+
+
+src/pcre2grep.o: src/pcre2grep.c src/config.h
+
+###################################################################
+##
+##////  Custom rules
+##
+###################################################################
+
+runtests: libpcre2.a libpcre2-posix.a tests
+	sh RunTest
+	sh RunGrepTest
+
+release:
+	@echo "Create release folders..."
+	@mkdir -p release/local/newlib/lib release/local/clib2/lib release/local/Documentation/pcre2 release/local/common/include
+	
+	@echo "Building newlib based libraries..."
+	@make -f Makefile.os4 all
+	@cp libpcre2.a release/local/newlib/lib/
+	@cp libpcre2-posix.a release/local/newlib/lib/
+	
+	@echo "Clean build and libraries files..."
+	@make -f Makefile.os4 cleanall
+	
+	@echo "Building clib2 based libraries..."
+	@make -f Makefile.os4 all USE_CLIB2=yes
+	@cp libpcre2.a release/local/clib2/lib/
+	@cp libpcre2-posix.a release/local/clib2/lib/
+
+	@echo "Copy the necessary files..."
+	@cp src/pcre2.h release/local/common/include/
+	@cp src/pcre2posix.h release/local/common/include/
+	@cp COPYING release/local/Documentation/pcre2/
+	@cp HACKING release/local/Documentation/pcre2/
+	@cp LICENCE release/local/Documentation/pcre2/
+	@cp README release/local/Documentation/pcre2/
+	@cp README-OS4.md release/local/Documentation/pcre2/
+	
+	@echo "Clean build and libraries files..."
+	@make -f Makefile.os4 cleanall
+	
+	@echo "Creating the lha release file..."
+	@rm -f pcre2.lha
+	@lha -aeqr3 a pcre2.lha release/
+	
+	@rm -rf release
+
+###################################################################
+
--- a/196
+++ b/196
@ -1,6 +1,198 @@
 News about PCRE2 releases
 -------------------------

+
+Version 10.40 15-April-2022
+---------------------------
+
+This is mostly a bug-fixing and code-tidying release. However, there are some
+extensions to Unicode property handling:
+
+* Added support for Bidi_Class and a number of binary Unicode properties,
+including Bidi_Control.
+
+* A number of changes to script matching for \p and \P:
+
+  (a) Script extensions for a character are now coded as a bitmap instead of
+      a list of script numbers, which should be faster and does not need a
+      loop.
+
+  (b) Added the syntax \p{script:xxx} and \p{script_extensions:xxx} (synonyms
+      sc and scx).
+
+  (c) Changed \p{scriptname} from being the same as \p{sc:scriptname} to being
+      the same as \p{scx:scriptname} because this change happened in Perl at
+      release 5.26.
+
+  (d) The standard Unicode 4-letter abbreviations for script names are now
+      recognized.
+
+  (e) In accordance with Unicode and Perl's "loose matching" rules, spaces,
+      hyphens, and underscores are ignored in property names, which are then
+      matched independent of case.
+
+As always, see ChangeLog for a list of all changes (also the Git log).
+
+
+Version 10.39 29-October-2021
+-----------------------------
+
+This release is happening soon after 10.38 because the bug fix is important.
+
+1. Fix incorrect detection of alternatives in first character search in JIT.
+
+2. Update to Unicode 14.0.0.
+
+3. Some code cleanups (see ChangeLog).
+
+
+Version 10.38 01-October-2021
+-----------------------------
+
+As well as some bug fixes and tidies (as always, see ChangeLog for details),
+the documentation is updated to list the new URLs, following the move of the
+source repository to GitHub and the mailing list to Google Groups.
+
+* The CMake build system can now build both static and shared libraries in one
+go.
+
+* Following Perl's lead, \K is now locked out in lookaround assertions by
+default, but an option is provided to re-enable the previous behaviour.
+
+
+Version 10.37 26-May-2021
+-------------------------
+
+A few more bug fixes and tidies. The only change of real note is the removal of
+the actual POSIX names regcomp etc. from the POSIX wrapper library because
+these have caused issues for some applications (see 10.33 #2 below).
+
+
+Version 10.36 04-December-2020
+------------------------------
+
+Again, mainly bug fixes and tidies. The only enhancements are the addition of
+GNU grep's -m (aka --max-count) option to pcre2grep, and also unifying the
+handling of substitution strings for both -O and callouts in pcre2grep, with
+the addition of $x{...} and $o{...} to allow for characters whose code points
+are greater than 255 in Unicode mode.
+
+NOTE: there is an outstanding issue with JIT support for MacOS on arm64
+hardware. For details, please see Bugzilla issue #2618.
+
+
+Version 10.35 15-April-2020
+---------------------------
+
+Bugfixes, tidies, and a few new enhancements.
+
+1. Capturing groups that contain recursive backreferences to themselves are no
+longer automatically atomic, because the restriction is no longer necessary
+as a result of the 10.30 restructuring.
+
+2. Several new options for pcre2_substitute().
+
+3. When Unicode is supported and PCRE2_UCP is set without PCRE2_UTF, Unicode
+character properties are used for upper/lower case computations on characters
+whose code points are greater than 127.
+
+4. The character tables (for low-valued characters) can now more easily be
+saved and restored in binary.
+
+5. Updated to Unicode 13.0.0.
+
+
+Version 10.34 21-November-2019
+------------------------------
+
+Another release with a few enhancements as well as bugfixes and tidies. The
+main new features are:
+
+1. There is now some support for matching in invalid UTF strings.
+
+2. Non-atomic positive lookarounds are implemented in the pcre2_match()
+interpreter, but not in JIT.
+
+3. Added two new functions: pcre2_get_match_data_size() and
+pcre2_maketables_free().
+
+4. Upgraded to Unicode 12.1.0.
+
+
+Version 10.33 16-April-2019
+---------------------------
+
+Yet more bugfixes, tidies, and a few enhancements, summarized here (see
+ChangeLog for the full list):
+
+1. Callouts from pcre2_substitute() are now available.
+
+2. The POSIX functions are now all called pcre2_regcomp() etc., with wrapper
+functions that use the standard POSIX names. However, in pcre2posix.h the POSIX
+names are defined as macros. This should help avoid linking with the wrong
+library in some environments, while still exporting the POSIX names for
+pre-existing programs that use them.
+
+3. Some new options:
+
+   (a) PCRE2_EXTRA_ESCAPED_CR_IS_LF makes \r behave as \n.
+
+   (b) PCRE2_EXTRA_ALT_BSUX enables support for ECMAScript 6's \u{hh...}
+       construct.
+
+   (c) PCRE2_COPY_MATCHED_SUBJECT causes a copy of a matched subject to be
+       made, instead of just remembering a pointer.
+
+4. Some new Perl features:
+
+   (a) Perl 5.28's experimental alphabetic names for atomic groups and
+       lookaround assertions, for example, (*pla:...) and (*atomic:...).
+
+   (b) The new Perl "script run" features (*script_run:...) and
+       (*atomic_script_run:...) aka (*sr:...) and (*asr:...).
+
+   (c) When PCRE2_UTF is set, allow non-ASCII letters and decimal digits in
+       capture group names.
+
+5. --disable-percent-zt disables the use of %zu and %td in formatting strings
+in pcre2test. They were already automatically disabled for VC and older C
+compilers.
+
+6. Some changes related to callouts in pcre2grep:
+
+   (a) Support for running an external program under VMS has been added, in
+       addition to Windows and fork() support.
+
+   (b) --disable-pcre2grep-callout-fork restricts the callout support in
+       to the inbuilt echo facility.
+
+
+Version 10.32 10-September-2018
+-------------------------------
+
+This is another mainly bugfix and tidying release with a few minor
+enhancements. These are the main ones:
+
+1. pcre2grep now supports the inclusion of binary zeros in patterns that are
+read from files via the -f option.
+
+2. ./configure now supports --enable-jit=auto, which automatically enables JIT
+if the hardware supports it.
+
+3. In pcre2_dfa_match(), internal recursive calls no longer use the stack for
+local workspace and local ovectors. Instead, an initial block of stack is
+reserved, but if this is insufficient, heap memory is used. The heap limit
+parameter now applies to pcre2_dfa_match().
+
+4. Updated to Unicode version 11.0.0.
+
+5. (*ACCEPT:ARG), (*FAIL:ARG), and (*COMMIT:ARG) are now supported.
+
+6. Added support for \N{U+dddd}, but only in Unicode mode.
+
+7. Added support for (?^) to unset all imnsx options.
+
+
 Version 10.31 12-February-2018
 ------------------------------

@ -31,7 +223,7 @@ remembering backtracking positions. This makes --disable-stack-for-recursion a
 NOOP. The new implementation allows backtracking into recursive group calls in
 patterns, making it more compatible with Perl, and also fixes some other
 previously hard-to-do issues. For patterns that have a lot of backtracking, the
-heap is now used, and there is explicit limit on the amount, settable by
+heap is now used, and there is an explicit limit on the amount, settable by
 pcre2_set_heap_limit() or (*LIMIT_HEAP=xxx). The "recursion limit" is retained,
 but is renamed as "depth limit" (though the old names remain for
 compatibility).
@ -53,7 +245,7 @@ also supported.

 5. Additional compile options in the compile context are now available, and the
 first two are: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES and
-PCRE2_EXTRA_BAD_ESCAPE_IS LITERAL.
+PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL.

 6. The newline type PCRE2_NEWLINE_NUL is now available.

--- a/74
+++ b/74
@ -10,6 +10,7 @@ This document contains the following sections:
  Calling conventions in Windows environments
  Comments about Win32 builds
  Building PCRE2 on Windows with CMake
+  Building PCRE2 on Windows with Visual Studio
  Testing with RunTest.bat
  Building PCRE2 on native z/OS and z/VM

@ -39,7 +40,11 @@ GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARY

 The following are generic instructions for building the PCRE2 C library "by
 hand". If you are going to use CMake, this section does not apply to you; you
-can skip ahead to the CMake section.
+can skip ahead to the CMake section. Note that the settings concerned with
+8-bit, 16-bit, and 32-bit code units relate to the type of data string that
+PCRE2 processes. They are NOT referring to the underlying operating system bit
+width. You do not have to do anything special to compile in a 64-bit
+environment, for example.

 (1) Copy or rename the file src/config.h.generic as src/config.h, and edit the
     macro settings that it contains to whatever is appropriate for your
@ -47,7 +52,7 @@ can skip ahead to the CMake section.
     macro to specify what character(s) you want to be interpreted as line
     terminators by default.

-     When you compile any of the PCRE2 modules, you must specify
+     When you subsequently compile any of the PCRE2 modules, you must specify
     -DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
     sources.

@ -61,6 +66,11 @@ can skip ahead to the CMake section.
     new release, you are strongly advised to review src/config.h.generic
     before re-using what you had previously.

+     Note also that the src/config.h.generic file is created from a config.h
+     that was generated by Autotools, which automatically includes settings of
+     a number of macros that are not actually used by PCRE2 (for example,
+     HAVE_MEMORY_H).
+
 (2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h.

 (3) EITHER:
@ -68,23 +78,23 @@ can skip ahead to the CMake section.
       src/pcre2_chartables.c.

     OR:
-       Compile src/dftables.c as a stand-alone program (using -DHAVE_CONFIG_H
-       if you have set up src/config.h), and then run it with the single
-       argument "src/pcre2_chartables.c". This generates a set of standard
-       character tables and writes them to that file. The tables are generated
-       using the default C locale for your system. If you want to use a locale
-       that is specified by LC_xxx environment variables, add the -L option to
-       the dftables command. You must use this method if you are building on a
-       system that uses EBCDIC code.
+       Compile src/pcre2_dftables.c as a stand-alone program (using
+       -DHAVE_CONFIG_H if you have set up src/config.h), and then run it with
+       the single argument "src/pcre2_chartables.c". This generates a set of
+       standard character tables and writes them to that file. The tables are
+       generated using the default C locale for your system. If you want to use
+       a locale that is specified by LC_xxx environment variables, add the -L
+       option to the pcre2_dftables command. You must use this method if you
+       are building on a system that uses EBCDIC code.

     The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
     specify alternative tables at run time.

- (4) For an 8-bit library, compile the following source files from the src
-     directory, setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also
-     set -DHAVE_CONFIG_H if you have set up src/config.h with your
-     configuration, or else use other -D settings to change the configuration
-     as required.
+ (4) For a library that supports 8-bit code units in the character strings that
+     it processes, compile the following source files from the src directory,
+     setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also set
+     -DHAVE_CONFIG_H if you have set up src/config.h with your configuration,
+     or else use other -D settings to change the configuration as required.

       pcre2_auto_possess.c
       pcre2_chartables.c
@ -103,6 +113,7 @@ can skip ahead to the CMake section.
       pcre2_newline.c
       pcre2_ord2utf.c
       pcre2_pattern_info.c
+       pcre2_script_run.c
       pcre2_serialize.c
       pcre2_string_utils.c
       pcre2_study.c
@ -110,6 +121,7 @@ can skip ahead to the CMake section.
       pcre2_substring.c
       pcre2_tables.c
       pcre2_ucd.c
+       pcre2_ucptables.c
       pcre2_valid_utf.c
       pcre2_xclass.c

@ -126,7 +138,7 @@ can skip ahead to the CMake section.
     src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile
     these yourself.

-     Not also that the pcre2_fuzzsupport.c file contains special code that is
+     Note also that the pcre2_fuzzsupport.c file contains special code that is
     useful to those who want to run fuzzing tests on the PCRE2 library. Unless
     you are doing that, you can ignore it.

@ -135,9 +147,9 @@ can skip ahead to the CMake section.
     If your system has static and shared libraries, you may have to do this
     once for each type.

- (6) If you want to build a 16-bit library or 32-bit library (as well as, or
-     instead of the 8-bit library) just supply 16 or 32 as the value of
-     -DPCRE2_CODE_UNIT_WIDTH when you are compiling.
+ (6) If you want to build a library that supports 16-bit or 32-bit code units,
+     (as well as, or instead of the 8-bit library) just supply 16 or 32 as the
+     value of -DPCRE2_CODE_UNIT_WIDTH when you are compiling.

 (7) If you want to build the POSIX wrapper functions (which apply only to the
     8-bit library), ensure that you have the src/pcre2posix.h file and then
@ -185,7 +197,7 @@ can skip ahead to the CMake section.

 STACK SIZE IN WINDOWS ENVIRONMENTS

-Prior to release 10.30 the default system stack size of 1Mb in some Windows
+Prior to release 10.30 the default system stack size of 1MiB in some Windows
 environments caused issues with some tests. This should no longer be the case
 for 10.30 and later releases.

@ -295,7 +307,7 @@ cache can be deleted by selecting "File > Delete Cache".
 3.  Create a new, empty build directory, preferably a subdirectory of the
    source dir. For example, C:\pcre2\pcre2-xx\build.

-4.  Run cmake-gui from the Shell envirornment of your build tool, for example,
+4.  Run cmake-gui from the Shell environment of your build tool, for example,
    Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
    to start Cmake from the Windows Start menu, as this can lead to errors.

@ -330,6 +342,18 @@ cache can be deleted by selecting "File > Delete Cache".
    available for review in Testing\Temporary under your build dir.


+BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO
+
+The code currently cannot be compiled without an inttypes.h header, which is
+available only with Visual Studio 2013 or newer. However, this portable and
+permissively-licensed implementation of the stdint.h header could be used as an
+alternative:
+
+  http://www.azillionmonkeys.com/qed/pstdint.h
+
+Just rename it and drop it into the top level of the build tree.
+
+
 TESTING WITH RUNTEST.BAT

 If configured with CMake, building the test project ("make test" or building
@ -350,7 +374,7 @@ Otherwise:
 1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
   have been created.

-2. Edit RunTest.bat to indentify the full or relative location of
+2. Edit RunTest.bat to identify the full or relative location of
   the pcre2 source (wherein which the testdata folder resides), e.g.:

   set srcdir=C:\pcre2\pcre2-10.00
@ -382,6 +406,6 @@ Everything in that location, source and executable, is in EBCDIC and native
 z/OS file formats. The port provides an API for LE languages such as COBOL and
 for the z/OS and z/VM versions of the Rexx languages.

-===============================
-Last Updated: 13 September 2017
-===============================
+===========================
+Last Updated: 28 April 2021
+===========================
--- a/2
+++ b/2
@ -190,7 +190,7 @@ files="\
  libpcre2-16.pc.in \
  libpcre2-32.pc.in \
  libpcre2-posix.pc.in \
-  src/dftables.c \
+  src/pcre2_dftables.c \
  src/pcre2.h.in \
  src/pcre2_auto_possess.c \
  src/pcre2_compile.c \
--- a/223
+++ b/223
@ -1,19 +1,23 @@
 README file for PCRE2 (Perl-compatible regular expression library)
 ------------------------------------------------------------------

-PCRE2 is a re-working of the original PCRE library to provide an entirely new
-API. The latest release of PCRE2 is always available in three alternative
-formats from:
+PCRE2 is a re-working of the original PCRE1 library to provide an entirely new
+API. Since its initial release in 2015, there has been further development of
+the code and it now differs from PCRE1 in more than just the API. There are new
+features, and the internals have been improved. The original PCRE1 library is
+now obsolete and no longer maintained. The latest release of PCRE2 is available
+in .tar.gz, tar.bz2, or .zip form from this GitHub repository:

-  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.gz
-  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.bz2
-  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.zip
+https://github.com/PCRE2Project/pcre2/releases

-There is a mailing list for discussion about the development of PCRE (both the
-original and new APIs) at pcre-dev@exim.org. You can access the archives and
-subscribe or manage your subscription here:
+There is a mailing list for discussion about the development of PCRE2 at
+pcre2-dev@googlegroups.com. You can subscribe by sending an email to
+pcre2-dev+subscribe@googlegroups.com.

-   https://lists.exim.org/mailman/listinfo/pcre-dev
+You can access the archives and also subscribe or manage your subscription
+here:
+
+https://groups.google.com/g/pcre2-dev

 Please read the NEWS file if you are upgrading from a previous release. The
 contents of this README file are:
@ -39,13 +43,13 @@ The PCRE2 APIs
 PCRE2 is written in C, and it has its own API. There are three sets of
 functions, one for the 8-bit library, which processes strings of bytes, one for
 the 16-bit library, which processes strings of 16-bit values, and one for the
-32-bit library, which processes strings of 32-bit values. There are no C++
-wrappers.
+32-bit library, which processes strings of 32-bit values. Unlike PCRE1, there
+are no C++ wrappers.

 The distribution does contain a set of C wrapper functions for the 8-bit
 library that are based on the POSIX regular expression API (see the pcre2posix
-man page). These can be found in a library called libpcre2-posix. Note that
-this just provides a POSIX calling interface to PCRE2; the regular expressions
+man page). These are built into a library called libpcre2-posix. Note that this
+just provides a POSIX calling interface to PCRE2; the regular expressions
 themselves still follow Perl syntax and semantics. The POSIX API is restricted,
 and does not give full access to all of PCRE2's facilities.

@ -53,20 +57,8 @@ The header file for the POSIX-style functions is called pcre2posix.h. The
 official POSIX name is regex.h, but I did not want to risk possible problems
 with existing files of that name by distributing it that way. To use PCRE2 with
 an existing program that uses the POSIX API, pcre2posix.h will have to be
-renamed or pointed at by a link.
-
-If you are using the POSIX interface to PCRE2 and there is already a POSIX
-regex library installed on your system, as well as worrying about the regex.h
-header file (as mentioned above), you must also take care when linking programs
-to ensure that they link with PCRE2's libpcre2-posix library. Otherwise they
-may pick up the POSIX functions of the same name from the other library.
-
-One way of avoiding this confusion is to compile PCRE2 with the addition of
-Dregcomp=PCRE2regcomp (and similarly for the other POSIX functions) to the
-compiler flags (CFLAGS if you are using "configure" -- see below). This has the
-effect of renaming the functions so that the names no longer clash. Of course,
-you have to do the same thing for your applications, or write them using the
-new names.
+renamed or pointed at by a link (or the program modified, of course). See the
+pcre2posix documentation for more details.


 Documentation for PCRE2
@ -122,12 +114,18 @@ Building PCRE2 using autotools
 The following instructions assume the use of the widely used "configure; make;
 make install" (autotools) process.

-To build PCRE2 on system that supports autotools, first run the "configure"
-command from the PCRE2 distribution directory, with your current directory set
+If you have downloaded and unpacked a PCRE2 release tarball, run the
+"configure" command from the PCRE2 directory, with your current directory set
 to the directory where you want the files to be created. This command is a
 standard GNU "autoconf" configuration script, for which generic instructions
 are supplied in the file INSTALL.

+The files in the GitHub repository do not contain "configure". If you have
+downloaded the PCRE2 source files from GitHub, before you can run "configure"
+you must run the shell script called autogen.sh. This runs a number of
+autotools to create a "configure" script (you must of course have the autotools
+commands installed in order to do this).
+
 Most commonly, people build PCRE2 within its own distribution directory, and in
 this case, on many systems, just running "./configure" is sufficient. However,
 the usual methods of changing standard defaults are available. For example:
@ -171,10 +169,14 @@ library. They are also documented in the pcre2build man page.
  give large performance improvements on certain platforms, add --enable-jit to
  the "configure" command. This support is available only for certain hardware
  architectures. If you try to enable it on an unsupported architecture, there
-  will be a compile time error. If you are running under SELinux you may also
-  want to add --enable-jit-sealloc, which enables the use of an execmem
-  allocator in JIT that is compatible with SELinux. This has no effect if JIT
-  is not enabled.
+  will be a compile time error. If in doubt, use --enable-jit=auto, which
+  enables JIT only if the current hardware is supported.
+
+. If you are enabling JIT under SELinux environment you may also want to add
+  --enable-jit-sealloc, which enables the use of an executable memory allocator
+  that is compatible with SELinux. Warning: this allocator is experimental!
+  It does not support fork() operation and may crash when no disk space is
+  available. This option has no effect if JIT is disabled.

 . If you do not want to make use of the default support for UTF-8 Unicode
  character strings in the 8-bit library, UTF-16 Unicode character strings in
@ -192,10 +194,10 @@ library. They are also documented in the pcre2build man page.

  As well as supporting UTF strings, Unicode support includes support for the
  \P, \p, and \X sequences that recognize Unicode character properties.
-  However, only the basic two-letter properties such as Lu are supported.
-  Escape sequences such as \d and \w in patterns do not by default make use of
-  Unicode properties, but can be made to do so by setting the PCRE2_UCP option
-  or starting a pattern with (*UCP).
+  However, only a subset of Unicode properties are supported; see the
+  pcre2pattern man page for details. Escape sequences such as \d and \w in
+  patterns do not by default make use of Unicode properties, but can be made to
+  do so by setting the PCRE2_UCP option or starting a pattern with (*UCP).

 . You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
  of the preceding, or any of the Unicode newline sequences, or the NUL (zero)
@ -239,9 +241,11 @@ library. They are also documented in the pcre2build man page.
  discussion in the pcre2api man page (search for pcre2_set_match_limit).

 . There is a separate counter that limits the depth of nested backtracking
-  during a matching process, which indirectly limits the amount of heap memory
-  that is used. This also has a default of ten million, which is essentially
-  "unlimited". You can change the default by setting, for example,
+  (pcre2_match()) or nested function calls (pcre2_dfa_match()) during a
+  matching process, which indirectly limits the amount of heap memory that is
+  used, and in the case of pcre2_dfa_match() the amount of stack as well. This
+  counter also has a default of ten million, which is essentially "unlimited".
+  You can change the default by setting, for example,

  --with-match-limit-depth=5000

@ -249,16 +253,17 @@ library. They are also documented in the pcre2build man page.
  pcre2_set_depth_limit).

 . You can also set an explicit limit on the amount of heap memory used by
-  the pcre2_match() interpreter:
+  the pcre2_match() and pcre2_dfa_match() interpreters:

  --with-heap-limit=500

-  The units are kilobytes. This limit does not apply when the JIT optimization
-  (which has its own memory control features) is used. There is more discussion
-  on the pcre2api man page (search for pcre2_set_heap_limit).
+  The units are kibibytes (units of 1024 bytes). This limit does not apply when
+  the JIT optimization (which has its own memory control features) is used.
+  There is more discussion on the pcre2api man page (search for
+  pcre2_set_heap_limit).

 . In the 8-bit library, the default maximum compiled pattern size is around
-  64K bytes. You can increase this by adding --with-link-size=3 to the
+  64 kibibytes. You can increase this by adding --with-link-size=3 to the
  "configure" command. PCRE2 then uses three bytes instead of two for offsets
  to different parts of the compiled pattern. In the 16-bit library,
  --with-link-size=3 is the same as --with-link-size=4, which (in both
@ -272,9 +277,9 @@ library. They are also documented in the pcre2build man page.

  --enable-rebuild-chartables

-  a program called dftables is compiled and run in the default C locale when
-  you obey "make". It builds a source file called pcre2_chartables.c. If you do
-  not specify this option, pcre2_chartables.c is created as a copy of
+  a program called pcre2_dftables is compiled and run in the default C locale
+  when you obey "make". It builds a source file called pcre2_chartables.c. If
+  you do not specify this option, pcre2_chartables.c is created as a copy of
  pcre2_chartables.c.dist. See "Character tables" below for further
  information.

@ -300,8 +305,8 @@ library. They are also documented in the pcre2build man page.
  unaddressable. This allows it to detect invalid memory accesses, and is
  mostly useful for debugging PCRE2 itself.

-. In environments where the gcc compiler is used and lcov version 1.6 or above
-  is installed, if you specify
+. In environments where the gcc compiler is used and lcov is installed, if you
+  specify

  --enable-coverage

@ -315,10 +320,14 @@ library. They are also documented in the pcre2build man page.
 . When JIT support is enabled, pcre2grep automatically makes use of it, unless
  you add --disable-pcre2grep-jit to the "configure" command.

-. On non-Windows sytems there is support for calling external scripts during
-  matching in the pcre2grep command via PCRE2's callout facility with string
-  arguments. This support can be disabled by adding --disable-pcre2grep-callout
-  to the "configure" command.
+. There is support for calling external programs during matching in the
+  pcre2grep command, using PCRE2's callout facility with string arguments. This
+  support can be disabled by adding --disable-pcre2grep-callout to the
+  "configure" command. There are two kinds of callout: one that generates
+  output from inbuilt code, and another that calls an external program. The
+  latter has special support for Windows and VMS; otherwise it assumes the
+  existence of the fork() function. This facility can be disabled by adding
+  --disable-pcre2grep-callout-fork to the "configure" command.

 . The pcre2grep program currently supports only 8-bit data files, and so
  requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
@ -366,11 +375,21 @@ library. They are also documented in the pcre2build man page.
  necessary to specify something like LIBS="-lncurses" as well. This is
  because, to quote the readline INSTALL, "Readline uses the termcap functions,
  but does not link with the termcap or curses library itself, allowing
-  applications which link with readline the to choose an appropriate library."
+  applications which link with readline the option to choose an appropriate
+  library."
  If you get error messages about missing functions tgetstr, tgetent, tputs,
  tgetflag, or tgoto, this is the problem, and linking with the ncurses library
  should fix it.

+. The C99 standard defines formatting modifiers z and t for size_t and
+  ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
+  environments other than Microsoft Visual Studio versions earlier than 2013
+  when __STDC_VERSION__ is defined and has a value greater than or equal to
+  199901L (indicating C99). However, there is at least one environment that
+  claims to be C99 but does not support these modifiers. If
+  --disable-percent-zt is specified, no use is made of the z or t modifiers.
+  Instead of %td or %zu, %lu is used, with a cast for size_t values.
+
 . There is a special option called --enable-fuzz-support for use by people who
  want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
  library. If set, it causes an extra library called libpcre2-fuzzsupport.a to
@ -382,10 +401,10 @@ library. They are also documented in the pcre2build man page.
  Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
  be created. This is normally run under valgrind or used when PCRE2 is
  compiled with address sanitizing enabled. It calls the fuzzing function and
-  outputs information about it is doing. The input strings are specified by
-  arguments: if an argument starts with "=" the rest of it is a literal input
-  string. Otherwise, it is assumed to be a file name, and the contents of the
-  file are the test string.
+  outputs information about what it is doing. The input strings are specified
+  by arguments: if an argument starts with "=" the rest of it is a literal
+  input string. Otherwise, it is assumed to be a file name, and the contents
+  of the file are the test string.

 . Releases before 10.30 could be compiled with --disable-stack-for-recursion,
  which caused pcre2_match() to use individual blocks on the heap for
@ -399,7 +418,7 @@ The "configure" script builds the following files for the basic C library:
 . Makefile             the makefile that builds the library
 . src/config.h         build-time configuration options for the library
 . src/pcre2.h          the public PCRE2 header file
-. pcre2-config          script that shows the building settings such as CFLAGS
+. pcre2-config         script that shows the building settings such as CFLAGS
                         that were set for "configure"
 . libpcre2-8.pc        )
 . libpcre2-16.pc       ) data for the pkg-config command
@ -538,11 +557,11 @@ Cross-compiling using autotools

 You can specify CC and CFLAGS in the normal way to the "configure" command, in
 order to cross-compile PCRE2 for some other host. However, you should NOT
-specify --enable-rebuild-chartables, because if you do, the dftables.c source
-file is compiled and run on the local host, in order to generate the inbuilt
-character tables (the pcre2_chartables.c file). This will probably not work,
-because dftables.c needs to be compiled with the local compiler, not the cross
-compiler.
+specify --enable-rebuild-chartables, because if you do, the pcre2_dftables.c
+source file is compiled and run on the local host, in order to generate the
+inbuilt character tables (the pcre2_chartables.c file). This will probably not
+work, because pcre2_dftables.c needs to be compiled with the local compiler,
+not the cross compiler.

 When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
 created by making a copy of pcre2_chartables.c.dist, which is a default set of
@ -550,9 +569,10 @@ tables that assumes ASCII code. Cross-compiling with the default tables should
 not be a problem.

 If you need to modify the character tables when cross-compiling, you should
-move pcre2_chartables.c.dist out of the way, then compile dftables.c by hand
-and run it on the local host to make a new version of pcre2_chartables.c.dist.
-Then when you cross-compile PCRE2 this new version of the tables will be used.
+move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by
+hand and run it on the local host to make a new version of
+pcre2_chartables.c.dist. See the pcre2build section "Creating character tables
+at build time" for more details.


 Making new tarballs
@ -589,13 +609,13 @@ is available. RunTest outputs a comment when it skips a test.

 Many (but not all) of the tests that are not skipped are run twice if JIT
 support is available. On the second run, JIT compilation is forced. This
-testing can be suppressed by putting "nojit" on the RunTest command line.
+testing can be suppressed by putting "-nojit" on the RunTest command line.

 The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
 libraries that are enabled. If you want to run just one set of tests, call
 RunTest with either the -8, -16 or -32 option.

-If valgrind is installed, you can run the tests under it by putting "valgrind"
+If valgrind is installed, you can run the tests under it by putting "-valgrind"
 on the RunTest command line. To run pcre2test on just one or more specific test
 files, give their numbers as arguments to RunTest, for example:

@ -676,7 +696,7 @@ Test 14 contains some special UTF and UCP tests that give different output for
 different code unit widths.

 Test 15 contains a number of tests that must not be run with JIT. They check,
-among other non-JIT things, the match-limiting features of the intepretive
+among other non-JIT things, the match-limiting features of the interpretive
 matcher.

 Test 16 is run only when JIT support is not available. It checks that an
@ -711,8 +731,8 @@ compile context.
 The source file called pcre2_chartables.c contains the default set of tables.
 By default, this is created as a copy of pcre2_chartables.c.dist, which
 contains tables for ASCII coding. However, if --enable-rebuild-chartables is
-specified for ./configure, a different version of pcre2_chartables.c is built
-by the program dftables (compiled from dftables.c), which uses the ANSI C
+specified for ./configure, a new version of pcre2_chartables.c is built by the
+program pcre2_dftables (compiled from pcre2_dftables.c), which uses the ANSI C
 character handling functions such as isalnum(), isalpha(), isupper(),
 islower(), etc. to build the table sources. This means that the default C
 locale that is set for your system will control the contents of these default
@ -722,32 +742,40 @@ file does not get automatically re-generated. The best way to do this is to
 move pcre2_chartables.c.dist out of the way and replace it with your customized
 tables.

-When the dftables program is run as a result of --enable-rebuild-chartables,
-it uses the default C locale that is set on your system. It does not pay
-attention to the LC_xxx environment variables. In other words, it uses the
-system's default locale rather than whatever the compiling user happens to have
-set. If you really do want to build a source set of character tables in a
-locale that is specified by the LC_xxx variables, you can run the dftables
-program by hand with the -L option. For example:
+When the pcre2_dftables program is run as a result of specifying
+--enable-rebuild-chartables, it uses the default C locale that is set on your
+system. It does not pay attention to the LC_xxx environment variables. In other
+words, it uses the system's default locale rather than whatever the compiling
+user happens to have set. If you really do want to build a source set of
+character tables in a locale that is specified by the LC_xxx variables, you can
+run the pcre2_dftables program by hand with the -L option. For example:

-  ./dftables -L pcre2_chartables.c.special
+  ./pcre2_dftables -L pcre2_chartables.c.special

-The first two 256-byte tables provide lower casing and case flipping functions,
-respectively. The next table consists of three 32-byte bit maps which identify
-digits, "word" characters, and white space, respectively. These are used when
-building 32-byte bit maps that represent character classes for code points less
-than 256. The final 256-byte table has bits indicating various character types,
-as follows:
+The second argument names the file where the source code for the tables is
+written. The first two 256-byte tables provide lower casing and case flipping
+functions, respectively. The next table consists of a number of 32-byte bit
+maps which identify certain character classes such as digits, "word"
+characters, white space, etc. These are used when building 32-byte bit maps
+that represent character classes for code points less than 256. The final
+256-byte table has bits indicating various character types, as follows:

    1   white space character
    2   letter
-    4   decimal digit
-    8   hexadecimal digit
+    4   lower case letter
+    8   decimal digit
   16   alphanumeric or '_'
-  128   regular expression metacharacter or binary zero

-You should not alter the set of characters that contain the 128 bit, as that
-will cause PCRE2 to malfunction.
+You can also specify -b (with or without -L) when running pcre2_dftables. This
+causes the tables to be written in binary instead of as source code. A set of
+binary tables can be loaded into memory by an application and passed to
+pcre2_compile() in the same way as tables created dynamically by calling
+pcre2_maketables(). The tables are just a string of bytes, independent of
+hardware characteristics such as endianness. This means they can be bundled
+with an application that runs in different environments, to ensure consistent
+behaviour.
+
+See also the pcre2build section "Creating character tables at build time".


 File manifest
@ -758,7 +786,7 @@ The distribution should contain the files listed below.
 (A) Source files for the PCRE2 library functions and their headers are found in
    the src directory:

-  src/dftables.c           auxiliary program for building pcre2_chartables.c
+  src/pcre2_dftables.c     auxiliary program for building pcre2_chartables.c
                           when --enable-rebuild-chartables is specified

  src/pcre2_chartables.c.dist  a default set of character tables that assume
@ -784,6 +812,7 @@ The distribution should contain the files listed below.
  src/pcre2_newline.c      )
  src/pcre2_ord2utf.c      )
  src/pcre2_pattern_info.c )
+  src/pcre2_script_run.c   )
  src/pcre2_serialize.c    )
  src/pcre2_string_utils.c )
  src/pcre2_study.c        )
@ -881,6 +910,6 @@ The distribution should contain the files listed below.
                          )   environments

 Philip Hazel
-Email local part: ph10
-Email domain: cam.ac.uk
-Last updated: 12 September 2017
+Email local part: Philip.Hazel
+Email domain: gmail.com
+Last updated: 15 April 2022
--- a/README-OS4.md
+++ b/README-OS4.md
@ -0,0 +1,39 @@
+PCRE2 (Perl-compatible regular expression library)
+---------------------------------------------------------------------------
+
+This is a port of PCRE2 10.40 by Philip Hazel for AmigaOS 4, as found at the
+GitHub repository https://github.com/PCRE2Project/pcre2
+
+More information about PCRE can be found at its official website
+at https://www.pcre.org and at the documentation that comes with this
+package.
+
+In the archive both newlib and clib2 libraries are included. It has been
+tested with various applications, but in case you find issues please 
+contact me.
+
+To install it into your AmigaOS 4 SDK installation, just extract all the 
+files in the SDK: path.
+
+Compile
+--------------------------
+The source and the changes I did can be found at my personale repository
+https://git.walkero.gr/walkero/pcre2
+
+You can compile it using the Makefile.os4 file, and produce the libraries
+yourself.
+
+* with newlib run:
+  ```bash
+  make -f Makefile.os4 all
+  ```
+* with clib2 run:
+  ```bash
+  make -f Makefile.os4 all USE_CLIB2=yes
+  ```
+
+Changelog
+--------------------------
+v10.40r1 - 2022-07-31
+* First release
+
--- a/README.md
+++ b/README.md
@ -0,0 +1,56 @@
+# PCRE2 - Perl-Compatible Regular Expressions
+
+The PCRE2 library is a set of C functions that implement regular expression
+pattern matching using the same syntax and semantics as Perl 5. PCRE2 has its
+own native API, as well as a set of wrapper functions that correspond to the
+POSIX regular expression API. The PCRE2 library is free, even for building 
+proprietary software. It comes in three forms, for processing 8-bit, 16-bit,
+or 32-bit code units, in either literal or UTF encoding.
+
+PCRE2 was first released in 2015 to replace the API in the original PCRE 
+library, which is now obsolete and no longer maintained. As well as a more
+flexible API, the code of PCRE2 has been much improved since the fork.
+ 
+## Download
+
+As well as downloading from the 
+[GitHub site](https://github.com/PCRE2Project/pcre2), you can download PCRE2 
+or the older, unmaintained PCRE1 library from an 
+[*unofficial* mirror](https://sourceforge.net/projects/pcre/files/) at SourceForge.
+
+You can check out the PCRE2 source code via Git or Subversion:
+
+    git clone https://github.com/PCRE2Project/pcre2.git
+    svn co    https://github.com/PCRE2Project/pcre2.git
+
+## Contributed Ports
+
+If you just need the command-line PCRE2 tools on Windows, precompiled binary
+versions are available at this 
+[Rexegg page](http://www.rexegg.com/pcregrep-pcretest.html).
+
+A PCRE2 port for z/OS, a mainframe operating system which uses EBCDIC as its
+default character encoding, can be found at 
+[http://www.cbttape.org](http://www.cbttape.org/) (File 939).
+
+## Documentation
+
+You can read the PCRE2 documentation 
+[here](https://PCRE2Project.github.io/pcre2/doc/html/index.html).
+
+Comparisons to Perl's regular expression semantics can be found in the
+community authored Wikipedia entry for PCRE.
+
+There is a curated summary of changes for each PCRE release, copies of
+documentation from older releases, and other useful information from the third
+party authored 
+[RexEgg PCRE Documentation and Change Log page](http://www.rexegg.com/pcre-documentation.html).
+
+## Contact
+
+To report a problem with the PCRE2 library, or to make a feature request, please
+use the PCRE2 GitHub issues tracker. There is a mailing list for discussion of
+ PCRE2 issues and development at pcre2-dev@googlegroups.com, which is where any
+announcements will be made. You can browse the 
+[list archives](https://groups.google.com/g/pcre2-dev).
+
--- a/154
+++ b/154
@ -4,6 +4,12 @@
 # itself. What we are checking here is the file handling and options that are
 # supported by pcre2grep. This script must be run in the build directory.

+# CODING CONVENTIONS:
+# * Put printf arguments in single, not double quotes to avoid unwanted
+#     escaping.
+# * Use \0 for binary zero in printf, not \x0, for the benefit of older
+#     versions (and use octal for other special values).
+
 # Set the C locale, so that sort(1) behaves predictably.

 LC_ALL=C
@ -62,6 +68,22 @@ diff -b  /dev/null /dev/null 2>/dev/null && cf="diff -b"
 diff -u  /dev/null /dev/null 2>/dev/null && cf="diff -u"
 diff -ub /dev/null /dev/null 2>/dev/null && cf="diff -ub"

+# Some tests involve NUL characters. It seems impossible to handle them easily
+# in many operating systems. An earlier version of this script used sed to
+# translate NUL into the string ZERO, but this didn't work on Solaris (aka
+# SunOS), where the version of sed explicitly doesn't like them, and also MacOS
+# (Darwin), OpenBSD, FreeBSD, NetBSD, and some Linux distributions like Alpine,
+# even when using GNU sed. A user suggested using tr instead, which
+# necessitates translating to a single character. However, on (some versions
+# of?) Solaris, the normal "tr" cannot handle binary zeros, but if
+# /usr/xpg4/bin/tr is available, it can do so, so test for that.
+
+if [ -x /usr/xpg4/bin/tr ] ; then
+  tr=/usr/xpg4/bin/tr
+else
+  tr=tr
+fi
+
 # If this test is being run from "make check", $srcdir will be set. If not, set
 # it to the current or parent directory, whichever one contains the test data.
 # Subsequently, we run most of the pcre2grep tests in the source directory so
@ -164,11 +186,11 @@ echo "---------------------------- Test 14 -----------------------------" >>test
 echo "RC=$?" >>testtrygrep

 echo "---------------------------- Test 15 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep 'abc^*' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep 'abc^*' ./testdata/grepinput) >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep

 echo "---------------------------- Test 16 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep abc ./testdata/grepinput ./testdata/nonexistfile) 2>>testtrygrep >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep abc ./testdata/grepinput ./testdata/nonexistfile) >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep

 echo "---------------------------- Test 17 -----------------------------" >>testtrygrep
@ -290,7 +312,7 @@ echo "---------------------------- Test 45 ------------------------------" >>tes
 echo "RC=$?" >>testtrygrep

 echo "---------------------------- Test 46 ------------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep -eabc -e '(unclosed' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -eabc -e '(unclosed' ./testdata/grepinput) >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep

 echo "---------------------------- Test 47 ------------------------------" >>testtrygrep
@ -552,7 +574,7 @@ echo "RC=$?" >>testtrygrep
 echo "---------------------------- Test 107 -----------------------------" >>testtrygrep
 echo "a" >testtemp1grep
 echo "aaaaa" >>testtemp1grep
-(cd $srcdir; $valgrind $vjs $pcre2grep  --line-offsets '(?<=\Ka)' $builddir/testtemp1grep) >>testtrygrep 2>&1
+(cd $srcdir; $valgrind $vjs $pcre2grep  --line-offsets --allow-lookaround-bsk '(?<=\Ka)' $builddir/testtemp1grep) >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep

 echo "---------------------------- Test 108 ------------------------------" >>testtrygrep
@ -600,7 +622,7 @@ echo "---------------------------- Test 118 -----------------------------" >>tes
 echo "RC=$?" >>testtrygrep

 echo "---------------------------- Test 119 -----------------------------" >>testtrygrep
-printf "123\n456\n789\n---abc\ndef\nxyz\n---\n" >testNinputgrep
+printf '123\n456\n789\n---abc\ndef\nxyz\n---\n' >testNinputgrep
 $valgrind $vjs $pcre2grep -Mo '(\n|[^-])*---' testNinputgrep >>testtrygrep
 echo "RC=$?" >>testtrygrep

@ -631,16 +653,63 @@ echo "RC=$?" >>testtrygrep
 echo "RC=$?" >>testtrygrep

 echo "---------------------------- Test 125 -----------------------------" >>testtrygrep
-printf "abcd\n" >testNinputgrep
-$valgrind $vjs $pcre2grep --colour=always '(?<=\K.)' testNinputgrep >>testtrygrep
+printf 'abcd\n' >testNinputgrep
+$valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?<=\K.)' testNinputgrep >>testtrygrep
 echo "RC=$?" >>testtrygrep
-$valgrind $vjs $pcre2grep --colour=always '(?=.\K)' testNinputgrep >>testtrygrep
+$valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?=.\K)' testNinputgrep >>testtrygrep
 echo "RC=$?" >>testtrygrep
-$valgrind $vjs $pcre2grep --colour=always '(?<=\K[ac])' testNinputgrep >>testtrygrep
+$valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?<=\K[ac])' testNinputgrep >>testtrygrep
 echo "RC=$?" >>testtrygrep
-$valgrind $vjs $pcre2grep --colour=always '(?=[ac]\K)' testNinputgrep >>testtrygrep
+$valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?=[ac]\K)' testNinputgrep >>testtrygrep
 echo "RC=$?" >>testtrygrep

+echo "---------------------------- Test 126 -----------------------------" >>testtrygrep
+printf 'Next line pattern has binary zero\nABC\0XYZ\n' >testtemp1grep
+printf 'ABC\0XYZ\nABCDEF\nDEFABC\n' >testtemp2grep
+$valgrind $vjs $pcre2grep -a -f testtemp1grep testtemp2grep >>testtrygrep
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 127 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -o --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 128 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -o1 --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 129 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -m 2 'fox' testdata/grepinput) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 130 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -o -m2 'fox' testdata/grepinput) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 131 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -oc -m2 'fox' testdata/grepinput) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 132 -----------------------------" >>testtrygrep
+(cd $srcdir; exec 3<testdata/grepinput; $valgrind $vjs $pcre2grep -m1 -A3 '^match' <&3; echo '---'; head -1 <&3; exec 3<&-) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 133 -----------------------------" >>testtrygrep
+(cd $srcdir; exec 3<testdata/grepinput; $valgrind $vjs $pcre2grep -m1 -A3 '^match' <&3; echo '---'; $valgrind $vjs $pcre2grep -m1 -A3 '^match' <&3; exec 3<&-) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 134 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 135 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -lZ 'word' ./testdata/grepinputv ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -A 1 -B 1 -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -MHZn 'start[\s]+end' testdata/grepinputM) >>testtrygrep
+echo "RC=$?" >>testtrygrep

 # Now compare the results.

@ -662,7 +731,21 @@ if [ $utf8 -ne 0 ] ; then
  echo "RC=$?" >>testtrygrep

  echo "---------------------------- Test U3 ------------------------------" >>testtrygrep
-  (cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets -u --newline=any '(?<=\K\x{17f})' ./testdata/grepinput8) >>testtrygrep
+  (cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets -u --newline=any --allow-lookaround-bsk '(?<=\K\x{17f})' ./testdata/grepinput8) >>testtrygrep
+  echo "RC=$?" >>testtrygrep
+
+  echo "---------------------------- Test U4 ------------------------------" >>testtrygrep
+  printf 'A\341\200\200\200CD\342\200\200Z\n' >testtemp1grep
+  (cd $srcdir; $valgrind $vjs $pcre2grep -u -o '....' $builddir/testtemp1grep) >>testtrygrep 2>&1
+  echo "RC=$?" >>testtrygrep
+
+  echo "---------------------------- Test U5 ------------------------------" >>testtrygrep
+  printf 'A\341\200\200\200CD\342\200\200Z\n' >testtemp1grep
+  (cd $srcdir; $valgrind $vjs $pcre2grep -U -o '....' $builddir/testtemp1grep) >>testtrygrep
+  echo "RC=$?" >>testtrygrep
+
+  echo "---------------------------- Test U6 -----------------------------" >>testtrygrep
+  (cd $srcdir; $valgrind $vjs $pcre2grep -u -m1 -O '=$x{1d3}$o{744}=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1
  echo "RC=$?" >>testtrygrep

  $cf $srcdir/testdata/grepoutput8 testtrygrep
@ -681,55 +764,54 @@ fi
 # starts with a hyphen. These tests are run in the build directory.

 echo "Testing pcre2grep newline settings"
-printf "abc\rdef\r\nghi\njkl" >testNinputgrep
+printf 'abc\rdef\r\nghi\njkl' >testNinputgrep

-printf "%c--------------------------- Test N1 ------------------------------\r\n" - >testtrygrep
+printf '%c--------------------------- Test N1 ------------------------------\r\n' - >testtrygrep
 $valgrind $vjs $pcre2grep -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep

-printf "%c--------------------------- Test N2 ------------------------------\r\n" - >>testtrygrep
+printf '%c--------------------------- Test N2 ------------------------------\r\n' - >>testtrygrep
 $valgrind $vjs $pcre2grep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep

-printf "%c--------------------------- Test N3 ------------------------------\r\n" - >>testtrygrep
+printf '%c--------------------------- Test N3 ------------------------------\r\n' - >>testtrygrep
 pattern=`printf 'def\rjkl'`
 $valgrind $vjs $pcre2grep -n --newline=cr -F "$pattern" testNinputgrep >>testtrygrep

-printf "%c--------------------------- Test N4 ------------------------------\r\n" - >>testtrygrep
+printf '%c--------------------------- Test N4 ------------------------------\r\n' - >>testtrygrep
 $valgrind $vjs $pcre2grep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinputgrep >>testtrygrep

-printf "%c--------------------------- Test N5 ------------------------------\r\n" - >>testtrygrep
+printf '%c--------------------------- Test N5 ------------------------------\r\n' - >>testtrygrep
 $valgrind $vjs $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep

-printf "%c--------------------------- Test N6 ------------------------------\r\n" - >>testtrygrep
+printf '%c--------------------------- Test N6 ------------------------------\r\n' - >>testtrygrep
 $valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep

-# It seems inpossible to handle NUL characters easily in Solaris (aka SunOS).
-# The version of sed explicitly doesn't like them. For the moment, we just
-# don't run this test under SunOS. Fudge the output so that the comparison
-# works. A similar problem has also been reported for MacOS (Darwin).
-
-printf "%c--------------------------- Test N7 ------------------------------\r\n" - >>testtrygrep
-uname=`uname`
-if [ "$uname" != "SunOS" -a "$uname" != "Darwin" ] ; then
-  printf "abc\0def" >testNinputgrep
-  $valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | sed 's/\x00/ZERO/' >>testtrygrep
-  echo "" >>testtrygrep
-else
-  echo '1:abcZERO2:def' >>testtrygrep
-fi
+printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep
+printf 'abc\0def' >testNinputgrep
+$valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep
+echo "" >>testtrygrep

 $cf $srcdir/testdata/grepoutputN testtrygrep
 if [ $? != 0 ] ; then exit 1; fi

-# If pcre2grep supports script callouts, run some tests on them.
+# If pcre2grep supports script callouts, run some tests on them. It is possible
+# to restrict these callouts to the non-fork case, either for security, or for
+# environments that do not support fork(). This is handled by comparing to a
+# different output.

-if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Callout scripts in patterns are supported'; then
+if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scripts in patterns are supported'; then
  echo "Testing pcre2grep script callouts"
  $valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep
  $valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep
  $valgrind $vjs $pcre2grep '(T)(?C"|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep
  $valgrind $vjs $pcre2grep '(T)(?C"|$1$n")(*F)' $srcdir/testdata/grepinputv >>testtrygrep
-  # The above has no newline, which 'diff -ub' ignores, so add one.
-  $cf $srcdir/testdata/grepoutputC testtrygrep
+  $valgrind $vjs $pcre2grep -m1 '(T)(?C"|$0:$1:$x{41}$o{101}$n")' $srcdir/testdata/grepinputv >>testtrygrep
+
+  if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Non-fork callout scripts in patterns are supported'; then
+    $cf $srcdir/testdata/grepoutputCN testtrygrep
+  else
+    $cf $srcdir/testdata/grepoutputC testtrygrep
+  fi
+
  if [ $? != 0 ] ; then exit 1; fi
 else
  echo "Script callouts are not supported"
--- a/RunGrepTest.bat
+++ b/RunGrepTest.bat
@ -653,14 +653,19 @@ if ERRORLEVEL 1 exit /b 1

 :: If pcre2grep supports script callouts, run some tests on them.

-%pcre2grep% --help | %pcre2grep% -q "Callout scripts in patterns are supported"
+%pcre2grep% --help | %pcre2grep% -q "callout scripts in patterns are supported"
 if %ERRORLEVEL% equ 0 (
  echo Testing pcre2grep script callouts
  %pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%/testdata/grepinputv >testtrygrep
  %pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%/testdata/grepinputv >>testtrygrep
  %pcre2grep% "(T)(?C'|$0:$1$n')" %srcdir%/testdata/grepinputv >>testtrygrep
  %pcre2grep% "(T)(?C'|$1$n')(*F)" %srcdir%/testdata/grepinputv >>testtrygrep
-  %cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout%
+  %pcre2grep% --help | %pcre2grep% -q "Non-script callout scripts in patterns are supported"
+  if %ERRORLEVEL% equ 0 (
+    %cf% %srcdir%\testdata\grepoutputCN testtrygrep %cfout%
+  ) else (
+    %cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout%
+  )
  if ERRORLEVEL 1 exit /b 1
 ) else (
  echo Script callouts are not supported
--- a/80
+++ b/80
@ -17,8 +17,16 @@
 # individual test numbers, ranges of tests such as 3-6 or 3- (meaning 3 to the
 # end), or a number preceded by ~ to exclude a test. For example, "3-15 ~10"
 # runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests
-# except test 10. Whatever order the arguments are in, the tests are always run
-# in numerical order.
+# except test 10. Whatever order the arguments are in, these tests are always
+# run in numerical order.
+#
+# If no specific tests are selected (which is the case when this script is run
+# via 'make check') the default is to run all the numbered tests.
+#
+# There may also be named (as well as numbered) tests for special purposes. At
+# present there is just one, called "heap". This test's output contains the
+# sizes of heap frames and frame vectors, which depend on the environment. It
+# is therefore not run unless explicitly requested.
 #
 # Inappropriate tests are automatically skipped (with a comment to say so). For
 # example, if JIT support is not compiled, test 16 is skipped, whereas if JIT
@ -80,7 +88,9 @@ title22="Test 22: \C tests with UTF (not supported for DFA matching)"
 title23="Test 23: \C disabled test"
 title24="Test 24: Non-UTF pattern conversion tests"
 title25="Test 25: UTF pattern conversion tests"
-maxtest=25
+title26="Test 26: Auto-generated unicode property tests"
+maxtest=26
+titleheap="Test 'heap': Environment-specific heap tests"

 if [ $# -eq 1 -a "$1" = "list" ]; then
  echo $title0
@ -109,6 +119,12 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
  echo $title23
  echo $title24
  echo $title25
+  echo $title26
+  echo ""
+  echo $titleheap
+  echo ""
+  echo "Numbered tests are automatically run if nothing selected."
+  echo "Named tests must be explicitly selected."
  exit 0
 fi

@ -238,6 +254,8 @@ do22=no
 do23=no
 do24=no
 do25=no
+do26=no
+doheap=no

 while [ $# -gt 0 ] ; do
  case $1 in
@ -267,6 +285,8 @@ while [ $# -gt 0 ] ; do
   23) do23=yes;;
   24) do24=yes;;
   25) do25=yes;;
+   26) do26=yes;;
+ heap) doheap=yes;;
   -8) arg8=yes;;
  -16) arg16=yes;;
  -32) arg32=yes;;
@ -319,8 +339,9 @@ fi
 # If it is possible to set the system stack size and -bigstack was given,
 # set up a large stack.

-$sim ./pcre2test -S 1 /dev/null /dev/null
-if [ $? -eq 0 -a "$bigstack" != "" ] ; then
+$sim ./pcre2test -S 64 /dev/null /dev/null
+support_setstack=$?
+if [ $support_setstack -eq 0 -a "$bigstack" != "" ] ; then
  setstack="-S 64"
 else
  setstack=""
@ -407,8 +428,8 @@ if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
  fi
 fi

-# If no specific tests were requested, select all. Those that are not
-# relevant will be automatically skipped.
+# If no specific tests were requested, select all the numbered tests. Those
+# that are not relevant will be automatically skipped.

 if [ $do0  = no -a $do1  = no -a $do2  = no -a $do3  = no -a \
     $do4  = no -a $do5  = no -a $do6  = no -a $do7  = no -a \
@ -416,7 +437,7 @@ if [ $do0  = no -a $do1  = no -a $do2  = no -a $do3  = no -a \
     $do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \
     $do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \
     $do20 = no -a $do21 = no -a $do22 = no -a $do23 = no -a \
-     $do24 = no -a $do25 = no \
+     $do24 = no -a $do25 = no -a $do26 = no -a $doheap = no \
   ]; then
  do0=yes
  do1=yes
@ -444,6 +465,7 @@ if [ $do0  = no -a $do1  = no -a $do2  = no -a $do3  = no -a \
  do23=yes
  do24=yes
  do25=yes
+  do26=yes
 fi

 # Handle any explicit skips at this stage, so that an argument list may consist
@ -479,7 +501,9 @@ for bmode in "$test8" "$test16" "$test32"; do
    echo '' >testtry
    checkspecial '-C'
    checkspecial '--help'
-    checkspecial '-S 1 -t 10 testSinput'
+    if [ $support_setstack -eq 0 ] ; then
+      checkspecial '-S 1 -t 10 testSinput'
+    fi
    echo "  OK"
  fi

@ -493,15 +517,20 @@ for bmode in "$test8" "$test16" "$test32"; do
    done
  fi

-  # PCRE2 tests that are not Perl-compatible: API, errors, internals
+  # PCRE2 tests that are not Perl-compatible: API, errors, internals. We copy
+  # the testbtables file to the current directory for use by this test.

  if [ $do2 = yes ] ; then
    echo $title2 "(excluding UTF-$bits)"
+    cp $testdata/testbtables .
    for opt in "" $jitopt; do
      $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
-      if [ $? = 0 ] ; then
-        $sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -65,-62,-2,-1,0,100,101,191,200 >>testtry
+      saverc=$?
+      if [ $saverc = 0 ] ; then
+        $sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -70,-62,-2,-1,0,100,101,191,200 >>testtry
        checkresult $? 2 "$opt"
+      else
+        checkresult $saverc 2 "$opt"
      fi
    done
  fi
@ -843,7 +872,7 @@ for bmode in "$test8" "$test16" "$test32"; do
    checkresult $? 24 ""
  fi

-  # UTF pattern converson tests
+  # UTF pattern conversion tests

  if [ "$do25" = yes ] ; then
    echo $title25
@ -855,10 +884,33 @@ for bmode in "$test8" "$test16" "$test32"; do
    fi
  fi

+  # Auto-generated unicode property tests
+
+  if [ $do26 = yes ] ; then
+    echo $title26
+    if [ $utf -eq 0 ] ; then
+      echo "  Skipped because UTF-$bits support is not available"
+    else
+      for opt in "" $jitopt; do
+        $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput26 testtry
+        checkresult $? 26 "$opt"
+      done
+    fi
+  fi
+
+  # Manually selected heap tests - output may vary in different environments,
+  # which is why that are not automatically run.
+
+  if [ $doheap = yes ] ; then
+    echo $titleheap
+    $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinputheap testtry
+    checkresult $? heap-$bits ""
+  fi
+
 # End of loop for 8/16/32-bit tests
 done

 # Clean up local working files
-rm -f testSinput test3input testsaved1 testsaved2 test3output test3outputA test3outputB teststdout teststderr testtry
+rm -f testbtables testSinput test3input testsaved1 testsaved2 test3output test3outputA test3outputB teststdout teststderr testtry

 # End
--- a/RunTest.bat
+++ b/RunTest.bat
@ -26,6 +26,7 @@
@rem Updated for new test 14 (moving others up a number), August 2015.
@rem Tidied and updated for new tests 21, 22, 23 by PH, October 2015.
@rem PH added missing "set type" for test 22, April 2016.
+@rem PH added copy command for new testbtables file, November 2020


 setlocal enabledelayedexpansion
@ -134,9 +135,9 @@ if "%all%" == "yes" (
  set do7=yes
  set do8=yes
  set do9=yes
-  set do10=yes
+  set do10=no
  set do11=yes
-  set do12=yes
+  set do12=no
  set do13=yes
  set do14=yes
  set do15=yes
@ -263,7 +264,7 @@ if errorlevel 1 (
  set failed="yes"
  goto :eof
 ) else if [%1]==[2] (
-  %pcre2test% %mode% %4 %5 %6 %7 %8 %9 -error -63,-62,-2,-1,0,100,188,189,190,191 >>%2%bits%\%testoutput%
+  %pcre2test% %mode% %4 %5 %6 %7 %8 %9 -error -70,-62,-2,-1,0,100,101,191,200 >>%2%bits%\%testoutput%
 )

 set type=
@ -305,6 +306,7 @@ if %jit% EQU 1 call :runsub 1 testoutjit "Test with JIT Override" -q -jit
 goto :eof

 :do2
+  copy /y %srcdir%\testdata\testbtables testbtables
 
  call :runsub 2 testout "API, errors, internals, and non-Perl stuff" -q
  if %jit% EQU 1 call :runsub 2 testoutjit "Test with JIT Override" -q -jit
 goto :eof
--- a/WORKSPACE.bazel
+++ b/WORKSPACE.bazel
@ -0,0 +1 @@
+# See MODULE.bazel
--- a/cmake/FindEditline.cmake
+++ b/cmake/FindEditline.cmake
@ -1,17 +1,16 @@
 # Modified from FindReadline.cmake (PH Feb 2012)

-if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
+if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
  set(EDITLINE_FOUND TRUE)
-else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
-  FIND_PATH(EDITLINE_INCLUDE_DIR readline.h
-    /usr/include/editline
-    /usr/include/edit/readline  
-    /usr/include/readline
+else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
+  FIND_PATH(EDITLINE_INCLUDE_DIR readline.h PATH_SUFFIXES
+    editline
+    edit/readline
  )
  
  FIND_LIBRARY(EDITLINE_LIBRARY NAMES edit)
  include(FindPackageHandleStandardArgs)
-  FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY )
+  FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)

  MARK_AS_ADVANCED(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
-endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
+endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
--- a/cmake/pcre2-config-version.cmake.in
+++ b/cmake/pcre2-config-version.cmake.in
@ -0,0 +1,15 @@
+set(PACKAGE_VERSION_MAJOR @PCRE2_MAJOR@)
+set(PACKAGE_VERSION_MINOR @PCRE2_MINOR@)
+set(PACKAGE_VERSION_PATCH 0)
+set(PACKAGE_VERSION @PCRE2_MAJOR@.@PCRE2_MINOR@.0)
+
+# Check whether the requested PACKAGE_FIND_VERSION is compatible
+if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION OR
+   PACKAGE_VERSION_MAJOR GREATER PACKAGE_FIND_VERSION_MAJOR)
+  set(PACKAGE_VERSION_COMPATIBLE FALSE)
+else()
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  if(PACKAGE_VERSION VERSION_EQUAL PACKAGE_FIND_VERSION)
+    set(PACKAGE_VERSION_EXACT TRUE)
+  endif()
+endif()
--- a/cmake/pcre2-config.cmake.in
+++ b/cmake/pcre2-config.cmake.in
@ -0,0 +1,145 @@
+# pcre2-config.cmake
+# ----------------
+#
+# Finds the PCRE2 library, specify the starting search path in PCRE2_ROOT.
+#
+# Static vs. shared
+# -----------------
+# To make use of the static library instead of the shared one, one needs
+# to set the variable PCRE2_USE_STATIC_LIBS to ON before calling find_package.
+# Example:
+#   set(PCRE2_USE_STATIC_LIBS ON)
+#   find_package(PCRE2 CONFIG COMPONENTS 8BIT)
+#
+# This will define the following variables:
+#
+#   PCRE2_FOUND   - True if the system has the PCRE2 library.
+#   PCRE2_VERSION - The version of the PCRE2 library which was found.
+#
+# and the following imported targets:
+#
+#   PCRE2::8BIT  - The 8 bit PCRE2 library.
+#   PCRE2::16BIT - The 16 bit PCRE2 library.
+#   PCRE2::32BIT - The 32 bit PCRE2 library.
+#   PCRE2::POSIX - The POSIX PCRE2 library.
+
+set(PCRE2_NON_STANDARD_LIB_PREFIX @NON_STANDARD_LIB_PREFIX@)
+set(PCRE2_NON_STANDARD_LIB_SUFFIX @NON_STANDARD_LIB_SUFFIX@)
+set(PCRE2_8BIT_NAME pcre2-8)
+set(PCRE2_16BIT_NAME pcre2-16)
+set(PCRE2_32BIT_NAME pcre2-32)
+set(PCRE2_POSIX_NAME pcre2-posix)
+find_path(PCRE2_INCLUDE_DIR NAMES pcre2.h DOC "PCRE2 include directory")
+if (PCRE2_USE_STATIC_LIBS)
+  if (MSVC)
+    set(PCRE2_8BIT_NAME pcre2-8-static)
+    set(PCRE2_16BIT_NAME pcre2-16-static)
+    set(PCRE2_32BIT_NAME pcre2-32-static)
+    set(PCRE2_POSIX_NAME pcre2-posix-static)
+  endif ()
+
+  set(PCRE2_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX})
+  set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
+else ()
+  set(PCRE2_PREFIX ${CMAKE_SHARED_LIBRARY_PREFIX})
+  if (MINGW AND PCRE2_NON_STANDARD_LIB_PREFIX)
+    set(PCRE2_PREFIX "")
+  endif ()
+
+  set(PCRE2_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
+  if (MINGW AND PCRE2_NON_STANDARD_LIB_SUFFIX)
+    set(PCRE2_SUFFIX "-0.dll")
+  endif ()
+endif ()
+find_library(PCRE2_8BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "8 bit PCRE2 library")
+find_library(PCRE2_16BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "16 bit PCRE2 library")
+find_library(PCRE2_32BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "32 bit PCRE2 library")
+find_library(PCRE2_POSIX_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "8 bit POSIX PCRE2 library")
+unset(PCRE2_NON_STANDARD_LIB_PREFIX)
+unset(PCRE2_NON_STANDARD_LIB_SUFFIX)
+unset(PCRE2_8BIT_NAME)
+unset(PCRE2_16BIT_NAME)
+unset(PCRE2_32BIT_NAME)
+unset(PCRE2_POSIX_NAME)
+
+# Set version
+if (PCRE2_INCLUDE_DIR)
+  set(PCRE2_VERSION "@PCRE2_MAJOR@.@PCRE2_MINOR@.0")
+endif ()
+
+# Which components have been found.
+if (PCRE2_8BIT_LIBRARY)
+  set(PCRE2_8BIT_FOUND TRUE)
+endif ()
+if (PCRE2_16BIT_LIBRARY)
+  set(PCRE2_16BIT_FOUND TRUE)
+endif ()
+if (PCRE2_32BIT_LIBRARY)
+  set(PCRE2_32BIT_FOUND TRUE)
+endif ()
+if (PCRE2_POSIX_LIBRARY)
+  set(PCRE2_POSIX_FOUND TRUE)
+endif ()
+
+# Check if at least one component has been specified.
+list(LENGTH PCRE2_FIND_COMPONENTS PCRE2_NCOMPONENTS)
+if (PCRE2_NCOMPONENTS LESS 1)
+  message(FATAL_ERROR "No components have been specified. This is not allowed. Please, specify at least one component.")
+endif ()
+unset(PCRE2_NCOMPONENTS)
+
+# When POSIX component has been specified make sure that also 8BIT component is specified.
+set(PCRE2_8BIT_COMPONENT FALSE)
+set(PCRE2_POSIX_COMPONENT FALSE)
+foreach(component ${PCRE2_FIND_COMPONENTS})
+  if (component STREQUAL "8BIT")
+    set(PCRE2_8BIT_COMPONENT TRUE)
+  elseif (component STREQUAL "POSIX")
+    set(PCRE2_POSIX_COMPONENT TRUE)
+  endif ()
+endforeach()
+
+if (PCRE2_POSIX_COMPONENT AND NOT PCRE2_8BIT_COMPONENT)
+  message(FATAL_ERROR "The component POSIX is specified while the 8BIT one is not. This is not allowed. Please, also specify the 8BIT component.")
+endif()
+unset(PCRE2_8BIT_COMPONENT)
+unset(PCRE2_POSIX_COMPONENT)
+
+include(FindPackageHandleStandardArgs)
+set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}")
+find_package_handle_standard_args(PCRE2
+  FOUND_VAR PCRE2_FOUND
+  REQUIRED_VARS PCRE2_INCLUDE_DIR
+  HANDLE_COMPONENTS
+  VERSION_VAR PCRE2_VERSION
+  CONFIG_MODE
+)
+
+set(PCRE2_LIBRARIES)
+if (PCRE2_FOUND)
+  foreach(component ${PCRE2_FIND_COMPONENTS})
+    if (PCRE2_USE_STATIC_LIBS)
+      add_library(PCRE2::${component} STATIC IMPORTED)
+      target_compile_definitions(PCRE2::${component} INTERFACE PCRE2_STATIC)
+    else ()
+      add_library(PCRE2::${component} SHARED IMPORTED)
+    endif ()
+    set_target_properties(PCRE2::${component} PROPERTIES
+      IMPORTED_LOCATION "${PCRE2_${component}_LIBRARY}"
+      INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIR}"
+    )
+    if (component STREQUAL "POSIX")
+      set_target_properties(PCRE2::${component} PROPERTIES
+        INTERFACE_LINK_LIBRARIES "PCRE2::8BIT"
+        LINK_LIBRARIES "PCRE2::8BIT"
+      )
+    endif ()
+
+    set(PCRE2_LIBRARIES ${PCRE2_LIBRARIES} ${PCRE2_${component}_LIBRARY})
+    mark_as_advanced(PCRE2_${component}_LIBRARY)
+  endforeach()
+endif ()
+
+mark_as_advanced(
+  PCRE2_INCLUDE_DIR
+)
--- a/config-cmake.h.in
+++ b/config-cmake.h.in
@ -1,8 +1,7 @@
 /* config.h for CMake builds */

+#cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1
 #cmakedefine HAVE_DIRENT_H 1
-#cmakedefine HAVE_INTTYPES_H 1    
-#cmakedefine HAVE_STDINT_H 1                                                   
 #cmakedefine HAVE_STRERROR 1
 #cmakedefine HAVE_SYS_STAT_H 1
 #cmakedefine HAVE_SYS_TYPES_H 1
@ -10,14 +9,16 @@
 #cmakedefine HAVE_WINDOWS_H 1

 #cmakedefine HAVE_BCOPY 1
+#cmakedefine HAVE_MEMFD_CREATE 1
 #cmakedefine HAVE_MEMMOVE 1
-
-#cmakedefine PCRE2_STATIC 1
+#cmakedefine HAVE_SECURE_GETENV 1
+#cmakedefine HAVE_STRERROR 1

 #cmakedefine SUPPORT_PCRE2_8 1
 #cmakedefine SUPPORT_PCRE2_16 1
 #cmakedefine SUPPORT_PCRE2_32 1
 #cmakedefine PCRE2_DEBUG 1
+#cmakedefine DISABLE_PERCENT_ZT 1

 #cmakedefine SUPPORT_LIBBZ2 1
 #cmakedefine SUPPORT_LIBEDIT 1
@ -27,6 +28,8 @@
 #cmakedefine SUPPORT_JIT 1
 #cmakedefine SLJIT_PROT_EXECUTABLE_ALLOCATOR 1
 #cmakedefine SUPPORT_PCRE2GREP_JIT 1
+#cmakedefine SUPPORT_PCRE2GREP_CALLOUT 1
+#cmakedefine SUPPORT_PCRE2GREP_CALLOUT_FORK 1
 #cmakedefine SUPPORT_UNICODE 1
 #cmakedefine SUPPORT_VALGRIND 1

--- a/configure.ac
+++ b/configure.ac
@ -9,21 +9,21 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
 dnl be defined as -RC2, for example. For real releases, it should be empty.

 m4_define(pcre2_major, [10])
-m4_define(pcre2_minor, [31])
+m4_define(pcre2_minor, [41])
 m4_define(pcre2_prerelease, [])
-m4_define(pcre2_date, [2018-02-12])
+m4_define(pcre2_date, [2022-xx-xx])
+
+# Libtool shared library interface versions (current:revision:age)
+m4_define(libpcre2_8_version,     [11:0:11])
+m4_define(libpcre2_16_version,    [11:0:11])
+m4_define(libpcre2_32_version,    [11:0:11])
+m4_define(libpcre2_posix_version, [3:2:0])

 # NOTE: The CMakeLists.txt file searches for the above variables in the first
 # 50 lines of this file. Please update that if the variables above are moved.

-# Libtool shared library interface versions (current:revision:age)
-m4_define(libpcre2_8_version,     [7:0:7])
-m4_define(libpcre2_16_version,    [7:0:7])
-m4_define(libpcre2_32_version,    [7:0:7])
-m4_define(libpcre2_posix_version, [2:0:0])
-
-AC_PREREQ(2.57)
-AC_INIT(PCRE2, pcre2_major.pcre2_minor[]pcre2_prerelease, , pcre2)
+AC_PREREQ([2.60])
+AC_INIT([PCRE2],pcre2_major.pcre2_minor[]pcre2_prerelease,[],[pcre2])
 AC_CONFIG_SRCDIR([src/pcre2.h.in])
 AM_INIT_AUTOMAKE([dist-bzip2 dist-zip])
 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
@ -64,14 +64,31 @@ m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
 AC_TYPE_INT64_T

 AC_PROG_INSTALL
-AC_LIBTOOL_WIN32_DLL
-LT_INIT
+LT_INIT([win32-dll])
 AC_PROG_LN_S

 # Check for GCC visibility feature

 PCRE2_VISIBILITY

+# Check for Clang __attribute__((uninitialized)) feature
+
+AC_MSG_CHECKING([for __attribute__((uninitialized))])
+AC_LANG_PUSH([C])
+tmp_CFLAGS=$CFLAGS
+CFLAGS="$CFLAGS -Werror"
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
+                   [[char buf[128] __attribute__((uninitialized));(void)buf]])],
+                   [pcre2_cc_cv_attribute_uninitialized=yes],
+                   [pcre2_cc_cv_attribute_uninitialized=no])
+AC_MSG_RESULT([$pcre2_cc_cv_attribute_uninitialized])
+if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then
+  AC_DEFINE([HAVE_ATTRIBUTE_UNINITIALIZED], 1, [Define this if your compiler
+             supports __attribute__((uninitialized))])
+fi
+CFLAGS=$tmp_CFLAGS
+AC_LANG_POP([C])
+
 # Versioning

 PCRE2_MAJOR="pcre2_major"
@ -131,7 +148,7 @@ AC_ARG_ENABLE(pcre2-32,
              , enable_pcre2_32=unset)
 AC_SUBST(enable_pcre2_32)

-# Handle --dnable-debug (disabled by default)
+# Handle --enable-debug (disabled by default)
 AC_ARG_ENABLE(debug,
              AS_HELP_STRING([--enable-debug],
                             [enable debugging code]),
@ -143,11 +160,33 @@ AC_ARG_ENABLE(jit,
                             [enable Just-In-Time compiling support]),
              , enable_jit=no)

-# Handle --enable-jit-sealloc (disabled by default)
-AC_ARG_ENABLE(jit-sealloc,
-              AS_HELP_STRING([--enable-jit-sealloc],
-                             [enable SELinux compatible execmem allocator in JIT]),
-              , enable_jit_sealloc=no)
+# This code enables JIT if the hardware supports it.
+if test "$enable_jit" = "auto"; then
+  AC_LANG(C)
+  SAVE_CPPFLAGS=$CPPFLAGS
+  CPPFLAGS=-I$srcdir
+  AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+  #define SLJIT_CONFIG_AUTO 1
+  #include "src/sljit/sljitConfigInternal.h"
+  #if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
+  #error unsupported
+  #endif]])], enable_jit=yes, enable_jit=no)
+  CPPFLAGS=$SAVE_CPPFLAGS
+  echo checking for JIT support on this hardware... $enable_jit
+fi
+
+# Handle --enable-jit-sealloc (disabled by default and only experimental)
+case $host_os in
+  linux* | netbsd*)
+    AC_ARG_ENABLE(jit-sealloc,
+      AS_HELP_STRING([--enable-jit-sealloc],
+        [enable SELinux compatible execmem allocator in JIT (experimental)]),
+        ,enable_jit_sealloc=no)
+    ;;
+  *)
+    enable_jit_sealloc=unsupported
+    ;;
+esac

 # Handle --disable-pcre2grep-jit (enabled by default)
 AC_ARG_ENABLE(pcre2grep-jit,
@ -161,6 +200,12 @@ AC_ARG_ENABLE(pcre2grep-callout,
                             [disable callout script support in pcre2grep]),
              , enable_pcre2grep_callout=yes)

+# Handle --disable-pcre2grep-callout-fork (enabled by default)
+AC_ARG_ENABLE(pcre2grep-callout-fork,
+              AS_HELP_STRING([--disable-pcre2grep-callout-fork],
+                             [disable callout script fork support in pcre2grep]),
+              , enable_pcre2grep_callout_fork=yes)
+
 # Handle --enable-rebuild-chartables
 AC_ARG_ENABLE(rebuild-chartables,
              AS_HELP_STRING([--enable-rebuild-chartables],
@ -276,7 +321,7 @@ AC_ARG_WITH(parens-nest-limit,
 # Handle --with-heap-limit
 AC_ARG_WITH(heap-limit,
            AS_HELP_STRING([--with-heap-limit=N],
-                           [default limit on heap memory (kilobytes, default=20000000)]),
+                           [default limit on heap memory (kibibytes, default=20000000)]),
            , with_heap_limit=20000000)

 # Handle --with-match-limit=N
@ -331,6 +376,12 @@ AC_ARG_ENABLE(stack-for-recursion,,
 #                              [don't use stack recursion when matching]),
 #               , enable_stack_for_recursion=yes)

+# Handle --disable-percent_zt (set as "auto" by default)
+AC_ARG_ENABLE(percent-zt,
+              AS_HELP_STRING([--disable-percent-zt],
+                             [disable the use of z and t formatting modifiers]),
+              , enable_percent_zt=auto)
+
 # Set the default value for pcre2-8
 if test "x$enable_pcre2_8" = "xunset"
 then
@ -372,7 +423,7 @@ case "$enable_newline" in
  anycrlf) ac_pcre2_newline_value=5 ;;
  nul)     ac_pcre2_newline_value=6 ;;
  *)
-  AC_MSG_ERROR([invalid argument \"$enable_newline\" to --enable-newline option])
+  AC_MSG_ERROR([invalid argument "$enable_newline" to --enable-newline option])
  ;;
 esac

@ -401,7 +452,7 @@ fi
 case "$with_link_size" in
  2|3|4) ;;
  *)
-  AC_MSG_ERROR([invalid argument \"$with_link_size\" to --with-link-size option])
+  AC_MSG_ERROR([invalid argument "$with_link_size" to --with-link-size option])
  ;;
 esac

@ -423,10 +474,10 @@ to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
 but if you do, default values will be taken from config.h for non-boolean
 macros that are not defined on the command line.

-Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be defined
-(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
-macros are listed as a commented #undef in config.h.generic. Macros such as
-MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
+Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
+defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
+such macros are listed as a commented #undef in config.h.generic. Macros such
+as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
 surrounded by #ifndef/#endif lines so that the value can be overridden by -D.

 PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
@ -434,7 +485,6 @@ HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
 sure both macros are undefined; an emulation function will then be used. */])

 # Checks for header files.
-AC_HEADER_STDC
 AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h)
 AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1])
 AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1])
@ -462,7 +512,20 @@ AC_TYPE_SIZE_T

 # Checks for library functions.

-AC_CHECK_FUNCS(bcopy memmove strerror mkostemp secure_getenv)
+AC_CHECK_FUNCS(bcopy memfd_create memmove mkostemp secure_getenv strerror)
+AC_MSG_CHECKING([for realpath])
+AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+#include <stdlib.h>
+#include <limits.h>
+]],[[
+char buffer[PATH_MAX];
+realpath(".", buffer);
+]])],
+[AC_MSG_RESULT([yes])
+ AC_DEFINE([HAVE_REALPATH], 1,
+  [Define to 1 if you have the `realpath' function.])
+],
+AC_MSG_RESULT([no]))

 # Check for the availability of libz (aka zlib)

@ -534,14 +597,14 @@ if test "$enable_pcre2test_libreadline" = "yes"; then
 fi
 fi

-
 # Check for the availability of libedit. Different distributions put its
 # headers in different places. Try to cover the most common ones.

 if test "$enable_pcre2test_libedit" = "yes"; then
-  AC_CHECK_HEADERS([editline/readline.h], [HAVE_EDITLINE_READLINE_H=1],
-    [AC_CHECK_HEADERS([edit/readline/readline.h], [HAVE_READLINE_READLINE_H=1],
-      [AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_READLINE_H=1])])])
+  AC_CHECK_HEADERS([editline/readline.h edit/readline/readline.h readline.h], [
+    HAVE_LIBEDIT_HEADER=1
+    break
+  ])
  AC_CHECK_LIB([edit], [readline], [LIBEDIT="-ledit"])
 fi

@ -575,6 +638,14 @@ if test "$enable_debug" = "yes"; then
    Define to any value to include debugging code.])
 fi

+if test "$enable_percent_zt" = "no"; then
+  AC_DEFINE([DISABLE_PERCENT_ZT], [], [
+    Define to any value to disable the use of the z and t modifiers in
+    formatting settings such as %zu or %td (this is rarely needed).])
+else
+  enable_percent_zt=auto
+fi
+
 # Unless running under Windows, JIT support requires pthreads.

 if test "$enable_jit" = "yes"; then
@ -604,13 +675,21 @@ if test "$enable_pcre2grep_jit" = "yes"; then
 fi

 if test "$enable_pcre2grep_callout" = "yes"; then
-  if test "$HAVE_WINDOWS_H" != "1"; then
-    if test "$HAVE_SYS_WAIT_H" != "1"; then
-      AC_MSG_ERROR([Callout script support needs sys/wait.h.])
+  if test "$enable_pcre2grep_callout_fork" = "yes"; then
+    if test "$HAVE_WINDOWS_H" != "1"; then
+      if test "$HAVE_SYS_WAIT_H" != "1"; then
+        AC_MSG_ERROR([Callout script support needs sys/wait.h.])
+      fi
    fi
+    AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT_FORK], [], [
+      Define to any value to enable fork support in pcre2grep callout scripts.
+      This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also
+      defined.])
  fi
  AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [
    Define to any value to enable callout script support in pcre2grep.])
+else
+  enable_pcre2grep_callout_fork="no"
 fi

 if test "$enable_unicode" = "yes"; then
@ -694,8 +773,8 @@ fi
 AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
  The value of LINK_SIZE determines the number of bytes used to store
  links as offsets within the compiled regex. The default is 2, which
-  allows for compiled patterns up to 64K long. This covers the vast
-  majority of cases. However, PCRE2 can also be compiled to use 3 or 4
+  allows for compiled patterns up to 65535 code units long. This covers the
+  vast majority of cases. However, PCRE2 can also be compiled to use 3 or 4
  bytes instead. This allows for longer patterns in extreme cases.])

 AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
@ -706,10 +785,11 @@ AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
 AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
  The value of MATCH_LIMIT determines the default number of times the
  pcre2_match() function can record a backtrack position during a single
-  matching attempt. There is a runtime interface for setting a different limit.
-  The limit exists in order to catch runaway regular expressions that take for
-  ever to determine that they do not match. The default is set very large so
-  that it does not accidentally catch legitimate cases.])
+  matching attempt. The value is also used to limit a loop counter in
+  pcre2_dfa_match(). There is a runtime interface for setting a different
+  limit. The limit exists in order to catch runaway regular expressions that
+  take for ever to determine that they do not match. The default is set very
+  large so that it does not accidentally catch legitimate cases.])

 # --with-match-limit-recursion is an obsolete synonym for --with-match-limit-depth

@ -733,11 +813,15 @@ AC_DEFINE_UNQUOTED([MATCH_LIMIT_DEPTH], [$with_match_limit_depth], [
  the maximum amount of heap memory that is used. The value of
  MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it must
  be less than the value of MATCH_LIMIT. The default is to use the same value
-  as MATCH_LIMIT. There is a runtime method for setting a different limit.])
+  as MATCH_LIMIT. There is a runtime method for setting a different limit. In
+  the case of pcre2_dfa_match(), this limit controls the depth of the internal
+  nested function calls that are used for pattern recursions, lookarounds, and
+  atomic groups.])

 AC_DEFINE_UNQUOTED([HEAP_LIMIT], [$with_heap_limit], [
-  This limits the amount of memory that pcre2_match() may use while matching
-  a pattern. The value is in kilobytes.])
+  This limits the amount of memory that may be used while matching
+  a pattern. It applies to both pcre2_match() and pcre2_dfa_match(). It does
+  not apply to JIT matching. The value is in kibibytes (units of 1024 bytes).])

 AC_DEFINE([MAX_NAME_SIZE], [32], [
  This limit is parameterized just in case anybody ever wants to
@ -817,7 +901,7 @@ AC_SUBST(EXTRA_LIBPCRE2_POSIX_LDFLAGS)

 # When we run 'make distcheck', use these arguments. Turning off compiler
 # optimization makes it run faster.
-DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre2-16 --enable-pcre2-32 --enable-jit --enable-utf"
+DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre2-16 --enable-pcre2-32 --enable-jit"
 AC_SUBST(DISTCHECK_CONFIGURE_FLAGS)

 # Check that, if --enable-pcre2grep-libz or --enable-pcre2grep-libbz2 is
@ -856,10 +940,9 @@ if test "$enable_pcre2test_libedit" = "yes"; then
    echo "** Cannot use both --enable-pcre2test-libedit and --enable-pcre2test-readline"
    exit 1
  fi
-  if test "$HAVE_EDITLINE_READLINE_H" != "1" -a \
-          "$HAVE_READLINE_READLINE_H" != "1"; then
-    echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h"
-    echo "** nor readline/readline.h was found."
+  if test -z "$HAVE_LIBEDIT_HEADER"; then
+    echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h,"
+    echo "** edit/readline/readline.h nor a compatible header was found."
    exit 1
  fi
  if test -z "$LIBEDIT"; then
@ -933,7 +1016,27 @@ fi # enable_coverage

 AM_CONDITIONAL([WITH_GCOV],[test "x$enable_coverage" = "xyes"])

+AC_MSG_CHECKING([whether Intel CET is enabled])
+AC_LANG_PUSH([C])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
+                   [[#ifndef __CET__
+# error CET is not enabled
+#endif]])],
+                   [pcre2_cc_cv_intel_cet_enabled=yes],
+                   [pcre2_cc_cv_intel_cet_enabled=no])
+AC_MSG_RESULT([$pcre2_cc_cv_intel_cet_enabled])
+if test "$pcre2_cc_cv_intel_cet_enabled" = yes; then
+  CET_CFLAGS="-mshstk"
+  AC_SUBST([CET_CFLAGS])
+fi
+AC_LANG_POP([C])
+
+# LIB_POSTFIX is used by CMakeLists.txt for Windows debug builds.
+# Pass empty LIB_POSTFIX to *.pc files and pcre2-config here.
+AC_SUBST(LIB_POSTFIX)
+
 # Produce these files, in addition to config.h.
+
 AC_CONFIG_FILES(
 	Makefile
 	libpcre2-8.pc
@ -1000,13 +1103,14 @@ $PACKAGE-$VERSION configuration summary:
    Rebuild char tables ................ : ${enable_rebuild_chartables}
    Internal link size ................. : ${with_link_size}
    Nested parentheses limit ........... : ${with_parens_nest_limit}
-    Heap limit ......................... : ${with_heap_limit} kilobytes
+    Heap limit ......................... : ${with_heap_limit} kibibytes
    Match limit ........................ : ${with_match_limit}
    Match depth limit .................. : ${with_match_limit_depth}
    Build shared libs .................. : ${enable_shared}
    Build static libs .................. : ${enable_static}
    Use JIT in pcre2grep ............... : ${enable_pcre2grep_jit}
    Enable callouts in pcre2grep ....... : ${enable_pcre2grep_callout}
+    Enable fork in pcre2grep callouts .. : ${enable_pcre2grep_callout_fork}
    Initial buffer size for pcre2grep .. : ${with_pcre2grep_bufsize}
    Maximum buffer size for pcre2grep .. : ${with_pcre2grep_max_bufsize}
    Link pcre2grep with libz ........... : ${enable_pcre2grep_libz}
@ -1016,6 +1120,7 @@ $PACKAGE-$VERSION configuration summary:
    Valgrind support ................... : ${enable_valgrind}
    Code coverage ...................... : ${enable_coverage}
    Fuzzer support ..................... : ${enable_fuzz_support}
+    Use %zu and %td .................... : ${enable_percent_zt}

 EOF

--- a/doc/html/NON-AUTOTOOLS-BUILD.txt
+++ b/doc/html/NON-AUTOTOOLS-BUILD.txt
@ -10,6 +10,7 @@ This document contains the following sections:
  Calling conventions in Windows environments
  Comments about Win32 builds
  Building PCRE2 on Windows with CMake
+  Building PCRE2 on Windows with Visual Studio
  Testing with RunTest.bat
  Building PCRE2 on native z/OS and z/VM

@ -39,7 +40,11 @@ GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARY

 The following are generic instructions for building the PCRE2 C library "by
 hand". If you are going to use CMake, this section does not apply to you; you
-can skip ahead to the CMake section.
+can skip ahead to the CMake section. Note that the settings concerned with
+8-bit, 16-bit, and 32-bit code units relate to the type of data string that
+PCRE2 processes. They are NOT referring to the underlying operating system bit
+width. You do not have to do anything special to compile in a 64-bit
+environment, for example.

 (1) Copy or rename the file src/config.h.generic as src/config.h, and edit the
     macro settings that it contains to whatever is appropriate for your
@ -47,7 +52,7 @@ can skip ahead to the CMake section.
     macro to specify what character(s) you want to be interpreted as line
     terminators by default.

-     When you compile any of the PCRE2 modules, you must specify
+     When you subsequently compile any of the PCRE2 modules, you must specify
     -DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
     sources.

@ -61,6 +66,11 @@ can skip ahead to the CMake section.
     new release, you are strongly advised to review src/config.h.generic
     before re-using what you had previously.

+     Note also that the src/config.h.generic file is created from a config.h
+     that was generated by Autotools, which automatically includes settings of
+     a number of macros that are not actually used by PCRE2 (for example,
+     HAVE_MEMORY_H).
+
 (2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h.

 (3) EITHER:
@ -68,23 +78,23 @@ can skip ahead to the CMake section.
       src/pcre2_chartables.c.

     OR:
-       Compile src/dftables.c as a stand-alone program (using -DHAVE_CONFIG_H
-       if you have set up src/config.h), and then run it with the single
-       argument "src/pcre2_chartables.c". This generates a set of standard
-       character tables and writes them to that file. The tables are generated
-       using the default C locale for your system. If you want to use a locale
-       that is specified by LC_xxx environment variables, add the -L option to
-       the dftables command. You must use this method if you are building on a
-       system that uses EBCDIC code.
+       Compile src/pcre2_dftables.c as a stand-alone program (using
+       -DHAVE_CONFIG_H if you have set up src/config.h), and then run it with
+       the single argument "src/pcre2_chartables.c". This generates a set of
+       standard character tables and writes them to that file. The tables are
+       generated using the default C locale for your system. If you want to use
+       a locale that is specified by LC_xxx environment variables, add the -L
+       option to the pcre2_dftables command. You must use this method if you
+       are building on a system that uses EBCDIC code.

     The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
     specify alternative tables at run time.

- (4) For an 8-bit library, compile the following source files from the src
-     directory, setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also
-     set -DHAVE_CONFIG_H if you have set up src/config.h with your
-     configuration, or else use other -D settings to change the configuration
-     as required.
+ (4) For a library that supports 8-bit code units in the character strings that
+     it processes, compile the following source files from the src directory,
+     setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also set
+     -DHAVE_CONFIG_H if you have set up src/config.h with your configuration,
+     or else use other -D settings to change the configuration as required.

       pcre2_auto_possess.c
       pcre2_chartables.c
@ -103,6 +113,7 @@ can skip ahead to the CMake section.
       pcre2_newline.c
       pcre2_ord2utf.c
       pcre2_pattern_info.c
+       pcre2_script_run.c
       pcre2_serialize.c
       pcre2_string_utils.c
       pcre2_study.c
@ -110,6 +121,7 @@ can skip ahead to the CMake section.
       pcre2_substring.c
       pcre2_tables.c
       pcre2_ucd.c
+       pcre2_ucptables.c
       pcre2_valid_utf.c
       pcre2_xclass.c

@ -126,7 +138,7 @@ can skip ahead to the CMake section.
     src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile
     these yourself.

-     Not also that the pcre2_fuzzsupport.c file contains special code that is
+     Note also that the pcre2_fuzzsupport.c file contains special code that is
     useful to those who want to run fuzzing tests on the PCRE2 library. Unless
     you are doing that, you can ignore it.

@ -135,9 +147,9 @@ can skip ahead to the CMake section.
     If your system has static and shared libraries, you may have to do this
     once for each type.

- (6) If you want to build a 16-bit library or 32-bit library (as well as, or
-     instead of the 8-bit library) just supply 16 or 32 as the value of
-     -DPCRE2_CODE_UNIT_WIDTH when you are compiling.
+ (6) If you want to build a library that supports 16-bit or 32-bit code units,
+     (as well as, or instead of the 8-bit library) just supply 16 or 32 as the
+     value of -DPCRE2_CODE_UNIT_WIDTH when you are compiling.

 (7) If you want to build the POSIX wrapper functions (which apply only to the
     8-bit library), ensure that you have the src/pcre2posix.h file and then
@ -185,7 +197,7 @@ can skip ahead to the CMake section.

 STACK SIZE IN WINDOWS ENVIRONMENTS

-Prior to release 10.30 the default system stack size of 1Mb in some Windows
+Prior to release 10.30 the default system stack size of 1MiB in some Windows
 environments caused issues with some tests. This should no longer be the case
 for 10.30 and later releases.

@ -295,7 +307,7 @@ cache can be deleted by selecting "File > Delete Cache".
 3.  Create a new, empty build directory, preferably a subdirectory of the
    source dir. For example, C:\pcre2\pcre2-xx\build.

-4.  Run cmake-gui from the Shell envirornment of your build tool, for example,
+4.  Run cmake-gui from the Shell environment of your build tool, for example,
    Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
    to start Cmake from the Windows Start menu, as this can lead to errors.

@ -330,6 +342,18 @@ cache can be deleted by selecting "File > Delete Cache".
    available for review in Testing\Temporary under your build dir.


+BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO
+
+The code currently cannot be compiled without an inttypes.h header, which is
+available only with Visual Studio 2013 or newer. However, this portable and
+permissively-licensed implementation of the stdint.h header could be used as an
+alternative:
+
+  http://www.azillionmonkeys.com/qed/pstdint.h
+
+Just rename it and drop it into the top level of the build tree.
+
+
 TESTING WITH RUNTEST.BAT

 If configured with CMake, building the test project ("make test" or building
@ -350,7 +374,7 @@ Otherwise:
 1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
   have been created.

-2. Edit RunTest.bat to indentify the full or relative location of
+2. Edit RunTest.bat to identify the full or relative location of
   the pcre2 source (wherein which the testdata folder resides), e.g.:

   set srcdir=C:\pcre2\pcre2-10.00
@ -382,6 +406,6 @@ Everything in that location, source and executable, is in EBCDIC and native
 z/OS file formats. The port provides an API for LE languages such as COBOL and
 for the z/OS and z/VM versions of the Rexx languages.

-===============================
-Last Updated: 13 September 2017
-===============================
+===========================
+Last Updated: 28 April 2021
+===========================
--- a/doc/html/README.txt
+++ b/doc/html/README.txt
@ -1,19 +1,23 @@
 README file for PCRE2 (Perl-compatible regular expression library)
 ------------------------------------------------------------------

-PCRE2 is a re-working of the original PCRE library to provide an entirely new
-API. The latest release of PCRE2 is always available in three alternative
-formats from:
+PCRE2 is a re-working of the original PCRE1 library to provide an entirely new
+API. Since its initial release in 2015, there has been further development of
+the code and it now differs from PCRE1 in more than just the API. There are new
+features, and the internals have been improved. The original PCRE1 library is
+now obsolete and no longer maintained. The latest release of PCRE2 is available
+in .tar.gz, tar.bz2, or .zip form from this GitHub repository:

-  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.gz
-  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.bz2
-  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.zip
+https://github.com/PCRE2Project/pcre2/releases

-There is a mailing list for discussion about the development of PCRE (both the
-original and new APIs) at pcre-dev@exim.org. You can access the archives and
-subscribe or manage your subscription here:
+There is a mailing list for discussion about the development of PCRE2 at
+pcre2-dev@googlegroups.com. You can subscribe by sending an email to
+pcre2-dev+subscribe@googlegroups.com.

-   https://lists.exim.org/mailman/listinfo/pcre-dev
+You can access the archives and also subscribe or manage your subscription
+here:
+
+https://groups.google.com/g/pcre2-dev

 Please read the NEWS file if you are upgrading from a previous release. The
 contents of this README file are:
@ -39,13 +43,13 @@ The PCRE2 APIs
 PCRE2 is written in C, and it has its own API. There are three sets of
 functions, one for the 8-bit library, which processes strings of bytes, one for
 the 16-bit library, which processes strings of 16-bit values, and one for the
-32-bit library, which processes strings of 32-bit values. There are no C++
-wrappers.
+32-bit library, which processes strings of 32-bit values. Unlike PCRE1, there
+are no C++ wrappers.

 The distribution does contain a set of C wrapper functions for the 8-bit
 library that are based on the POSIX regular expression API (see the pcre2posix
-man page). These can be found in a library called libpcre2-posix. Note that
-this just provides a POSIX calling interface to PCRE2; the regular expressions
+man page). These are built into a library called libpcre2-posix. Note that this
+just provides a POSIX calling interface to PCRE2; the regular expressions
 themselves still follow Perl syntax and semantics. The POSIX API is restricted,
 and does not give full access to all of PCRE2's facilities.

@ -53,20 +57,8 @@ The header file for the POSIX-style functions is called pcre2posix.h. The
 official POSIX name is regex.h, but I did not want to risk possible problems
 with existing files of that name by distributing it that way. To use PCRE2 with
 an existing program that uses the POSIX API, pcre2posix.h will have to be
-renamed or pointed at by a link.
-
-If you are using the POSIX interface to PCRE2 and there is already a POSIX
-regex library installed on your system, as well as worrying about the regex.h
-header file (as mentioned above), you must also take care when linking programs
-to ensure that they link with PCRE2's libpcre2-posix library. Otherwise they
-may pick up the POSIX functions of the same name from the other library.
-
-One way of avoiding this confusion is to compile PCRE2 with the addition of
-Dregcomp=PCRE2regcomp (and similarly for the other POSIX functions) to the
-compiler flags (CFLAGS if you are using "configure" -- see below). This has the
-effect of renaming the functions so that the names no longer clash. Of course,
-you have to do the same thing for your applications, or write them using the
-new names.
+renamed or pointed at by a link (or the program modified, of course). See the
+pcre2posix documentation for more details.


 Documentation for PCRE2
@ -122,12 +114,18 @@ Building PCRE2 using autotools
 The following instructions assume the use of the widely used "configure; make;
 make install" (autotools) process.

-To build PCRE2 on system that supports autotools, first run the "configure"
-command from the PCRE2 distribution directory, with your current directory set
+If you have downloaded and unpacked a PCRE2 release tarball, run the
+"configure" command from the PCRE2 directory, with your current directory set
 to the directory where you want the files to be created. This command is a
 standard GNU "autoconf" configuration script, for which generic instructions
 are supplied in the file INSTALL.

+The files in the GitHub repository do not contain "configure". If you have
+downloaded the PCRE2 source files from GitHub, before you can run "configure"
+you must run the shell script called autogen.sh. This runs a number of
+autotools to create a "configure" script (you must of course have the autotools
+commands installed in order to do this).
+
 Most commonly, people build PCRE2 within its own distribution directory, and in
 this case, on many systems, just running "./configure" is sufficient. However,
 the usual methods of changing standard defaults are available. For example:
@ -171,10 +169,14 @@ library. They are also documented in the pcre2build man page.
  give large performance improvements on certain platforms, add --enable-jit to
  the "configure" command. This support is available only for certain hardware
  architectures. If you try to enable it on an unsupported architecture, there
-  will be a compile time error. If you are running under SELinux you may also
-  want to add --enable-jit-sealloc, which enables the use of an execmem
-  allocator in JIT that is compatible with SELinux. This has no effect if JIT
-  is not enabled.
+  will be a compile time error. If in doubt, use --enable-jit=auto, which
+  enables JIT only if the current hardware is supported.
+
+. If you are enabling JIT under SELinux environment you may also want to add
+  --enable-jit-sealloc, which enables the use of an executable memory allocator
+  that is compatible with SELinux. Warning: this allocator is experimental!
+  It does not support fork() operation and may crash when no disk space is
+  available. This option has no effect if JIT is disabled.

 . If you do not want to make use of the default support for UTF-8 Unicode
  character strings in the 8-bit library, UTF-16 Unicode character strings in
@ -192,10 +194,10 @@ library. They are also documented in the pcre2build man page.

  As well as supporting UTF strings, Unicode support includes support for the
  \P, \p, and \X sequences that recognize Unicode character properties.
-  However, only the basic two-letter properties such as Lu are supported.
-  Escape sequences such as \d and \w in patterns do not by default make use of
-  Unicode properties, but can be made to do so by setting the PCRE2_UCP option
-  or starting a pattern with (*UCP).
+  However, only a subset of Unicode properties are supported; see the
+  pcre2pattern man page for details. Escape sequences such as \d and \w in
+  patterns do not by default make use of Unicode properties, but can be made to
+  do so by setting the PCRE2_UCP option or starting a pattern with (*UCP).

 . You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
  of the preceding, or any of the Unicode newline sequences, or the NUL (zero)
@ -239,9 +241,11 @@ library. They are also documented in the pcre2build man page.
  discussion in the pcre2api man page (search for pcre2_set_match_limit).

 . There is a separate counter that limits the depth of nested backtracking
-  during a matching process, which indirectly limits the amount of heap memory
-  that is used. This also has a default of ten million, which is essentially
-  "unlimited". You can change the default by setting, for example,
+  (pcre2_match()) or nested function calls (pcre2_dfa_match()) during a
+  matching process, which indirectly limits the amount of heap memory that is
+  used, and in the case of pcre2_dfa_match() the amount of stack as well. This
+  counter also has a default of ten million, which is essentially "unlimited".
+  You can change the default by setting, for example,

  --with-match-limit-depth=5000

@ -249,16 +253,17 @@ library. They are also documented in the pcre2build man page.
  pcre2_set_depth_limit).

 . You can also set an explicit limit on the amount of heap memory used by
-  the pcre2_match() interpreter:
+  the pcre2_match() and pcre2_dfa_match() interpreters:

  --with-heap-limit=500

-  The units are kilobytes. This limit does not apply when the JIT optimization
-  (which has its own memory control features) is used. There is more discussion
-  on the pcre2api man page (search for pcre2_set_heap_limit).
+  The units are kibibytes (units of 1024 bytes). This limit does not apply when
+  the JIT optimization (which has its own memory control features) is used.
+  There is more discussion on the pcre2api man page (search for
+  pcre2_set_heap_limit).

 . In the 8-bit library, the default maximum compiled pattern size is around
-  64K bytes. You can increase this by adding --with-link-size=3 to the
+  64 kibibytes. You can increase this by adding --with-link-size=3 to the
  "configure" command. PCRE2 then uses three bytes instead of two for offsets
  to different parts of the compiled pattern. In the 16-bit library,
  --with-link-size=3 is the same as --with-link-size=4, which (in both
@ -272,9 +277,9 @@ library. They are also documented in the pcre2build man page.

  --enable-rebuild-chartables

-  a program called dftables is compiled and run in the default C locale when
-  you obey "make". It builds a source file called pcre2_chartables.c. If you do
-  not specify this option, pcre2_chartables.c is created as a copy of
+  a program called pcre2_dftables is compiled and run in the default C locale
+  when you obey "make". It builds a source file called pcre2_chartables.c. If
+  you do not specify this option, pcre2_chartables.c is created as a copy of
  pcre2_chartables.c.dist. See "Character tables" below for further
  information.

@ -300,8 +305,8 @@ library. They are also documented in the pcre2build man page.
  unaddressable. This allows it to detect invalid memory accesses, and is
  mostly useful for debugging PCRE2 itself.

-. In environments where the gcc compiler is used and lcov version 1.6 or above
-  is installed, if you specify
+. In environments where the gcc compiler is used and lcov is installed, if you
+  specify

  --enable-coverage

@ -315,10 +320,14 @@ library. They are also documented in the pcre2build man page.
 . When JIT support is enabled, pcre2grep automatically makes use of it, unless
  you add --disable-pcre2grep-jit to the "configure" command.

-. On non-Windows sytems there is support for calling external scripts during
-  matching in the pcre2grep command via PCRE2's callout facility with string
-  arguments. This support can be disabled by adding --disable-pcre2grep-callout
-  to the "configure" command.
+. There is support for calling external programs during matching in the
+  pcre2grep command, using PCRE2's callout facility with string arguments. This
+  support can be disabled by adding --disable-pcre2grep-callout to the
+  "configure" command. There are two kinds of callout: one that generates
+  output from inbuilt code, and another that calls an external program. The
+  latter has special support for Windows and VMS; otherwise it assumes the
+  existence of the fork() function. This facility can be disabled by adding
+  --disable-pcre2grep-callout-fork to the "configure" command.

 . The pcre2grep program currently supports only 8-bit data files, and so
  requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
@ -366,11 +375,21 @@ library. They are also documented in the pcre2build man page.
  necessary to specify something like LIBS="-lncurses" as well. This is
  because, to quote the readline INSTALL, "Readline uses the termcap functions,
  but does not link with the termcap or curses library itself, allowing
-  applications which link with readline the to choose an appropriate library."
+  applications which link with readline the option to choose an appropriate
+  library."
  If you get error messages about missing functions tgetstr, tgetent, tputs,
  tgetflag, or tgoto, this is the problem, and linking with the ncurses library
  should fix it.

+. The C99 standard defines formatting modifiers z and t for size_t and
+  ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
+  environments other than Microsoft Visual Studio versions earlier than 2013
+  when __STDC_VERSION__ is defined and has a value greater than or equal to
+  199901L (indicating C99). However, there is at least one environment that
+  claims to be C99 but does not support these modifiers. If
+  --disable-percent-zt is specified, no use is made of the z or t modifiers.
+  Instead of %td or %zu, %lu is used, with a cast for size_t values.
+
 . There is a special option called --enable-fuzz-support for use by people who
  want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
  library. If set, it causes an extra library called libpcre2-fuzzsupport.a to
@ -382,10 +401,10 @@ library. They are also documented in the pcre2build man page.
  Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
  be created. This is normally run under valgrind or used when PCRE2 is
  compiled with address sanitizing enabled. It calls the fuzzing function and
-  outputs information about it is doing. The input strings are specified by
-  arguments: if an argument starts with "=" the rest of it is a literal input
-  string. Otherwise, it is assumed to be a file name, and the contents of the
-  file are the test string.
+  outputs information about what it is doing. The input strings are specified
+  by arguments: if an argument starts with "=" the rest of it is a literal
+  input string. Otherwise, it is assumed to be a file name, and the contents
+  of the file are the test string.

 . Releases before 10.30 could be compiled with --disable-stack-for-recursion,
  which caused pcre2_match() to use individual blocks on the heap for
@ -399,7 +418,7 @@ The "configure" script builds the following files for the basic C library:
 . Makefile             the makefile that builds the library
 . src/config.h         build-time configuration options for the library
 . src/pcre2.h          the public PCRE2 header file
-. pcre2-config          script that shows the building settings such as CFLAGS
+. pcre2-config         script that shows the building settings such as CFLAGS
                         that were set for "configure"
 . libpcre2-8.pc        )
 . libpcre2-16.pc       ) data for the pkg-config command
@ -538,11 +557,11 @@ Cross-compiling using autotools

 You can specify CC and CFLAGS in the normal way to the "configure" command, in
 order to cross-compile PCRE2 for some other host. However, you should NOT
-specify --enable-rebuild-chartables, because if you do, the dftables.c source
-file is compiled and run on the local host, in order to generate the inbuilt
-character tables (the pcre2_chartables.c file). This will probably not work,
-because dftables.c needs to be compiled with the local compiler, not the cross
-compiler.
+specify --enable-rebuild-chartables, because if you do, the pcre2_dftables.c
+source file is compiled and run on the local host, in order to generate the
+inbuilt character tables (the pcre2_chartables.c file). This will probably not
+work, because pcre2_dftables.c needs to be compiled with the local compiler,
+not the cross compiler.

 When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
 created by making a copy of pcre2_chartables.c.dist, which is a default set of
@ -550,9 +569,10 @@ tables that assumes ASCII code. Cross-compiling with the default tables should
 not be a problem.

 If you need to modify the character tables when cross-compiling, you should
-move pcre2_chartables.c.dist out of the way, then compile dftables.c by hand
-and run it on the local host to make a new version of pcre2_chartables.c.dist.
-Then when you cross-compile PCRE2 this new version of the tables will be used.
+move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by
+hand and run it on the local host to make a new version of
+pcre2_chartables.c.dist. See the pcre2build section "Creating character tables
+at build time" for more details.


 Making new tarballs
@ -589,13 +609,13 @@ is available. RunTest outputs a comment when it skips a test.

 Many (but not all) of the tests that are not skipped are run twice if JIT
 support is available. On the second run, JIT compilation is forced. This
-testing can be suppressed by putting "nojit" on the RunTest command line.
+testing can be suppressed by putting "-nojit" on the RunTest command line.

 The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
 libraries that are enabled. If you want to run just one set of tests, call
 RunTest with either the -8, -16 or -32 option.

-If valgrind is installed, you can run the tests under it by putting "valgrind"
+If valgrind is installed, you can run the tests under it by putting "-valgrind"
 on the RunTest command line. To run pcre2test on just one or more specific test
 files, give their numbers as arguments to RunTest, for example:

@ -676,7 +696,7 @@ Test 14 contains some special UTF and UCP tests that give different output for
 different code unit widths.

 Test 15 contains a number of tests that must not be run with JIT. They check,
-among other non-JIT things, the match-limiting features of the intepretive
+among other non-JIT things, the match-limiting features of the interpretive
 matcher.

 Test 16 is run only when JIT support is not available. It checks that an
@ -711,8 +731,8 @@ compile context.
 The source file called pcre2_chartables.c contains the default set of tables.
 By default, this is created as a copy of pcre2_chartables.c.dist, which
 contains tables for ASCII coding. However, if --enable-rebuild-chartables is
-specified for ./configure, a different version of pcre2_chartables.c is built
-by the program dftables (compiled from dftables.c), which uses the ANSI C
+specified for ./configure, a new version of pcre2_chartables.c is built by the
+program pcre2_dftables (compiled from pcre2_dftables.c), which uses the ANSI C
 character handling functions such as isalnum(), isalpha(), isupper(),
 islower(), etc. to build the table sources. This means that the default C
 locale that is set for your system will control the contents of these default
@ -722,32 +742,40 @@ file does not get automatically re-generated. The best way to do this is to
 move pcre2_chartables.c.dist out of the way and replace it with your customized
 tables.

-When the dftables program is run as a result of --enable-rebuild-chartables,
-it uses the default C locale that is set on your system. It does not pay
-attention to the LC_xxx environment variables. In other words, it uses the
-system's default locale rather than whatever the compiling user happens to have
-set. If you really do want to build a source set of character tables in a
-locale that is specified by the LC_xxx variables, you can run the dftables
-program by hand with the -L option. For example:
+When the pcre2_dftables program is run as a result of specifying
+--enable-rebuild-chartables, it uses the default C locale that is set on your
+system. It does not pay attention to the LC_xxx environment variables. In other
+words, it uses the system's default locale rather than whatever the compiling
+user happens to have set. If you really do want to build a source set of
+character tables in a locale that is specified by the LC_xxx variables, you can
+run the pcre2_dftables program by hand with the -L option. For example:

-  ./dftables -L pcre2_chartables.c.special
+  ./pcre2_dftables -L pcre2_chartables.c.special

-The first two 256-byte tables provide lower casing and case flipping functions,
-respectively. The next table consists of three 32-byte bit maps which identify
-digits, "word" characters, and white space, respectively. These are used when
-building 32-byte bit maps that represent character classes for code points less
-than 256. The final 256-byte table has bits indicating various character types,
-as follows:
+The second argument names the file where the source code for the tables is
+written. The first two 256-byte tables provide lower casing and case flipping
+functions, respectively. The next table consists of a number of 32-byte bit
+maps which identify certain character classes such as digits, "word"
+characters, white space, etc. These are used when building 32-byte bit maps
+that represent character classes for code points less than 256. The final
+256-byte table has bits indicating various character types, as follows:

    1   white space character
    2   letter
-    4   decimal digit
-    8   hexadecimal digit
+    4   lower case letter
+    8   decimal digit
   16   alphanumeric or '_'
-  128   regular expression metacharacter or binary zero

-You should not alter the set of characters that contain the 128 bit, as that
-will cause PCRE2 to malfunction.
+You can also specify -b (with or without -L) when running pcre2_dftables. This
+causes the tables to be written in binary instead of as source code. A set of
+binary tables can be loaded into memory by an application and passed to
+pcre2_compile() in the same way as tables created dynamically by calling
+pcre2_maketables(). The tables are just a string of bytes, independent of
+hardware characteristics such as endianness. This means they can be bundled
+with an application that runs in different environments, to ensure consistent
+behaviour.
+
+See also the pcre2build section "Creating character tables at build time".


 File manifest
@ -758,7 +786,7 @@ The distribution should contain the files listed below.
 (A) Source files for the PCRE2 library functions and their headers are found in
    the src directory:

-  src/dftables.c           auxiliary program for building pcre2_chartables.c
+  src/pcre2_dftables.c     auxiliary program for building pcre2_chartables.c
                           when --enable-rebuild-chartables is specified

  src/pcre2_chartables.c.dist  a default set of character tables that assume
@ -784,6 +812,7 @@ The distribution should contain the files listed below.
  src/pcre2_newline.c      )
  src/pcre2_ord2utf.c      )
  src/pcre2_pattern_info.c )
+  src/pcre2_script_run.c   )
  src/pcre2_serialize.c    )
  src/pcre2_string_utils.c )
  src/pcre2_study.c        )
@ -881,6 +910,6 @@ The distribution should contain the files listed below.
                          )   environments

 Philip Hazel
-Email local part: ph10
-Email domain: cam.ac.uk
-Last updated: 12 September 2017
+Email local part: Philip.Hazel
+Email domain: gmail.com
+Last updated: 15 April 2022
--- a/doc/html/index.html
+++ b/doc/html/index.html
@ -141,11 +141,14 @@ in the library.
    <td>&nbsp;&nbsp;Free a general context</td></tr>

 <tr><td><a href="pcre2_get_error_message.html">pcre2_get_error_message</a></td>
-    <td>&nbsp;&nbsp;Free study data</td></tr>
+    <td>&nbsp;&nbsp;Get textual error message for error number</td></tr>

 <tr><td><a href="pcre2_get_mark.html">pcre2_get_mark</a></td>
    <td>&nbsp;&nbsp;Get a (*MARK) name</td></tr>

+<tr><td><a href="pcre2_get_match_data_size.html">pcre2_get_match_data_size</a></td>
+    <td>&nbsp;&nbsp;Get the size of a match data block</td></tr>
+
 <tr><td><a href="pcre2_get_ovector_count.html">pcre2_get_ovector_count</a></td>
    <td>&nbsp;&nbsp;Get the ovector count</td></tr>

@ -176,6 +179,9 @@ in the library.
 <tr><td><a href="pcre2_maketables.html">pcre2_maketables</a></td>
    <td>&nbsp;&nbsp;Build character tables in current locale</td></tr>

+<tr><td><a href="pcre2_maketables_free.html">pcre2_maketables_free</a></td>
+    <td>&nbsp;&nbsp;Free character tables</td></tr>
+
 <tr><td><a href="pcre2_match.html">pcre2_match</a></td>
    <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
    (Perl compatible)</td></tr>
--- a/doc/html/pcre2.html
+++ b/doc/html/pcre2.html
@ -23,16 +23,30 @@ please consult the man page, in case the conversion went wrong.
 <P>
 PCRE2 is the name used for a revised API for the PCRE library, which is a set
 of functions, written in C, that implement regular expression pattern matching
-using the same syntax and semantics as Perl, with just a few differences. Some
-features that appeared in Python and the original PCRE before they appeared in
-Perl are also available using the Python syntax. There is also some support for
-one or two .NET and Oniguruma syntax items, and there are options for
-requesting some minor changes that give better ECMAScript (aka JavaScript)
-compatibility.
+using the same syntax and semantics as Perl, with just a few differences. After
+nearly two decades, the limitations of the original API were making development
+increasingly difficult. The new API is more extensible, and it was simplified
+by abolishing the separate "study" optimizing function; in PCRE2, patterns are
+automatically optimized where possible. Since forking from PCRE1, the code has
+been extensively refactored and new features introduced. The old library is now
+obsolete and is no longer maintained.
+</P>
+<P>
+As well as Perl-style regular expression patterns, some features that appeared
+in Python and the original PCRE before they appeared in Perl are available
+using the Python syntax. There is also some support for one or two .NET and
+Oniguruma syntax items, and there are options for requesting some minor changes
+that give better ECMAScript (aka JavaScript) compatibility.
+</P>
+<P>
+The source code for PCRE2 can be compiled to support strings of 8-bit, 16-bit,
+or 32-bit code units, which means that up to three separate libraries may be
+installed, one for each code unit size. The size of code unit is not related to
+the bit size of the underlying hardware. In a 64-bit environment that also
+supports 32-bit applications, versions of PCRE2 that are compiled in both
+64-bit and 32-bit modes may be needed.
 </P>
 <P>
-The source code for PCRE2 can be compiled to support 8-bit, 16-bit, or 32-bit
-code units, which means that up to three separate libraries may be installed.
 The original work to extend PCRE to 16-bit and 32-bit code units was done by
 Zoltan Herczeg and Christian Persch, respectively. In all three cases, strings
 can be interpreted either as one character per code unit, or as UTF-encoded
@ -155,8 +169,9 @@ listing), and the short pages for individual functions, are concatenated in
  pcre2-config       show PCRE2 installation configuration information
  pcre2api           details of PCRE2's native C API
  pcre2build         building PCRE2
-  pcre2callout       details of the callout feature
+  pcre2callout       details of the pattern callout feature
  pcre2compat        discussion of Perl compatibility
+  pcre2convert       details of pattern conversion functions
  pcre2demo          a demonstration C program that uses PCRE2
  pcre2grep          description of the <b>pcre2grep</b> command (8-bit only)
  pcre2jit           discussion of just-in-time optimization support
@ -167,6 +182,7 @@ listing), and the short pages for individual functions, are concatenated in
  pcre2perform       discussion of performance issues
  pcre2posix         the POSIX-compatible C API for the 8-bit library
  pcre2sample        discussion of the pcre2demo program
+  pcre2serialize     details of pattern serialization
  pcre2syntax        quick syntax reference
  pcre2test          description of the <b>pcre2test</b> command
  pcre2unicode       discussion of Unicode and UTF support
@ -178,20 +194,20 @@ function, listing its arguments and results.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <P>
 Putting an actual email address here is a spam magnet. If you want to email me,
-use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
+use my two names separated by a dot at gmail.com.
 </P>
 <br><a name="SEC5" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 01 April 2017
+Last updated: 27 August 2021
 <br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2021 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2_code_free.html
+++ b/doc/html/pcre2_code_free.html
@ -25,7 +25,8 @@ SYNOPSIS
 DESCRIPTION
 </b><br>
 <P>
-This function frees the memory used for a compiled pattern, including any
+If <i>code</i> is NULL, this function does nothing. Otherwise, <i>code</i> must
+point to a compiled pattern. This function frees its memory, including any
 memory used by the JIT compiler. If the compiled pattern was created by a call
 to <b>pcre2_code_copy_with_tables()</b>, the memory for the character tables is
 also freed.
--- a/doc/html/pcre2_compile.html
+++ b/doc/html/pcre2_compile.html
@ -65,7 +65,8 @@ The option bits are:
  PCRE2_EXTENDED           Ignore white space and # comments
  PCRE2_FIRSTLINE          Force matching to be before newline
  PCRE2_LITERAL            Pattern characters are all literal
-  PCRE2_MATCH_UNSET_BACKREF  Match unset back references
+  PCRE2_MATCH_INVALID_UTF  Enable support for matching invalid UTF
+  PCRE2_MATCH_UNSET_BACKREF  Match unset backreferences
  PCRE2_MULTILINE          ^ and $ match newlines within data
  PCRE2_NEVER_BACKSLASH_C  Lock out the use of \C in patterns
  PCRE2_NEVER_UCP          Lock out PCRE2_UCP, e.g. via (*UCP)
@ -86,8 +87,23 @@ PCRE2 must be built with Unicode support (the default) in order to use
 PCRE2_UTF, PCRE2_UCP and related options.
 </P>
 <P>
-The yield of the function is a pointer to a private data structure that
-contains the compiled pattern, or NULL if an error was detected.
+Additional options may be set in the compile context via the
+<a href="pcre2_set_compile_extra_options.html"><b>pcre2_set_compile_extra_options</b></a>
+function.
+</P>
+<P>
+If either of <i>errorcode</i> or <i>erroroffset</i> is NULL, the function returns
+NULL immediately. Otherwise, the yield of this function is a pointer to a
+private data structure that contains the compiled pattern, or NULL if an error
+was detected. In the error case, a text error message can be obtained by
+passing the value returned via the <i>errorcode</i> argument to the the
+<b>pcre2_get_error_message()</b> function. The offset (in code units) where the
+error was encountered is returned via the <i>erroroffset</i> argument.
+</P>
+<P>
+If there is no error, the value passed via <i>errorcode</i> returns the message
+"no error" if passed to <b>pcre2_get_error_message()</b>, and the value passed
+via <i>erroroffset</i> is zero.
 </P>
 <P>
 There is a complete description of the PCRE2 native API, with more detail on
--- a/doc/html/pcre2_compile_context_free.html
+++ b/doc/html/pcre2_compile_context_free.html
@ -27,7 +27,8 @@ DESCRIPTION
 <P>
 This function frees the memory occupied by a compile context, using the memory
 freeing function from the general context with which it was created, or
-<b>free()</b> if that was not set.
+<b>free()</b> if that was not set. If the argument is NULL, the function returns
+immediately without doing anything.
 </P>
 <P>
 There is a complete description of the PCRE2 native API in the
--- a/doc/html/pcre2_convert_context_free.html
+++ b/doc/html/pcre2_convert_context_free.html
@ -28,7 +28,8 @@ DESCRIPTION
 This function is part of an experimental set of pattern conversion functions.
 It frees the memory occupied by a convert context, using the memory
 freeing function from the general context with which it was created, or
-<b>free()</b> if that was not set.
+<b>free()</b> if that was not set. If the argument is NULL, the function returns
+immediately without doing anything.
 </P>
 <P>
 The pattern conversion functions are described in the
--- a/doc/html/pcre2_converted_pattern_free.html
+++ b/doc/html/pcre2_converted_pattern_free.html
@ -28,7 +28,8 @@ DESCRIPTION
 This function is part of an experimental set of pattern conversion functions.
 It frees the memory occupied by a converted pattern that was obtained by
 calling <b>pcre2_pattern_convert()</b> with arguments that caused it to place
-the converted pattern into newly obtained heap memory.
+the converted pattern into newly obtained heap memory. If the argument is NULL,
+the function returns immediately without doing anything.
 </P>
 <P>
 The pattern conversion functions are described in the
--- a/doc/html/pcre2_dfa_match.html
+++ b/doc/html/pcre2_dfa_match.html
@ -45,12 +45,20 @@ just once (except when processing lookaround assertions). This function is
  <i>workspace</i>    Points to a vector of ints used as working space
  <i>wscount</i>      Number of elements in the vector
 </pre>
-For <b>pcre2_dfa_match()</b>, a match context is needed only if you want to set
-up a callout function or specify the match and/or the recursion depth limits.
-The <i>length</i> and <i>startoffset</i> values are code units, not characters.
-The options are:
+The size of output vector needed to contain all the results depends on the
+number of simultaneous matches, not on the number of parentheses in the
+pattern. Using <b>pcre2_match_data_create_from_pattern()</b> to create the match
+data block is therefore not advisable when using this function.
+</P>
+<P>
+A match context is needed only if you want to set up a callout function or
+specify the heap limit or the match or the recursion depth limits. The
+<i>length</i> and <i>startoffset</i> values are code units, not characters. The
+options are:
 <pre>
  PCRE2_ANCHORED          Match only at the first position
+  PCRE2_COPY_MATCHED_SUBJECT
+                          On success, make a private subject copy
  PCRE2_ENDANCHORED       Pattern can match only at end of subject
  PCRE2_NOTBOL            Subject is not the beginning of a line
  PCRE2_NOTEOL            Subject is not the end of a line
--- a/doc/html/pcre2_general_context_free.html
+++ b/doc/html/pcre2_general_context_free.html
@ -26,7 +26,8 @@ DESCRIPTION
 </b><br>
 <P>
 This function frees the memory occupied by a general context, using the memory
-freeing function within the context, if set.
+freeing function within the context, if set.  If the argument is NULL, the
+function returns immediately without doing anything.
 </P>
 <P>
 There is a complete description of the PCRE2 native API in the
--- a/doc/html/pcre2_get_match_data_size.html
+++ b/doc/html/pcre2_get_match_data_size.html
@ -0,0 +1,39 @@
+<html>
+<head>
+<title>pcre2_get_match_data_size specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2_get_match_data_size man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<br><b>
+SYNOPSIS
+</b><br>
+<P>
+<b>#include &#60;pcre2.h&#62;</b>
+</P>
+<P>
+<b>PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *<i>match_data</i>);</b>
+</P>
+<br><b>
+DESCRIPTION
+</b><br>
+<P>
+This function returns the size, in bytes, of the match data block that is its
+argument.
+</P>
+<P>
+There is a complete description of the PCRE2 native API in the
+<a href="pcre2api.html"><b>pcre2api</b></a>
+page and a description of the POSIX API in the
+<a href="pcre2posix.html"><b>pcre2posix</b></a>
+page.
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
--- a/doc/html/pcre2_jit_compile.html
+++ b/doc/html/pcre2_jit_compile.html
@ -41,9 +41,16 @@ bits:
  PCRE2_JIT_PARTIAL_SOFT  compile code for soft partial matching
  PCRE2_JIT_PARTIAL_HARD  compile code for hard partial matching
 </pre>
+There is also an obsolete option called PCRE2_JIT_INVALID_UTF, which has been
+superseded by the <b>pcre2_compile()</b> option PCRE2_MATCH_INVALID_UTF. The old
+option is deprecated and may be removed in the future.
+</P>
+<P>
 The yield of the function is 0 for success, or a negative error code otherwise.
 In particular, PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or
-if an unknown bit is set in <i>options</i>.
+if an unknown bit is set in <i>options</i>. The function can also return
+PCRE2_ERROR_NOMEMORY if JIT is unable to allocate executable memory for the
+compiler, even if it was because of a system security restriction.
 </P>
 <P>
 There is a complete description of the PCRE2 native API in the
--- a/doc/html/pcre2_jit_free_unused_memory.html
+++ b/doc/html/pcre2_jit_free_unused_memory.html
@ -29,7 +29,7 @@ This function frees unused JIT executable memory. The argument is a general
 context, for custom memory management, or NULL for standard memory management.
 JIT memory allocation retains some memory in order to improve future JIT
 compilation speed. In low memory conditions,
-\fBpcre2_jit_free_unused_memory()\fB can be used to cause this memory to be
+<b>pcre2_jit_free_unused_memory()</b> can be used to cause this memory to be
 freed.
 </P>
 <P>
--- a/doc/html/pcre2_jit_match.html
+++ b/doc/html/pcre2_jit_match.html
@ -33,7 +33,9 @@ processed by the JIT compiler against a given subject string, using a matching
 algorithm that is similar to Perl's. It is a "fast path" interface to JIT, and
 it bypasses some of the sanity checks that <b>pcre2_match()</b> applies.
 Its arguments are exactly the same as for
-<a href="pcre2_match.html"><b>pcre2_match()</b>.</a>
+<a href="pcre2_match.html"><b>pcre2_match()</b>,</a>
+except that the subject string must be specified with a length;
+PCRE2_ZERO_TERMINATED is not supported.
 </P>
 <P>
 The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
--- a/doc/html/pcre2_jit_stack_assign.html
+++ b/doc/html/pcre2_jit_stack_assign.html
@ -38,7 +38,11 @@ passed to a matching function. The arguments of this function are:
 </PRE>
 </P>
 <P>
-If <i>callback</i> is NULL and <i>callback_data</i> is NULL, an internal 32K
+If <i>mcontext</i> is NULL, the function returns immediately, without doing
+anything.
+</P>
+<P>
+If <i>callback</i> is NULL and <i>callback_data</i> is NULL, an internal 32KiB
 block on the machine stack is used.
 </P>
 <P>
@ -49,8 +53,9 @@ If <i>callback</i> is NULL and <i>callback_data</i> is not NULL,
 <P>
 If <i>callback</i> not NULL, it is called with <i>callback_data</i> as an
 argument at the start of matching, in order to set up a JIT stack. If the
-result is NULL, the internal 32K stack is used; otherwise the return value must
-be a valid JIT stack, the result of calling <b>pcre2_jit_stack_create()</b>.
+result is NULL, the internal 32KiB stack is used; otherwise the return value
+must be a valid JIT stack, the result of calling
+<b>pcre2_jit_stack_create()</b>.
 </P>
 <P>
 You may safely use the same JIT stack for multiple patterns, as long as they
--- a/doc/html/pcre2_jit_stack_create.html
+++ b/doc/html/pcre2_jit_stack_create.html
@ -33,8 +33,9 @@ context, for memory allocation functions, or NULL for standard memory
 allocation. The result can be passed to the JIT run-time code by calling
 <b>pcre2_jit_stack_assign()</b> to associate the stack with a compiled pattern,
 which can then be processed by <b>pcre2_match()</b> or <b>pcre2_jit_match()</b>.
-A maximum stack size of 512K to 1M should be more than enough for any pattern.
-For more details, see the
+A maximum stack size of 512KiB to 1MiB should be more than enough for any
+pattern. If the stack couldn't be allocated or the values passed were not
+reasonable, NULL will be returned. For more details, see the
 <a href="pcre2jit.html"><b>pcre2jit</b></a>
 page.
 </P>
--- a/doc/html/pcre2_jit_stack_free.html
+++ b/doc/html/pcre2_jit_stack_free.html
@ -26,8 +26,9 @@ DESCRIPTION
 </b><br>
 <P>
 This function is used to free a JIT stack that was created by
-<b>pcre2_jit_stack_create()</b> when it is no longer needed. For more details,
-see the
+<b>pcre2_jit_stack_create()</b> when it is no longer needed. If the argument is
+NULL, the function returns immediately without doing anything. For more
+details, see the
 <a href="pcre2jit.html"><b>pcre2jit</b></a>
 page.
 </P>
--- a/doc/html/pcre2_maketables.html
+++ b/doc/html/pcre2_maketables.html
@ -19,7 +19,7 @@ SYNOPSIS
 <b>#include &#60;pcre2.h&#62;</b>
 </P>
 <P>
-<b>const unsigned char *pcre2_maketables(pcre2_general_context *<i>gcontext</i>);</b>
+<b>const uint8_t *pcre2_maketables(pcre2_general_context *<i>gcontext</i>);</b>
 </P>
 <br><b>
 DESCRIPTION
--- a/doc/html/pcre2_maketables_free.html
+++ b/doc/html/pcre2_maketables_free.html
@ -0,0 +1,44 @@
+<html>
+<head>
+<title>pcre2_maketables_free specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2_maketables_free man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<br><b>
+SYNOPSIS
+</b><br>
+<P>
+<b>#include &#60;pcre2.h&#62;</b>
+</P>
+<P>
+<b>void pcre2_maketables_free(pcre2_general_context *<i>gcontext</i>,</b>
+<b>  const uint8_t *<i>tables</i>);</b>
+</P>
+<br><b>
+DESCRIPTION
+</b><br>
+<P>
+This function discards a set of character tables that were created by a call
+to
+<a href="pcre2_maketables.html"><b>pcre2_maketables()</b>.</a>
+</P>
+<P>
+The <i>gcontext</i> parameter should match what was used in that call to
+account for any custom allocators that might be in use; if it is NULL
+the system <b>free()</b> is used.
+</P>
+<P>
+There is a complete description of the PCRE2 native API in the
+<a href="pcre2api.html"><b>pcre2api</b></a>
+page.
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
--- a/doc/html/pcre2_match.html
+++ b/doc/html/pcre2_match.html
@ -55,11 +55,13 @@ A match context is needed only if you want to:
  Change the backtracking depth limit
  Set custom memory management specifically for the match
 </pre>
-The <i>length</i> and <i>startoffset</i> values are code
-units, not characters. The length may be given as PCRE2_ZERO_TERMINATE for a
-subject that is terminated by a binary zero code unit. The options are:
+The <i>length</i> and <i>startoffset</i> values are code units, not characters.
+The length may be given as PCRE2_ZERO_TERMINATED for a subject that is
+terminated by a binary zero code unit. The options are:
 <pre>
  PCRE2_ANCHORED          Match only at the first position
+  PCRE2_COPY_MATCHED_SUBJECT
+                          On success, make a private subject copy
  PCRE2_ENDANCHORED       Pattern can match only at end of subject
  PCRE2_NOTBOL            Subject string is not the beginning of a line
  PCRE2_NOTEOL            Subject string is not the end of a line
--- a/doc/html/pcre2_match_context_free.html
+++ b/doc/html/pcre2_match_context_free.html
@ -27,7 +27,8 @@ DESCRIPTION
 <P>
 This function frees the memory occupied by a match context, using the memory
 freeing function from the general context with which it was created, or
-<b>free()</b> if that was not set.
+<b>free()</b> if that was not set. If the argument is NULL, the function returns
+immediately without doing anything.
 </P>
 <P>
 There is a complete description of the PCRE2 native API in the
--- a/doc/html/pcre2_match_data_create.html
+++ b/doc/html/pcre2_match_data_create.html
@ -30,8 +30,9 @@ This function creates a new match data block, which is used for holding the
 result of a match. The first argument specifies the number of pairs of offsets
 that are required. These form the "output vector" (ovector) within the match
 data block, and are used to identify the matched string and any captured
-substrings. There is always one pair of offsets; if <b>ovecsize</b> is zero, it
-is treated as one.
+substrings when matching with <b>pcre2_match()</b>, or a number of different
+matches at the same point when used with <b>pcre2_dfa_match()</b>. There is
+always one pair of offsets; if <b>ovecsize</b> is zero, it is treated as one.
 </P>
 <P>
 The second argument points to a general context, for custom memory management,
--- a/doc/html/pcre2_match_data_create_from_pattern.html
+++ b/doc/html/pcre2_match_data_create_from_pattern.html
@ -26,12 +26,15 @@ SYNOPSIS
 DESCRIPTION
 </b><br>
 <P>
-This function creates a new match data block, which is used for holding the
-result of a match. The first argument points to a compiled pattern. The number
-of capturing parentheses within the pattern is used to compute the number of
-pairs of offsets that are required in the match data block. These form the
-"output vector" (ovector) within the match data block, and are used to identify
-the matched string and any captured substrings.
+This function creates a new match data block for holding the result of a match.
+The first argument points to a compiled pattern. The number of capturing
+parentheses within the pattern is used to compute the number of pairs of
+offsets that are required in the match data block. These form the "output
+vector" (ovector) within the match data block, and are used to identify the
+matched string and any captured substrings when matching with
+<b>pcre2_match()</b>. If you are using <b>pcre2_dfa_match()</b>, which uses the
+outut vector in a different way, you should use <b>pcre2_match_data_create()</b>
+instead of this function.
 </P>
 <P>
 The second argument points to a general context, for custom memory management,
--- a/doc/html/pcre2_match_data_free.html
+++ b/doc/html/pcre2_match_data_free.html
@ -25,9 +25,15 @@ SYNOPSIS
 DESCRIPTION
 </b><br>
 <P>
-This function frees the memory occupied by a match data block, using the memory
-freeing function from the general context or compiled pattern with which it was
-created, or <b>free()</b> if that was not set.
+If <i>match_data</i> is NULL, this function does nothing. Otherwise,
+<i>match_data</i> must point to a match data block, which this function frees,
+using the memory freeing function from the general context or compiled pattern
+with which it was created, or <b>free()</b> if that was not set.
+</P>
+<P>
+If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this
+match data block, the copy of the subject that was remembered with the block is
+also freed.
 </P>
 <P>
 There is a complete description of the PCRE2 native API in the
--- a/doc/html/pcre2_pattern_info.html
+++ b/doc/html/pcre2_pattern_info.html
@ -19,7 +19,8 @@ SYNOPSIS
 <b>#include &#60;pcre2.h&#62;</b>
 </P>
 <P>
-<b>int pcre2_pattern_info(const pcre2 *<i>code</i>, uint32_t <i>what</i>, void *<i>where</i>);</b>
+<b>int pcre2_pattern_info(const pcre2_code *<i>code</i>, uint32_t <i>what</i>,</b>
+<b>   void *<i>where</i>);</b>
 </P>
 <br><b>
 DESCRIPTION
@ -36,7 +37,7 @@ request are as follows:
 <pre>
  PCRE2_INFO_ALLOPTIONS      Final options after compiling
  PCRE2_INFO_ARGOPTIONS      Options passed to <b>pcre2_compile()</b>
-  PCRE2_INFO_BACKREFMAX      Number of highest back reference
+  PCRE2_INFO_BACKREFMAX      Number of highest backreference
  PCRE2_INFO_BSR             What \R matches:
                               PCRE2_BSR_UNICODE: Unicode line endings
                               PCRE2_BSR_ANYCRLF: CR, LF, or CRLF only
--- a/doc/html/pcre2_serialize_decode.html
+++ b/doc/html/pcre2_serialize_decode.html
@ -28,7 +28,10 @@ DESCRIPTION
 </b><br>
 <P>
 This function decodes a serialized set of compiled patterns back into a list of
-individual patterns. Its arguments are:
+individual patterns. This is possible only on a host that is running the same
+version of PCRE2, with the same code unit width, and the host must also have
+the same endianness, pointer width and PCRE2_SIZE type. The arguments for
+<b>pcre2_serialize_decode()</b> are:
 <pre>
  <i>codes</i>            pointer to a vector in which to build the list
  <i>number_of_codes</i>  number of slots in the vector
@ -45,7 +48,7 @@ the following negative error codes:
  PCRE2_ERROR_BADDATA   <i>number_of_codes</i> is zero or less
  PCRE2_ERROR_BADMAGIC  mismatch of id bytes in <i>bytes</i>
  PCRE2_ERROR_BADMODE   mismatch of variable unit size or PCRE version
-  PCRE2_ERROR_MEMORY    memory allocation failed
+  PCRE2_ERROR_NOMEMORY  memory allocation failed
  PCRE2_ERROR_NULL      <i>codes</i> or <i>bytes</i> is NULL
 </pre>
 PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled
@ -54,8 +57,8 @@ on a system with different endianness.
 <P>
 There is a complete description of the PCRE2 native API in the
 <a href="pcre2api.html"><b>pcre2api</b></a>
-page and a description of the POSIX API in the
-<a href="pcre2posix.html"><b>pcre2posix</b></a>
+page and a description of the serialization functions in the
+<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
 page.
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2_serialize_encode.html
+++ b/doc/html/pcre2_serialize_encode.html
@ -28,7 +28,12 @@ DESCRIPTION
 </b><br>
 <P>
 This function encodes a list of compiled patterns into a byte stream that can
-be saved on disc or elsewhere. Its arguments are:
+be saved on disc or elsewhere. Note that this is not an abstract format like
+Java or .NET. Conversion of the byte stream back into usable compiled patterns
+can only happen on a host that is running the same version of PCRE2, with the
+same code unit width, and the host must also have the same endianness, pointer
+width and PCRE2_SIZE type. The arguments for <b>pcre2_serialize_encode()</b>
+are:
 <pre>
  <i>codes</i>             pointer to a vector containing the list
  <i>number_of_codes</i>   number of slots in the vector
@ -53,8 +58,8 @@ that a slot in the vector does not point to a compiled pattern.
 <P>
 There is a complete description of the PCRE2 native API in the
 <a href="pcre2api.html"><b>pcre2api</b></a>
-page and a description of the POSIX API in the
-<a href="pcre2posix.html"><b>pcre2posix</b></a>
+page and a description of the serialization functions in the
+<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
 page.
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2_serialize_free.html
+++ b/doc/html/pcre2_serialize_free.html
@ -27,13 +27,14 @@ DESCRIPTION
 <P>
 This function frees the memory that was obtained by
 <b>pcre2_serialize_encode()</b> to hold a serialized byte stream. The argument
-must point to such a byte stream.
+must point to such a byte stream or be NULL, in which case the function returns
+without doing anything.
 </P>
 <P>
 There is a complete description of the PCRE2 native API in the
 <a href="pcre2api.html"><b>pcre2api</b></a>
-page and a description of the POSIX API in the
-<a href="pcre2posix.html"><b>pcre2posix</b></a>
+page and a description of the serialization functions in the
+<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
 page.
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2_serialize_get_number_of_codes.html
+++ b/doc/html/pcre2_serialize_get_number_of_codes.html
@ -41,8 +41,8 @@ on a system with different endianness.
 <P>
 There is a complete description of the PCRE2 native API in the
 <a href="pcre2api.html"><b>pcre2api</b></a>
-page and a description of the POSIX API in the
-<a href="pcre2posix.html"><b>pcre2posix</b></a>
+page and a description of the serialization functions in the
+<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
 page.
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2_set_character_tables.html
+++ b/doc/html/pcre2_set_character_tables.html
@ -20,16 +20,19 @@ SYNOPSIS
 </P>
 <P>
 <b>int pcre2_set_character_tables(pcre2_compile_context *<i>ccontext</i>,</b>
-<b>  const unsigned char *<i>tables</i>);</b>
+<b>  const uint8_t *<i>tables</i>);</b>
 </P>
 <br><b>
 DESCRIPTION
 </b><br>
 <P>
 This function sets a pointer to custom character tables within a compile
-context. The second argument must be the result of a call to
-<b>pcre2_maketables()</b> or NULL to request the default tables. The result is
-always zero.
+context. The second argument must point to a set of PCRE2 character tables or
+be NULL to request the default tables. The result is always zero. Character
+tables can be created by calling <b>pcre2_maketables()</b> or by running the
+<b>pcre2_dftables</b> maintenance command in binary mode (see the
+<a href="pcre2build.html"><b>pcre2build</b></a>
+documentation).
 </P>
 <P>
 There is a complete description of the PCRE2 native API in the
--- a/doc/html/pcre2_set_compile_extra_options.html
+++ b/doc/html/pcre2_set_compile_extra_options.html
@ -20,7 +20,7 @@ SYNOPSIS
 </P>
 <P>
 <b>int pcre2_set_compile_extra_options(pcre2_compile_context *<i>ccontext</i>,</b>
-<b>  PCRE2_SIZE <i>extra_options</i>);</b>
+<b>  uint32_t <i>extra_options</i>);</b>
 </P>
 <br><b>
 DESCRIPTION
@ -30,8 +30,11 @@ This function sets additional option bits for <b>pcre2_compile()</b> that are
 housed in a compile context. It completely replaces all the bits. The extra
 options are:
 <pre>
-  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \x{df800} to \x{dfff} in UTF-8 and UTF-32 modes
+  PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK     Allow \K in lookarounds
+  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \x{d800} to \x{dfff} in UTF-8 and UTF-32 modes
+  PCRE2_EXTRA_ALT_BSUX                 Extended alternate \u, \U, and \x handling
  PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL    Treat all invalid escapes as a literal following character
+  PCRE2_EXTRA_ESCAPED_CR_IS_LF         Interpret \r as \n
  PCRE2_EXTRA_MATCH_LINE               Pattern matches whole lines
  PCRE2_EXTRA_MATCH_WORD               Pattern matches "words"
 </pre>
--- a/doc/html/pcre2_set_glob_separator.html
+++ b/doc/html/pcre2_set_glob_separator.html
@ -28,7 +28,7 @@ DESCRIPTION
 <P>
 This function is part of an experimental set of pattern conversion functions.
 It sets the component separator character that is used when converting globs.
-The second argument must one of the characters forward slash, backslash, or
+The second argument must be one of the characters forward slash, backslash, or
 dot. The default is backslash when running under Windows, otherwise forward
 slash. The result of the function is zero for success or PCRE2_ERROR_BADDATA if
 the second argument is invalid.
--- a/doc/html/pcre2_set_substitute_callout.html
+++ b/doc/html/pcre2_set_substitute_callout.html
@ -0,0 +1,43 @@
+<html>
+<head>
+<title>pcre2_set_substitute_callout specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2_set_substitute_callout man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<br><b>
+SYNOPSIS
+</b><br>
+<P>
+<b>#include &#60;pcre2.h&#62;</b>
+</P>
+<P>
+<b>int pcre2_set_substitute_callout(pcre2_match_context *<i>mcontext</i>,</b>
+<b>  int (*<i>callout_function</i>)(pcre2_substitute_callout_block *),</b>
+<b>  void *<i>callout_data</i>);</b>
+</P>
+<br><b>
+DESCRIPTION
+</b><br>
+<P>
+This function sets the substitute callout fields in a match context (the first
+argument). The second argument specifies a callout function, and the third
+argument is an opaque data item that is passed to it. The result of this
+function is always zero.
+</P>
+<P>
+There is a complete description of the PCRE2 native API in the
+<a href="pcre2api.html"><b>pcre2api</b></a>
+page and a description of the POSIX API in the
+<a href="pcre2posix.html"><b>pcre2posix</b></a>
+page.
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
--- a/doc/html/pcre2_substitute.html
+++ b/doc/html/pcre2_substitute.html
@ -48,8 +48,8 @@ Its arguments are:
  <i>outlengthptr</i>  Points to the length of the output buffer
 </pre>
 A match data block is needed only if you want to inspect the data from the
-match that is returned in that block. A match context is needed only if you
-want to:
+final match that is returned in that block or if PCRE2_SUBSTITUTE_MATCHED is
+set. A match context is needed only if you want to:
 <pre>
  Set up a callout function
  Set a matching offset limit
@ -57,29 +57,46 @@ want to:
  Change the backtracking depth limit
  Set custom memory management in the match context
 </pre>
-The <i>length</i>, <i>startoffset</i> and <i>rlength</i> values are code
-units, not characters, as is the contents of the variable pointed at by
-<i>outlengthptr</i>, which is updated to the actual length of the new string.
+The <i>length</i>, <i>startoffset</i> and <i>rlength</i> values are code units,
+not characters, as is the contents of the variable pointed at by
+<i>outlengthptr</i>. This variable must contain the length of the output buffer
+when the function is called. If the function is successful, the value is
+changed to the length of the new string, excluding the trailing zero that is
+automatically added.
+</P>
+<P>
 The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for
 zero-terminated strings. The options are:
 <pre>
-  PCRE2_ANCHORED             Match only at the first position
-  PCRE2_ENDANCHORED          Pattern can match only at end of subject
-  PCRE2_NOTBOL               Subject is not the beginning of a line
-  PCRE2_NOTEOL               Subject is not the end of a line
-  PCRE2_NOTEMPTY             An empty string is not a valid match
-  PCRE2_NOTEMPTY_ATSTART     An empty string at the start of the subject is not a valid match
-  PCRE2_NO_JIT               Do not use JIT matching
-  PCRE2_NO_UTF_CHECK         Do not check the subject or replacement for UTF validity (only relevant if
-                              PCRE2_UTF was set at compile time)
-  PCRE2_SUBSTITUTE_EXTENDED  Do extended replacement processing
-  PCRE2_SUBSTITUTE_GLOBAL    Replace all occurrences in the subject
-  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH  If overflow, compute needed length
-  PCRE2_SUBSTITUTE_UNKNOWN_UNSET  Treat unknown group as unset
-  PCRE2_SUBSTITUTE_UNSET_EMPTY  Simple unset insert = empty string
+  PCRE2_ANCHORED                     Match only at the first position
+  PCRE2_ENDANCHORED                  Match only at end of subject
+  PCRE2_NOTBOL                       Subject is not the beginning of a line
+  PCRE2_NOTEOL                       Subject is not the end of a line
+  PCRE2_NOTEMPTY                     An empty string is not a valid match
+  PCRE2_NOTEMPTY_ATSTART             An empty string at the start of the subject is not a valid match
+  PCRE2_NO_JIT                       Do not use JIT matching
+  PCRE2_NO_UTF_CHECK                 Do not check for UTF validity in the subject or replacement
+                                      (only relevant if PCRE2_UTF was set at compile time)
+  PCRE2_SUBSTITUTE_EXTENDED          Do extended replacement processing
+  PCRE2_SUBSTITUTE_GLOBAL            Replace all occurrences in the subject
+  PCRE2_SUBSTITUTE_LITERAL           The replacement string is literal
+  PCRE2_SUBSTITUTE_MATCHED           Use pre-existing match data for first match
+  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH   If overflow, compute needed length
+  PCRE2_SUBSTITUTE_REPLACEMENT_ONLY  Return only replacement string(s)
+  PCRE2_SUBSTITUTE_UNKNOWN_UNSET     Treat unknown group as unset
+  PCRE2_SUBSTITUTE_UNSET_EMPTY       Simple unset insert = empty string
 </pre>
+If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_EXTENDED,
+PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and PCRE2_SUBSTITUTE_UNSET_EMPTY are ignored.
+</P>
+<P>
+If PCRE2_SUBSTITUTE_MATCHED is set, <i>match_data</i> must be non-NULL; its
+contents must be the result of a call to <b>pcre2_match()</b> using the same
+pattern and subject.
+</P>
+<P>
 The function returns the number of substitutions, which may be zero if there
-were no matches. The result can be greater than one only when
+are no matches. The result may be greater than one only when
 PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code
 is returned.
 </P>
--- a/doc/html/pcre2_substring_free.html
+++ b/doc/html/pcre2_substring_free.html
@ -28,7 +28,7 @@ DESCRIPTION
 This is a convenience function for freeing the memory obtained by a previous
 call to <b>pcre2_substring_get_byname()</b> or
 <b>pcre2_substring_get_bynumber()</b>. Its only argument is a pointer to the
-string.
+string. If the argument is NULL, the function does nothing.
 </P>
 <P>
 There is a complete description of the PCRE2 native API in the
--- a/doc/html/pcre2_substring_list_free.html
+++ b/doc/html/pcre2_substring_list_free.html
@ -27,7 +27,8 @@ DESCRIPTION
 <P>
 This is a convenience function for freeing the store obtained by a previous
 call to <b>pcre2substring_list_get()</b>. Its only argument is a pointer to
-the list of string pointers.
+the list of string pointers. If the argument is NULL, the function returns
+immediately, without doing anything.
 </P>
 <P>
 There is a complete description of the PCRE2 native API in the
--- a/doc/html/pcre2_substring_nametable_scan.html
+++ b/doc/html/pcre2_substring_nametable_scan.html
@ -27,8 +27,8 @@ DESCRIPTION
 </b><br>
 <P>
 This convenience function finds, for a compiled pattern, the first and last
-entries for a given name in the table that translates capturing parenthesis
-names into numbers.
+entries for a given name in the table that translates capture group names into
+numbers.
 <pre>
  <i>code</i>    Compiled regular expression
  <i>name</i>    Name whose entries required
--- a/doc/html/pcre2api.html
+++ b/doc/html/pcre2api.html
--- a/doc/html/pcre2build.html
+++ b/doc/html/pcre2build.html
@ -33,11 +33,12 @@ please consult the man page, in case the conversion went wrong.
 <li><a name="TOC18" href="#SEC18">INCLUDING DEBUGGING CODE</a>
 <li><a name="TOC19" href="#SEC19">DEBUGGING WITH VALGRIND SUPPORT</a>
 <li><a name="TOC20" href="#SEC20">CODE COVERAGE REPORTING</a>
-<li><a name="TOC21" href="#SEC21">SUPPORT FOR FUZZERS</a>
-<li><a name="TOC22" href="#SEC22">OBSOLETE OPTION</a>
-<li><a name="TOC23" href="#SEC23">SEE ALSO</a>
-<li><a name="TOC24" href="#SEC24">AUTHOR</a>
-<li><a name="TOC25" href="#SEC25">REVISION</a>
+<li><a name="TOC21" href="#SEC21">DISABLING THE Z AND T FORMATTING MODIFIERS</a>
+<li><a name="TOC22" href="#SEC22">SUPPORT FOR FUZZERS</a>
+<li><a name="TOC23" href="#SEC23">OBSOLETE OPTION</a>
+<li><a name="TOC24" href="#SEC24">SEE ALSO</a>
+<li><a name="TOC25" href="#SEC25">AUTHOR</a>
+<li><a name="TOC26" href="#SEC26">REVISION</a>
 </ul>
 <br><a name="SEC1" href="#TOC1">BUILDING PCRE2</a><br>
 <P>
@ -82,7 +83,8 @@ The following sections include descriptions of "on/off" options whose names
 begin with --enable or --disable. Because of the way that <b>configure</b>
 works, --enable and --disable always come in pairs, so the complementary option
 always exists as well, but as it specifies the default, it is not described.
-Options that specify values have names that start with --with.
+Options that specify values have names that start with --with. At the end of a
+<b>configure</b> run, a summary of the configuration is output.
 </P>
 <br><a name="SEC3" href="#TOC1">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
 <P>
@ -126,7 +128,7 @@ To build it without Unicode support, add
  --disable-unicode
 </pre>
 to the <b>configure</b> command. This setting applies to all three libraries. It
-is not possible to build one library with Unicode support, and another without,
+is not possible to build one library with Unicode support and another without
 in the same configuration.
 </P>
 <P>
@ -140,8 +142,9 @@ locked this out by setting PCRE2_NEVER_UTF.
 UTF support allows the libraries to process character code points up to
 0x10ffff in the strings that they handle. Unicode support also gives access to
 the Unicode properties of characters, using pattern escapes such as \P, \p,
-and \X. Only the general category properties such as <i>Lu</i> and <i>Nd</i> are
-supported. Details are given in the
+and \X. Only the general category properties such as <i>Lu</i> and <i>Nd</i>,
+script names, and some bi-directional properties are supported. Details are
+given in the
 <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
 documentation.
 </P>
@ -170,8 +173,15 @@ Just-in-time (JIT) compiler support is included in the build by specifying
  --enable-jit
 </pre>
 This support is available only for certain hardware architectures. If this
-option is set for an unsupported architecture, a building error occurs. If you
-are running under SELinux you may also want to add
+option is set for an unsupported architecture, a building error occurs.
+If in doubt, use
+<pre>
+  --enable-jit=auto
+</pre>
+which enables JIT only if the current hardware is supported. You can check
+if JIT is enabled in the configuration summary that is output at the end of a
+<b>configure</b> run. If you are enabling JIT under SELinux you may also want to
+add
 <pre>
  --enable-jit-sealloc
 </pre>
@ -179,11 +189,11 @@ which enables the use of an execmem allocator in JIT that is compatible with
 SELinux. This has no effect if JIT is not enabled. See the
 <a href="pcre2jit.html"><b>pcre2jit</b></a>
 documentation for a discussion of JIT usage. When JIT support is enabled,
-pcre2grep automatically makes use of it, unless you add
+<b>pcre2grep</b> automatically makes use of it, unless you add
 <pre>
  --disable-pcre2grep-jit
 </pre>
-to the "configure" command.
+to the <b>configure</b> command.
 </P>
 <br><a name="SEC8" href="#TOC1">NEWLINE RECOGNITION</a><br>
 <P>
@ -219,7 +229,7 @@ separator, U+2028), and PS (paragraph separator, U+2029). The final option is
 <pre>
  --enable-newline-is-nul
 </pre>
-which causes NUL (binary zero) is set as the default line-ending character.
+which causes NUL (binary zero) to be set as the default line-ending character.
 </P>
 <P>
 Whatever default line ending convention is selected when PCRE2 is built can be
@ -244,10 +254,10 @@ Within a compiled pattern, offset values are used to point from one part to
 another (for example, from an opening parenthesis to an alternation
 metacharacter). By default, in the 8-bit and 16-bit libraries, two-byte values
 are used for these offsets, leading to a maximum size for a compiled pattern of
-around 64K code units. This is sufficient to handle all but the most gigantic
-patterns. Nevertheless, some people do want to process truly enormous patterns,
-so it is possible to compile PCRE2 to use three-byte or four-byte offsets by
-adding a setting such as
+around 64 thousand code units. This is sufficient to handle all but the most
+gigantic patterns. Nevertheless, some people do want to process truly enormous
+patterns, so it is possible to compile PCRE2 to use three-byte or four-byte
+offsets by adding a setting such as
 <pre>
  --with-link-size=3
 </pre>
@ -274,11 +284,10 @@ to the <b>configure</b> command. This setting also applies to the
 counting is done differently).
 </P>
 <P>
-The <b>pcre2_match()</b> function starts out using a 20K vector on the system
-stack to record backtracking points. The more nested backtracking points there
-are (that is, the deeper the search tree), the more memory is needed. If the
-initial vector is not large enough, heap memory is used, up to a certain limit,
-which is specified in kilobytes. The limit can be changed at run time, as
+The <b>pcre2_match()</b> function uses heap memory to record backtracking
+points. The more nested backtracking points there are (that is, the deeper the
+search tree), the more memory is needed. There is an upper limit, specified in
+kibibytes (units of 1024 bytes). This limit can be changed at run time, as
 described in the
 <a href="pcre2api.html"><b>pcre2api</b></a>
 documentation. The default limit (in effect unlimited) is 20 million. You can
@ -286,10 +295,11 @@ change this by a setting such as
 <pre>
  --with-heap-limit=500
 </pre>
-which limits the amount of heap to 500 kilobytes. This limit applies only to
-interpretive matching in pcre2_match(). It does not apply when JIT (which has
-its own memory arrangements) is used, nor does it apply to
-<b>pcre2_dfa_match()</b>.
+which limits the amount of heap to 500 KiB. This limit applies only to
+interpretive matching in <b>pcre2_match()</b> and <b>pcre2_dfa_match()</b>, which
+may also use the heap for internal workspace when processing complicated
+patterns. This limit does not apply when JIT (which has its own memory
+arrangements) is used.
 </P>
 <P>
 You can also explicitly limit the depth of nested backtracking in the
@ -297,7 +307,7 @@ You can also explicitly limit the depth of nested backtracking in the
 for --with-match-limit. You can set a lower default limit by adding, for
 example,
 <pre>
-  --with-match-limit_depth=10000
+  --with-match-limit-depth=10000
 </pre>
 to the <b>configure</b> command. This value can be overridden at run time. This
 depth limit indirectly limits the amount of heap memory that is used, but
@ -311,7 +321,7 @@ As well as applying to <b>pcre2_match()</b>, the depth limit also controls
 the depth of recursive function calls in <b>pcre2_dfa_match()</b>. These are
 used for lookaround assertions, atomic groups, and recursion within patterns.
 The limit does not apply to JIT matching.
-</P>
+<a name="createtables"></a></P>
 <br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
 <P>
 PCRE2 uses fixed tables for processing characters whose code points are less
@ -322,12 +332,34 @@ only. If you add
  --enable-rebuild-chartables
 </pre>
 to the <b>configure</b> command, the distributed tables are no longer used.
-Instead, a program called <b>dftables</b> is compiled and run. This outputs the
-source for new set of tables, created in the default locale of your C run-time
-system. This method of replacing the tables does not work if you are cross
-compiling, because <b>dftables</b> is run on the local host. If you need to
-create alternative tables when cross compiling, you will have to do so "by
-hand".
+Instead, a program called <b>pcre2_dftables</b> is compiled and run. This
+outputs the source for new set of tables, created in the default locale of your
+C run-time system. This method of replacing the tables does not work if you are
+cross compiling, because <b>pcre2_dftables</b> needs to be run on the local
+host and therefore not compiled with the cross compiler.
+</P>
+<P>
+If you need to create alternative tables when cross compiling, you will have to
+do so "by hand". There may also be other reasons for creating tables manually.
+To cause <b>pcre2_dftables</b> to be built on the local host, run a normal
+compiling command, and then run the program with the output file as its
+argument, for example:
+<pre>
+  cc src/pcre2_dftables.c -o pcre2_dftables
+  ./pcre2_dftables src/pcre2_chartables.c
+</pre>
+This builds the tables in the default locale of the local host. If you want to
+specify a locale, you must use the -L option:
+<pre>
+  LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c
+</pre>
+You can also specify -b (with or without -L). This causes the tables to be
+written in binary instead of as source code. A set of binary tables can be
+loaded into memory by an application and passed to <b>pcre2_compile()</b> in the
+same way as tables created by calling <b>pcre2_maketables()</b>. The tables are
+just a string of bytes, independent of hardware characteristics such as
+endianness. This means they can be bundled with an application that runs in
+different environments, to ensure consistent behaviour.
 </P>
 <br><a name="SEC13" href="#TOC1">USING EBCDIC CODE</a><br>
 <P>
@ -366,12 +398,15 @@ environment.
 </P>
 <br><a name="SEC14" href="#TOC1">PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS</a><br>
 <P>
-By default, on non-Windows systems, <b>pcre2grep</b> supports the use of
-callouts with string arguments within the patterns it is matching, in order to
-run external scripts. For details, see the
+By default <b>pcre2grep</b> supports the use of callouts with string arguments
+within the patterns it is matching. There are two kinds: one that generates
+output using local code, and another that calls an external program or script.
+If --disable-pcre2grep-callout-fork is added to the <b>configure</b> command,
+only the first kind of callout is supported; if --disable-pcre2grep-callout is
+used, all callouts are completely ignored. For more details of <b>pcre2grep</b>
+callouts, see the
 <a href="pcre2grep.html"><b>pcre2grep</b></a>
-documentation. This support can be disabled by adding
--disable-pcre2grep-callout to the <b>configure</b> command.
+documentation.
 </P>
 <br><a name="SEC15" href="#TOC1">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
 <P>
@ -390,18 +425,18 @@ they are not.
 <P>
 <b>pcre2grep</b> uses an internal buffer to hold a "window" on the file it is
 scanning, in order to be able to output "before" and "after" lines when it
-finds a match. The starting size of the buffer is controlled by a parameter
-whose default value is 20K. The buffer itself is three times this size, but
-because of the way it is used for holding "before" lines, the longest line that
-is guaranteed to be processable is the parameter size. If a longer line is
-encountered, <b>pcre2grep</b> automatically expands the buffer, up to a
-specified maximum size, whose default is 1M or the starting size, whichever is
-the larger. You can change the default parameter values by adding, for example,
+finds a match. The default starting size of the buffer is 20KiB. The buffer
+itself is three times this size, but because of the way it is used for holding
+"before" lines, the longest line that is guaranteed to be processable is the
+notional buffer size. If a longer line is encountered, <b>pcre2grep</b>
+automatically expands the buffer, up to a specified maximum size, whose default
+is 1MiB or the starting size, whichever is the larger. You can change the
+default parameter values by adding, for example,
 <pre>
  --with-pcre2grep-bufsize=51200
  --with-pcre2grep-max-bufsize=2097152
 </pre>
-to the <b>configure</b> command. The caller of \fPpcre2grep\fP can override
+to the <b>configure</b> command. The caller of <b>pcre2grep</b> can override
 these values by using --buffer-size and --max-buffer-size on the command line.
 </P>
 <br><a name="SEC17" href="#TOC1">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a><br>
@ -514,7 +549,22 @@ This cleans all coverage data including the generated coverage report. For more
 information about code coverage, see the <b>gcov</b> and <b>lcov</b>
 documentation.
 </P>
-<br><a name="SEC21" href="#TOC1">SUPPORT FOR FUZZERS</a><br>
+<br><a name="SEC21" href="#TOC1">DISABLING THE Z AND T FORMATTING MODIFIERS</a><br>
+<P>
+The C99 standard defines formatting modifiers z and t for size_t and
+ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
+environments other than old versions of Microsoft Visual Studio when
+__STDC_VERSION__ is defined and has a value greater than or equal to 199901L
+(indicating support for C99).
+However, there is at least one environment that claims to be C99 but does not
+support these modifiers. If
+<pre>
+  --disable-percent-zt
+</pre>
+is specified, no use is made of the z or t modifiers. Instead of %td or %zu,
+a suitable format is used depending in the size of long for the platform.
+</P>
+<br><a name="SEC22" href="#TOC1">SUPPORT FOR FUZZERS</a><br>
 <P>
 There is a special option for use by people who want to run fuzzing tests on
 PCRE2:
@ -533,12 +583,12 @@ generated from the string.
 Setting --enable-fuzz-support also causes a binary called <b>pcre2fuzzcheck</b>
 to be created. This is normally run under valgrind or used when PCRE2 is
 compiled with address sanitizing enabled. It calls the fuzzing function and
-outputs information about it is doing. The input strings are specified by
+outputs information about what it is doing. The input strings are specified by
 arguments: if an argument starts with "=" the rest of it is a literal input
 string. Otherwise, it is assumed to be a file name, and the contents of the
 file are the test string.
 </P>
-<br><a name="SEC22" href="#TOC1">OBSOLETE OPTION</a><br>
+<br><a name="SEC23" href="#TOC1">OBSOLETE OPTION</a><br>
 <P>
 In versions of PCRE2 prior to 10.30, there were two ways of handling
 backtracking in the <b>pcre2_match()</b> function. The default was to use the
@ -550,24 +600,24 @@ was set, memory on the heap was used. From release 10.30 onwards this has
 changed (the stack is no longer used) and this option now does nothing except
 give a warning.
 </P>
-<br><a name="SEC23" href="#TOC1">SEE ALSO</a><br>
+<br><a name="SEC24" href="#TOC1">SEE ALSO</a><br>
 <P>
 <b>pcre2api</b>(3), <b>pcre2-config</b>(3).
 </P>
-<br><a name="SEC24" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC25" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
-<br><a name="SEC25" href="#TOC1">REVISION</a><br>
+<br><a name="SEC26" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 18 July 2017
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2callout.html
+++ b/doc/html/pcre2callout.html
@ -44,6 +44,14 @@ a match context (see <b>pcre2_set_callout()</b> in the
 documentation).
 </P>
 <P>
+When using the <b>pcre2_substitute()</b> function, an additional callout feature
+is available. This does a callout after each change to the subject string and
+is described in the
+<a href="pcre2api.html"><b>pcre2api</b></a>
+documentation; the rest of this document is concerned with callouts during
+pattern matching.
+</P>
+<P>
 Within a regular expression, (?C&#60;arg&#62;) indicates a point at which the external
 function is to be called. Different callout points can be identified by putting
 a number less than 256 after the letter C. The default value is zero.
@ -143,7 +151,7 @@ branch, automatic anchoring occurs if all branches are anchorable.
 </P>
 <P>
 This optimization is disabled, however, if .* is in an atomic group or if there
-is a back reference to the capturing group in which it appears. It is also
+is a backreference to the capture group in which it appears. It is also
 disabled if the pattern contains (*PRUNE) or (*SKIP). However, the presence of
 callouts does not affect it.
 </P>
@ -310,10 +318,12 @@ PCRE2_UNSET.
 </P>
 <P>
 For DFA matching, the <i>offset_vector</i> field points to the ovector that was
-passed to the matching function in the match data block, but it holds no useful
-information at callout time because <b>pcre2_dfa_match()</b> does not support
-substring capturing. The value of <i>capture_top</i> is always 1 and the value
-of <i>capture_last</i> is always 0 for DFA matching.
+passed to the matching function in the match data block for callouts at the top
+level, but to an internal ovector during the processing of pattern recursions,
+lookarounds, and atomic groups. However, these ovectors hold no useful
+information because <b>pcre2_dfa_match()</b> does not support substring
+capturing. The value of <i>capture_top</i> is always 1 and the value of
+<i>capture_last</i> is always 0 for DFA matching.
 </P>
 <P>
 The <i>subject</i> and <i>subject_length</i> fields contain copies of the values
@ -344,8 +354,8 @@ callout before an assertion such as (?=ab) the length is 3. For an an
 alternation bar or a closing parenthesis, the length is one, unless a closing
 parenthesis is followed by a quantifier, in which case its length is included.
 (This changed in release 10.23. In earlier releases, before an opening
-parenthesis the length was that of the entire subpattern, and before an
-alternation bar or a closing parenthesis the length was zero.)
+parenthesis the length was that of the entire group, and before an alternation
+bar or a closing parenthesis the length was zero.)
 </P>
 <P>
 The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to
@ -461,9 +471,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC8" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 22 December 2017
+Last updated: 03 February 2019
 <br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2019 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2compat.html
+++ b/doc/html/pcre2compat.html
@ -16,120 +16,135 @@ please consult the man page, in case the conversion went wrong.
 DIFFERENCES BETWEEN PCRE2 AND PERL
 </b><br>
 <P>
-This document describes the differences in the ways that PCRE2 and Perl handle
-regular expressions. The differences described here are with respect to Perl
-versions 5.26, but as both Perl and PCRE2 are continually changing, the
-information may sometimes be out of date.
+This document describes some of the differences in the ways that PCRE2 and Perl
+handle regular expressions. The differences described here are with respect to
+Perl version 5.34.0, but as both Perl and PCRE2 are continually changing, the
+information may at times be out of date.
 </P>
 <P>
-1. PCRE2 has only a subset of Perl's Unicode support. Details of what it does
+1. When PCRE2_DOTALL (equivalent to Perl's /s qualifier) is not set, the
+behaviour of the '.' metacharacter differs from Perl. In PCRE2, '.' matches the
+next character unless it is the start of a newline sequence. This means that,
+if the newline setting is CR, CRLF, or NUL, '.' will match the code point LF
+(0x0A) in ASCII/Unicode environments, and NL (either 0x15 or 0x25) when using
+EBCDIC. In Perl, '.' appears never to match LF, even when 0x0A is not a newline
+indicator.
+</P>
+<P>
+2. PCRE2 has only a subset of Perl's Unicode support. Details of what it does
 have are given in the
 <a href="pcre2unicode.html"><b>pcre2unicode</b></a>
 page.
 </P>
 <P>
-2. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but
+3. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but
 they do not mean what you might think. For example, (?!a){3} does not assert
 that the next three characters are not "a". It just asserts that the next
-character is not "a" three times (in principle: PCRE2 optimizes this to run the
+character is not "a" three times (in principle; PCRE2 optimizes this to run the
 assertion just once). Perl allows some repeat quantifiers on other assertions,
-for example, \b* (but not \b{3}), but these do not seem to have any use.
+for example, \b* , but these do not seem to have any use. PCRE2 does not allow
+any kind of quantifier on non-lookaround assertions.
 </P>
 <P>
-3. Capturing subpatterns that occur inside negative lookaround assertions are
-counted, but their entries in the offsets vector are set only when a negative
-assertion is a condition that has a matching branch (that is, the condition is
-false).
+4. Capture groups that occur inside negative lookaround assertions are counted,
+but their entries in the offsets vector are set only when a negative assertion
+is a condition that has a matching branch (that is, the condition is false).
+Perl may set such capture groups in other circumstances.
 </P>
 <P>
-4. The following Perl escape sequences are not supported: \l, \u, \L,
-\U, and \N when followed by a character name or Unicode value. (\N on its
-own, matching a non-newline character, is supported.) In fact these are
+5. The following Perl escape sequences are not supported: \F, \l, \L, \u,
+\U, and \N when followed by a character name. \N on its own, matching a
+non-newline character, and \N{U+dd..}, matching a Unicode code point, are
+supported. The escapes that modify the case of following letters are
 implemented by Perl's general string-handling and are not part of its pattern
 matching engine. If any of these are encountered by PCRE2, an error is
-generated by default. However, if the PCRE2_ALT_BSUX option is set,
-\U and \u are interpreted as ECMAScript interprets them.
+generated by default. However, if either of the PCRE2_ALT_BSUX or
+PCRE2_EXTRA_ALT_BSUX options is set, \U and \u are interpreted as ECMAScript
+interprets them.
 </P>
 <P>
-5. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is
+6. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is
 built with Unicode support (the default). The properties that can be tested
 with \p and \P are limited to the general category properties such as Lu and
-Nd, script names such as Greek or Han, and the derived properties Any and L&.
-PCRE2 does support the Cs (surrogate) property, which Perl does not; the Perl
-documentation says "Because Perl hides the need for the user to understand the
-internal representation of Unicode characters, there is no need to implement
-the somewhat messy concept of surrogates."
+Nd, script names such as Greek or Han, Bidi_Class, Bidi_Control, and the
+derived properties Any and LC (synonym L&). Both PCRE2 and Perl support the Cs
+(surrogate) property, but in PCRE2 its use is limited. See the
+<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
+documentation for details. The long synonyms for property names that Perl
+supports (such as \p{Letter}) are not supported by PCRE2, nor is it permitted
+to prefix any of these properties with "Is".
 </P>
 <P>
-6. PCRE2 does support the \Q...\E escape for quoting substrings. Characters
-in between are treated as literals. This is slightly different from Perl in
-that $ and @ are also handled as literals inside the quotes. In Perl, they
-cause variable interpolation (but of course PCRE2 does not have variables).
-Note the following examples:
+7. PCRE2 supports the \Q...\E escape for quoting substrings. Characters
+in between are treated as literals. However, this is slightly different from
+Perl in that $ and @ are also handled as literals inside the quotes. In Perl,
+they cause variable interpolation (PCRE2 does not have variables). Also, Perl
+does "double-quotish backslash interpolation" on any backslashes between \Q
+and \E which, its documentation says, "may lead to confusing results". PCRE2
+treats a backslash between \Q and \E just like any other character. Note the
+following examples:
 <pre>
-    Pattern            PCRE2 matches      Perl matches
+    Pattern            PCRE2 matches     Perl matches

    \Qabc$xyz\E        abc$xyz           abc followed by the contents of $xyz
    \Qabc\$xyz\E       abc\$xyz          abc\$xyz
    \Qabc\E\$\Qxyz\E   abc$xyz           abc$xyz
+    \QA\B\E            A\B               A\B
+    \Q\\E              \                 \\E
 </pre>
-The \Q...\E sequence is recognized both inside and outside character classes.
+The \Q...\E sequence is recognized both inside and outside character classes
+by both PCRE2 and Perl.
 </P>
 <P>
-7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
-constructions. However, there is support PCRE2's "callout" feature, which
-allows an external function to be called during pattern matching. See the
+8. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
+constructions. However, PCRE2 does have a "callout" feature, which allows an
+external function to be called during pattern matching. See the
 <a href="pcre2callout.html"><b>pcre2callout</b></a>
 documentation for details.
 </P>
 <P>
-8. Subroutine calls (whether recursive or not) were treated as atomic groups up
+9. Subroutine calls (whether recursive or not) were treated as atomic groups up
 to PCRE2 release 10.23, but from release 10.30 this changed, and backtracking
 into subroutine calls is now supported, as in Perl.
 </P>
 <P>
-9. If any of the backtracking control verbs are used in a subpattern that is
-called as a subroutine (whether or not recursively), their effect is confined
-to that subpattern; it does not extend to the surrounding pattern. This is not
-always the case in Perl. In particular, if (*THEN) is present in a group that
-is called as a subroutine, its action is limited to that group, even if the
-group does not contain any | characters. Note that such subpatterns are
+10. In PCRE2, if any of the backtracking control verbs are used in a group that
+is called as a subroutine (whether or not recursively), their effect is
+confined to that group; it does not extend to the surrounding pattern. This is
+not always the case in Perl. In particular, if (*THEN) is present in a group
+that is called as a subroutine, its action is limited to that group, even if
+the group does not contain any | characters. Note that such groups are
 processed as anchored at the point where they are tested.
 </P>
 <P>
-10. If a pattern contains more than one backtracking control verb, the first
+11. If a pattern contains more than one backtracking control verb, the first
 one that is backtracked onto acts. For example, in the pattern
 A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C
 triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the
 same as PCRE2, but there are cases where it differs.
 </P>
 <P>
-11. Most backtracking verbs in assertions have their normal actions. They are
-not confined to the assertion.
-</P>
-<P>
 12. There are some differences that are concerned with the settings of captured
 strings when part of a pattern is repeated. For example, matching "aba" against
 the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to
 "b".
 </P>
 <P>
-13. PCRE2's handling of duplicate subpattern numbers and duplicate subpattern
-names is not as general as Perl's. This is a consequence of the fact the PCRE2
-works internally just with numbers, using an external table to translate
-between numbers and names. In particular, a pattern such as (?|(?&#60;a&#62;A)|(?&#60;b&#62;B),
-where the two capturing parentheses have the same number but different names,
-is not supported, and causes an error at compile time. If it were allowed, it
-would not be possible to distinguish which parentheses matched, because both
-names map to capturing subpattern number 1. To avoid this confusing situation,
-an error is given at compile time.
+13. PCRE2's handling of duplicate capture group numbers and names is not as
+general as Perl's. This is a consequence of the fact the PCRE2 works internally
+just with numbers, using an external table to translate between numbers and
+names. In particular, a pattern such as (?|(?&#60;a&#62;A)|(?&#60;b&#62;B)), where the two
+capture groups have the same number but different names, is not supported, and
+causes an error at compile time. If it were allowed, it would not be possible
+to distinguish which group matched, because both names map to capture group
+number 1. To avoid this confusing situation, an error is given at compile time.
 </P>
 <P>
 14. Perl used to recognize comments in some places that PCRE2 does not, for
-example, between the ( and ? at the start of a subpattern. If the /x modifier
-is set, Perl allowed white space between ( and ? though the latest Perls give
-an error (for a while it was just deprecated). There may still be some cases
-where Perl behaves differently.
+example, between the ( and ? at the start of a group. If the /x modifier is
+set, Perl allowed white space between ( and ? though the latest Perls give an
+error (for a while it was just deprecated). There may still be some cases where
+Perl behaves differently.
 </P>
 <P>
 15. Perl, when in warning mode, gives warnings for character classes such as
@ -141,22 +156,30 @@ certainly user mistakes.
 16. In PCRE2, the upper/lower case character properties Lu and Ll are not
 affected when case-independent matching is specified. For example, \p{Lu}
 always matches an upper case letter. I think Perl has changed in this respect;
-in the release at the time of writing (5.24), \p{Lu} and \p{Ll} match all
+in the release at the time of writing (5.34), \p{Lu} and \p{Ll} match all
 letters, regardless of case, when case independence is specified.
 </P>
 <P>
-17. PCRE2 provides some extensions to the Perl regular expression facilities.
-Perl 5.10 includes new features that are not in earlier versions of Perl, some
+17. From release 5.32.0, Perl locks out the use of \K in lookaround
+assertions. From release 10.38 PCRE2 does the same by default. However, there
+is an option for re-enabling the previous behaviour. When this option is set,
+\K is acted on when it occurs in positive assertions, but is ignored in
+negative assertions.
+</P>
+<P>
+18. PCRE2 provides some extensions to the Perl regular expression facilities.
+Perl 5.10 included new features that were not in earlier versions of Perl, some
 of which (such as named parentheses) were in PCRE2 for some time before. This
-list is with respect to Perl 5.26:
+list is with respect to Perl 5.34:
 <br>
 <br>
 (a) Although lookbehind assertions in PCRE2 must match fixed length strings,
-each alternative branch of a lookbehind assertion can match a different length
-of string. Perl requires them all to have the same length.
+each alternative toplevel branch of a lookbehind assertion can match a
+different length of string. Perl used to require them all to have the same
+length, but the latest version has some variable length support.
 <br>
 <br>
-(b) From PCRE2 10.23, back references to groups of fixed length are supported
+(b) From PCRE2 10.23, backreferences to groups of fixed length are supported
 in lookbehinds, provided that there is no possibility of referencing a
 non-unique number or name. Perl does not support backreferences in lookbehinds.
 <br>
@ -198,16 +221,21 @@ different way and is not Perl-compatible.
 <br>
 <br>
 (l) PCRE2 recognizes some special sequences such as (*CR) or (*NO_JIT) at
-the start of a pattern that set overall options that cannot be changed within
+the start of a pattern. These set overall options that cannot be changed within
 the pattern.
+<br>
+<br>
+(m) PCRE2 supports non-atomic positive lookaround assertions. This is an
+extension to the lookaround facilities. The default, Perl-compatible
+lookarounds are atomic.
 </P>
 <P>
-18. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa
+19. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa
 modifier restricts /i case-insensitive matching to pure ascii, ignoring Unicode
 rules. This separation cannot be represented with PCRE2_UCP.
 </P>
 <P>
-19. Perl has different limits than PCRE2. See the
+20. Perl has different limits than PCRE2. See the
 <a href="pcre2limit.html"><b>pcre2limit</b></a>
 documentation for details. Perl went with 5.10 from recursion to iteration
 keeping the intermediate matches on the heap, which is ~10% slower but does not
@ -220,7 +248,7 @@ AUTHOR
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
@ -229,9 +257,9 @@ Cambridge, England.
 REVISION
 </b><br>
 <P>
-Last updated: 18 April 2017
+Last updated: 08 December 2021
 <br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2021 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2convert.html
+++ b/doc/html/pcre2convert.html
@ -105,7 +105,8 @@ If <b>buffer</b> points to a NULL pointer, an output buffer is obtained using
 the allocator in the context or <b>malloc()</b> if no context is supplied. A
 pointer to this buffer is placed in the variable to which <b>buffer</b> points.
 When no longer needed the output buffer must be freed by calling
-<b>pcre2_converted_pattern_free()</b>.
+<b>pcre2_converted_pattern_free()</b>. If this function is called with a NULL
+argument, it returns immediately without doing anything.
 </P>
 <P>
 If <b>buffer</b> points to a non-NULL pointer, <b>blength</b> must be set to the
@ -140,8 +141,8 @@ permitted to match separator characters, but the double-star (**) feature
 </P>
 <P>
 PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR matches globs with wildcards allowed to
-match separator characters. PCRE2_GLOB_NO_STARSTAR matches globs with the
-double-star feature disabled. These options may be given together.
+match separator characters. PCRE2_CONVERT_GLOB_NO_STARSTAR matches globs with
+the double-star feature disabled. These options may be given together.
 </P>
 <br><a name="SEC5" href="#TOC1">CONVERTING POSIX PATTERNS</a><br>
 <P>
@ -181,9 +182,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC7" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 12 July 2017
+Last updated: 28 June 2018
 <br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2demo.html
+++ b/doc/html/pcre2demo.html
@ -104,12 +104,11 @@ uint32_t newline;

 PCRE2_SIZE erroroffset;
 PCRE2_SIZE *ovector;
+PCRE2_SIZE subject_length;

-size_t subject_length;
 pcre2_match_data *match_data;


-
 /**************************************************************************
 * First, sort out the command line. There is only one possible option at  *
 * the moment, "-g" to request repeated matching to find all occurrences,  *
@ -138,12 +137,14 @@ if (argc - i != 2)
  return 1;
  }

-/* As pattern and subject are char arguments, they can be straightforwardly
-cast to PCRE2_SPTR as we are working in 8-bit code units. */
+/* Pattern and subject are char arguments, so they can be straightforwardly
+cast to PCRE2_SPTR because we are working in 8-bit code units. The subject
+length is cast to PCRE2_SIZE for completeness, though PCRE2_SIZE is in fact
+defined to be size_t. */

 pattern = (PCRE2_SPTR)argv[i];
 subject = (PCRE2_SPTR)argv[i+1];
-subject_length = strlen((char *)subject);
+subject_length = (PCRE2_SIZE)strlen((char *)subject);


 /*************************************************************************
@ -172,17 +173,22 @@ if (re == NULL)


 /*************************************************************************
-* If the compilation succeeded, we call PCRE again, in order to do a     *
+* If the compilation succeeded, we call PCRE2 again, in order to do a    *
 * pattern match against the subject string. This does just ONE match. If *
 * further matching is needed, it will be done below. Before running the  *
-* match we must set up a match_data block for holding the result.        *
+* match we must set up a match_data block for holding the result. Using  *
+* pcre2_match_data_create_from_pattern() ensures that the block is       *
+* exactly the right size for the number of capturing parentheses in the  *
+* pattern. If you need to know the actual size of a match_data block as  *
+* a number of bytes, you can find it like this:                          *
+*                                                                        *
+* PCRE2_SIZE match_data_size = pcre2_get_match_data_size(match_data);    *
 *************************************************************************/

-/* Using this function ensures that the block is exactly the right size for
-the number of capturing parentheses in the pattern. */
-
 match_data = pcre2_match_data_create_from_pattern(re, NULL);

+/* Now run the match. */
+
 rc = pcre2_match(
  re,                   /* the compiled pattern */
  subject,              /* the subject string */
@ -205,12 +211,12 @@ if (rc &lt; 0)
    default: printf("Matching error %d\n", rc); break;
    }
  pcre2_match_data_free(match_data);   /* Release memory used for the match */
-  pcre2_code_free(re);                 /* data and the compiled pattern. */
+  pcre2_code_free(re);                 /*   data and the compiled pattern. */
  return 1;
  }

-/* Match succeded. Get a pointer to the output vector, where string offsets are
-stored. */
+/* Match succeeded. Get a pointer to the output vector, where string offsets
+are stored. */

 ovector = pcre2_get_ovector_pointer(match_data);
 printf("Match succeeded at offset %d\n", (int)ovector[0]);
@ -228,9 +234,12 @@ pcre2_match_data_create_from_pattern() above. */
 if (rc == 0)
  printf("ovector was not big enough for all the captured substrings\n");

-/* We must guard against patterns such as /(?=.\K)/ that use \K in an assertion
-to set the start of a match later than its end. In this demonstration program,
-we just detect this case and give up. */
+/* Since release 10.38 PCRE2 has locked out the use of \K in lookaround
+assertions. However, there is an option to re-enable the old behaviour. If that
+is set, it is possible to run patterns such as /(?=.\K)/ that use \K in an
+assertion to set the start of a match later than its end. In this demonstration
+program, we show how to detect this case, but it shouldn't arise because the
+option is never set. */

 if (ovector[0] &gt; ovector[1])
  {
@ -249,7 +258,7 @@ application you might want to do things other than print them. */
 for (i = 0; i &lt; rc; i++)
  {
  PCRE2_SPTR substring_start = subject + ovector[2*i];
-  size_t substring_length = ovector[2*i+1] - ovector[2*i];
+  PCRE2_SIZE substring_length = ovector[2*i+1] - ovector[2*i];
  printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
  }

@ -447,7 +456,7 @@ for (;;)
    return 1;
    }

-  /* Match succeded */
+  /* Match succeeded */

  printf("\nMatch succeeded again at offset %d\n", (int)ovector[0]);

--- a/doc/html/pcre2grep.html
+++ b/doc/html/pcre2grep.html
@ -17,17 +17,18 @@ please consult the man page, in case the conversion went wrong.
 <li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
 <li><a name="TOC3" href="#SEC3">SUPPORT FOR COMPRESSED FILES</a>
 <li><a name="TOC4" href="#SEC4">BINARY FILES</a>
-<li><a name="TOC5" href="#SEC5">OPTIONS</a>
-<li><a name="TOC6" href="#SEC6">ENVIRONMENT VARIABLES</a>
-<li><a name="TOC7" href="#SEC7">NEWLINES</a>
-<li><a name="TOC8" href="#SEC8">OPTIONS COMPATIBILITY</a>
-<li><a name="TOC9" href="#SEC9">OPTIONS WITH DATA</a>
-<li><a name="TOC10" href="#SEC10">USING PCRE2'S CALLOUT FACILITY</a>
-<li><a name="TOC11" href="#SEC11">MATCHING ERRORS</a>
-<li><a name="TOC12" href="#SEC12">DIAGNOSTICS</a>
-<li><a name="TOC13" href="#SEC13">SEE ALSO</a>
-<li><a name="TOC14" href="#SEC14">AUTHOR</a>
-<li><a name="TOC15" href="#SEC15">REVISION</a>
+<li><a name="TOC5" href="#SEC5">BINARY ZEROS IN PATTERNS</a>
+<li><a name="TOC6" href="#SEC6">OPTIONS</a>
+<li><a name="TOC7" href="#SEC7">ENVIRONMENT VARIABLES</a>
+<li><a name="TOC8" href="#SEC8">NEWLINES</a>
+<li><a name="TOC9" href="#SEC9">OPTIONS COMPATIBILITY</a>
+<li><a name="TOC10" href="#SEC10">OPTIONS WITH DATA</a>
+<li><a name="TOC11" href="#SEC11">USING PCRE2'S CALLOUT FACILITY</a>
+<li><a name="TOC12" href="#SEC12">MATCHING ERRORS</a>
+<li><a name="TOC13" href="#SEC13">DIAGNOSTICS</a>
+<li><a name="TOC14" href="#SEC14">SEE ALSO</a>
+<li><a name="TOC15" href="#SEC15">AUTHOR</a>
+<li><a name="TOC16" href="#SEC16">REVISION</a>
 </ul>
 <br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
 <P>
@ -70,13 +71,15 @@ For example:
 <pre>
  pcre2grep some-pattern file1 - file3
 </pre>
-Input files are searched line by line. By default, each line that matches a
+By default, input files are searched line by line. Each line that matches a
 pattern is copied to the standard output, and if there is more than one file,
 the file name is output at the start of each line, followed by a colon.
-However, there are options that can change how <b>pcre2grep</b> behaves. In
-particular, the <b>-M</b> option makes it possible to search for strings that
-span line boundaries. What defines a line boundary is controlled by the
-<b>-N</b> (<b>--newline</b>) option.
+However, there are options that can change how <b>pcre2grep</b> behaves. For
+example, the <b>-M</b> option makes it possible to search for strings that span
+line boundaries. What defines a line boundary is controlled by the <b>-N</b>
+(<b>--newline</b>) option. The <b>-h</b> and <b>-H</b> options control whether or
+not file names are shown, and the <b>-Z</b> option changes the file name
+terminator to a zero byte.
 </P>
 <P>
 The amount of memory used for buffering files that are being scanned is
@ -85,9 +88,10 @@ controlled by parameters that can be set by the <b>--buffer-size</b> and
 that is obtained at the start of processing. If an input file contains very
 long lines, a larger buffer may be needed; this is handled by automatically
 extending the buffer, up to the limit specified by <b>--max-buffer-size</b>. The
-default values for these parameters are specified when <b>pcre2grep</b> is
-built, with the default defaults being 20K and 1M respectively. An error occurs
-if a line is too long and the buffer can no longer be expanded.
+default values for these parameters can be set when <b>pcre2grep</b> is
+built; if nothing is specified, the defaults are set to 20KiB and 1MiB
+respectively. An error occurs if a line is too long and the buffer can no
+longer be expanded.
 </P>
 <P>
 The block of memory that is actually used is three times the "buffer size", to
@ -95,7 +99,7 @@ allow for buffering "before" and "after" lines. If the buffer size is too
 small, fewer than requested "before" and "after" lines may be output.
 </P>
 <P>
-Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
+Patterns can be no longer than 8KiB or BUFSIZ bytes, whichever is the greater.
 BUFSIZ is defined in <b>&#60;stdio.h&#62;</b>. When there is more than one pattern
 (specified by the use of <b>-e</b> and/or <b>-f</b>), each pattern is applied to
 each line in the order in which they are defined, except that all the <b>-e</b>
@ -109,8 +113,8 @@ matching substrings, or if <b>--only-matching</b>, <b>--file-offsets</b>, or
 (either shown literally, or as an offset), scanning resumes immediately
 following the match, so that further matches on the same line can be found. If
 there are multiple patterns, they are all tried on the remainder of the line,
-but patterns that follow the one that matched are not tried on the earlier part
-of the line.
+but patterns that follow the one that matched are not tried on the earlier
+matched part of the line.
 </P>
 <P>
 This behaviour means that the order in which multiple patterns are specified
@ -144,13 +148,18 @@ ignored.
 <br><a name="SEC4" href="#TOC1">BINARY FILES</a><br>
 <P>
 By default, a file that contains a binary zero byte within the first 1024 bytes
-is identified as a binary file, and is processed specially. (GNU grep
-identifies binary files in this manner.) However, if the newline type is
-specified as "nul", that is, the line terminator is a binary zero, the test for
-a binary file is not applied. See the <b>--binary-files</b> option for a means
-of changing the way binary files are handled.
+is identified as a binary file, and is processed specially. However, if the
+newline type is specified as NUL, that is, the line terminator is a binary
+zero, the test for a binary file is not applied. See the <b>--binary-files</b>
+option for a means of changing the way binary files are handled.
 </P>
-<br><a name="SEC5" href="#TOC1">OPTIONS</a><br>
+<br><a name="SEC5" href="#TOC1">BINARY ZEROS IN PATTERNS</a><br>
+<P>
+Patterns passed from the command line are strings that are terminated by a
+binary zero, so cannot contain internal zeros. However, patterns that are read
+from a file via the <b>-f</b> option may contain binary zeros.
+</P>
+<br><a name="SEC6" href="#TOC1">OPTIONS</a><br>
 <P>
 The order in which some of the options appear can affect the output. For
 example, both the <b>-H</b> and <b>-l</b> options affect the printing of file
@ -171,9 +180,11 @@ Output up to <i>number</i> lines of context after each matching line. Fewer
 lines are output if the next match or the end of the file is reached, or if the
 processing buffer size has been set too small. If file names and/or line
 numbers are being output, a hyphen separator is used instead of a colon for the
-context lines. A line containing "--" is output between each group of lines,
-unless they are in fact contiguous in the input file. The value of <i>number</i>
-is expected to be relatively small. When <b>-c</b> is used, <b>-A</b> is ignored.
+context lines (the <b>-Z</b> option can be used to change the file name
+terminator to a zero byte). A line containing "--" is output between each group
+of lines, unless they are in fact contiguous in the input file. The value of
+<i>number</i> is expected to be relatively small. When <b>-c</b> is used,
+<b>-A</b> is ignored.
 </P>
 <P>
 <b>-a</b>, <b>--text</b>
@ -181,14 +192,21 @@ Treat binary files as text. This is equivalent to
 <b>--binary-files</b>=<i>text</i>.
 </P>
 <P>
+<b>--allow-lookaround-bsk</b>
+PCRE2 now forbids the use of \K in lookarounds by default, in line with Perl.
+This option causes <b>pcre2grep</b> to set the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
+option, which enables this somewhat dangerous usage.
+</P>
+<P>
 <b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i>
 Output up to <i>number</i> lines of context before each matching line. Fewer
 lines are output if the previous match or the start of the file is within
 <i>number</i> lines, or if the processing buffer size has been set too small. If
 file names and/or line numbers are being output, a hyphen separator is used
-instead of a colon for the context lines. A line containing "--" is output
-between each group of lines, unless they are in fact contiguous in the input
-file. The value of <i>number</i> is expected to be relatively small. When
+instead of a colon for the context lines (the <b>-Z</b> option can be used to
+change the file name terminator to a zero byte). A line containing "--" is
+output between each group of lines, unless they are in fact contiguous in the
+input file. The value of <i>number</i> is expected to be relatively small. When
 <b>-c</b> is used, <b>-B</b> is ignored.
 </P>
 <P>
@ -355,12 +373,15 @@ files; it does not apply to patterns specified by any of the <b>--include</b> or
 <P>
 <b>-f</b> <i>filename</i>, <b>--file=</b><i>filename</i>
 Read patterns from the file, one per line, and match them against each line of
-input. What constitutes a newline when reading the file is the operating
-system's default. The <b>--newline</b> option has no effect on this option.
-Trailing white space is removed from each line, and blank lines are ignored. An
-empty file contains no patterns and therefore matches nothing. See also the
-comments about multiple patterns versus a single pattern with alternatives in
-the description of <b>-e</b> above.
+input. As is the case with patterns on the command line, no delimiters should
+be used. What constitutes a newline when reading the file is the operating
+system's default interpretation of \n. The <b>--newline</b> option has no
+effect on this option. Trailing white space is removed from each line, and
+blank lines are ignored. An empty file contains no patterns and therefore
+matches nothing. Patterns read from a file in this way may contain binary
+zeros, which are treated as ordinary data characters. See also the comments
+about multiple patterns versus a single pattern with alternatives in the
+description of <b>-e</b> above.
 <br>
 <br>
 If this option is given more than once, all the specified files are read. A
@ -373,14 +394,15 @@ command line; all arguments are treated as the names of paths to be searched.
 <P>
 <b>--file-list</b>=<i>filename</i>
 Read a list of files and/or directories that are to be scanned from the given
-file, one per line. Trailing white space is removed from each line, and blank
-lines are ignored. These paths are processed before any that are listed on the
-command line. The file name can be given as "-" to refer to the standard input.
-If <b>--file</b> and <b>--file-list</b> are both specified as "-", patterns are
-read first. This is useful only when the standard input is a terminal, from
-which further lines (the list of files) can be read after an end-of-file
-indication. If this option is given more than once, all the specified files are
-read.
+file, one per line. What constitutes a newline when reading the file is the
+operating system's default. Trailing white space is removed from each line, and
+blank lines are ignored. These paths are processed before any that are listed
+on the command line. The file name can be given as "-" to refer to the standard
+input. If <b>--file</b> and <b>--file-list</b> are both specified as "-",
+patterns are read first. This is useful only when the standard input is a
+terminal, from which further lines (the list of files) can be read after an
+end-of-file indication. If this option is given more than once, all the
+specified files are read.
 </P>
 <P>
 <b>--file-offsets</b>
@ -394,20 +416,22 @@ shown separately. This option is mutually exclusive with <b>--output</b>,
 <P>
 <b>-H</b>, <b>--with-filename</b>
 Force the inclusion of the file name at the start of output lines when
-searching a single file. By default, the file name is not shown in this case.
-For matching lines, the file name is followed by a colon; for context lines, a
-hyphen separator is used. If a line number is also being output, it follows the
-file name. When the <b>-M</b> option causes a pattern to match more than one
-line, only the first is preceded by the file name. This option overrides any
-previous <b>-h</b>, <b>-l</b>, or <b>-L</b> options.
+searching a single file. The file name is not normally shown in this case.
+By default, for matching lines, the file name is followed by a colon; for
+context lines, a hyphen separator is used. The <b>-Z</b> option can be used to
+change the terminator to a zero byte. If a line number is also being output,
+it follows the file name. When the <b>-M</b> option causes a pattern to match
+more than one line, only the first is preceded by the file name. This option
+overrides any previous <b>-h</b>, <b>-l</b>, or <b>-L</b> options.
 </P>
 <P>
 <b>-h</b>, <b>--no-filename</b>
-Suppress the output file names when searching multiple files. By default,
-file names are shown when multiple files are searched. For matching lines, the
-file name is followed by a colon; for context lines, a hyphen separator is used.
-If a line number is also being output, it follows the file name. This option
-overrides any previous <b>-H</b>, <b>-L</b>, or <b>-l</b> options.
+Suppress the output file names when searching multiple files. File names are
+normally shown when multiple files are searched. By default, for matching
+lines, the file name is followed by a colon; for context lines, a hyphen
+separator is used. The <b>-Z</b> option can be used to change the terminator to
+a zero byte. If a line number is also being output, it follows the file name.
+This option overrides any previous <b>-H</b>, <b>-L</b>, or <b>-l</b> options.
 </P>
 <P>
 <b>--heap-limit</b>=<i>number</i>
@ -431,8 +455,8 @@ Ignore upper/lower case distinctions during comparisons.
 <P>
 <b>--include</b>=<i>pattern</i>
 If any <b>--include</b> patterns are specified, the only files that are
-processed are those that match one of the patterns (and do not match an
-<b>--exclude</b> pattern). This option does not affect directories, but it
+processed are those whose names match one of the patterns and do not match an
+<b>--exclude</b> pattern. This option does not affect directories, but it
 applies to all files, whether listed on the command line, obtained from
 <b>--file-list</b>, or by scanning a directory. The pattern is a PCRE2 regular
 expression, and is matched against the final component of the file name, not
@ -451,8 +475,8 @@ may be given any number of times; all the files are read.
 <P>
 <b>--include-dir</b>=<i>pattern</i>
 If any <b>--include-dir</b> patterns are specified, the only directories that
-are processed are those that match one of the patterns (and do not match an
-<b>--exclude-dir</b> pattern). This applies to all directories, whether listed
+are processed are those whose names match one of the patterns and do not match
+an <b>--exclude-dir</b> pattern. This applies to all directories, whether listed
 on the command line, obtained from <b>--file-list</b>, or by scanning a parent
 directory. The pattern is a PCRE2 regular expression, and is matched against
 the final component of the directory name, not the entire path. The <b>-F</b>,
@ -464,19 +488,22 @@ given any number of times. If a directory matches both <b>--include-dir</b> and
 <b>-L</b>, <b>--files-without-match</b>
 Instead of outputting lines from the files, just output the names of the files
 that do not contain any lines that would have been output. Each file name is
-output once, on a separate line. This option overrides any previous <b>-H</b>,
-<b>-h</b>, or <b>-l</b> options.
+output once, on a separate line by default, but if the <b>-Z</b> option is set, 
+they are separated by zero bytes instead of newlines. This option overrides any
+previous <b>-H</b>, <b>-h</b>, or <b>-l</b> options.
 </P>
 <P>
 <b>-l</b>, <b>--files-with-matches</b>
 Instead of outputting lines from the files, just output the names of the files
 containing lines that would have been output. Each file name is output once, on
-a separate line. Searching normally stops as soon as a matching line is found
-in a file. However, if the <b>-c</b> (count) option is also used, matching
-continues in order to obtain the correct count, and those files that have at
-least one match are listed along with their counts. Using this option with
-<b>-c</b> is a way of suppressing the listing of files with no matches. This
-opeion overrides any previous <b>-H</b>, <b>-h</b>, or <b>-L</b> options.
+a separate line, but if the <b>-Z</b> option is set, they are separated by zero
+bytes instead of newlines. Searching normally stops as soon as a matching line
+is found in a file. However, if the <b>-c</b> (count) option is also used,
+matching continues in order to obtain the correct count, and those files that
+have at least one match are listed along with their counts. Using this option
+with <b>-c</b> is a way of suppressing the listing of files with no matches that
+occurs with <b>-c</b> on its own. This option overrides any previous <b>-H</b>,
+<b>-h</b>, or <b>-L</b> options.
 </P>
 <P>
 <b>--label</b>=<i>name</i>
@ -489,13 +516,13 @@ short form for this option.
 When this option is given, non-compressed input is read and processed line by
 line, and the output is flushed after each write. By default, input is read in
 large chunks, unless <b>pcre2grep</b> can determine that it is reading from a
-terminal (which is currently possible only in Unix-like environments). Output
-to terminal is normally automatically flushed by the operating system. This
-option can be useful when the input or output is attached to a pipe and you do
-not want <b>pcre2grep</b> to buffer up large amounts of data. However, its use
-will affect performance, and the <b>-M</b> (multiline) option ceases to work.
-When input is from a compressed .gz or .bz2 file, <b>--line-buffered</b> is
-ignored.
+terminal, which is currently possible only in Unix-like environments or
+Windows. Output to terminal is normally automatically flushed by the operating
+system. This option can be useful when the input or output is attached to a
+pipe and you do not want <b>pcre2grep</b> to buffer up large amounts of data.
+However, its use will affect performance, and the <b>-M</b> (multiline) option
+ceases to work. When input is from a compressed .gz or .bz2 file,
+<b>--line-buffered</b> is ignored.
 </P>
 <P>
 <b>--line-offsets</b>
@ -516,46 +543,6 @@ locale is specified, the PCRE2 library's default (usually the "C" locale) is
 used. There is no short form for this option.
 </P>
 <P>
-<b>--match-limit</b>=<i>number</i>
-Processing some regular expression patterns may take a very long time to search
-for all possible matching strings. Others may require a very large amount of
-memory. There are three options that set resource limits for matching.
-<br>
-<br>
-The <b>--match-limit</b> option provides a means of limiting computing resource
-usage when processing patterns that are not going to match, but which have a
-very large number of possibilities in their search trees. The classic example
-is a pattern that uses nested unlimited repeats. Internally, PCRE2 has a
-counter that is incremented each time around its main processing loop. If the
-value set by <b>--match-limit</b> is reached, an error occurs.
-<br>
-<br>
-The <b>--heap-limit</b> option specifies, as a number of kilobytes, the amount
-of heap memory that may be used for matching. Heap memory is needed only if
-matching the pattern requires a significant number of nested backtracking
-points to be remembered. This parameter can be set to zero to forbid the use of
-heap memory altogether.
-<br>
-<br>
-The <b>--depth-limit</b> option limits the depth of nested backtracking points,
-which indirectly limits the amount of memory that is used. The amount of memory
-needed for each backtracking point depends on the number of capturing
-parentheses in the pattern, so the amount of memory that is used before this
-limit acts varies from pattern to pattern. This limit is of use only if it is
-set smaller than <b>--match-limit</b>.
-<br>
-<br>
-There are no short forms for these options. The default settings are specified
-when the PCRE2 library is compiled, with the default defaults being very large
-and so effectively unlimited.
-</P>
-<P>
-\fB--max-buffer-size=<i>number</i>
-This limits the expansion of the processing buffer, whose initial size can be
-set by <b>--buffer-size</b>. The maximum buffer size is silently forced to be no
-smaller than the starting buffer size.
-</P>
-<P>
 <b>-M</b>, <b>--multiline</b>
 Allow patterns to match more than one line. When this option is set, the PCRE2
 library is called in "multiline" mode. This allows a matched string to extend
@ -585,29 +572,84 @@ well as possibly handling a two-character newline sequence.
 There is a limit to the number of lines that can be matched, imposed by the way
 that <b>pcre2grep</b> buffers the input file as it scans it. With a sufficiently
 large processing buffer, this should not be a problem, but the <b>-M</b> option
-does not work when input is read line by line (see \fP--line-buffered\fP.)
+does not work when input is read line by line (see <b>--line-buffered</b>.)
+</P>
+<P>
+<b>-m</b> <i>number</i>, <b>--max-count</b>=<i>number</i>
+Stop processing after finding <i>number</i> matching lines, or non-matching
+lines if <b>-v</b> is also set. Any trailing context lines are output after the
+final match. In multiline mode, each multiline match counts as just one line
+for this purpose. If this limit is reached when reading the standard input from
+a regular file, the file is left positioned just after the last matching line.
+If <b>-c</b> is also set, the count that is output is never greater than
+<i>number</i>. This option has no effect if used with <b>-L</b>, <b>-l</b>, or
+<b>-q</b>, or when just checking for a match in a binary file.
+</P>
+<P>
+<b>--match-limit</b>=<i>number</i>
+Processing some regular expression patterns may take a very long time to search
+for all possible matching strings. Others may require a very large amount of
+memory. There are three options that set resource limits for matching.
+<br>
+<br>
+The <b>--match-limit</b> option provides a means of limiting computing resource
+usage when processing patterns that are not going to match, but which have a
+very large number of possibilities in their search trees. The classic example
+is a pattern that uses nested unlimited repeats. Internally, PCRE2 has a
+counter that is incremented each time around its main processing loop. If the
+value set by <b>--match-limit</b> is reached, an error occurs.
+<br>
+<br>
+The <b>--heap-limit</b> option specifies, as a number of kibibytes (units of
+1024 bytes), the maximum amount of heap memory that may be used for matching.
+<br>
+<br>
+The <b>--depth-limit</b> option limits the depth of nested backtracking points,
+which indirectly limits the amount of memory that is used. The amount of memory
+needed for each backtracking point depends on the number of capturing
+parentheses in the pattern, so the amount of memory that is used before this
+limit acts varies from pattern to pattern. This limit is of use only if it is
+set smaller than <b>--match-limit</b>.
+<br>
+<br>
+There are no short forms for these options. The default limits can be set
+when the PCRE2 library is compiled; if they are not specified, the defaults
+are very large and so effectively unlimited.
+</P>
+<P>
+<b>--max-buffer-size</b>=<i>number</i>
+This limits the expansion of the processing buffer, whose initial size can be
+set by <b>--buffer-size</b>. The maximum buffer size is silently forced to be no
+smaller than the starting buffer size.
 </P>
 <P>
 <b>-N</b> <i>newline-type</i>, <b>--newline</b>=<i>newline-type</i>
-The PCRE2 library supports five different conventions for indicating
-the ends of lines. They are the single-character sequences CR (carriage return)
-and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
-which recognizes any of the preceding three types, and an "any" convention, in
-which any Unicode line ending sequence is assumed to end a line. The Unicode
-sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
-(form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
-PS (paragraph separator, U+2029).
+Six different conventions for indicating the ends of lines in scanned files are
+supported. For example:
+<pre>
+  pcre2grep -N CRLF 'some pattern' &#60;file&#62;
+</pre>
+The newline type may be specified in upper, lower, or mixed case. If the
+newline type is NUL, lines are separated by binary zero characters. The other
+types are the single-character sequences CR (carriage return) and LF
+(linefeed), the two-character sequence CRLF, an "anycrlf" type, which
+recognizes any of the preceding three types, and an "any" type, for which any
+Unicode line ending sequence is assumed to end a line. The Unicode sequences
+are the three just mentioned, plus VT (vertical tab, U+000B), FF (form feed,
+U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
+(paragraph separator, U+2029).
 <br>
 <br>
 When the PCRE2 library is built, a default line-ending sequence is specified.
 This is normally the standard sequence for the operating system. Unless
 otherwise specified by this option, <b>pcre2grep</b> uses the library's default.
-The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
-makes it possible to use <b>pcre2grep</b> to scan files that have come from
-other environments without having to modify their line endings. If the data
-that is being scanned does not agree with the convention set by this option,
-<b>pcre2grep</b> may behave in strange ways. Note that this option does not
-apply to files specified by the <b>-f</b>, <b>--exclude-from</b>, or
+<br>
+<br>
+This option makes it possible to use <b>pcre2grep</b> to scan files that have
+come from other environments without having to modify their line endings. If
+the data that is being scanned does not agree with the convention set by this
+option, <b>pcre2grep</b> may behave in strange ways. Note that this option does
+not apply to files specified by the <b>-f</b>, <b>--exclude-from</b>, or
 <b>--include-from</b> options, which are expected to use the operating system's
 standard newline sequence.
 </P>
@ -629,29 +671,41 @@ It should never be needed in normal use.
 </P>
 <P>
 <b>-O</b> <i>text</i>, <b>--output</b>=<i>text</i>
-When there is a match, instead of outputting the whole line that matched,
-output just the given text. This option is mutually exclusive with
-<b>--only-matching</b>, <b>--file-offsets</b>, and <b>--line-offsets</b>. Escape
-sequences starting with a dollar character may be used to insert the contents
-of the matched part of the line and/or captured substrings into the text.
+When there is a match, instead of outputting the line that matched, output just
+the text specified in this option, followed by an operating-system standard
+newline. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>,
+and <b>-C</b> options are ignored. The <b>--newline</b> option has no effect on
+this option, which is mutually exclusive with <b>--only-matching</b>,
+<b>--file-offsets</b>, and <b>--line-offsets</b>. However, like
+<b>--only-matching</b>, if there is more than one match in a line, each of them
+causes a line of output.
 <br>
 <br>
-$&#60;digits&#62; or ${&#60;digits&#62;} is replaced by the captured
-substring of the given decimal number; zero substitutes the whole match. If
-the number is greater than the number of capturing substrings, or if the
-capture is unset, the replacement is empty.
+Escape sequences starting with a dollar character may be used to insert the
+contents of the matched part of the line and/or captured substrings into the
+text.
+<br>
+<br>
+$&#60;digits&#62; or ${&#60;digits&#62;} is replaced by the captured substring of the given
+decimal number; zero substitutes the whole match. If the number is greater than
+the number of capturing substrings, or if the capture is unset, the replacement
+is empty.
 <br>
 <br>
 $a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
 newline; $r by carriage return; $t by tab; $v by vertical tab.
 <br>
 <br>
-$o&#60;digits&#62; is replaced by the character represented by the given octal
-number; up to three digits are processed.
+$o&#60;digits&#62; or $o{&#60;digits&#62;} is replaced by the character whose code point is the
+given octal number. In the first form, up to three octal digits are processed.
+When more digits are needed in Unicode mode to specify a wide character, the
+second form must be used.
 <br>
 <br>
-$x&#60;digits&#62; is replaced by the character represented by the given hexadecimal
-number; up to two digits are processed.
+$x&#60;digits&#62; or $x{&#60;digits&#62;} is replaced by the character represented by the
+given hexadecimal number. In the first form, up to two hexadecimal digits are
+processed. When more digits are needed in Unicode mode to specify a wide
+character, the second form must be used.
 <br>
 <br>
 Any other character is substituted by itself. In particular, $$ is replaced by
@ -673,20 +727,32 @@ otherwise empty line. This option is mutually exclusive with <b>--output</b>,
 <P>
 <b>-o</b><i>number</i>, <b>--only-matching</b>=<i>number</i>
 Show only the part of the line that matched the capturing parentheses of the
-given number. Up to 32 capturing parentheses are supported, and -o0 is
-equivalent to <b>-o</b> without a number. Because these options can be given
-without an argument (see above), if an argument is present, it must be given in
-the same shell item, for example, -o3 or --only-matching=2. The comments given
-for the non-argument case above also apply to this option. If the specified
-capturing parentheses do not exist in the pattern, or were not set in the
-match, nothing is output unless the file name or line number are being output.
+given number. Up to 50 capturing parentheses are supported by default. This
+limit can be changed via the <b>--om-capture</b> option. A pattern may contain
+any number of capturing parentheses, but only those whose number is within the
+limit can be accessed by <b>-o</b>. An error occurs if the number specified by
+<b>-o</b> is greater than the limit.
+<br>
+<br>
+-o0 is the same as <b>-o</b> without a number. Because these options can be
+given without an argument (see above), if an argument is present, it must be
+given in the same shell item, for example, -o3 or --only-matching=2. The
+comments given for the non-argument case above also apply to this option. If
+the specified capturing parentheses do not exist in the pattern, or were not
+set in the match, nothing is output unless the file name or line number are
+being output.
 <br>
 <br>
 If this option is given multiple times, multiple substrings are output for each
 match, in the order the options are given, and all on one line. For example,
 -o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and
 then 3 again to be output. By default, there is no separator (but see the next
-option).
+but one option).
+</P>
+<P>
+<b>--om-capture</b>=<i>number</i>
+Set the number of capturing parentheses that can be accessed by <b>-o</b>. The
+default is 50.
 </P>
 <P>
 <b>--om-separator</b>=<i>text</i>
@ -708,7 +774,8 @@ option to "recurse".
 </P>
 <P>
 <b>--recursion-limit</b>=<i>number</i>
-See <b>--match-limit</b> above.
+This is an obsolete synonym for <b>--depth-limit</b>. See <b>--match-limit</b>
+above for details.
 </P>
 <P>
 <b>-s</b>, <b>--no-messages</b>
@ -729,11 +796,23 @@ ignored when used with <b>-L</b> (list files without matches), because the grand
 total would always be zero.
 </P>
 <P>
-<b>-u</b>, <b>--utf-8</b>
+<b>-u</b>, <b>--utf</b>
 Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
 with UTF-8 support. All patterns (including those for any <b>--exclude</b> and
-<b>--include</b> options) and all subject lines that are scanned must be valid
-strings of UTF-8 characters.
+<b>--include</b> options) and all lines that are scanned must be valid strings
+of UTF-8 characters. If an invalid UTF-8 string is encountered, an error
+occurs.
+</P>
+<P>
+<b>-U</b>, <b>--utf-allow-invalid</b>
+As <b>--utf</b>, but in addition subject lines may contain invalid UTF-8 code
+unit sequences. These can never form part of any pattern match. Patterns
+themselves, however, must still be valid UTF-8 strings. This facility allows
+valid UTF-8 strings to be sought within arbitrary byte sequences in executable
+or other binary files. For more details about matching in non-valid UTF-8
+strings, see the
+<a href="pcre2unicode.html"><b>pcre2unicode</b>(3)</a>
+documentation.
 </P>
 <P>
 <b>-V</b>, <b>--version</b>
@ -744,7 +823,9 @@ ignored.
 <P>
 <b>-v</b>, <b>--invert-match</b>
 Invert the sense of the match, so that lines which do <i>not</i> match any of
-the patterns are the ones that are found.
+the patterns are the ones that are found. When this option is set, options such
+as <b>--only-matching</b> and <b>--output</b>, which specify parts of a match
+that are to be output, are ignored.
 </P>
 <P>
 <b>-w</b>, <b>--word-regex</b>, <b>--word-regexp</b>
@ -764,27 +845,46 @@ pattern and ")$" at the end. This option applies only to the patterns that are
 matched against the contents of files; it does not apply to patterns specified
 by any of the <b>--include</b> or <b>--exclude</b> options.
 </P>
-<br><a name="SEC6" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
+<P>
+<b>-Z</b>, <b>--null</b>
+Terminate files names in the regular output with a zero byte (the NUL
+character) instead of what would normally appear. This is useful when file
+names contain unusual characters such as colons, hyphens, or even newlines. The
+option does not apply to file names in error messages.
+</P>
+<br><a name="SEC7" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
 <P>
 The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
 order, for a locale. The first one that is set is used. This can be overridden
 by the <b>--locale</b> option. If no locale is set, the PCRE2 library's default
 (usually the "C" locale) is used.
 </P>
-<br><a name="SEC7" href="#TOC1">NEWLINES</a><br>
+<br><a name="SEC8" href="#TOC1">NEWLINES</a><br>
 <P>
 The <b>-N</b> (<b>--newline</b>) option allows <b>pcre2grep</b> to scan files with
-different newline conventions from the default. Any parts of the input files
-that are written to the standard output are copied identically, with whatever
-newline sequences they have in the input. However, the setting of this option
-does not affect the interpretation of files specified by the <b>-f</b>,
-<b>--exclude-from</b>, or <b>--include-from</b> options, which are assumed to use
-the operating system's standard newline sequence, nor does it affect the way in
-which <b>pcre2grep</b> writes informational messages to the standard error and
-output streams. For these it uses the string "\n" to indicate newlines,
-relying on the C I/O library to convert this to an appropriate sequence.
+newline conventions that differ from the default. This option affects only the
+way scanned files are processed. It does not affect the interpretation of files
+specified by the <b>-f</b>, <b>--file-list</b>, <b>--exclude-from</b>, or
+<b>--include-from</b> options.
 </P>
-<br><a name="SEC8" href="#TOC1">OPTIONS COMPATIBILITY</a><br>
+<P>
+Any parts of the scanned input files that are written to the standard output
+are copied with whatever newline sequences they have in the input. However, if
+the final line of a file is output, and it does not end with a newline
+sequence, a newline sequence is added. If the newline setting is CR, LF, CRLF
+or NUL, that line ending is output; for the other settings (ANYCRLF or ANY) a
+single NL is used.
+</P>
+<P>
+The newline setting does not affect the way in which <b>pcre2grep</b> writes
+newlines in informational messages to the standard output and error streams.
+Under Windows, the standard output is set to be binary, so that "\r\n" at the
+ends of output lines that are copied from the input is not converted to
+"\r\r\n" by the C I/O library. This means that any messages written to the
+standard output must end with "\r\n". For all other operating systems, and
+for all messages to the standard error stream, "\n" is used.
+</P>
+<br><a name="SEC9" href="#TOC1">OPTIONS COMPATIBILITY</a><br>
 <P>
 Many of the short and long forms of <b>pcre2grep</b>'s options are the same
 as in the GNU <b>grep</b> program. Any long option of the form
@ -793,9 +893,9 @@ as in the GNU <b>grep</b> program. Any long option of the form
 <b>--file-offsets</b>, <b>--heap-limit</b>, <b>--include-dir</b>,
 <b>--line-offsets</b>, <b>--locale</b>, <b>--match-limit</b>, <b>-M</b>,
 <b>--multiline</b>, <b>-N</b>, <b>--newline</b>, <b>--om-separator</b>,
-<b>--output</b>, <b>-u</b>, and <b>--utf-8</b> options are specific to
-<b>pcre2grep</b>, as is the use of the <b>--only-matching</b> option with a
-capturing parentheses number.
+<b>--output</b>, <b>-u</b>, <b>--utf</b>, <b>-U</b>, and <b>--utf-allow-invalid</b>
+options are specific to <b>pcre2grep</b>, as is the use of the
+<b>--only-matching</b> option with a capturing parentheses number.
 </P>
 <P>
 Although most of the common options work the same way, a few are different in
@ -804,7 +904,7 @@ for GNU <b>grep</b>, but a regular expression for <b>pcre2grep</b>. If both the
 <b>-c</b> and <b>-l</b> options are given, GNU grep lists only file names,
 without counts, but <b>pcre2grep</b> gives the counts as well.
 </P>
-<br><a name="SEC9" href="#TOC1">OPTIONS WITH DATA</a><br>
+<br><a name="SEC10" href="#TOC1">OPTIONS WITH DATA</a><br>
 <P>
 There are four different ways in which an option with data can be specified.
 If a short form option is used, the data may follow immediately, or (with one
@ -836,14 +936,16 @@ The exceptions to the above are the <b>--colour</b> (or <b>--color</b>) and
 options does have data, it must be given in the first form, using an equals
 character. Otherwise <b>pcre2grep</b> will assume that it has no data.
 </P>
-<br><a name="SEC10" href="#TOC1">USING PCRE2'S CALLOUT FACILITY</a><br>
+<br><a name="SEC11" href="#TOC1">USING PCRE2'S CALLOUT FACILITY</a><br>
 <P>
 <b>pcre2grep</b> has, by default, support for calling external programs or
 scripts or echoing specific strings during matching by making use of PCRE2's
-callout facility. However, this support can be disabled when <b>pcre2grep</b> is
-built. You can find out whether your binary has support for callouts by running
-it with the <b>--help</b> option. If the support is not enabled, all callouts in
-patterns are ignored by <b>pcre2grep</b>.
+callout facility. However, this support can be completely or partially disabled
+when <b>pcre2grep</b> is built. You can find out whether your binary has support
+for callouts by running it with the <b>--help</b> option. If callout support is
+completely disabled, all callouts in patterns are ignored by <b>pcre2grep</b>.
+If the facility is partially disabled, calling external programs is not
+supported, and callouts that request it are ignored.
 </P>
 <P>
 A callout in a PCRE2 pattern is of the form (?C&#60;arg&#62;) where the argument is
@ -853,9 +955,39 @@ documentation for details). Numbered callouts are ignored by <b>pcre2grep</b>;
 only callouts with string arguments are useful.
 </P>
 <br><b>
+Echoing a specific string
+</b><br>
+<P>
+Starting the callout string with a pipe character invokes an echoing facility
+that avoids calling an external program or script. This facility is always
+available, provided that callouts were not completely disabled when
+<b>pcre2grep</b> was built. The rest of the callout string is processed as a
+zero-terminated string, which means it should not contain any internal binary
+zeros. It is written to the output, having first been passed through the same
+escape processing as text from the <b>--output</b> (<b>-O</b>) option (see
+above). However, $0 cannot be used to insert a matched substring because the
+match is still in progress. Instead, the single character '0' is inserted. Any
+syntax errors in the string (for example, a dollar not followed by another
+character) causes the callout to be ignored. No terminator is added to the
+output string, so if you want a newline, you must include it explicitly using
+the escape $n. For example:
+<pre>
+  pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' &#60;some file&#62;
+</pre>
+Matching continues normally after the string is output. If you want to see only
+the callout output but not any output from an actual match, you should end the
+pattern with (*FAIL).
+</P>
+<br><b>
 Calling external programs or scripts
 </b><br>
 <P>
+This facility can be independently disabled when <b>pcre2grep</b> is built. It
+is supported for Windows, where a call to <b>_spawnvp()</b> is used, for VMS,
+where <b>lib$spawn()</b> is used, and for any Unix-like environment where
+<b>fork()</b> and <b>execv()</b> are available.
+</P>
+<P>
 If the callout string does not start with a pipe (vertical bar) character, it
 is parsed into a list of substrings separated by pipe characters. The first
 substring must be an executable name, with the following substrings specifying
@ -864,14 +996,11 @@ arguments:
  executable_name|arg1|arg2|...
 </pre>
 Any substring (including the executable name) may contain escape sequences
-started by a dollar character: $&#60;digits&#62; or ${&#60;digits&#62;} is replaced by the
-captured substring of the given decimal number, which must be greater than
-zero. If the number is greater than the number of capturing substrings, or if
-the capture is unset, the replacement is empty.
-</P>
-<P>
-Any other character is substituted by itself. In particular, $$ is replaced by
-a single dollar and $| is replaced by a pipe character. Here is an example:
+started by a dollar character. These are the same as for the <b>--output</b>
+(<b>-O</b>) option documented above, except that $0 cannot insert the matched
+string because the match is still in progress. Instead, the character '0'
+is inserted. If you need a literal dollar or pipe character in any
+substring, use $$ or $| respectively. Here is an example:
 <pre>
  echo -e "abcde\n12345" | pcre2grep \
    '(?x)(.)(..(.))
@ -884,29 +1013,16 @@ a single dollar and $| is replaced by a pipe character. Here is an example:
    Arg1: [1] [234] [4] Arg2: |1| ()
    12345
 </pre>
-The parameters for the <b>execv()</b> system call that is used to run the
-program or script are zero-terminated strings. This means that binary zero
-characters in the callout argument will cause premature termination of their
-substrings, and therefore should not be present. Any syntax errors in the
-string (for example, a dollar not followed by another character) cause the
-callout to be ignored. If running the program fails for any reason (including
-the non-existence of the executable), a local matching failure occurs and the
-matcher backtracks in the normal way.
+The parameters for the system call that is used to run the program or script
+are zero-terminated strings. This means that binary zero characters in the
+callout argument will cause premature termination of their substrings, and
+therefore should not be present. Any syntax errors in the string (for example,
+a dollar not followed by another character) causes the callout to be ignored.
+If running the program fails for any reason (including the non-existence of the
+executable), a local matching failure occurs and the matcher backtracks in the
+normal way.
 </P>
-<br><b>
-Echoing a specific string
-</b><br>
-<P>
-If the callout string starts with a pipe (vertical bar) character, the rest of
-the string is written to the output, having been passed through the same escape
-processing as text from the --output option. This provides a simple echoing
-facility that avoids calling an external program or script. No terminator is
-added to the string, so if you want a newline, you must include it explicitly.
-Matching continues normally after the string is output. If you want to see only
-the callout output but not any output from an actual match, you should end the
-relevant pattern with (*FAIL).
-</P>
-<br><a name="SEC11" href="#TOC1">MATCHING ERRORS</a><br>
+<br><a name="SEC12" href="#TOC1">MATCHING ERRORS</a><br>
 <P>
 It is possible to supply a regular expression that takes a very long time to
 fail to match certain lines. Such patterns normally involve nested indefinite
@ -922,7 +1038,7 @@ overall resource limit. There are also other limits that affect the amount of
 memory used during matching; see the discussion of <b>--heap-limit</b> and
 <b>--depth-limit</b> above.
 </P>
-<br><a name="SEC12" href="#TOC1">DIAGNOSTICS</a><br>
+<br><a name="SEC13" href="#TOC1">DIAGNOSTICS</a><br>
 <P>
 Exit status is 0 if any matches were found, 1 if no matches were found, and 2
 for syntax errors, overlong lines, non-existent or inaccessible files (even if
@ -934,24 +1050,25 @@ affect the return code.
 When run under VMS, the return code is placed in the symbol PCRE2GREP_RC
 because VMS does not distinguish between exit(0) and exit(1).
 </P>
-<br><a name="SEC13" href="#TOC1">SEE ALSO</a><br>
+<br><a name="SEC14" href="#TOC1">SEE ALSO</a><br>
 <P>
-<b>pcre2pattern</b>(3), <b>pcre2syntax</b>(3), <b>pcre2callout</b>(3).
+<b>pcre2pattern</b>(3), <b>pcre2syntax</b>(3), <b>pcre2callout</b>(3),
+<b>pcre2unicode</b>(3).
 </P>
-<br><a name="SEC14" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC15" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
-<br><a name="SEC15" href="#TOC1">REVISION</a><br>
+<br><a name="SEC16" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 13 November 2017
+Last updated: 30 July 2022
 <br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2jit.html
+++ b/doc/html/pcre2jit.html
@ -16,16 +16,17 @@ please consult the man page, in case the conversion went wrong.
 <li><a name="TOC1" href="#SEC1">PCRE2 JUST-IN-TIME COMPILER SUPPORT</a>
 <li><a name="TOC2" href="#SEC2">AVAILABILITY OF JIT SUPPORT</a>
 <li><a name="TOC3" href="#SEC3">SIMPLE USE OF JIT</a>
-<li><a name="TOC4" href="#SEC4">UNSUPPORTED OPTIONS AND PATTERN ITEMS</a>
-<li><a name="TOC5" href="#SEC5">RETURN VALUES FROM JIT MATCHING</a>
-<li><a name="TOC6" href="#SEC6">CONTROLLING THE JIT STACK</a>
-<li><a name="TOC7" href="#SEC7">JIT STACK FAQ</a>
-<li><a name="TOC8" href="#SEC8">FREEING JIT SPECULATIVE MEMORY</a>
-<li><a name="TOC9" href="#SEC9">EXAMPLE CODE</a>
-<li><a name="TOC10" href="#SEC10">JIT FAST PATH API</a>
-<li><a name="TOC11" href="#SEC11">SEE ALSO</a>
-<li><a name="TOC12" href="#SEC12">AUTHOR</a>
-<li><a name="TOC13" href="#SEC13">REVISION</a>
+<li><a name="TOC4" href="#SEC4">MATCHING SUBJECTS CONTAINING INVALID UTF</a>
+<li><a name="TOC5" href="#SEC5">UNSUPPORTED OPTIONS AND PATTERN ITEMS</a>
+<li><a name="TOC6" href="#SEC6">RETURN VALUES FROM JIT MATCHING</a>
+<li><a name="TOC7" href="#SEC7">CONTROLLING THE JIT STACK</a>
+<li><a name="TOC8" href="#SEC8">JIT STACK FAQ</a>
+<li><a name="TOC9" href="#SEC9">FREEING JIT SPECULATIVE MEMORY</a>
+<li><a name="TOC10" href="#SEC10">EXAMPLE CODE</a>
+<li><a name="TOC11" href="#SEC11">JIT FAST PATH API</a>
+<li><a name="TOC12" href="#SEC12">SEE ALSO</a>
+<li><a name="TOC13" href="#SEC13">AUTHOR</a>
+<li><a name="TOC14" href="#SEC14">REVISION</a>
 </ul>
 <br><a name="SEC1" href="#TOC1">PCRE2 JUST-IN-TIME COMPILER SUPPORT</a><br>
 <P>
@ -53,6 +54,7 @@ platforms:
 <pre>
  ARM 32-bit (v5, v7, and Thumb2)
  ARM 64-bit
+  IBM s390x 64 bit
  Intel x86 32-bit and 64-bit
  MIPS 32-bit and 64-bit
  Power PC 32-bit and 64-bit
@ -89,7 +91,7 @@ or a negative error code.
 There is a limit to the size of pattern that JIT supports, imposed by the size
 of machine stack that it uses. The exact rules are not documented because they
 may change at any time, in particular, when new optimizations are introduced.
-If a pattern is too big, a call to \fBpcre2_jit_compile()\fB returns
+If a pattern is too big, a call to <b>pcre2_jit_compile()</b> returns
 PCRE2_ERROR_NOMEMORY.
 </P>
 <P>
@ -144,12 +146,39 @@ support is not available, or the pattern was not processed by
 <b>pcre2_jit_compile()</b>, or the JIT compiler was not able to handle the
 pattern.
 </P>
-<br><a name="SEC4" href="#TOC1">UNSUPPORTED OPTIONS AND PATTERN ITEMS</a><br>
+<br><a name="SEC4" href="#TOC1">MATCHING SUBJECTS CONTAINING INVALID UTF</a><br>
+<P>
+When a pattern is compiled with the PCRE2_UTF option, subject strings are
+normally expected to be a valid sequence of UTF code units. By default, this is
+checked at the start of matching and an error is generated if invalid UTF is
+detected. The PCRE2_NO_UTF_CHECK option can be passed to <b>pcre2_match()</b> to
+skip the check (for improved performance) if you are sure that a subject string
+is valid. If this option is used with an invalid string, the result is
+undefined.
+</P>
+<P>
+However, a way of running matches on strings that may contain invalid UTF
+sequences is available. Calling <b>pcre2_compile()</b> with the
+PCRE2_MATCH_INVALID_UTF option has two effects: it tells the interpreter in
+<b>pcre2_match()</b> to support invalid UTF, and, if <b>pcre2_jit_compile()</b>
+is called, the compiled JIT code also supports invalid UTF. Details of how this
+support works, in both the JIT and the interpretive cases, is given in the
+<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
+documentation.
+</P>
+<P>
+There is also an obsolete option for <b>pcre2_jit_compile()</b> called
+PCRE2_JIT_INVALID_UTF, which currently exists only for backward compatibility.
+It is superseded by the <b>pcre2_compile()</b> option PCRE2_MATCH_INVALID_UTF
+and should no longer be used. It may be removed in future.
+</P>
+<br><a name="SEC5" href="#TOC1">UNSUPPORTED OPTIONS AND PATTERN ITEMS</a><br>
 <P>
 The <b>pcre2_match()</b> options that are supported for JIT matching are
-PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
-PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. The
-PCRE2_ANCHORED option is not supported at match time.
+PCRE2_COPY_MATCHED_SUBJECT, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
+PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and
+PCRE2_PARTIAL_SOFT. The PCRE2_ANCHORED and PCRE2_ENDANCHORED options are not
+supported at match time.
 </P>
 <P>
 If the PCRE2_NO_JIT option is passed to <b>pcre2_match()</b> it disables the
@ -160,7 +189,7 @@ The only unsupported pattern items are \C (match a single data unit) when
 running in a UTF mode, and a callout immediately before an assertion condition
 in a conditional group.
 </P>
-<br><a name="SEC5" href="#TOC1">RETURN VALUES FROM JIT MATCHING</a><br>
+<br><a name="SEC6" href="#TOC1">RETURN VALUES FROM JIT MATCHING</a><br>
 <P>
 When a pattern is matched using JIT matching, the return values are the same
 as those given by the interpretive <b>pcre2_match()</b> code, with the addition
@ -176,10 +205,10 @@ circumstance when JIT is not used, but the details of exactly what is counted
 are not the same. The PCRE2_ERROR_DEPTHLIMIT error code is never returned
 when JIT matching is used.
 <a name="stackcontrol"></a></P>
-<br><a name="SEC6" href="#TOC1">CONTROLLING THE JIT STACK</a><br>
+<br><a name="SEC7" href="#TOC1">CONTROLLING THE JIT STACK</a><br>
 <P>
 When the compiled JIT code runs, it needs a block of memory to use as a stack.
-By default, it uses 32K on the machine stack. However, some large or
+By default, it uses 32KiB on the machine stack. However, some large or
 complicated patterns need more than this. The error PCRE2_ERROR_JIT_STACKLIMIT
 is given when there is not enough stack. Three functions are provided for
 managing blocks of memory for use as JIT stacks. There is further discussion
@ -193,9 +222,10 @@ are a starting size, a maximum size, and a general context (for memory
 allocation functions, or NULL for standard memory allocation). It returns a
 pointer to an opaque structure of type <b>pcre2_jit_stack</b>, or NULL if there
 is an error. The <b>pcre2_jit_stack_free()</b> function is used to free a stack
-that is no longer needed. (For the technically minded: the address space is
-allocated by mmap or VirtualAlloc.) A maximum stack size of 512K to 1M should
-be more than enough for any pattern.
+that is no longer needed. If its argument is NULL, this function returns
+immediately, without doing anything. (For the technically minded: the address
+space is allocated by mmap or VirtualAlloc.) A maximum stack size of 512KiB to
+1MiB should be more than enough for any pattern.
 </P>
 <P>
 The <b>pcre2_jit_stack_assign()</b> function specifies which stack JIT code
@ -207,9 +237,10 @@ should use. Its arguments are as follows:
 </pre>
 The first argument is a pointer to a match context. When this is subsequently
 passed to a matching function, its information determines which JIT stack is
-used. There are three cases for the values of the other two options:
+used. If this argument is NULL, the function returns immediately, without doing
+anything. There are three cases for the values of the other two options:
 <pre>
-  (1) If <i>callback</i> is NULL and <i>data</i> is NULL, an internal 32K block
+  (1) If <i>callback</i> is NULL and <i>data</i> is NULL, an internal 32KiB block
      on the machine stack is used. This is the default when a match
      context is created.

@ -220,7 +251,7 @@ used. There are three cases for the values of the other two options:
  (3) If <i>callback</i> is not NULL, it must point to a function that is
      called with <i>data</i> as an argument at the start of matching, in
      order to set up a JIT stack. If the return from the callback
-      function is NULL, the internal 32K stack is used; otherwise the
+      function is NULL, the internal 32KiB stack is used; otherwise the
      return value must be a valid JIT stack, the result of calling
      <b>pcre2_jit_stack_create()</b>.
 </pre>
@ -238,11 +269,11 @@ starts another match, that match must use a different JIT stack to the one used
 for currently suspended match(es).
 </P>
 <P>
-In a multithread application, if you do not
-specify a JIT stack, or if you assign or pass back NULL from a callback, that
-is thread-safe, because each thread has its own machine stack. However, if you
-assign or pass back a non-NULL JIT stack, this must be a different stack for
-each thread so that the application is thread-safe.
+In a multithread application, if you do not specify a JIT stack, or if you
+assign or pass back NULL from a callback, that is thread-safe, because each
+thread has its own machine stack. However, if you assign or pass back a
+non-NULL JIT stack, this must be a different stack for each thread so that the
+application is thread-safe.
 </P>
 <P>
 Strictly speaking, even more is allowed. You can assign the same non-NULL stack
@ -256,7 +287,7 @@ inefficient solution, and not recommended.
 This is a suggestion for how a multithreaded program that needs to set up
 non-default JIT stacks might operate:
 <pre>
-  During thread initalization
+  During thread initialization
    thread_local_var = pcre2_jit_stack_create(...)

  During thread exit
@ -267,7 +298,7 @@ non-default JIT stacks might operate:
 </pre>
 All the functions described in this section do nothing if JIT is not available.
 <a name="stackfaq"></a></P>
-<br><a name="SEC7" href="#TOC1">JIT STACK FAQ</a><br>
+<br><a name="SEC8" href="#TOC1">JIT STACK FAQ</a><br>
 <P>
 (1) Why do we need JIT stacks?
 <br>
@ -286,9 +317,9 @@ we do the recursion in memory.
 Modern operating systems have a nice feature: they can reserve an address space
 instead of allocating memory. We can safely allocate memory pages inside this
 address space, so the stack could grow without moving memory data (this is
-important because of pointers). Thus we can allocate 1M address space, and use
-only a single memory page (usually 4K) if that is enough. However, we can still
-grow up to 1M anytime if needed.
+important because of pointers). Thus we can allocate 1MiB address space, and
+use only a single memory page (usually 4KiB) if that is enough. However, we can
+still grow up to 1MiB anytime if needed.
 </P>
 <P>
 (3) Who "owns" a JIT stack?
@ -309,12 +340,12 @@ stack through the JIT callback function.
 You can free a JIT stack at any time, as long as it will not be used by
 <b>pcre2_match()</b> again. When you assign the stack to a match context, only a
 pointer is set. There is no reference counting or any other magic. You can free
-compiled patterns, contexts, and stacks in any order, anytime. Just \fIdo
-not\fP call <b>pcre2_match()</b> with a match context pointing to an already
-freed stack, as that will cause SEGFAULT. (Also, do not free a stack currently
-used by <b>pcre2_match()</b> in another thread). You can also replace the stack
-in a context at any time when it is not in use. You should free the previous
-stack before assigning a replacement.
+compiled patterns, contexts, and stacks in any order, anytime.
+Just <i>do not</i> call <b>pcre2_match()</b> with a match context pointing to an
+already freed stack, as that will cause SEGFAULT. (Also, do not free a stack
+currently used by <b>pcre2_match()</b> in another thread). You can also replace
+the stack in a context at any time when it is not in use. You should free the
+previous stack before assigning a replacement.
 </P>
 <P>
 (5) Should I allocate/free a stack every time before/after calling
@ -328,7 +359,7 @@ list of patterns.
 </P>
 <P>
 (6) OK, the stack is for long term memory allocation. But what happens if a
-pattern causes stack overflow with a stack of 1M? Is that 1M kept until the
+pattern causes stack overflow with a stack of 1MiB? Is that 1MiB kept until the
 stack is freed?
 <br>
 <br>
@ -346,19 +377,19 @@ stack handling?
 No, thanks to Windows. If POSIX threads were used everywhere, we could throw
 out this complicated API.
 </P>
-<br><a name="SEC8" href="#TOC1">FREEING JIT SPECULATIVE MEMORY</a><br>
+<br><a name="SEC9" href="#TOC1">FREEING JIT SPECULATIVE MEMORY</a><br>
 <P>
 <b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
 </P>
 <P>
-The JIT executable allocator does not free all memory when it is possible.
-It expects new allocations, and keeps some free memory around to improve
+The JIT executable allocator does not free all memory when it is possible. It
+expects new allocations, and keeps some free memory around to improve
 allocation speed. However, in low memory conditions, it might be better to free
 all possible memory. You can cause this to happen by calling
 pcre2_jit_free_unused_memory(). Its argument is a general context, for custom
 memory management, or NULL for standard memory management.
 </P>
-<br><a name="SEC9" href="#TOC1">EXAMPLE CODE</a><br>
+<br><a name="SEC10" href="#TOC1">EXAMPLE CODE</a><br>
 <P>
 This is a single-threaded example that specifies a JIT stack without using a
 callback. A real program should include error checking after all the function
@ -387,7 +418,7 @@ calls.

 </PRE>
 </P>
-<br><a name="SEC10" href="#TOC1">JIT FAST PATH API</a><br>
+<br><a name="SEC11" href="#TOC1">JIT FAST PATH API</a><br>
 <P>
 Because the API described above falls back to interpreted matching when JIT is
 not available, it is convenient for programs that are written for general use
@ -400,28 +431,31 @@ processed by <b>pcre2_jit_compile()</b>).
 </P>
 <P>
 The fast path function is called <b>pcre2_jit_match()</b>, and it takes exactly
-the same arguments as <b>pcre2_match()</b>. The return values are also the same,
-plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is
-requested that was not compiled. Unsupported option bits (for example,
-PCRE2_ANCHORED) are ignored, as is the PCRE2_NO_JIT option.
+the same arguments as <b>pcre2_match()</b>. However, the subject string must be
+specified with a length; PCRE2_ZERO_TERMINATED is not supported. Unsupported
+option bits (for example, PCRE2_ANCHORED, PCRE2_ENDANCHORED and
+PCRE2_COPY_MATCHED_SUBJECT) are ignored, as is the PCRE2_NO_JIT option. The
+return values are also the same as for <b>pcre2_match()</b>, plus
+PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is requested
+that was not compiled.
 </P>
 <P>
 When you call <b>pcre2_match()</b>, as well as testing for invalid options, a
 number of other sanity checks are performed on the arguments. For example, if
-the subject pointer is NULL, an immediate error is given. Also, unless
-PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for validity. In the
-interests of speed, these checks do not happen on the JIT fast path, and if
-invalid data is passed, the result is undefined.
+the subject pointer is NULL but the length is non-zero, an immediate error is
+given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested
+for validity. In the interests of speed, these checks do not happen on the JIT
+fast path, and if invalid data is passed, the result is undefined.
 </P>
 <P>
 Bypassing the sanity checks and the <b>pcre2_match()</b> wrapping can give
 speedups of more than 10%.
 </P>
-<br><a name="SEC11" href="#TOC1">SEE ALSO</a><br>
+<br><a name="SEC12" href="#TOC1">SEE ALSO</a><br>
 <P>
 <b>pcre2api</b>(3)
 </P>
-<br><a name="SEC12" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC13" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel (FAQ by Zoltan Herczeg)
 <br>
@ -430,11 +464,11 @@ University Computing Service
 Cambridge, England.
 <br>
 </P>
-<br><a name="SEC13" href="#TOC1">REVISION</a><br>
+<br><a name="SEC14" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 31 March 2017
+Last updated: 30 November 2021
 <br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2021 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2limits.html
+++ b/doc/html/pcre2limits.html
@ -20,12 +20,12 @@ There are some size limitations in PCRE2 but it is hoped that they will never
 in practice be relevant.
 </P>
 <P>
-The maximum size of a compiled pattern is approximately 64K code units for the
-8-bit and 16-bit libraries if PCRE2 is compiled with the default internal
-linkage size, which is 2 bytes for these libraries. If you want to process
-regular expressions that are truly enormous, you can compile PCRE2 with an
-internal linkage size of 3 or 4 (when building the 16-bit library, 3 is rounded
-up to 4). See the <b>README</b> file in the source distribution and the
+The maximum size of a compiled pattern is approximately 64 thousand code units
+for the 8-bit and 16-bit libraries if PCRE2 is compiled with the default
+internal linkage size, which is 2 bytes for these libraries. If you want to
+process regular expressions that are truly enormous, you can compile PCRE2 with
+an internal linkage size of 3 or 4 (when building the 16-bit library, 3 is
+rounded up to 4). See the <b>README</b> file in the source distribution and the
 <a href="pcre2build.html"><b>pcre2build</b></a>
 documentation for details. In these cases the limit is substantially larger.
 However, the speed of execution is slower. In the 32-bit library, the internal
@ -50,17 +50,17 @@ All values in repeating quantifiers must be less than 65536.
 The maximum length of a lookbehind assertion is 65535 characters.
 </P>
 <P>
-There is no limit to the number of parenthesized subpatterns, but there can be
-no more than 65535 capturing subpatterns. There is, however, a limit to the
-depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
-order to limit the amount of system stack used at compile time. The default
-limit can be specified when PCRE2 is built; the default default is 250. An
-application can change this limit by calling pcre2_set_parens_nest_limit() to
-set the limit in a compile context.
+There is no limit to the number of parenthesized groups, but there can be no
+more than 65535 capture groups, and there is a limit to the depth of nesting of
+parenthesized subpatterns of all kinds. This is imposed in order to limit the
+amount of system stack used at compile time. The default limit can be specified
+when PCRE2 is built; if not, the default is set to 250. An application can
+change this limit by calling pcre2_set_parens_nest_limit() to set the limit in
+a compile context.
 </P>
 <P>
-The maximum length of name for a named subpattern is 32 code units, and the
-maximum number of named subpatterns is 10000.
+The maximum length of name for a named capture group is 32 code units, and the
+maximum number of such groups is 10000.
 </P>
 <P>
 The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
@ -71,13 +71,18 @@ is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
 The maximum length of a string argument to a callout is the largest number a
 32-bit unsigned integer can hold.
 </P>
+<P>
+The maximum amount of heap memory used for matching is controlled by the heap 
+limit, which can be set in a pattern or in a match context. The default is a 
+very large number, effectively unlimited.
+</P>
 <br><b>
 AUTHOR
 </b><br>
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
@ -86,9 +91,9 @@ Cambridge, England.
 REVISION
 </b><br>
 <P>
-Last updated: 30 March 2017
+Last updated: 26 July 2022
 <br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2matching.html
+++ b/doc/html/pcre2matching.html
@ -78,14 +78,15 @@ tried is controlled by the greedy or ungreedy nature of the quantifier.
 If a leaf node is reached, a matching string has been found, and at that point
 the algorithm stops. Thus, if there is more than one possible match, this
 algorithm returns the first one that it finds. Whether this is the shortest,
-the longest, or some intermediate length depends on the way the greedy and
-ungreedy repetition quantifiers are specified in the pattern.
+the longest, or some intermediate length depends on the way the alternations
+and the greedy or ungreedy repetition quantifiers are specified in the
+pattern.
 </P>
 <P>
 Because it ends up with a single path through the tree, it is relatively
 straightforward for this algorithm to keep track of the substrings that are
 matched by portions of the pattern in parentheses. This provides support for
-capturing parentheses and back references.
+capturing parentheses and backreferences.
 </P>
 <br><a name="SEC4" href="#TOC1">THE ALTERNATIVE MATCHING ALGORITHM</a><br>
 <P>
@ -109,11 +110,17 @@ no more unterminated paths. At this point, terminated paths represent the
 different matching possibilities (if there are none, the match has failed).
 Thus, if there is more than one possible match, this algorithm finds all of
 them, and in particular, it finds the longest. The matches are returned in
-decreasing order of length. There is an option to stop the algorithm after the
-first match (which is necessarily the shortest) is found.
+the output vector in decreasing order of length. There is an option to stop the
+algorithm after the first match (which is necessarily the shortest) is found.
 </P>
 <P>
-Note that all the matches that are found start at the same point in the
+Note that the size of vector needed to contain all the results depends on the
+number of simultaneous matches, not on the number of parentheses in the
+pattern. Using <b>pcre2_match_data_create_from_pattern()</b> to create the match
+data block is therefore not advisable when doing DFA matching.
+</P>
+<P>
+Note also that all the matches that are found start at the same point in the
 subject. If the pattern
 <pre>
  cat(er(pillar)?)?
@ -134,7 +141,8 @@ do want multiple matches in such cases, either use an ungreedy repeat
 </P>
 <P>
 There are a number of features of PCRE2 regular expressions that are not
-supported by the alternative matching algorithm. They are as follows:
+supported or behave differently in the alternative matching function. Those
+that are not supported cause an error if encountered.
 </P>
 <P>
 1. Because the algorithm finds all possible matches, the greedy or ungreedy
@ -158,49 +166,49 @@ possibilities, and PCRE2's implementation of this algorithm does not attempt to
 do this. This means that no captured substrings are available.
 </P>
 <P>
-3. Because no substrings are captured, back references within the pattern are
-not supported, and cause errors if encountered.
+3. Because no substrings are captured, backreferences within the pattern are
+not supported.
 </P>
 <P>
 4. For the same reason, conditional expressions that use a backreference as the
 condition or test for a specific group recursion are not supported.
 </P>
 <P>
-5. Because many paths through the tree may be active, the \K escape sequence,
-which resets the start of the match when encountered (but may be on some paths
-and not on others), is not supported. It causes an error if encountered.
+5. Again for the same reason, script runs are not supported.
 </P>
 <P>
-6. Callouts are supported, but the value of the <i>capture_top</i> field is
+6. Because many paths through the tree may be active, the \K escape sequence,
+which resets the start of the match when encountered (but may be on some paths
+and not on others), is not supported.
+</P>
+<P>
+7. Callouts are supported, but the value of the <i>capture_top</i> field is
 always 1, and the value of the <i>capture_last</i> field is always 0.
 </P>
 <P>
-7. The \C escape sequence, which (in the standard algorithm) always matches a
+8. The \C escape sequence, which (in the standard algorithm) always matches a
 single code unit, even in a UTF mode, is not supported in these modes, because
 the alternative algorithm moves through the subject string one character (not
 code unit) at a time, for all active paths through the tree.
 </P>
 <P>
-8. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not
+9. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not
 supported. (*FAIL) is supported, and behaves like a failing negative assertion.
 </P>
+<P>
+10. The PCRE2_MATCH_INVALID_UTF option for <b>pcre2_compile()</b> is not
+supported by <b>pcre2_dfa_match()</b>.
+</P>
 <br><a name="SEC5" href="#TOC1">ADVANTAGES OF THE ALTERNATIVE ALGORITHM</a><br>
 <P>
-Using the alternative matching algorithm provides the following advantages:
+The main advantage of the alternative algorithm is that all possible matches
+(at a single point in the subject) are automatically found, and in particular,
+the longest match is found. To find more than one match at the same point using
+the standard algorithm, you have to do kludgy things with callouts.
 </P>
 <P>
-1. All possible matches (at a single point in the subject) are automatically
-found, and in particular, the longest match is found. To find more than one
-match using the standard algorithm, you have to do kludgy things with
-callouts.
-</P>
-<P>
-2. Because the alternative algorithm scans the subject string just once, and
-never needs to backtrack (except for lookbehinds), it is possible to pass very
-long subject strings to the matching function in several pieces, checking for
-partial matching each time. Although it is also possible to do multi-segment
-matching using the standard algorithm, by retaining partially matched
-substrings, it is more complicated. The
+Partial matching is possible with this algorithm, though it has some
+limitations. The
 <a href="pcre2partial.html"><b>pcre2partial</b></a>
 documentation gives details of partial matching and discusses multi-segment
 matching.
@ -215,26 +223,30 @@ because it has to search for all possible matches, but is also because it is
 less susceptible to optimization.
 </P>
 <P>
-2. Capturing parentheses and back references are not supported.
+2. Capturing parentheses, backreferences, script runs, and matching within
+invalid UTF string are not supported.
 </P>
 <P>
 3. Although atomic groups are supported, their use does not provide the
 performance advantage that it does for the standard algorithm.
 </P>
+<P>
+4. JIT optimization is not supported.
+</P>
 <br><a name="SEC7" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC8" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 29 September 2014
+Last updated: 28 August 2021
 <br>
-Copyright &copy; 1997-2014 University of Cambridge.
+Copyright &copy; 1997-2021 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2partial.html
+++ b/doc/html/pcre2partial.html
@ -14,78 +14,123 @@ please consult the man page, in case the conversion went wrong.
 <br>
 <ul>
 <li><a name="TOC1" href="#SEC1">PARTIAL MATCHING IN PCRE2</a>
-<li><a name="TOC2" href="#SEC2">PARTIAL MATCHING USING pcre2_match()</a>
-<li><a name="TOC3" href="#SEC3">PARTIAL MATCHING USING pcre2_dfa_match()</a>
-<li><a name="TOC4" href="#SEC4">PARTIAL MATCHING AND WORD BOUNDARIES</a>
-<li><a name="TOC5" href="#SEC5">EXAMPLE OF PARTIAL MATCHING USING PCRE2TEST</a>
+<li><a name="TOC2" href="#SEC2">REQUIREMENTS FOR A PARTIAL MATCH</a>
+<li><a name="TOC3" href="#SEC3">PARTIAL MATCHING USING pcre2_match()</a>
+<li><a name="TOC4" href="#SEC4">MULTI-SEGMENT MATCHING WITH pcre2_match()</a>
+<li><a name="TOC5" href="#SEC5">PARTIAL MATCHING USING pcre2_dfa_match()</a>
 <li><a name="TOC6" href="#SEC6">MULTI-SEGMENT MATCHING WITH pcre2_dfa_match()</a>
-<li><a name="TOC7" href="#SEC7">MULTI-SEGMENT MATCHING WITH pcre2_match()</a>
-<li><a name="TOC8" href="#SEC8">ISSUES WITH MULTI-SEGMENT MATCHING</a>
-<li><a name="TOC9" href="#SEC9">AUTHOR</a>
-<li><a name="TOC10" href="#SEC10">REVISION</a>
+<li><a name="TOC7" href="#SEC7">AUTHOR</a>
+<li><a name="TOC8" href="#SEC8">REVISION</a>
 </ul>
 <br><a name="SEC1" href="#TOC1">PARTIAL MATCHING IN PCRE2</a><br>
 <P>
-In normal use of PCRE2, if the subject string that is passed to a matching
-function matches as far as it goes, but is too short to match the entire
-pattern, PCRE2_ERROR_NOMATCH is returned. There are circumstances where it
-might be helpful to distinguish this case from other cases in which there is no
-match.
+In normal use of PCRE2, if there is a match up to the end of a subject string,
+but more characters are needed to match the entire pattern, PCRE2_ERROR_NOMATCH
+is returned, just like any other failing match. There are circumstances where
+it might be helpful to distinguish this "partial match" case.
 </P>
 <P>
-Consider, for example, an application where a human is required to type in data
-for a field with specific formatting requirements. An example might be a date
-in the form <i>ddmmmyy</i>, defined by this pattern:
+One example is an application where the subject string is very long, and not
+all available at once. The requirement here is to be able to do the matching
+segment by segment, but special action is needed when a matched substring spans
+the boundary between two segments.
+</P>
+<P>
+Another example is checking a user input string as it is typed, to ensure that
+it conforms to a required format. Invalid characters can be immediately
+diagnosed and rejected, giving instant feedback.
+</P>
+<P>
+Partial matching is a PCRE2-specific feature; it is not Perl-compatible. It is
+requested by setting one of the PCRE2_PARTIAL_HARD or PCRE2_PARTIAL_SOFT
+options when calling a matching function. The difference between the two
+options is whether or not a partial match is preferred to an alternative
+complete match, though the details differ between the two types of matching
+function. If both options are set, PCRE2_PARTIAL_HARD takes precedence.
+</P>
+<P>
+If you want to use partial matching with just-in-time optimized code, as well
+as setting a partial match option for the matching function, you must also call
+<b>pcre2_jit_compile()</b> with one or both of these options:
 <pre>
-  ^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$
-</pre>
-If the application sees the user's keystrokes one by one, and can check that
-what has been typed so far is potentially valid, it is able to raise an error
-as soon as a mistake is made, by beeping and not reflecting the character that
-has been typed, for example. This immediate feedback is likely to be a better
-user interface than a check that is delayed until the entire string has been
-entered. Partial matching can also be useful when the subject string is very
-long and is not all available at once.
-</P>
-<P>
-PCRE2 supports partial matching by means of the PCRE2_PARTIAL_SOFT and
-PCRE2_PARTIAL_HARD options, which can be set when calling a matching function.
-The difference between the two options is whether or not a partial match is
-preferred to an alternative complete match, though the details differ between
-the two types of matching function. If both options are set, PCRE2_PARTIAL_HARD
-takes precedence.
-</P>
-<P>
-If you want to use partial matching with just-in-time optimized code, you must
-call <b>pcre2_jit_compile()</b> with one or both of these options:
-<pre>
-  PCRE2_JIT_PARTIAL_SOFT
  PCRE2_JIT_PARTIAL_HARD
+  PCRE2_JIT_PARTIAL_SOFT
 </pre>
 PCRE2_JIT_COMPLETE should also be set if you are going to run non-partial
-matches on the same pattern. If the appropriate JIT mode has not been compiled,
-interpretive matching code is used.
+matches on the same pattern. Separate code is compiled for each mode. If the
+appropriate JIT mode has not been compiled, interpretive matching code is used.
 </P>
 <P>
 Setting a partial matching option disables two of PCRE2's standard
-optimizations. PCRE2 remembers the last literal code unit in a pattern, and
-abandons matching immediately if it is not present in the subject string. This
-optimization cannot be used for a subject string that might match only
-partially. PCRE2 also knows the minimum length of a matching string, and does
+optimization hints. PCRE2 remembers the last literal code unit in a pattern,
+and abandons matching immediately if it is not present in the subject string.
+This optimization cannot be used for a subject string that might match only
+partially. PCRE2 also remembers a minimum length of a matching string, and does
 not bother to run the matching function on shorter strings. This optimization
 is also disabled for partial matching.
 </P>
-<br><a name="SEC2" href="#TOC1">PARTIAL MATCHING USING pcre2_match()</a><br>
+<br><a name="SEC2" href="#TOC1">REQUIREMENTS FOR A PARTIAL MATCH</a><br>
 <P>
-A partial match occurs during a call to <b>pcre2_match()</b> when the end of the
-subject string is reached successfully, but matching cannot continue because
-more characters are needed. However, at least one character in the subject must
-have been inspected. This character need not form part of the final matched
-string; lookbehind assertions and the \K escape sequence provide ways of
-inspecting characters before the start of a matched string. The requirement for
-inspecting at least one character exists because an empty string can always be
-matched; without such a restriction there would always be a partial match of an
-empty string at the end of the subject.
+A possible partial match occurs during matching when the end of the subject
+string is reached successfully, but either more characters are needed to
+complete the match, or the addition of more characters might change what is
+matched.
+</P>
+<P>
+Example 1: if the pattern is /abc/ and the subject is "ab", more characters are
+definitely needed to complete a match. In this case both hard and soft matching
+options yield a partial match.
+</P>
+<P>
+Example 2: if the pattern is /ab+/ and the subject is "ab", a complete match
+can be found, but the addition of more characters might change what is
+matched. In this case, only PCRE2_PARTIAL_HARD returns a partial match;
+PCRE2_PARTIAL_SOFT returns the complete match.
+</P>
+<P>
+On reaching the end of the subject, when PCRE2_PARTIAL_HARD is set, if the next
+pattern item is \z, \Z, \b, \B, or $ there is always a partial match.
+Otherwise, for both options, the next pattern item must be one that inspects a
+character, and at least one of the following must be true:
+</P>
+<P>
+(1) At least one character has already been inspected. An inspected character
+need not form part of the final matched string; lookbehind assertions and the
+\K escape sequence provide ways of inspecting characters before the start of a
+matched string.
+</P>
+<P>
+(2) The pattern contains one or more lookbehind assertions. This condition
+exists in case there is a lookbehind that inspects characters before the start
+of the match.
+</P>
+<P>
+(3) There is a special case when the whole pattern can match an empty string.
+When the starting point is at the end of the subject, the empty string match is
+a possibility, and if PCRE2_PARTIAL_SOFT is set and neither of the above
+conditions is true, it is returned. However, because adding more characters
+might result in a non-empty match, PCRE2_PARTIAL_HARD returns a partial match,
+which in this case means "there is going to be a match at this point, but until
+some more characters are added, we do not know if it will be an empty string or
+something longer".
+</P>
+<br><a name="SEC3" href="#TOC1">PARTIAL MATCHING USING pcre2_match()</a><br>
+<P>
+When a partial matching option is set, the result of calling
+<b>pcre2_match()</b> can be one of the following:
+</P>
+<P>
+<b>A successful match</b>
+A complete match has been found, starting and ending within this subject.
+</P>
+<P>
+<b>PCRE2_ERROR_NOMATCH</b>
+No match can start anywhere in this subject.
+</P>
+<P>
+<b>PCRE2_ERROR_PARTIAL</b>
+Adding more characters may result in a complete match that uses one or more
+characters from the end of this subject.
 </P>
 <P>
 When a partial match is returned, the first two elements in the ovector point
@ -103,54 +148,42 @@ these characters are needed for a subsequent re-match with additional
 characters.
 </P>
 <P>
-What happens when a partial match is identified depends on which of the two
-partial matching options are set.
-</P>
-<br><b>
-PCRE2_PARTIAL_SOFT WITH pcre2_match()
-</b><br>
-<P>
-If PCRE2_PARTIAL_SOFT is set when <b>pcre2_match()</b> identifies a partial
-match, the partial match is remembered, but matching continues as normal, and
-other alternatives in the pattern are tried. If no complete match can be found,
-PCRE2_ERROR_PARTIAL is returned instead of PCRE2_ERROR_NOMATCH.
-</P>
-<P>
-This option is "soft" because it prefers a complete match over a partial match.
-All the various matching items in a pattern behave as if the subject string is
-potentially complete. For example, \z, \Z, and $ match at the end of the
-subject, as normal, and for \b and \B the end of the subject is treated as a
-non-alphanumeric.
-</P>
-<P>
 If there is more than one partial match, the first one that was found provides
 the data that is returned. Consider this pattern:
 <pre>
  /123\w+X|dogY/
 </pre>
-If this is matched against the subject string "abc123dog", both
-alternatives fail to match, but the end of the subject is reached during
-matching, so PCRE2_ERROR_PARTIAL is returned. The offsets are set to 3 and 9,
-identifying "123dog" as the first partial match that was found. (In this
-example, there are two partial matches, because "dog" on its own partially
-matches the second alternative.)
+If this is matched against the subject string "abc123dog", both alternatives
+fail to match, but the end of the subject is reached during matching, so
+PCRE2_ERROR_PARTIAL is returned. The offsets are set to 3 and 9, identifying
+"123dog" as the first partial match. (In this example, there are two partial
+matches, because "dog" on its own partially matches the second alternative.)
 </P>
 <br><b>
-PCRE2_PARTIAL_HARD WITH pcre2_match()
+How a partial match is processed by pcre2_match()
 </b><br>
 <P>
-If PCRE2_PARTIAL_HARD is set for <b>pcre2_match()</b>, PCRE2_ERROR_PARTIAL is
-returned as soon as a partial match is found, without continuing to search for
-possible complete matches. This option is "hard" because it prefers an earlier
-partial match over a later complete match. For this reason, the assumption is
-made that the end of the supplied subject string may not be the true end of the
-available data, and so, if \z, \Z, \b, \B, or $ are encountered at the end
-of the subject, the result is PCRE2_ERROR_PARTIAL, provided that at least one
-character in the subject has been inspected.
+What happens when a partial match is identified depends on which of the two
+partial matching options is set.
+</P>
+<P>
+If PCRE2_PARTIAL_HARD is set, PCRE2_ERROR_PARTIAL is returned as soon as a
+partial match is found, without continuing to search for possible complete
+matches. This option is "hard" because it prefers an earlier partial match over
+a later complete match. For this reason, the assumption is made that the end of
+the supplied subject string is not the true end of the available data, which is
+why \z, \Z, \b, \B, and $ always give a partial match.
+</P>
+<P>
+If PCRE2_PARTIAL_SOFT is set, the partial match is remembered, but matching
+continues as normal, and other alternatives in the pattern are tried. If no
+complete match can be found, PCRE2_ERROR_PARTIAL is returned instead of
+PCRE2_ERROR_NOMATCH. This option is "soft" because it prefers a complete match
+over a partial match. All the various matching items in a pattern behave as if
+the subject string is potentially complete; \z, \Z, and $ match at the end of
+the subject, as normal, and for \b and \B the end of the subject is treated
+as a non-alphanumeric.
 </P>
-<br><b>
-Comparing hard and soft partial matching
-</b><br>
 <P>
 The difference between the two partial matching options can be illustrated by a
 pattern such as:
@ -175,26 +208,135 @@ to follow this explanation by thinking of the two patterns like this:
 The second pattern will never match "dogsbody", because it will always find the
 shorter match first.
 </P>
-<br><a name="SEC3" href="#TOC1">PARTIAL MATCHING USING pcre2_dfa_match()</a><br>
+<br><b>
+Example of partial matching using pcre2test
+</b><br>
 <P>
-The DFA functions move along the subject string character by character, without
+The <b>pcre2test</b> data modifiers <b>partial_hard</b> (or <b>ph</b>) and
+<b>partial_soft</b> (or <b>ps</b>) set PCRE2_PARTIAL_HARD and PCRE2_PARTIAL_SOFT,
+respectively, when calling <b>pcre2_match()</b>. Here is a run of
+<b>pcre2test</b> using a pattern that matches the whole subject in the form of a
+date:
+<pre>
+    re&#62; /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
+  data&#62; 25dec3\=ph
+  Partial match: 23dec3
+  data&#62; 3ju\=ph
+  Partial match: 3ju
+  data&#62; 3juj\=ph
+  No match
+</pre>
+This example gives the same results for both hard and soft partial matching
+options. Here is an example where there is a difference:
+<pre>
+    re&#62; /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
+  data&#62; 25jun04\=ps
+   0: 25jun04
+   1: jun
+  data&#62; 25jun04\=ph
+  Partial match: 25jun04
+</pre>
+With PCRE2_PARTIAL_SOFT, the subject is matched completely. For
+PCRE2_PARTIAL_HARD, however, the subject is assumed not to be complete, so
+there is only a partial match.
+</P>
+<br><a name="SEC4" href="#TOC1">MULTI-SEGMENT MATCHING WITH pcre2_match()</a><br>
+<P>
+PCRE was not originally designed with multi-segment matching in mind. However,
+over time, features (including partial matching) that make multi-segment
+matching possible have been added. A very long string can be searched segment
+by segment by calling <b>pcre2_match()</b> repeatedly, with the aim of achieving
+the same results that would happen if the entire string was available for
+searching all the time. Normally, the strings that are being sought are much
+shorter than each individual segment, and are in the middle of very long
+strings, so the pattern is normally not anchored.
+</P>
+<P>
+Special logic must be implemented to handle a matched substring that spans a
+segment boundary. PCRE2_PARTIAL_HARD should be used, because it returns a
+partial match at the end of a segment whenever there is the possibility of
+changing the match by adding more characters. The PCRE2_NOTBOL option should
+also be set for all but the first segment.
+</P>
+<P>
+When a partial match occurs, the next segment must be added to the current
+subject and the match re-run, using the <i>startoffset</i> argument of
+<b>pcre2_match()</b> to begin at the point where the partial match started.
+For example:
+<pre>
+    re&#62; /\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d/
+  data&#62; ...the date is 23ja\=ph
+  Partial match: 23ja
+  data&#62; ...the date is 23jan19 and on that day...\=offset=15
+   0: 23jan19
+   1: jan
+</pre>
+Note the use of the <b>offset</b> modifier to start the new match where the
+partial match was found. In this example, the next segment was added to the one
+in which the partial match was found. This is the most straightforward
+approach, typically using a memory buffer that is twice the size of each
+segment. After a partial match, the first half of the buffer is discarded, the
+second half is moved to the start of the buffer, and a new segment is added
+before repeating the match as in the example above. After a no match, the
+entire buffer can be discarded.
+</P>
+<P>
+If there are memory constraints, you may want to discard text that precedes a
+partial match before adding the next segment. Unfortunately, this is not at
+present straightforward. In cases such as the above, where the pattern does not
+contain any lookbehinds, it is sufficient to retain only the partially matched
+substring. However, if the pattern contains a lookbehind assertion, characters
+that precede the start of the partial match may have been inspected during the
+matching process. When <b>pcre2test</b> displays a partial match, it indicates
+these characters with '&#60;' if the <b>allusedtext</b> modifier is set:
+<pre>
+    re&#62; "(?&#60;=123)abc"
+  data&#62; xx123ab\=ph,allusedtext
+  Partial match: 123ab
+                 &#60;&#60;&#60;
+</pre>
+However, the <b>allusedtext</b> modifier is not available for JIT matching,
+because JIT matching does not record the first (or last) consulted characters.
+For this reason, this information is not available via the API. It is therefore
+not possible in general to obtain the exact number of characters that must be
+retained in order to get the right match result. If you cannot retain the
+entire segment, you must find some heuristic way of choosing.
+</P>
+<P>
+If you know the approximate length of the matching substrings, you can use that
+to decide how much text to retain. The only lookbehind information that is
+currently available via the API is the length of the longest individual
+lookbehind in a pattern, but this can be misleading if there are nested
+lookbehinds. The value returned by calling <b>pcre2_pattern_info()</b> with the
+PCRE2_INFO_MAXLOOKBEHIND option is the maximum number of characters (not code
+units) that any individual lookbehind moves back when it is processed. A
+pattern such as "(?&#60;=(?&#60;!b)a)" has a maximum lookbehind value of one, but
+inspects two characters before its starting point.
+</P>
+<P>
+In a non-UTF or a 32-bit case, moving back is just a subtraction, but in
+UTF-8 or UTF-16 you have to count characters while moving back through the code
+units.
+</P>
+<br><a name="SEC5" href="#TOC1">PARTIAL MATCHING USING pcre2_dfa_match()</a><br>
+<P>
+The DFA function moves along the subject string character by character, without
 backtracking, searching for all possible matches simultaneously. If the end of
 the subject is reached before the end of the pattern, there is the possibility
-of a partial match, again provided that at least one character has been
-inspected.
+of a partial match.
 </P>
 <P>
 When PCRE2_PARTIAL_SOFT is set, PCRE2_ERROR_PARTIAL is returned only if there
 have been no complete matches. Otherwise, the complete matches are returned.
-However, if PCRE2_PARTIAL_HARD is set, a partial match takes precedence over
-any complete matches. The portion of the string that was matched when the
-longest partial match was found is set as the first matching string.
+If PCRE2_PARTIAL_HARD is set, a partial match takes precedence over any
+complete matches. The portion of the string that was matched when the longest
+partial match was found is set as the first matching string.
 </P>
 <P>
-Because the DFA functions always search for all possible matches, and there is
-no difference between greedy and ungreedy repetition, their behaviour is
-different from the standard functions when PCRE2_PARTIAL_HARD is set. Consider
-the string "dog" matched against the ungreedy pattern shown above:
+Because the DFA function always searches for all possible matches, and there is
+no difference between greedy and ungreedy repetition, its behaviour is
+different from the <b>pcre2_match()</b>. Consider the string "dog" matched
+against this ungreedy pattern:
 <pre>
  /dog(sbody)??/
 </pre>
@ -202,58 +344,16 @@ Whereas the standard function stops as soon as it finds the complete match for
 "dog", the DFA function also finds the partial match for "dogsbody", and so
 returns that when PCRE2_PARTIAL_HARD is set.
 </P>
-<br><a name="SEC4" href="#TOC1">PARTIAL MATCHING AND WORD BOUNDARIES</a><br>
-<P>
-If a pattern ends with one of sequences \b or \B, which test for word
-boundaries, partial matching with PCRE2_PARTIAL_SOFT can give counter-intuitive
-results. Consider this pattern:
-<pre>
-  /\bcat\b/
-</pre>
-This matches "cat", provided there is a word boundary at either end. If the
-subject string is "the cat", the comparison of the final "t" with a following
-character cannot take place, so a partial match is found. However, normal
-matching carries on, and \b matches at the end of the subject when the last
-character is a letter, so a complete match is found. The result, therefore, is
-<i>not</i> PCRE2_ERROR_PARTIAL. Using PCRE2_PARTIAL_HARD in this case does yield
-PCRE2_ERROR_PARTIAL, because then the partial match takes precedence.
-</P>
-<br><a name="SEC5" href="#TOC1">EXAMPLE OF PARTIAL MATCHING USING PCRE2TEST</a><br>
-<P>
-If the <b>partial_soft</b> (or <b>ps</b>) modifier is present on a
-<b>pcre2test</b> data line, the PCRE2_PARTIAL_SOFT option is used for the match.
-Here is a run of <b>pcre2test</b> that uses the date example quoted above:
-<pre>
-    re&#62; /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
-  data&#62; 25jun04\=ps
-   0: 25jun04
-   1: jun
-  data&#62; 25dec3\=ps
-  Partial match: 23dec3
-  data&#62; 3ju\=ps
-  Partial match: 3ju
-  data&#62; 3juj\=ps
-  No match
-  data&#62; j\=ps
-  No match
-</pre>
-The first data string is matched completely, so <b>pcre2test</b> shows the
-matched substrings. The remaining four strings do not match the complete
-pattern, but the first two are partial matches. Similar output is obtained
-if DFA matching is used.
-</P>
-<P>
-If the <b>partial_hard</b> (or <b>ph</b>) modifier is present on a
-<b>pcre2test</b> data line, the PCRE2_PARTIAL_HARD option is set for the match.
-</P>
 <br><a name="SEC6" href="#TOC1">MULTI-SEGMENT MATCHING WITH pcre2_dfa_match()</a><br>
 <P>
-When a partial match has been found using a DFA matching function, it is
+When a partial match has been found using the DFA matching function, it is
 possible to continue the match by providing additional subject data and calling
 the function again with the same compiled regular expression, this time setting
 the PCRE2_DFA_RESTART option. You must pass the same working space as before,
-because this is where details of the previous partial match are stored. Here is
-an example using <b>pcre2test</b>:
+because this is where details of the previous partial match are stored. You can
+set the PCRE2_PARTIAL_SOFT or PCRE2_PARTIAL_HARD options with PCRE2_DFA_RESTART
+to continue partial matching over multiple segments. Here is an example using
+<b>pcre2test</b>:
 <pre>
    re&#62; /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
  data&#62; 23ja\=dfa,ps
@ -265,155 +365,10 @@ The first call has "23ja" as the subject, and requests partial matching; the
 second call has "n05" as the subject for the continued (restarted) match.
 Notice that when the match is complete, only the last part is shown; PCRE2 does
 not retain the previously partially-matched string. It is up to the calling
-program to do that if it needs to.
-</P>
-<P>
-That means that, for an unanchored pattern, if a continued match fails, it is
-not possible to try again at a new starting point. All this facility is capable
-of doing is continuing with the previous match attempt. In the previous
-example, if the second set of data is "ug23" the result is no match, even
-though there would be a match for "aug23" if the entire string were given at
-once. Depending on the application, this may or may not be what you want.
-The only way to allow for starting again at the next character is to retain the
-matched part of the subject and try a new complete match.
-</P>
-<P>
-You can set the PCRE2_PARTIAL_SOFT or PCRE2_PARTIAL_HARD options with
-PCRE2_DFA_RESTART to continue partial matching over multiple segments. This
-facility can be used to pass very long subject strings to the DFA matching
-functions.
-</P>
-<br><a name="SEC7" href="#TOC1">MULTI-SEGMENT MATCHING WITH pcre2_match()</a><br>
-<P>
-Unlike the DFA function, it is not possible to restart the previous match with
-a new segment of data when using <b>pcre2_match()</b>. Instead, new data must be
-added to the previous subject string, and the entire match re-run, starting
-from the point where the partial match occurred. Earlier data can be discarded.
-</P>
-<P>
-It is best to use PCRE2_PARTIAL_HARD in this situation, because it does not
-treat the end of a segment as the end of the subject when matching \z, \Z,
-\b, \B, and $. Consider an unanchored pattern that matches dates:
-<pre>
-    re&#62; /\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d/
-  data&#62; The date is 23ja\=ph
-  Partial match: 23ja
-</pre>
-At this stage, an application could discard the text preceding "23ja", add on
-text from the next segment, and call the matching function again. Unlike the
-DFA matching function, the entire matching string must always be available,
-and the complete matching process occurs for each call, so more memory and more
-processing time is needed.
-</P>
-<br><a name="SEC8" href="#TOC1">ISSUES WITH MULTI-SEGMENT MATCHING</a><br>
-<P>
-Certain types of pattern may give problems with multi-segment matching,
-whichever matching function is used.
-</P>
-<P>
-1. If the pattern contains a test for the beginning of a line, you need to pass
-the PCRE2_NOTBOL option when the subject string for any call does start at the
-beginning of a line. There is also a PCRE2_NOTEOL option, but in practice when
-doing multi-segment matching you should be using PCRE2_PARTIAL_HARD, which
-includes the effect of PCRE2_NOTEOL.
-</P>
-<P>
-2. If a pattern contains a lookbehind assertion, characters that precede the
-start of the partial match may have been inspected during the matching process.
-When using <b>pcre2_match()</b>, sufficient characters must be retained for the
-next match attempt. You can ensure that enough characters are retained by doing
-the following:
-</P>
-<P>
-Before doing any matching, find the length of the longest lookbehind in the
-pattern by calling <b>pcre2_pattern_info()</b> with the PCRE2_INFO_MAXLOOKBEHIND
-option. Note that the resulting count is in characters, not code units. After a
-partial match, moving back from the ovector[0] offset in the subject by the
-number of characters given for the maximum lookbehind gets you to the earliest
-character that must be retained. In a non-UTF or a 32-bit situation, moving
-back is just a subtraction, but in UTF-8 or UTF-16 you have to count characters
-while moving back through the code units.
-</P>
-<P>
-Characters before the point you have now reached can be discarded, and after
-the next segment has been added to what is retained, you should run the next
-match with the <b>startoffset</b> argument set so that the match begins at the
-same point as before.
-</P>
-<P>
-For example, if the pattern "(?&#60;=123)abc" is partially matched against the
-string "xx123ab", the ovector offsets are 5 and 7 ("ab"). The maximum
-lookbehind count is 3, so all characters before offset 2 can be discarded. The
-value of <b>startoffset</b> for the next match should be 3. When <b>pcre2test</b>
-displays a partial match, it indicates the lookbehind characters with '&#60;'
-characters:
-<pre>
-    re&#62; "(?&#60;=123)abc"
-  data&#62; xx123ab\=ph
-  Partial match: 123ab
-                 &#60;&#60;&#60;
-</PRE>
-</P>
-<P>
-3. Because a partial match must always contain at least one character, what
-might be considered a partial match of an empty string actually gives a "no
-match" result. For example:
-<pre>
-    re&#62; /c(?&#60;=abc)x/
-  data&#62; ab\=ps
-  No match
-</pre>
-If the next segment begins "cx", a match should be found, but this will only
-happen if characters from the previous segment are retained. For this reason, a
-"no match" result should be interpreted as "partial match of an empty string"
-when the pattern contains lookbehinds.
-</P>
-<P>
-4. Matching a subject string that is split into multiple segments may not
-always produce exactly the same result as matching over one single long string,
-especially when PCRE2_PARTIAL_SOFT is used. The section "Partial Matching and
-Word Boundaries" above describes an issue that arises if the pattern ends with
-\b or \B. Another kind of difference may occur when there are multiple
-matching possibilities, because (for PCRE2_PARTIAL_SOFT) a partial match result
-is given only when there are no completed matches. This means that as soon as
-the shortest match has been found, continuation to a new subject segment is no
-longer possible. Consider this <b>pcre2test</b> example:
-<pre>
-    re&#62; /dog(sbody)?/
-  data&#62; dogsb\=ps
-   0: dog
-  data&#62; do\=ps,dfa
-  Partial match: do
-  data&#62; gsb\=ps,dfa,dfa_restart
-   0: g
-  data&#62; dogsbody\=dfa
-   0: dogsbody
-   1: dog
-</pre>
-The first data line passes the string "dogsb" to a standard matching function,
-setting the PCRE2_PARTIAL_SOFT option. Although the string is a partial match
-for "dogsbody", the result is not PCRE2_ERROR_PARTIAL, because the shorter
-string "dog" is a complete match. Similarly, when the subject is presented to
-a DFA matching function in several parts ("do" and "gsb" being the first two)
-the match stops when "dog" has been found, and it is not possible to continue.
-On the other hand, if "dogsbody" is presented as a single string, a DFA
-matching function finds both matches.
-</P>
-<P>
-Because of these problems, it is best to use PCRE2_PARTIAL_HARD when matching
-multi-segment data. The example above then behaves differently:
-<pre>
-    re&#62; /dog(sbody)?/
-  data&#62; dogsb\=ph
-  Partial match: dogsb
-  data&#62; do\=ps,dfa
-  Partial match: do
-  data&#62; gsb\=ph,dfa,dfa_restart
-  Partial match: gsb
-</pre>
-5. Patterns that contain alternatives at the top level which do not all start
-with the same pattern item may not work as expected when PCRE2_DFA_RESTART is
-used. For example, consider this pattern:
+program to do that if it needs to. This means that, for an unanchored pattern,
+if a continued match fails, it is not possible to try again at a new starting
+point. All this facility is capable of doing is continuing with the previous
+match attempt. For example, consider this pattern:
 <pre>
  1234|3789
 </pre>
@ -422,30 +377,18 @@ alternative is found at offset 3. There is no partial match for the second
 alternative, because such a match does not start at the same point in the
 subject string. Attempting to continue with the string "7890" does not yield a
 match because only those alternatives that match at one point in the subject
-are remembered. The problem arises because the start of the second alternative
-matches within the first alternative. There is no problem with anchored
-patterns or patterns such as:
-<pre>
-  1234|ABCD
-</pre>
-where no string can be a partial match for both alternatives. This is not a
-problem if a standard matching function is used, because the entire match has
-to be rerun each time:
-<pre>
-    re&#62; /1234|3789/
-  data&#62; ABC123\=ph
-  Partial match: 123
-  data&#62; 1237890
-   0: 3789
-</pre>
-Of course, instead of using PCRE2_DFA_RESTART, the same technique of re-running
-the entire match can also be used with the DFA matching function. Another
-possibility is to work with two buffers. If a partial match at offset <i>n</i>
-in the first buffer is followed by "no match" when PCRE2_DFA_RESTART is used on
-the second buffer, you can then try a new match starting at offset <i>n+1</i> in
-the first buffer.
+are remembered. Depending on the application, this may or may not be what you
+want.
 </P>
-<br><a name="SEC9" href="#TOC1">AUTHOR</a><br>
+<P>
+If you do want to allow for starting again at the next character, one way of
+doing it is to retain some or all of the segment and try a new complete match,
+as described for <b>pcre2_match()</b> above. Another possibility is to work with
+two buffers. If a partial match at offset <i>n</i> in the first buffer is
+followed by "no match" when PCRE2_DFA_RESTART is used on the second buffer, you
+can then try a new match starting at offset <i>n+1</i> in the first buffer.
+</P>
+<br><a name="SEC7" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel
 <br>
@ -454,11 +397,11 @@ University Computing Service
 Cambridge, England.
 <br>
 </P>
-<br><a name="SEC10" href="#TOC1">REVISION</a><br>
+<br><a name="SEC8" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 22 December 2014
+Last updated: 04 September 2019
 <br>
-Copyright &copy; 1997-2014 University of Cambridge.
+Copyright &copy; 1997-2019 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2pattern.html
+++ b/doc/html/pcre2pattern.html
--- a/doc/html/pcre2perform.html
+++ b/doc/html/pcre2perform.html
@ -31,9 +31,9 @@ of them.
 Patterns are compiled by PCRE2 into a reasonably efficient interpretive code,
 so that most simple patterns do not use much memory for storing the compiled
 version. However, there is one case where the memory usage of a compiled
-pattern can be unexpectedly large. If a parenthesized subpattern has a
-quantifier with a minimum greater than 1 and/or a limited maximum, the whole
-subpattern is repeated in the compiled code. For example, the pattern
+pattern can be unexpectedly large. If a parenthesized group has a quantifier
+with a minimum greater than 1 and/or a limited maximum, the whole group is
+repeated in the compiled code. For example, the pattern
 <pre>
  (abc|def){2,4}
 </pre>
@ -52,9 +52,9 @@ example, the very simple pattern
 <pre>
  ((ab){1,1000}c){1,3}
 </pre>
-uses over 50K bytes when compiled using the 8-bit library. When PCRE2 is
+uses over 50KiB when compiled using the 8-bit library. When PCRE2 is
 compiled with its default internal pointer size of two bytes, the size limit on
-a compiled pattern is 64K code units in the 8-bit and 16-bit libraries, and
+a compiled pattern is 65535 code units in the 8-bit and 16-bit libraries, and
 this is reached with the above pattern if the outer repetition is increased
 from 3 to 4. PCRE2 can be compiled to use larger internal pointers and thus
 handle larger compiled patterns, but it is better to try to rewrite your
@ -68,14 +68,14 @@ facility. Re-writing the above pattern as
 <pre>
  ((ab)(?2){0,999}c)(?1){0,2}
 </pre>
-reduces the memory requirements to around 16K, and indeed it remains under 20K
-even with the outer repetition increased to 100. However, this kind of pattern
-is not always exactly equivalent, because any captures within subroutine calls
-are lost when the subroutine completes. If this is not a problem, this kind of
-rewriting will allow you to process patterns that PCRE2 cannot otherwise
-handle. The matching performance of the two different versions of the pattern
-are roughly the same. (This applies from release 10.30 - things were different
-in earlier releases.)
+reduces the memory requirements to around 16KiB, and indeed it remains under
+20KiB even with the outer repetition increased to 100. However, this kind of
+pattern is not always exactly equivalent, because any captures within
+subroutine calls are lost when the subroutine completes. If this is not a
+problem, this kind of rewriting will allow you to process patterns that PCRE2
+cannot otherwise handle. The matching performance of the two different versions
+of the pattern are roughly the same. (This applies from release 10.30 - things
+were different in earlier releases.)
 </P>
 <br><a name="SEC3" href="#TOC1">STACK AND HEAP USAGE AT RUN TIME</a><br>
 <P>
@ -83,19 +83,46 @@ From release 10.30, the interpretive (non-JIT) version of <b>pcre2_match()</b>
 uses very little system stack at run time. In earlier releases recursive
 function calls could use a great deal of stack, and this could cause problems,
 but this usage has been eliminated. Backtracking positions are now explicitly
-remembered in memory frames controlled by the code. An initial 20K vector of
-frames is allocated on the system stack (enough for about 100 frames for small
-patterns), but if this is insufficient, heap memory is used. The amount of heap
-memory can be limited; if the limit is set to zero, only the initial stack
-vector is used. Rewriting patterns to be time-efficient, as described below,
-may also reduce the memory requirements.
+remembered in memory frames controlled by the code. 
+</P>
+<P>
+The size of each frame depends on the size of pointer variables and the number
+of capturing parenthesized groups in the pattern being matched. On a 64-bit
+system the frame size for a pattern with no captures is 128 bytes. For each
+capturing group the size increases by 16 bytes.
+</P>
+<P>
+Until release 10.41, an initial 20KiB frames vector was allocated on the system 
+stack, but this still caused some issues for multi-thread applications where
+each thread has a very small stack. From release 10.41 backtracking memory
+frames are always held in heap memory. An initial heap allocation is obtained
+the first time any match data block is passed to <b>pcre2_match()</b>. This is
+remembered with the match data block and re-used if that block is used for
+another match. It is freed when the match data block itself is freed.
+</P>
+<P>
+The size of the initial block is the larger of 20KiB or ten times the pattern's 
+frame size, unless the heap limit is less than this, in which case the heap 
+limit is used. If the initial block proves to be too small during matching, it
+is replaced by a larger block, subject to the heap limit. The heap limit is 
+checked only when a new block is to be allocated. Reducing the heap limit 
+between calls to <b>pcre2_match()</b> with the same match data block does not 
+affect the saved block.
 </P>
 <P>
 In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive
 function calls, but only for processing atomic groups, lookaround assertions,
-and recursion within the pattern. Too much nested recursion may cause stack
-issues. The "match depth" parameter can be used to limit the depth of function
-recursion in <b>pcre2_dfa_match()</b>.
+and recursion within the pattern. The original version of the code used to
+allocate quite large internal workspace vectors on the stack, which caused some
+problems for some patterns in environments with small stacks. From release
+10.32 the code for <b>pcre2_dfa_match()</b> has been re-factored to use heap
+memory when necessary for internal workspace when recursing, though recursive
+function calls are still used.
+</P>
+<P>
+The "match depth" parameter can be used to limit the depth of function
+recursion, and the "match heap" parameter to limit heap memory in
+<b>pcre2_dfa_match()</b>.
 </P>
 <br><a name="SEC4" href="#TOC1">PROCESSING TIME</a><br>
 <P>
@ -237,16 +264,16 @@ pattern to match. This is done by repeatedly matching with different limits.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC6" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 08 April 2017
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2posix.html
+++ b/doc/html/pcre2posix.html
@ -15,51 +15,78 @@ please consult the man page, in case the conversion went wrong.
 <ul>
 <li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
 <li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
-<li><a name="TOC3" href="#SEC3">COMPILING A PATTERN</a>
-<li><a name="TOC4" href="#SEC4">MATCHING NEWLINE CHARACTERS</a>
-<li><a name="TOC5" href="#SEC5">MATCHING A PATTERN</a>
-<li><a name="TOC6" href="#SEC6">ERROR MESSAGES</a>
-<li><a name="TOC7" href="#SEC7">MEMORY USAGE</a>
-<li><a name="TOC8" href="#SEC8">AUTHOR</a>
-<li><a name="TOC9" href="#SEC9">REVISION</a>
+<li><a name="TOC3" href="#SEC3">USING THE POSIX FUNCTIONS</a>
+<li><a name="TOC4" href="#SEC4">COMPILING A PATTERN</a>
+<li><a name="TOC5" href="#SEC5">MATCHING NEWLINE CHARACTERS</a>
+<li><a name="TOC6" href="#SEC6">MATCHING A PATTERN</a>
+<li><a name="TOC7" href="#SEC7">ERROR MESSAGES</a>
+<li><a name="TOC8" href="#SEC8">MEMORY USAGE</a>
+<li><a name="TOC9" href="#SEC9">AUTHOR</a>
+<li><a name="TOC10" href="#SEC10">REVISION</a>
 </ul>
 <br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
 <P>
 <b>#include &#60;pcre2posix.h&#62;</b>
 </P>
 <P>
-<b>int regcomp(regex_t *<i>preg</i>, const char *<i>pattern</i>,</b>
+<b>int pcre2_regcomp(regex_t *<i>preg</i>, const char *<i>pattern</i>,</b>
 <b>     int <i>cflags</i>);</b>
 <br>
 <br>
-<b>int regexec(const regex_t *<i>preg</i>, const char *<i>string</i>,</b>
+<b>int pcre2_regexec(const regex_t *<i>preg</i>, const char *<i>string</i>,</b>
 <b>     size_t <i>nmatch</i>, regmatch_t <i>pmatch</i>[], int <i>eflags</i>);</b>
 <br>
 <br>
-<b>size_t regerror(int <i>errcode</i>, const regex_t *<i>preg</i>,</b>
+<b>size_t pcre2_regerror(int <i>errcode</i>, const regex_t *<i>preg</i>,</b>
 <b>     char *<i>errbuf</i>, size_t <i>errbuf_size</i>);</b>
 <br>
 <br>
-<b>void regfree(regex_t *<i>preg</i>);</b>
+<b>void pcre2_regfree(regex_t *<i>preg</i>);</b>
 </P>
 <br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
 <P>
 This set of functions provides a POSIX-style API for the PCRE2 regular
-expression 8-bit library. See the
+expression 8-bit library. There are no POSIX-style wrappers for PCRE2's 16-bit
+and 32-bit libraries. See the
 <a href="pcre2api.html"><b>pcre2api</b></a>
 documentation for a description of PCRE2's native API, which contains much
-additional functionality. There are no POSIX-style wrappers for PCRE2's 16-bit
-and 32-bit libraries.
+additional functionality.
 </P>
 <P>
-The functions described here are just wrapper functions that ultimately call
-the PCRE2 native API. Their prototypes are defined in the <b>pcre2posix.h</b>
-header file, and on Unix systems the library itself is called
-<b>libpcre2-posix.a</b>, so can be accessed by adding <b>-lpcre2-posix</b> to the
-command for linking an application that uses them. Because the POSIX functions
-call the native ones, it is also necessary to add <b>-lpcre2-8</b>.
+The functions described here are wrapper functions that ultimately call the
+PCRE2 native API. Their prototypes are defined in the <b>pcre2posix.h</b> header
+file, and they all have unique names starting with <b>pcre2_</b>. However, the
+<b>pcre2posix.h</b> header also contains macro definitions that convert the
+standard POSIX names such <b>regcomp()</b> into <b>pcre2_regcomp()</b> etc. This
+means that a program can use the usual POSIX names without running the risk of
+accidentally linking with POSIX functions from a different library.
 </P>
 <P>
+On Unix-like systems the PCRE2 POSIX library is called <b>libpcre2-posix</b>, so
+can be accessed by adding <b>-lpcre2-posix</b> to the command for linking an
+application. Because the POSIX functions call the native ones, it is also
+necessary to add <b>-lpcre2-8</b>.
+</P>
+<P>
+Although they were not defined as protypes in <b>pcre2posix.h</b>, releases
+10.33 to 10.36 of the library contained functions with the POSIX names
+<b>regcomp()</b> etc. These simply passed their arguments to the PCRE2
+functions. These functions were provided for backwards compatibility with
+earlier versions of PCRE2, which had only POSIX names. However, this has proved
+troublesome in situations where a program links with several libraries, some of
+which use PCRE2's POSIX interface while others use the real POSIX functions.
+For this reason, the POSIX names have been removed since release 10.37.
+</P>
+<P>
+Calling the header file <b>pcre2posix.h</b> avoids any conflict with other POSIX
+libraries. It can, of course, be renamed or aliased as <b>regex.h</b>, which is
+the "correct" name, if there is no clash. It provides two structure types,
+<i>regex_t</i> for compiled internal forms, and <i>regmatch_t</i> for returning
+captured substrings. It also defines some constants whose names start with
+"REG_"; these are used for setting options and identifying error codes.
+</P>
+<br><a name="SEC3" href="#TOC1">USING THE POSIX FUNCTIONS</a><br>
+<P>
 Those POSIX option bits that can reasonably be mapped to PCRE2 native options
 have been implemented. In addition, the option REG_EXTENDED is defined with the
 value zero. This has no effect, but since programs that are written to the
@ -80,17 +107,13 @@ POSIX definition; it is not fully POSIX-compatible, and in multi-unit encoding
 domains it is probably even less compatible.
 </P>
 <P>
-The header for these functions is supplied as <b>pcre2posix.h</b> to avoid any
-potential clash with other POSIX libraries. It can, of course, be renamed or
-aliased as <b>regex.h</b>, which is the "correct" name. It provides two
-structure types, <i>regex_t</i> for compiled internal forms, and
-<i>regmatch_t</i> for returning captured substrings. It also defines some
-constants whose names start with "REG_"; these are used for setting options and
-identifying error codes.
+The descriptions below use the actual names of the functions, but, as described
+above, the standard POSIX names (without the <b>pcre2_</b> prefix) may also be
+used.
 </P>
-<br><a name="SEC3" href="#TOC1">COMPILING A PATTERN</a><br>
+<br><a name="SEC4" href="#TOC1">COMPILING A PATTERN</a><br>
 <P>
-The function <b>regcomp()</b> is called to compile a pattern into an
+The function <b>pcre2_regcomp()</b> is called to compile a pattern into an
 internal form. By default, the pattern is a C string terminated by a binary
 zero (but see REG_PEND below). The <i>preg</i> argument is a pointer to a
 <b>regex_t</b> structure that is used as a base for storing information about
@ -128,18 +151,18 @@ REG_UTF. Note that REG_NOSPEC is not part of the POSIX standard.
 <pre>
  REG_NOSUB
 </pre>
-When a pattern that is compiled with this flag is passed to <b>regexec()</b> for
-matching, the <i>nmatch</i> and <i>pmatch</i> arguments are ignored, and no
-captured strings are returned. Versions of the PCRE library prior to 10.22 used
-to set the PCRE2_NO_AUTO_CAPTURE compile option, but this no longer happens
-because it disables the use of back references.
+When a pattern that is compiled with this flag is passed to
+<b>pcre2_regexec()</b> for matching, the <i>nmatch</i> and <i>pmatch</i> arguments
+are ignored, and no captured strings are returned. Versions of the PCRE library
+prior to 10.22 used to set the PCRE2_NO_AUTO_CAPTURE compile option, but this
+no longer happens because it disables the use of backreferences.
 <pre>
  REG_PEND
 </pre>
 If this option is set, the <b>reg_endp</b> field in the <i>preg</i> structure
 (which has the type const char *) must be set to point to the character beyond
-the end of the pattern before calling <b>regcomp()</b>. The pattern itself may
-now contain binary zeroes, which are treated as data characters. Without
+the end of the pattern before calling <b>pcre2_regcomp()</b>. The pattern itself
+may now contain binary zeros, which are treated as data characters. Without
 REG_PEND, a binary zero terminates the pattern and the <b>re_endp</b> field is
 ignored. This is a GNU extension to the POSIX standard and should be used with
 caution in software intended to be portable to other systems.
@ -174,18 +197,19 @@ newlines are matched by the dot metacharacter (they are not) or by a negative
 class such as [^a] (they are).
 </P>
 <P>
-The yield of <b>regcomp()</b> is zero on success, and non-zero otherwise. The
-<i>preg</i> structure is filled in on success, and one other member of the
+The yield of <b>pcre2_regcomp()</b> is zero on success, and non-zero otherwise.
+The <i>preg</i> structure is filled in on success, and one other member of the
 structure (as well as <i>re_endp</i>) is public: <i>re_nsub</i> contains the
 number of capturing subpatterns in the regular expression. Various error codes
 are defined in the header file.
 </P>
 <P>
-NOTE: If the yield of <b>regcomp()</b> is non-zero, you must not attempt to
-use the contents of the <i>preg</i> structure. If, for example, you pass it to
-<b>regexec()</b>, the result is undefined and your program is likely to crash.
+NOTE: If the yield of <b>pcre2_regcomp()</b> is non-zero, you must not attempt
+to use the contents of the <i>preg</i> structure. If, for example, you pass it
+to <b>pcre2_regexec()</b>, the result is undefined and your program is likely to
+crash.
 </P>
-<br><a name="SEC4" href="#TOC1">MATCHING NEWLINE CHARACTERS</a><br>
+<br><a name="SEC5" href="#TOC1">MATCHING NEWLINE CHARACTERS</a><br>
 <P>
 This area is not simple, because POSIX and Perl take different views of things.
 It is not possible to get PCRE2 to obey POSIX semantics, but then PCRE2 was
@ -219,16 +243,16 @@ is no way to stop newline from matching [^a].
 Default POSIX newline handling can be obtained by setting PCRE2_DOTALL and
 PCRE2_DOLLAR_ENDONLY when calling <b>pcre2_compile()</b> directly, but there is
 no way to make PCRE2 behave exactly as for the REG_NEWLINE action. When using
-the POSIX API, passing REG_NEWLINE to PCRE2's <b>regcomp()</b> function
+the POSIX API, passing REG_NEWLINE to PCRE2's <b>pcre2_regcomp()</b> function
 causes PCRE2_MULTILINE to be passed to <b>pcre2_compile()</b>, and REG_DOTALL
 passes PCRE2_DOTALL. There is no way to pass PCRE2_DOLLAR_ENDONLY.
 </P>
-<br><a name="SEC5" href="#TOC1">MATCHING A PATTERN</a><br>
+<br><a name="SEC6" href="#TOC1">MATCHING A PATTERN</a><br>
 <P>
-The function <b>regexec()</b> is called to match a compiled pattern <i>preg</i>
-against a given <i>string</i>, which is by default terminated by a zero byte
-(but see REG_STARTEND below), subject to the options in <i>eflags</i>. These can
-be:
+The function <b>pcre2_regexec()</b> is called to match a compiled pattern
+<i>preg</i> against a given <i>string</i>, which is by default terminated by a
+zero byte (but see REG_STARTEND below), subject to the options in <i>eflags</i>.
+These can be:
 <pre>
  REG_NOTBOL
 </pre>
@ -248,10 +272,10 @@ function.
 <pre>
  REG_STARTEND
 </pre>
-When this option is set, the subject string is starts at <i>string</i> +
+When this option is set, the subject string starts at <i>string</i> +
 <i>pmatch[0].rm_so</i> and ends at <i>string</i> + <i>pmatch[0].rm_eo</i>, which
 should point to the first character beyond the string. There may be binary
-zeroes within the subject string, and indeed, using REG_STARTEND is the only
+zeros within the subject string, and indeed, using REG_STARTEND is the only
 way to pass a subject string that contains a binary zero.
 </P>
 <P>
@ -272,7 +296,7 @@ are mutually exclusive; the error REG_INVARG is returned.
 <P>
 If the pattern was compiled with the REG_NOSUB flag, no data about any matched
 strings is returned. The <i>nmatch</i> and <i>pmatch</i> arguments of
-<b>regexec()</b> are ignored (except possibly as input for REG_STARTEND).
+<b>pcre2_regexec()</b> are ignored (except possibly as input for REG_STARTEND).
 </P>
 <P>
 The value of <i>nmatch</i> may be zero, and the value <i>pmatch</i> may be NULL
@ -294,24 +318,25 @@ array have both structure members set to -1.
 A successful match yields a zero return; various error codes are defined in the
 header file, of which REG_NOMATCH is the "expected" failure code.
 </P>
-<br><a name="SEC6" href="#TOC1">ERROR MESSAGES</a><br>
+<br><a name="SEC7" href="#TOC1">ERROR MESSAGES</a><br>
 <P>
-The <b>regerror()</b> function maps a non-zero errorcode from either
-<b>regcomp()</b> or <b>regexec()</b> to a printable message. If <i>preg</i> is not
-NULL, the error should have arisen from the use of that structure. A message
-terminated by a binary zero is placed in <i>errbuf</i>. If the buffer is too
-short, only the first <i>errbuf_size</i> - 1 characters of the error message are
-used. The yield of the function is the size of buffer needed to hold the whole
-message, including the terminating zero. This value is greater than
-<i>errbuf_size</i> if the message was truncated.
+The <b>pcre2_regerror()</b> function maps a non-zero errorcode from either
+<b>pcre2_regcomp()</b> or <b>pcre2_regexec()</b> to a printable message. If
+<i>preg</i> is not NULL, the error should have arisen from the use of that
+structure. A message terminated by a binary zero is placed in <i>errbuf</i>. If
+the buffer is too short, only the first <i>errbuf_size</i> - 1 characters of the
+error message are used. The yield of the function is the size of buffer needed
+to hold the whole message, including the terminating zero. This value is
+greater than <i>errbuf_size</i> if the message was truncated.
 </P>
-<br><a name="SEC7" href="#TOC1">MEMORY USAGE</a><br>
+<br><a name="SEC8" href="#TOC1">MEMORY USAGE</a><br>
 <P>
 Compiling a regular expression causes memory to be allocated and associated
-with the <i>preg</i> structure. The function <b>regfree()</b> frees all such
-memory, after which <i>preg</i> may no longer be used as a compiled expression.
+with the <i>preg</i> structure. The function <b>pcre2_regfree()</b> frees all
+such memory, after which <i>preg</i> may no longer be used as a compiled
+expression.
 </P>
-<br><a name="SEC8" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC9" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel
 <br>
@ -320,11 +345,11 @@ University Computing Service
 Cambridge, England.
 <br>
 </P>
-<br><a name="SEC9" href="#TOC1">REVISION</a><br>
+<br><a name="SEC10" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 15 June 2017
+Last updated: 26 April 2021
 <br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2021 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2serialize.html
+++ b/doc/html/pcre2serialize.html
@ -23,12 +23,12 @@ please consult the man page, in case the conversion went wrong.
 <br><a name="SEC1" href="#TOC1">SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS</a><br>
 <P>
 <b>int32_t pcre2_serialize_decode(pcre2_code **<i>codes</i>,</b>
-<b>  int32_t <i>number_of_codes</i>, const uint32_t *<i>bytes</i>,</b>
+<b>  int32_t <i>number_of_codes</i>, const uint8_t *<i>bytes</i>,</b>
 <b>  pcre2_general_context *<i>gcontext</i>);</b>
 <br>
 <br>
-<b>int32_t pcre2_serialize_encode(pcre2_code **<i>codes</i>,</b>
-<b>  int32_t <i>number_of_codes</i>, uint32_t **<i>serialized_bytes</i>,</b>
+<b>int32_t pcre2_serialize_encode(const pcre2_code **<i>codes</i>,</b>
+<b>  int32_t <i>number_of_codes</i>, uint8_t **<i>serialized_bytes</i>,</b>
 <b>  PCRE2_SIZE *<i>serialized_size</i>, pcre2_general_context *<i>gcontext</i>);</b>
 <br>
 <br>
@ -49,6 +49,15 @@ and PCRE2_SIZE type. For example, patterns compiled on a 32-bit system using
 PCRE2's 16-bit library cannot be reloaded on a 64-bit system, nor can they be
 reloaded using the 8-bit library.
 </P>
+<P>
+Note that "serialization" in PCRE2 does not convert compiled patterns to an
+abstract format like Java or .NET serialization. The serialized output is
+really just a bytecode dump, which is why it can only be reloaded in the same
+environment as the one that created it. Hence the restrictions mentioned above.
+Applications that are not statically linked with a fixed version of PCRE2 must
+be prepared to recompile patterns from their sources, in order to be immune to
+PCRE2 upgrades.
+</P>
 <br><a name="SEC2" href="#TOC1">SECURITY CONCERNS</a><br>
 <P>
 The facility for saving and restoring compiled patterns is intended for use
@ -62,11 +71,11 @@ the byte stream that is passed to it.
 </P>
 <br><a name="SEC3" href="#TOC1">SAVING COMPILED PATTERNS</a><br>
 <P>
-Before compiled patterns can be saved they must be serialized, that is,
-converted to a stream of bytes. A single byte stream may contain any number of
-compiled patterns, but they must all use the same character tables. A single
-copy of the tables is included in the byte stream (its size is 1088 bytes). For
-more details of character tables, see the
+Before compiled patterns can be saved they must be serialized, which in PCRE2
+means converting the pattern to a stream of bytes. A single byte stream may
+contain any number of compiled patterns, but they must all use the same
+character tables. A single copy of the tables is included in the byte stream
+(its size is 1088 bytes). For more details of character tables, see the
 <a href="pcre2api.html#localesupport">section on locale support</a>
 in the
 <a href="pcre2api.html"><b>pcre2api</b></a>
@ -85,7 +94,7 @@ of serialized patterns, or one of the following negative error codes:
 <pre>
  PCRE2_ERROR_BADDATA      the number of patterns is zero or less
  PCRE2_ERROR_BADMAGIC     mismatch of id bytes in one of the patterns
-  PCRE2_ERROR_MEMORY       memory allocation failed
+  PCRE2_ERROR_NOMEMORY     memory allocation failed
  PCRE2_ERROR_MIXEDTABLES  the patterns do not all use the same tables
  PCRE2_ERROR_NULL         the 1st, 3rd, or 4th argument is NULL
 </pre>
@ -120,7 +129,9 @@ non-binary data, be sure that the file is opened for binary output.
 Serializing a set of patterns leaves the original data untouched, so they can
 still be used for matching. Their memory must eventually be freed in the usual
 way by calling <b>pcre2_code_free()</b>. When you have finished with the byte
-stream, it too must be freed by calling <b>pcre2_serialize_free()</b>.
+stream, it too must be freed by calling <b>pcre2_serialize_free()</b>. If this
+function is called with a NULL argument, it returns immediately without doing
+anything.
 </P>
 <br><a name="SEC4" href="#TOC1">RE-USING PRECOMPILED PATTERNS</a><br>
 <P>
@ -143,7 +154,6 @@ mangagement functions for the decoded patterns. If this argument is NULL,
 <b>malloc()</b> and <b>free()</b> are used. After deserialization, the byte
 stream is no longer needed and can be discarded.
 <pre>
-  int32_t number_of_codes;
  pcre2_code *list_of_codes[2];
  uint8_t *bytes = &#60;serialized data&#62;;
  int32_t number_of_codes =
@ -193,9 +203,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC6" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 21 March 2017
+Last updated: 27 June 2018
 <br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2syntax.html
+++ b/doc/html/pcre2syntax.html
@ -19,27 +19,31 @@ please consult the man page, in case the conversion went wrong.
 <li><a name="TOC4" href="#SEC4">CHARACTER TYPES</a>
 <li><a name="TOC5" href="#SEC5">GENERAL CATEGORY PROPERTIES FOR \p and \P</a>
 <li><a name="TOC6" href="#SEC6">PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P</a>
-<li><a name="TOC7" href="#SEC7">SCRIPT NAMES FOR \p AND \P</a>
-<li><a name="TOC8" href="#SEC8">CHARACTER CLASSES</a>
-<li><a name="TOC9" href="#SEC9">QUANTIFIERS</a>
-<li><a name="TOC10" href="#SEC10">ANCHORS AND SIMPLE ASSERTIONS</a>
-<li><a name="TOC11" href="#SEC11">MATCH POINT RESET</a>
-<li><a name="TOC12" href="#SEC12">ALTERNATION</a>
-<li><a name="TOC13" href="#SEC13">CAPTURING</a>
-<li><a name="TOC14" href="#SEC14">ATOMIC GROUPS</a>
-<li><a name="TOC15" href="#SEC15">COMMENT</a>
-<li><a name="TOC16" href="#SEC16">OPTION SETTING</a>
-<li><a name="TOC17" href="#SEC17">NEWLINE CONVENTION</a>
-<li><a name="TOC18" href="#SEC18">WHAT \R MATCHES</a>
-<li><a name="TOC19" href="#SEC19">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
-<li><a name="TOC20" href="#SEC20">BACKREFERENCES</a>
-<li><a name="TOC21" href="#SEC21">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
-<li><a name="TOC22" href="#SEC22">CONDITIONAL PATTERNS</a>
-<li><a name="TOC23" href="#SEC23">BACKTRACKING CONTROL</a>
-<li><a name="TOC24" href="#SEC24">CALLOUTS</a>
-<li><a name="TOC25" href="#SEC25">SEE ALSO</a>
-<li><a name="TOC26" href="#SEC26">AUTHOR</a>
-<li><a name="TOC27" href="#SEC27">REVISION</a>
+<li><a name="TOC7" href="#SEC7">BINARY PROPERTIES FOR \p AND \P</a>
+<li><a name="TOC8" href="#SEC8">SCRIPT MATCHING WITH \p AND \P</a>
+<li><a name="TOC9" href="#SEC9">THE BIDI_CLASS PROPERTY FOR \p AND \P</a>
+<li><a name="TOC10" href="#SEC10">CHARACTER CLASSES</a>
+<li><a name="TOC11" href="#SEC11">QUANTIFIERS</a>
+<li><a name="TOC12" href="#SEC12">ANCHORS AND SIMPLE ASSERTIONS</a>
+<li><a name="TOC13" href="#SEC13">REPORTED MATCH POINT SETTING</a>
+<li><a name="TOC14" href="#SEC14">ALTERNATION</a>
+<li><a name="TOC15" href="#SEC15">CAPTURING</a>
+<li><a name="TOC16" href="#SEC16">ATOMIC GROUPS</a>
+<li><a name="TOC17" href="#SEC17">COMMENT</a>
+<li><a name="TOC18" href="#SEC18">OPTION SETTING</a>
+<li><a name="TOC19" href="#SEC19">NEWLINE CONVENTION</a>
+<li><a name="TOC20" href="#SEC20">WHAT \R MATCHES</a>
+<li><a name="TOC21" href="#SEC21">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
+<li><a name="TOC22" href="#SEC22">NON-ATOMIC LOOKAROUND ASSERTIONS</a>
+<li><a name="TOC23" href="#SEC23">SCRIPT RUNS</a>
+<li><a name="TOC24" href="#SEC24">BACKREFERENCES</a>
+<li><a name="TOC25" href="#SEC25">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
+<li><a name="TOC26" href="#SEC26">CONDITIONAL PATTERNS</a>
+<li><a name="TOC27" href="#SEC27">BACKTRACKING CONTROL</a>
+<li><a name="TOC28" href="#SEC28">CALLOUTS</a>
+<li><a name="TOC29" href="#SEC29">SEE ALSO</a>
+<li><a name="TOC30" href="#SEC30">AUTHOR</a>
+<li><a name="TOC31" href="#SEC31">REVISION</a>
 </ul>
 <br><a name="SEC1" href="#TOC1">PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY</a><br>
 <P>
@ -57,7 +61,8 @@ documentation. This document contains a quick-reference summary of the syntax.
 </P>
 <br><a name="SEC3" href="#TOC1">ESCAPED CHARACTERS</a><br>
 <P>
-This table applies to ASCII and Unicode environments.
+This table applies to ASCII and Unicode environments. An unrecognized escape
+sequence causes an error.
 <pre>
  \a         alarm, that is, the BEL character (hex 07)
  \cx        "control-x", where x is any ASCII printing character
@ -69,25 +74,34 @@ This table applies to ASCII and Unicode environments.
  \0dd       character with octal code 0dd
  \ddd       character with octal code ddd, or backreference
  \o{ddd..}  character with octal code ddd..
-  \U         "U" if PCRE2_ALT_BSUX is set (otherwise is an error)
-  \uhhhh     character with hex code hhhh (if PCRE2_ALT_BSUX is set)
+  \N{U+hh..} character with Unicode code point hh.. (Unicode mode only)
  \xhh       character with hex code hh
-  \x{hhh..}  character with hex code hhh..
+  \x{hh..}   character with hex code hh..
 </pre>
+If PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX is set ("ALT_BSUX mode"), the
+following are also recognized:
+<pre>
+  \U         the character "U"
+  \uhhhh     character with hex code hhhh
+  \u{hh..}   character with hex code hh.. but only for EXTRA_ALT_BSUX
+</pre>
+When \x is not followed by {, from zero to two hexadecimal digits are read,
+but in ALT_BSUX mode \x must be followed by two hexadecimal digits to be
+recognized as a hexadecimal escape; otherwise it matches a literal "x".
+Likewise, if \u (in ALT_BSUX mode) is not followed by four hexadecimal digits
+or (in EXTRA_ALT_BSUX mode) a sequence of hex digits in curly brackets, it
+matches a literal "u".
+</P>
+<P>
 Note that \0dd is always an octal code. The treatment of backslash followed by
 a non-zero digit is complicated; for details see the section
 <a href="pcre2pattern.html#digitsafterbackslash">"Non-printing characters"</a>
 in the
 <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
 documentation, where details of escape processing in EBCDIC environments are
-also given.
-</P>
-<P>
-When \x is not followed by {, from zero to two hexadecimal digits are read,
-but if PCRE2_ALT_BSUX is set, \x must be followed by two hexadecimal digits to
-be recognized as a hexadecimal escape; otherwise it matches a literal "x".
-Likewise, if \u (in ALT_BSUX mode) is not followed by four hexadecimal digits,
-it matches a literal "u".
+also given. \N{U+hh..} is synonymous with \x{hh..} in PCRE2 but is not
+supported in EBCDIC environments. Note that \N not followed by an opening
+curly bracket has a different meaning (see below).
 </P>
 <br><a name="SEC4" href="#TOC1">CHARACTER TYPES</a><br>
 <P>
@ -124,6 +138,11 @@ happening, \s and \w may also match characters with code points in the range
 sequences is changed to use Unicode properties and they match many more
 characters.
 </P>
+<P>
+Property descriptions in \p and \P are matched caselessly; hyphens,
+underscores, and white space are ignored, in accordance with Unicode's "loose
+matching" rules.
+</P>
 <br><a name="SEC5" href="#TOC1">GENERAL CATEGORY PROPERTIES FOR \p and \P</a><br>
 <P>
 <pre>
@ -140,6 +159,7 @@ characters.
  Lo         Other letter
  Lt         Title case letter
  Lu         Upper case letter
+  Lc         Ll, Lu, or Lt
  L&         Ll, Lu, or Lt

  M          Mark
@ -186,141 +206,58 @@ characters.
 Perl and POSIX space are now the same. Perl added VT to its space character set
 at release 5.18.
 </P>
-<br><a name="SEC7" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
+<br><a name="SEC7" href="#TOC1">BINARY PROPERTIES FOR \p AND \P</a><br>
 <P>
-Ahom,
-Anatolian_Hieroglyphs,
-Arabic,
-Armenian,
-Avestan,
-Balinese,
-Bamum,
-Bassa_Vah,
-Batak,
-Bengali,
-Bopomofo,
-Brahmi,
-Braille,
-Buginese,
-Buhid,
-Canadian_Aboriginal,
-Carian,
-Caucasian_Albanian,
-Chakma,
-Cham,
-Cherokee,
-Common,
-Coptic,
-Cuneiform,
-Cypriot,
-Cyrillic,
-Deseret,
-Devanagari,
-Duployan,
-Egyptian_Hieroglyphs,
-Elbasan,
-Ethiopic,
-Georgian,
-Glagolitic,
-Gothic,
-Grantha,
-Greek,
-Gujarati,
-Gurmukhi,
-Han,
-Hangul,
-Hanunoo,
-Hatran,
-Hebrew,
-Hiragana,
-Imperial_Aramaic,
-Inherited,
-Inscriptional_Pahlavi,
-Inscriptional_Parthian,
-Javanese,
-Kaithi,
-Kannada,
-Katakana,
-Kayah_Li,
-Kharoshthi,
-Khmer,
-Khojki,
-Khudawadi,
-Lao,
-Latin,
-Lepcha,
-Limbu,
-Linear_A,
-Linear_B,
-Lisu,
-Lycian,
-Lydian,
-Mahajani,
-Malayalam,
-Mandaic,
-Manichaean,
-Meetei_Mayek,
-Mende_Kikakui,
-Meroitic_Cursive,
-Meroitic_Hieroglyphs,
-Miao,
-Modi,
-Mongolian,
-Mro,
-Multani,
-Myanmar,
-Nabataean,
-New_Tai_Lue,
-Nko,
-Ogham,
-Ol_Chiki,
-Old_Hungarian,
-Old_Italic,
-Old_North_Arabian,
-Old_Permic,
-Old_Persian,
-Old_South_Arabian,
-Old_Turkic,
-Oriya,
-Osmanya,
-Pahawh_Hmong,
-Palmyrene,
-Pau_Cin_Hau,
-Phags_Pa,
-Phoenician,
-Psalter_Pahlavi,
-Rejang,
-Runic,
-Samaritan,
-Saurashtra,
-Sharada,
-Shavian,
-Siddham,
-SignWriting,
-Sinhala,
-Sora_Sompeng,
-Sundanese,
-Syloti_Nagri,
-Syriac,
-Tagalog,
-Tagbanwa,
-Tai_Le,
-Tai_Tham,
-Tai_Viet,
-Takri,
-Tamil,
-Telugu,
-Thaana,
-Thai,
-Tibetan,
-Tifinagh,
-Tirhuta,
-Ugaritic,
-Vai,
-Warang_Citi,
-Yi.
+Unicode defines a number of binary properties, that is, properties whose only
+values are true or false. You can obtain a list of those that are recognized by
+\p and \P, along with their abbreviations, by running this command:
+<pre>
+  pcre2test -LP
+</PRE>
 </P>
-<br><a name="SEC8" href="#TOC1">CHARACTER CLASSES</a><br>
+<br><a name="SEC8" href="#TOC1">SCRIPT MATCHING WITH \p AND \P</a><br>
+<P>
+Many script names and their 4-letter abbreviations are recognized in
+\p{sc:...} or \p{scx:...} items, or on their own with \p (and also \P of
+course). You can obtain a list of these scripts by running this command:
+<pre>
+  pcre2test -LS
+</PRE>
+</P>
+<br><a name="SEC9" href="#TOC1">THE BIDI_CLASS PROPERTY FOR \p AND \P</a><br>
+<P>
+<pre>
+  \p{Bidi_Class:&#60;class&#62;}   matches a character with the given class
+  \p{BC:&#60;class&#62;}           matches a character with the given class
+</pre>
+The recognized classes are:
+<pre>
+  AL          Arabic letter
+  AN          Arabic number
+  B           paragraph separator
+  BN          boundary neutral
+  CS          common separator
+  EN          European number
+  ES          European separator
+  ET          European terminator
+  FSI         first strong isolate
+  L           left-to-right
+  LRE         left-to-right embedding
+  LRI         left-to-right isolate
+  LRO         left-to-right override
+  NSM         non-spacing mark
+  ON          other neutral
+  PDF         pop directional format
+  PDI         pop directional isolate
+  R           right-to-left
+  RLE         right-to-left embedding
+  RLI         right-to-left isolate
+  RLO         right-to-left override
+  S           segment separator
+  WS          which space
+</PRE>
+</P>
+<br><a name="SEC10" href="#TOC1">CHARACTER CLASSES</a><br>
 <P>
 <pre>
  [...]       positive character class
@ -348,7 +285,7 @@ In PCRE2, POSIX character set names recognize only ASCII characters by default,
 but some of them use Unicode properties if PCRE2_UCP is set. You can use
 \Q...\E inside a character class.
 </P>
-<br><a name="SEC9" href="#TOC1">QUANTIFIERS</a><br>
+<br><a name="SEC11" href="#TOC1">QUANTIFIERS</a><br>
 <P>
 <pre>
  ?           0 or 1, greedy
@ -369,7 +306,7 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
  {n,}?       n or more, lazy
 </PRE>
 </P>
-<br><a name="SEC10" href="#TOC1">ANCHORS AND SIMPLE ASSERTIONS</a><br>
+<br><a name="SEC12" href="#TOC1">ANCHORS AND SIMPLE ASSERTIONS</a><br>
 <P>
 <pre>
  \b          word boundary
@ -387,48 +324,57 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
  \G          first matching position in subject
 </PRE>
 </P>
-<br><a name="SEC11" href="#TOC1">MATCH POINT RESET</a><br>
+<br><a name="SEC13" href="#TOC1">REPORTED MATCH POINT SETTING</a><br>
 <P>
 <pre>
-  \K          reset start of match
+  \K          set reported start of match
 </pre>
+From release 10.38 \K is not permitted by default in lookaround assertions,
+for compatibility with Perl. However, if the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
+option is set, the previous behaviour is re-enabled. When this option is set,
 \K is honoured in positive assertions, but ignored in negative ones.
 </P>
-<br><a name="SEC12" href="#TOC1">ALTERNATION</a><br>
+<br><a name="SEC14" href="#TOC1">ALTERNATION</a><br>
 <P>
 <pre>
  expr|expr|expr...
 </PRE>
 </P>
-<br><a name="SEC13" href="#TOC1">CAPTURING</a><br>
+<br><a name="SEC15" href="#TOC1">CAPTURING</a><br>
 <P>
 <pre>
-  (...)           capturing group
-  (?&#60;name&#62;...)    named capturing group (Perl)
-  (?'name'...)    named capturing group (Perl)
-  (?P&#60;name&#62;...)   named capturing group (Python)
-  (?:...)         non-capturing group
-  (?|...)         non-capturing group; reset group numbers for
-                   capturing groups in each alternative
-</PRE>
+  (...)           capture group
+  (?&#60;name&#62;...)    named capture group (Perl)
+  (?'name'...)    named capture group (Perl)
+  (?P&#60;name&#62;...)   named capture group (Python)
+  (?:...)         non-capture group
+  (?|...)         non-capture group; reset group numbers for
+                   capture groups in each alternative
+</pre>
+In non-UTF modes, names may contain underscores and ASCII letters and digits;
+in UTF modes, any Unicode letters and Unicode decimal digits are permitted. In
+both cases, a name must not start with a digit.
 </P>
-<br><a name="SEC14" href="#TOC1">ATOMIC GROUPS</a><br>
+<br><a name="SEC16" href="#TOC1">ATOMIC GROUPS</a><br>
 <P>
 <pre>
-  (?&#62;...)         atomic, non-capturing group
+  (?&#62;...)         atomic non-capture group
+  (*atomic:...)   atomic non-capture group
 </PRE>
 </P>
-<br><a name="SEC15" href="#TOC1">COMMENT</a><br>
+<br><a name="SEC17" href="#TOC1">COMMENT</a><br>
 <P>
 <pre>
  (?#....)        comment (not nestable)
 </PRE>
 </P>
-<br><a name="SEC16" href="#TOC1">OPTION SETTING</a><br>
+<br><a name="SEC18" href="#TOC1">OPTION SETTING</a><br>
 <P>
+Changes of these options within a group are automatically cancelled at the end
+of the group.
 <pre>
  (?i)            caseless
-  (?J)            allow duplicate names
+  (?J)            allow duplicate named groups
  (?m)            multiline
  (?n)            no auto capture
  (?s)            single line (dotall)
@ -436,13 +382,21 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
  (?x)            extended: ignore white space except in classes
  (?xx)           as (?x) but also ignore space and tab in classes
  (?-...)         unset option(s)
+  (?^)            unset imnsx options
 </pre>
+Unsetting x or xx unsets both. Several options may be set at once, and a
+mixture of setting and unsetting such as (?i-x) is allowed, but there may be
+only one hyphen. Setting (but no unsetting) is allowed after (?^ for example
+(?^in). An option setting may appear at the start of a non-capture group, for
+example (?i:...).
+</P>
+<P>
 The following are recognized only at the very start of a pattern or after one
 of the newline or \R options with similar syntax. More than one of them may
 appear. For the first three, d is a decimal number.
 <pre>
  (*LIMIT_DEPTH=d) set the backtracking limit to d
-  (*LIMIT_HEAP=d)  set the heap size limit to d kilobytes
+  (*LIMIT_HEAP=d)  set the heap size limit to d * 1024 bytes
  (*LIMIT_MATCH=d) set the match limit to d
  (*NOTEMPTY)      set PCRE2_NOTEMPTY when matching
  (*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
@ -459,7 +413,7 @@ not increase them. LIMIT_RECURSION is an obsolete synonym for LIMIT_DEPTH. The
 application can lock out the use of (*UTF) and (*UCP) by setting the
 PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, respectively, at compile time.
 </P>
-<br><a name="SEC17" href="#TOC1">NEWLINE CONVENTION</a><br>
+<br><a name="SEC19" href="#TOC1">NEWLINE CONVENTION</a><br>
 <P>
 These are recognized only at the very start of the pattern or after option
 settings with a similar syntax.
@ -472,7 +426,7 @@ settings with a similar syntax.
  (*NUL)          the NUL character (binary zero)
 </PRE>
 </P>
-<br><a name="SEC18" href="#TOC1">WHAT \R MATCHES</a><br>
+<br><a name="SEC20" href="#TOC1">WHAT \R MATCHES</a><br>
 <P>
 These are recognized only at the very start of the pattern or after option
 setting with a similar syntax.
@ -481,17 +435,51 @@ setting with a similar syntax.
  (*BSR_UNICODE)  any Unicode newline sequence
 </PRE>
 </P>
-<br><a name="SEC19" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
+<br><a name="SEC21" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
 <P>
 <pre>
-  (?=...)         positive look ahead
-  (?!...)         negative look ahead
-  (?&#60;=...)        positive look behind
-  (?&#60;!...)        negative look behind
+  (?=...)                     )
+  (*pla:...)                  ) positive lookahead
+  (*positive_lookahead:...)   )
+
+  (?!...)                     )
+  (*nla:...)                  ) negative lookahead
+  (*negative_lookahead:...)   )
+
+  (?&#60;=...)                    )
+  (*plb:...)                  ) positive lookbehind
+  (*positive_lookbehind:...)  )
+
+  (?&#60;!...)                    )
+  (*nlb:...)                  ) negative lookbehind
+  (*negative_lookbehind:...)  )
 </pre>
-Each top-level branch of a look behind must be of a fixed length.
+Each top-level branch of a lookbehind must be of a fixed length.
 </P>
-<br><a name="SEC20" href="#TOC1">BACKREFERENCES</a><br>
+<br><a name="SEC22" href="#TOC1">NON-ATOMIC LOOKAROUND ASSERTIONS</a><br>
+<P>
+These assertions are specific to PCRE2 and are not Perl-compatible.
+<pre>
+  (?*...)                                )
+  (*napla:...)                           ) synonyms
+  (*non_atomic_positive_lookahead:...)   )
+
+  (?&#60;*...)                               )
+  (*naplb:...)                           ) synonyms
+  (*non_atomic_positive_lookbehind:...)  )
+</PRE>
+</P>
+<br><a name="SEC23" href="#TOC1">SCRIPT RUNS</a><br>
+<P>
+<pre>
+  (*script_run:...)           ) script run, can be backtracked into
+  (*sr:...)                   )
+
+  (*atomic_script_run:...)    ) atomic script run
+  (*asr:...)                  )
+</PRE>
+</P>
+<br><a name="SEC24" href="#TOC1">BACKREFERENCES</a><br>
 <P>
 <pre>
  \n              reference by number (can be ambiguous)
@ -508,26 +496,26 @@ Each top-level branch of a look behind must be of a fixed length.
  (?P=name)       reference by name (Python)
 </PRE>
 </P>
-<br><a name="SEC21" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
+<br><a name="SEC25" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
 <P>
 <pre>
  (?R)            recurse whole pattern
-  (?n)            call subpattern by absolute number
-  (?+n)           call subpattern by relative number
-  (?-n)           call subpattern by relative number
-  (?&name)        call subpattern by name (Perl)
-  (?P&#62;name)       call subpattern by name (Python)
-  \g&#60;name&#62;        call subpattern by name (Oniguruma)
-  \g'name'        call subpattern by name (Oniguruma)
-  \g&#60;n&#62;           call subpattern by absolute number (Oniguruma)
-  \g'n'           call subpattern by absolute number (Oniguruma)
-  \g&#60;+n&#62;          call subpattern by relative number (PCRE2 extension)
-  \g'+n'          call subpattern by relative number (PCRE2 extension)
-  \g&#60;-n&#62;          call subpattern by relative number (PCRE2 extension)
-  \g'-n'          call subpattern by relative number (PCRE2 extension)
+  (?n)            call subroutine by absolute number
+  (?+n)           call subroutine by relative number
+  (?-n)           call subroutine by relative number
+  (?&name)        call subroutine by name (Perl)
+  (?P&#62;name)       call subroutine by name (Python)
+  \g&#60;name&#62;        call subroutine by name (Oniguruma)
+  \g'name'        call subroutine by name (Oniguruma)
+  \g&#60;n&#62;           call subroutine by absolute number (Oniguruma)
+  \g'n'           call subroutine by absolute number (Oniguruma)
+  \g&#60;+n&#62;          call subroutine by relative number (PCRE2 extension)
+  \g'+n'          call subroutine by relative number (PCRE2 extension)
+  \g&#60;-n&#62;          call subroutine by relative number (PCRE2 extension)
+  \g'-n'          call subroutine by relative number (PCRE2 extension)
 </PRE>
 </P>
-<br><a name="SEC22" href="#TOC1">CONDITIONAL PATTERNS</a><br>
+<br><a name="SEC26" href="#TOC1">CONDITIONAL PATTERNS</a><br>
 <P>
 <pre>
  (?(condition)yes-pattern)
@ -542,7 +530,7 @@ Each top-level branch of a look behind must be of a fixed length.
  (?(R)               overall recursion condition
  (?(Rn)              specific numbered group recursion condition
  (?(R&name)          specific named group recursion condition
-  (?(DEFINE)          define subpattern for reference
+  (?(DEFINE)          define groups for reference
  (?(VERSION[&#62;]=n.m)  test PCRE2 version
  (?(assert)          assertion condition
 </pre>
@ -550,9 +538,13 @@ Note the ambiguity of (?(R) and (?(Rn) which might be named reference
 conditions or recursion tests. Such a condition is interpreted as a reference
 condition if the relevant named group exists.
 </P>
-<br><a name="SEC23" href="#TOC1">BACKTRACKING CONTROL</a><br>
+<br><a name="SEC27" href="#TOC1">BACKTRACKING CONTROL</a><br>
 <P>
-The following act immediately they are reached:
+All backtracking control verbs may be in the form (*VERB:NAME). For (*MARK) the
+name is mandatory, for the others it is optional. (*SKIP) changes its behaviour
+if :NAME is present. The others just set a name for passing back to the caller,
+but this is not a name that (*SKIP) can see. The following act immediately they
+are reached:
 <pre>
  (*ACCEPT)       force successful match
  (*FAIL)         force backtrack; synonym (*F)
@ -565,15 +557,15 @@ pattern is not anchored.
 <pre>
  (*COMMIT)       overall failure, no advance of starting point
  (*PRUNE)        advance to next starting character
-  (*PRUNE:NAME)   equivalent to (*MARK:NAME)(*PRUNE)
  (*SKIP)         advance to current matching position
  (*SKIP:NAME)    advance to position corresponding to an earlier
                  (*MARK:NAME); if not found, the (*SKIP) is ignored
  (*THEN)         local failure, backtrack to next alternation
-  (*THEN:NAME)    equivalent to (*MARK:NAME)(*THEN)
-</PRE>
+</pre>
+The effect of one of these verbs in a group called as a subroutine is confined
+to the subroutine call.
 </P>
-<br><a name="SEC24" href="#TOC1">CALLOUTS</a><br>
+<br><a name="SEC28" href="#TOC1">CALLOUTS</a><br>
 <P>
 <pre>
  (?C)            callout (assumed number 0)
@ -584,25 +576,25 @@ The allowed string delimiters are ` ' " ^ % # $ (which are the same for the
 start and the end), and the starting delimiter { matched with the ending
 delimiter }. To encode the ending delimiter within the string, double it.
 </P>
-<br><a name="SEC25" href="#TOC1">SEE ALSO</a><br>
+<br><a name="SEC29" href="#TOC1">SEE ALSO</a><br>
 <P>
 <b>pcre2pattern</b>(3), <b>pcre2api</b>(3), <b>pcre2callout</b>(3),
 <b>pcre2matching</b>(3), <b>pcre2</b>(3).
 </P>
-<br><a name="SEC26" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC30" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
-<br><a name="SEC27" href="#TOC1">REVISION</a><br>
+<br><a name="SEC31" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 17 June 2017
+Last updated: 12 January 2022
 <br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2test.html
+++ b/doc/html/pcre2test.html
@ -59,12 +59,7 @@ patterns, and the subject lines specify PCRE2 function options, control how the
 subject is processed, and what output is produced.
 </P>
 <P>
-As the original fairly simple PCRE library evolved, it acquired many different
-features, and as a result, the original <b>pcretest</b> program ended up with a
-lot of options in a messy, arcane syntax for testing all the features. The
-move to the new PCRE2 API provided an opportunity to re-implement the test
-program as <b>pcre2test</b>, with a cleaner modifier syntax. Nevertheless, there
-are still many obscure modifiers, some of which are specifically designed for
+There are many obscure modifiers, some of which are specifically designed for
 use in conjunction with the test script and data files that are distributed as
 part of PCRE2. All the modifiers are documented here, some without much
 justification, but many of them are unlikely to be of use except when testing
@ -83,16 +78,16 @@ to 8-bit code units for output.
 </P>
 <P>
 In the rest of this document, the names of library functions and structures
-are given in generic form, for example, <b>pcre_compile()</b>. The actual
+are given in generic form, for example, <b>pcre2_compile()</b>. The actual
 names used in the libraries have a suffix _8, _16, or _32, as appropriate.
 <a name="inputencoding"></a></P>
 <br><a name="SEC3" href="#TOC1">INPUT ENCODING</a><br>
 <P>
 Input to <b>pcre2test</b> is processed line by line, either by calling the C
-library's <b>fgets()</b> function, or via the <b>libreadline</b> library. In some
-Windows environments character 26 (hex 1A) causes an immediate end of file, and
-no further data is read, so this character should be avoided unless you really
-want that action.
+library's <b>fgets()</b> function, or via the <b>libreadline</b> or <b>libedit</b>
+library. In some Windows environments character 26 (hex 1A) causes an immediate
+end of file, and no further data is read, so this character should be avoided
+unless you really want that action.
 </P>
 <P>
 The input is processed using using C's string functions, so must not
@ -129,7 +124,7 @@ to occur).
 UTF-8 (in its original definition) is not capable of encoding values greater
 than 0x7fffffff, but such values can be handled by the 32-bit library. When
 testing this library in non-UTF mode with <b>utf8_input</b> set, if any
-character is preceded by the byte 0xff (which is an illegal byte in UTF-8)
+character is preceded by the byte 0xff (which is an invalid byte in UTF-8)
 0x80000000 is added to the character's value. This is the only way of passing
 such code points in a pattern string. For subject strings, using an escape
 sequence is preferable.
@ -242,19 +237,38 @@ Behave as if each pattern line has the <b>jit</b> modifier; after successful
 compilation, each pattern is passed to the just-in-time compiler, if available.
 </P>
 <P>
+<b>-jitfast</b>
+Behave as if each pattern line has the <b>jitfast</b> modifier; after
+successful compilation, each pattern is passed to the just-in-time compiler, if
+available, and each subject line is passed directly to the JIT matcher via its
+"fast path".
+</P>
+<P>
 <b>-jitverify</b>
 Behave as if each pattern line has the <b>jitverify</b> modifier; after
 successful compilation, each pattern is passed to the just-in-time compiler, if
-available, and the use of JIT is verified.
+available, and the use of JIT for matching is verified.
 </P>
 <P>
 <b>-LM</b>
 List modifiers: write a list of available pattern and subject modifiers to the
 standard output, then exit with zero exit code. All other options are ignored.
-If both -C and -LM are present, whichever is first is recognized.
+If both -C and any -Lx options are present, whichever is first is recognized.
 </P>
 <P>
-\fB-pattern\fB <i>modifier-list</i>
+<b>-LP</b>
+List properties: write a list of recognized Unicode properties to the standard
+output, then exit with zero exit code. All other options are ignored. If both
+-C and any -Lx options are present, whichever is first is recognized.
+</P>
+<P>
+<b>-LS</b>
+List scripts: write a list of recogized Unicode script names to the standard
+output, then exit with zero exit code. All other options are ignored. If both
+-C and any -Lx options are present, whichever is first is recognized.
+</P>
+<P>
+<b>-pattern</b> <i>modifier-list</i>
 Behave as if each pattern line contains the given modifiers.
 </P>
 <P>
@ -264,7 +278,7 @@ Do not output the version number of <b>pcre2test</b> at the start of execution.
 <P>
 <b>-S</b> <i>size</i>
 On Unix-like systems, set the size of the run-time stack to <i>size</i>
-megabytes.
+mebibytes (units of 1024*1024 bytes).
 </P>
 <P>
 <b>-subject</b> <i>modifier-list</i>
@ -315,7 +329,8 @@ number of subject lines to be matched against that pattern. In between sets of
 test data, command lines that begin with # may appear. This file format, with
 some restrictions, can also be processed by the <b>perltest.sh</b> script that
 is distributed with PCRE2 as a means of checking that the behaviour of PCRE2
-and Perl is the same.
+and Perl is the same. For a specification of <b>perltest.sh</b>, see the
+comments near its beginning. See also the #perltest command below.
 </P>
 <P>
 When the input is a terminal, <b>pcre2test</b> prompts for each line of input,
@ -367,6 +382,12 @@ output.
 This command is used to load a set of precompiled patterns from a file, as
 described in the section entitled "Saving and restoring compiled patterns"
 <a href="#saverestore">below.</a>
+<pre>
+  #loadtables &#60;filename&#62;
+</pre>
+This command is used to load a set of binary character tables that can be
+accessed by the tables=3 qualifier. Such tables can be created by the
+<b>pcre2_dftables</b> program with the -b option.
 <pre>
  #newline_default [&#60;newline-list&#62;]
 </pre>
@ -406,13 +427,20 @@ patterns. Modifiers on a pattern can change these settings.
 <pre>
  #perltest
 </pre>
-The appearance of this line causes all subsequent modifier settings to be
-checked for compatibility with the <b>perltest.sh</b> script, which is used to
-confirm that Perl gives the same results as PCRE2. Also, apart from comment
-lines, none of the other command lines are permitted, because they and many
-of the modifiers are specific to <b>pcre2test</b>, and should not be used in
-test files that are also processed by <b>perltest.sh</b>. The <b>#perltest</b>
-command helps detect tests that are accidentally put in the wrong file.
+This line is used in test files that can also be processed by <b>perltest.sh</b>
+to confirm that Perl gives the same results as PCRE2. Subsequent tests are
+checked for the use of <b>pcre2test</b> features that are incompatible with the
+<b>perltest.sh</b> script.
+</P>
+<P>
+Patterns must use '/' as their delimiter, and only certain modifiers are
+supported. Comment lines, #pattern commands, and #subject commands that set or
+unset "mark" are recognized and acted on. The #perltest, #forbid_utf, and
+#newline_default commands, which are needed in the relevant pcre2test files,
+are silently ignored. All other command lines are ignored, but give a warning
+message. The <b>#perltest</b> command helps detect tests that are accidentally
+put in the wrong file or use the wrong delimiter. For more details of the
+<b>perltest.sh</b> script see the comments it contains.
 <pre>
  #pop [&#60;modifiers&#62;]
  #popcopy [&#60;modifiers&#62;]
@ -465,15 +493,17 @@ excluding pattern meta-characters):
 </pre>
 This is interpreted as the pattern's delimiter. A regular expression may be
 continued over several input lines, in which case the newline characters are
-included within it. It is possible to include the delimiter within the pattern
-by escaping it with a backslash, for example
+included within it. It is possible to include the delimiter as a literal within
+the pattern by escaping it with a backslash, for example
 <pre>
  /abc\/def/
 </pre>
 If you do this, the escape and the delimiter form part of the pattern, but
-since the delimiters are all non-alphanumeric, this does not affect its
-interpretation. If the terminating delimiter is immediately followed by a
-backslash, for example,
+since the delimiters are all non-alphanumeric, the inclusion of the backslash
+does not affect the pattern's interpretation. Note, however, that this trick
+does not work within \Q...\E literal bracketing because the backslash will
+itself be interpreted as a literal. If the terminating delimiter is immediately
+followed by a backslash, for example,
 <pre>
  /abc/\
 </pre>
@ -491,11 +521,11 @@ A pattern can be followed by a modifier list (details below).
 </P>
 <br><a name="SEC9" href="#TOC1">SUBJECT LINE SYNTAX</a><br>
 <P>
-Before each subject line is passed to <b>pcre2_match()</b> or
-<b>pcre2_dfa_match()</b>, leading and trailing white space is removed, and the
-line is scanned for backslash escapes, unless the <b>subject_literal</b>
-modifier was set for the pattern. The following provide a means of encoding
-non-printing characters in a visible way:
+Before each subject line is passed to <b>pcre2_match()</b>,
+<b>pcre2_dfa_match()</b>, or <b>pcre2_jit_match()</b>, leading and trailing white
+space is removed, and the line is scanned for backslash escapes, unless the
+<b>subject_literal</b> modifier was set for the pattern. The following provide a
+means of encoding non-printing characters in a visible way:
 <pre>
  \a         alarm (BEL, \x07)
  \b         backspace (\x08)
@ -592,6 +622,7 @@ way <b>pcre2_compile()</b> behaves. See
 for a description of the effects of these options.
 <pre>
      allow_empty_class         set PCRE2_ALLOW_EMPTY_CLASS
+      allow_lookaround_bsk      set PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
      allow_surrogate_escapes   set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
      alt_bsux                  set PCRE2_ALT_BSUX
      alt_circumflex            set PCRE2_ALT_CIRCUMFLEX
@ -604,11 +635,14 @@ for a description of the effects of these options.
  /s  dotall                    set PCRE2_DOTALL
      dupnames                  set PCRE2_DUPNAMES
      endanchored               set PCRE2_ENDANCHORED
+      escaped_cr_is_lf          set PCRE2_EXTRA_ESCAPED_CR_IS_LF
  /x  extended                  set PCRE2_EXTENDED
  /xx extended_more             set PCRE2_EXTENDED_MORE
+      extra_alt_bsux            set PCRE2_EXTRA_ALT_BSUX
      firstline                 set PCRE2_FIRSTLINE
      literal                   set PCRE2_LITERAL
      match_line                set PCRE2_EXTRA_MATCH_LINE
+      match_invalid_utf         set PCRE2_MATCH_INVALID_UTF
      match_unset_backref       set PCRE2_MATCH_UNSET_BACKREF
      match_word                set PCRE2_EXTRA_MATCH_WORD
  /m  multiline                 set PCRE2_MULTILINE
@ -667,7 +701,7 @@ heavily used in the test files.
      pushcopy                  push a copy onto the stack
      stackguard=&#60;number&#62;       test the stackguard feature
      subject_literal           treat all subject lines as literal
-      tables=[0|1|2]            select internal tables
+      tables=[0|1|2|3]          select internal tables
      use_length                do not zero-terminate the pattern
      utf8_input                treat input as UTF-8
 </pre>
@ -679,8 +713,8 @@ Newline and \R handling
 <P>
 The <b>bsr</b> modifier specifies what \R in a pattern should match. If it is
 set to "anycrlf", \R matches CR, LF, or CRLF only. If it is set to "unicode",
-\R matches any Unicode newline sequence. The default is specified when PCRE2
-is built, with the default default being Unicode.
+\R matches any Unicode newline sequence. The default can be specified when
+PCRE2 is built; if it is not, the default is set to Unicode.
 </P>
 <P>
 The <b>newline</b> modifier specifies which characters are to be interpreted as
@ -713,14 +747,14 @@ information is obtained from the <b>pcre2_pattern_info()</b> function. Here are
 some typical examples:
 <pre>
    re&#62; /(?i)(^a|^b)/m,info
-  Capturing subpattern count = 1
+  Capture group count = 1
  Compile options: multiline
  Overall options: caseless multiline
  First code unit at start or follows newline
  Subject length lower bound = 1

    re&#62; /(?i)abc/info
-  Capturing subpattern count = 0
+  Capture group count = 0
  Compile options: &#60;none&#62;
  Overall options: caseless
  First code unit = 'a' (caseless)
@ -734,7 +768,9 @@ options, the line is omitted. "First code unit" is where any match must start;
 if there is more than one they are listed as "starting code units". "Last code
 unit" is the last literal code unit that must be present in any match. This is
 not necessarily the last character. These lines are omitted if no starting or
-ending code units are recorded.
+ending code units are recorded. The subject length line is omitted when
+<b>no_start_optimize</b> is set because the minimum length is not calculated
+when it can never be used.
 </P>
 <P>
 The <b>framesize</b> modifier shows the size, in bytes, of the storage frames
@ -1013,18 +1049,20 @@ Using alternative character tables
 </b><br>
 <P>
 The value specified for the <b>tables</b> modifier must be one of the digits 0,
-1, or 2. It causes a specific set of built-in character tables to be passed to
-<b>pcre2_compile()</b>. This is used in the PCRE2 tests to check behaviour with
-different character tables. The digit specifies the tables as follows:
+1, 2, or 3. It causes a specific set of built-in character tables to be passed
+to <b>pcre2_compile()</b>. This is used in the PCRE2 tests to check behaviour
+with different character tables. The digit specifies the tables as follows:
 <pre>
  0   do not pass any special character tables
  1   the default ASCII tables, as distributed in
        pcre2_chartables.c.dist
  2   a set of tables defining ISO 8859 characters
+  3   a set of tables loaded by the #loadtables command
 </pre>
-In table 2, some characters whose codes are greater than 128 are identified as
-letters, digits, spaces, etc. Setting alternate character tables and a locale
-are mutually exclusive.
+In tables 2, some characters whose codes are greater than 128 are identified as
+letters, digits, spaces, etc. Tables 3 can be used only after a
+<b>#loadtables</b> command has loaded them from a binary file. Setting alternate
+character tables and a locale are mutually exclusive.
 </P>
 <br><b>
 Setting certain match controls
@ -1036,20 +1074,27 @@ modifier list, in which case they are applied to every subject line that is
 processed with that pattern. These modifiers do not affect the compilation
 process.
 <pre>
-      aftertext                  show text after match
-      allaftertext               show text after captures
-      allcaptures                show all captures
-      allusedtext                show all consulted text
-      altglobal                  alternative global matching
-  /g  global                     global matching
-      jitstack=&#60;n&#62;               set size of JIT stack
-      mark                       show mark values
-      replace=&#60;string&#62;           specify a replacement string
-      startchar                  show starting character when relevant
-      substitute_extended        use PCRE2_SUBSTITUTE_EXTENDED
-      substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
-      substitute_unknown_unset   use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
-      substitute_unset_empty     use PCRE2_SUBSTITUTE_UNSET_EMPTY
+      aftertext                   show text after match
+      allaftertext                show text after captures
+      allcaptures                 show all captures
+      allvector                   show the entire ovector
+      allusedtext                 show all consulted text
+      altglobal                   alternative global matching
+  /g  global                      global matching
+      jitstack=&#60;n&#62;                set size of JIT stack
+      mark                        show mark values
+      replace=&#60;string&#62;            specify a replacement string
+      startchar                   show starting character when relevant
+      substitute_callout          use substitution callouts
+      substitute_extended         use PCRE2_SUBSTITUTE_EXTENDED
+      substitute_literal          use PCRE2_SUBSTITUTE_LITERAL
+      substitute_matched          use PCRE2_SUBSTITUTE_MATCHED
+      substitute_overflow_length  use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+      substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+      substitute_skip=&#60;n&#62;         skip substitution &#60;n&#62;
+      substitute_stop=&#60;n&#62;         skip substitution &#60;n&#62; and following
+      substitute_unknown_unset    use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
+      substitute_unset_empty      use PCRE2_SUBSTITUTE_UNSET_EMPTY
 </pre>
 These modifiers may not appear in a <b>#pattern</b> command. If you want them as
 defaults, set them in a <b>#subject</b> command.
@ -1178,11 +1223,12 @@ Setting match controls
 The following modifiers affect the matching process or request additional
 information. Some of them may also be specified on a pattern line (see above),
 in which case they apply to every subject line that is matched against that
-pattern.
+pattern, but can be overridden by modifiers on the subject.
 <pre>
      aftertext                  show text after match
      allaftertext               show text after captures
      allcaptures                show all captures
+      allvector                  show the entire ovector
      allusedtext                show all consulted text (non-JIT only)
      altglobal                  alternative global matching
      callout_capture            show captures at callout time
@ -1195,16 +1241,19 @@ pattern.
      copy=&#60;number or name&#62;      copy captured substring
      depth_limit=&#60;n&#62;            set a depth limit
      dfa                        use <b>pcre2_dfa_match()</b>
-      find_limits                find match and depth limits
+      find_limits                find heap, match and depth limits
+      find_limits_noheap         find match and depth limits
      get=&#60;number or name&#62;       extract captured substring
      getall                     extract all captured substrings
  /g  global                     global matching
-      heap_limit=&#60;n&#62;             set a limit on heap memory
+      heap_limit=&#60;n&#62;             set a limit on heap memory (Kbytes)
      jitstack=&#60;n&#62;               set size of JIT stack
      mark                       show mark values
      match_limit=&#60;n&#62;            set a match limit
      memory                     show heap memory usage
      null_context               match with a NULL context
+      null_replacement           substitute with NULL replacement
+      null_subject               match with NULL subject
      offset=&#60;n&#62;                 set starting offset
      offset_limit=&#60;n&#62;           set offset limit
      ovector=&#60;n&#62;                set size of output vector
@ -1212,8 +1261,14 @@ pattern.
      replace=&#60;string&#62;           specify a replacement string
      startchar                  show startchar when relevant
      startoffset=&#60;n&#62;            same as offset=&#60;n&#62;
+      substitute_callout         use substitution callouts
      substitute_extedded        use PCRE2_SUBSTITUTE_EXTENDED
+      substitute_literal         use PCRE2_SUBSTITUTE_LITERAL
+      substitute_matched         use PCRE2_SUBSTITUTE_MATCHED
      substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+      substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+      substitute_skip=&#60;n&#62;        skip substitution number n
+      substitute_stop=&#60;n&#62;        skip substitution number n and greater
      substitute_unknown_unset   use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
      substitute_unset_empty     use PCRE2_SUBSTITUTE_UNSET_EMPTY
      zero_terminate             pass the subject as zero-terminated
@ -1237,22 +1292,27 @@ following line with a plus character following the capture number.
 </P>
 <P>
 The <b>allusedtext</b> modifier requests that all the text that was consulted
-during a successful pattern match by the interpreter should be shown. This
-feature is not supported for JIT matching, and if requested with JIT it is
-ignored (with a warning message). Setting this modifier affects the output if
-there is a lookbehind at the start of a match, or a lookahead at the end, or if
-\K is used in the pattern. Characters that precede or follow the start and end
-of the actual match are indicated in the output by '&#60;' or '&#62;' characters
-underneath them. Here is an example:
+during a successful pattern match by the interpreter should be shown, for both
+full and partial matches. This feature is not supported for JIT matching, and
+if requested with JIT it is ignored (with a warning message). Setting this
+modifier affects the output if there is a lookbehind at the start of a match,
+or, for a complete match, a lookahead at the end, or if \K is used in the
+pattern. Characters that precede or follow the start and end of the actual
+match are indicated in the output by '&#60;' or '&#62;' characters underneath them.
+Here is an example:
 <pre>
    re&#62; /(?&#60;=pqr)abc(?=xyz)/
  data&#62; 123pqrabcxyz456\=allusedtext
   0: pqrabcxyz
      &#60;&#60;&#60;   &#62;&#62;&#62;
+  data&#62; 123pqrabcxy\=ph,allusedtext
+  Partial match: pqrabcxy
+                 &#60;&#60;&#60;
 </pre>
-This shows that the matched string is "abc", with the preceding and following
-strings "pqr" and "xyz" having been consulted during the match (when processing
-the assertions).
+The first, complete match shows that the matched string is "abc", with the
+preceding and following strings "pqr" and "xyz" having been consulted during
+the match (when processing the assertions). The partial match can indicate only
+the preceding string.
 </P>
 <P>
 The <b>startchar</b> modifier requests that the starting character for the match
@ -1279,10 +1339,28 @@ captured parentheses be output after a match. By default, only those up to the
 highest one actually used in the match are output (corresponding to the return
 code from <b>pcre2_match()</b>). Groups that did not take part in the match
 are output as "&#60;unset&#62;". This modifier is not relevant for DFA matching (which
-does no capturing); it is ignored, with a warning message, if present.
+does no capturing) and does not apply when <b>replace</b> is specified; it is
+ignored, with a warning message, if present.
 </P>
 <br><b>
-Testing callouts
+Showing the entire ovector, for all outcomes
+</b><br>
+<P>
+The <b>allvector</b> modifier requests that the entire ovector be shown,
+whatever the outcome of the match. Compare <b>allcaptures</b>, which shows only
+up to the maximum number of capture groups for the pattern, and then only for a
+successful complete non-DFA match. This modifier, which acts after any match
+result, and also for DFA matching, provides a means of checking that there are
+no unexpected modifications to ovector fields. Before each match attempt, the
+ovector is filled with a special value, and if this is found in both elements
+of a capturing pair, "&#60;unchanged&#62;" is output. After a successful match, this
+applies to all groups after the maximum capture group for the pattern. In other
+cases it applies to the entire ovector. After a partial match, the first two
+elements are the only ones that should be set. After a DFA match, the amount of
+ovector that is used depends on the number of matches that were found.
+</P>
+<br><b>
+Testing pattern callouts
 </b><br>
 <P>
 A callout function is supplied when <b>pcre2test</b> calls the library matching
@ -1290,6 +1368,9 @@ functions, unless <b>callout_none</b> is specified. Its behaviour can be
 controlled by various modifiers listed above whose names begin with
 <b>callout_</b>. Details are given in the section entitled "Callouts"
 <a href="#callouts">below.</a>
+Testing callouts from <b>pcre2_substitute()</b> is decribed separately in
+"Testing the substitution function"
+<a href="#substitution">below.</a>
 </P>
 <br><b>
 Finding all matches in a string
@ -1321,8 +1402,8 @@ Testing substring extraction functions
 <P>
 The <b>copy</b> and <b>get</b> modifiers can be used to test the
 <b>pcre2_substring_copy_xxx()</b> and <b>pcre2_substring_get_xxx()</b> functions.
-They can be given more than once, and each can specify a group name or number,
-for example:
+They can be given more than once, and each can specify a capture group name or
+number, for example:
 <pre>
   abcd\=copy=1,copy=3,get=G1
 </pre>
@ -1341,15 +1422,21 @@ instead of a colon. This is in addition to the normal full list. The string
 length (that is, the return from the extraction function) is given in
 parentheses after each substring, followed by the name when the extraction was
 by name.
-</P>
+<a name="substitution"></a></P>
 <br><b>
 Testing the substitution function
 </b><br>
 <P>
 If the <b>replace</b> modifier is set, the <b>pcre2_substitute()</b> function is
-called instead of one of the matching functions. Note that replacement strings
-cannot contain commas, because a comma signifies the end of a modifier. This is
-not thought to be an issue in a test program.
+called instead of one of the matching functions (or after one call of
+<b>pcre2_match()</b> in the case of PCRE2_SUBSTITUTE_MATCHED). Note that
+replacement strings cannot contain commas, because a comma signifies the end of
+a modifier. This is not thought to be an issue in a test program.
+</P>
+<P>
+Specifying a completely empty replacement string disables this modifier.
+However, it is possible to specify an empty replacement by providing a buffer
+length, as described below, for an otherwise empty replacement.
 </P>
 <P>
 Unlike subject strings, <b>pcre2test</b> does not process replacement strings
@ -1365,11 +1452,16 @@ for <b>pcre2_substitute()</b>:
 <pre>
  global                      PCRE2_SUBSTITUTE_GLOBAL
  substitute_extended         PCRE2_SUBSTITUTE_EXTENDED
+  substitute_literal          PCRE2_SUBSTITUTE_LITERAL
+  substitute_matched          PCRE2_SUBSTITUTE_MATCHED
  substitute_overflow_length  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+  substitute_replacement_only PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
  substitute_unknown_unset    PCRE2_SUBSTITUTE_UNKNOWN_UNSET
  substitute_unset_empty      PCRE2_SUBSTITUTE_UNSET_EMPTY
-
-</PRE>
+</pre>
+See the
+<a href="pcre2api.html"><b>pcre2api</b></a>
+documentation for details of these options.
 </P>
 <P>
 After a successful substitution, the modified string is output, preceded by the
@ -1399,10 +1491,10 @@ The default action of <b>pcre2_substitute()</b> is to return
 PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if the
 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the
 <b>substitute_overflow_length</b> modifier), <b>pcre2_substitute()</b> continues
-to go through the motions of matching and substituting, in order to compute the
-size of buffer that is required. When this happens, <b>pcre2test</b> shows the
-required buffer length (which includes space for the trailing zero) as part of
-the error message. For example:
+to go through the motions of matching and substituting (but not doing any
+callouts), in order to compute the size of buffer that is required. When this
+happens, <b>pcre2test</b> shows the required buffer length (which includes space
+for the trailing zero) as part of the error message. For example:
 <pre>
  /abc/substitute_overflow_length
      123abc123\=replace=[9]XYZ
@ -1413,16 +1505,59 @@ matching provokes an error return ("bad option value") from
 <b>pcre2_substitute()</b>.
 </P>
 <br><b>
+Testing substitute callouts
+</b><br>
+<P>
+If the <b>substitute_callout</b> modifier is set, a substitution callout
+function is set up. The <b>null_context</b> modifier must not be set, because
+the address of the callout function is passed in a match context. When the
+callout function is called (after each substitution), details of the the input
+and output strings are output. For example:
+<pre>
+  /abc/g,replace=&#60;$0&#62;,substitute_callout
+      abcdefabcpqr
+   1(1) Old 0 3 "abc" New 0 5 "&#60;abc&#62;"
+   2(1) Old 6 9 "abc" New 8 13 "&#60;abc&#62;"
+   2: &#60;abc&#62;def&#60;abc&#62;pqr
+</pre>
+The first number on each callout line is the count of matches. The
+parenthesized number is the number of pairs that are set in the ovector (that
+is, one more than the number of capturing groups that were set). Then are
+listed the offsets of the old substring, its contents, and the same for the
+replacement.
+</P>
+<P>
+By default, the substitution callout function returns zero, which accepts the
+replacement and causes matching to continue if /g was used. Two further
+modifiers can be used to test other return values. If <b>substitute_skip</b> is
+set to a value greater than zero the callout function returns +1 for the match
+of that number, and similarly <b>substitute_stop</b> returns -1. These cause the
+replacement to be rejected, and -1 causes no further matching to take place. If
+either of them are set, <b>substitute_callout</b> is assumed. For example:
+<pre>
+  /abc/g,replace=&#60;$0&#62;,substitute_skip=1
+      abcdefabcpqr
+   1(1) Old 0 3 "abc" New 0 5 "&#60;abc&#62; SKIPPED"
+   2(1) Old 6 9 "abc" New 6 11 "&#60;abc&#62;"
+   2: abcdef&#60;abc&#62;pqr
+      abcdefabcpqr\=substitute_stop=1
+   1(1) Old 0 3 "abc" New 0 5 "&#60;abc&#62; STOPPED"
+   1: abcdefabcpqr
+</pre>
+If both are set for the same number, stop takes precedence. Only a single skip
+or stop is supported, which is sufficient for testing that the feature works.
+</P>
+<br><b>
 Setting the JIT stack size
 </b><br>
 <P>
 The <b>jitstack</b> modifier provides a way of setting the maximum stack size
 that is used by the just-in-time optimization code. It is ignored if JIT
-optimization is not being used. The value is a number of kilobytes. Setting
-zero reverts to the default of 32K. Providing a stack that is larger than the
-default is necessary only for very complicated patterns. If <b>jitstack</b> is
-set non-zero on a subject line it overrides any value that was set on the
-pattern.
+optimization is not being used. The value is a number of kibibytes (units of
+1024 bytes). Setting zero reverts to the default of 32KiB. Providing a stack
+that is larger than the default is necessary only for very complicated
+patterns. If <b>jitstack</b> is set non-zero on a subject line it overrides any
+value that was set on the pattern.
 </P>
 <br><b>
 Setting heap, match, and depth limits
@ -1430,7 +1565,7 @@ Setting heap, match, and depth limits
 <P>
 The <b>heap_limit</b>, <b>match_limit</b>, and <b>depth_limit</b> modifiers set
 the appropriate limits in the match context. These values are ignored when the
-<b>find_limits</b> modifier is specified.
+<b>find_limits</b> or <b>find_limits_noheap</b> modifier is specified.
 </P>
 <br><b>
 Finding minimum limits
@ -1438,20 +1573,21 @@ Finding minimum limits
 <P>
 If the <b>find_limits</b> modifier is present on a subject line, <b>pcre2test</b>
 calls the relevant matching function several times, setting different values in
-the match context via <b>pcre2_set_heap_limit(), \fBpcre2_set_match_limit()</b>,
-or <b>pcre2_set_depth_limit()</b> until it finds the minimum values for each
-parameter that allows the match to complete without error.
+the match context via <b>pcre2_set_heap_limit()</b>,
+<b>pcre2_set_match_limit()</b>, or <b>pcre2_set_depth_limit()</b> until it finds
+the smallest value for each parameter that allows the match to complete without
+a "limit exceeded" error. The match itself may succeed or fail. An alternative
+modifier, <b>find_limits_noheap</b>, omits the heap limit. This is used in the
+standard tests, because the minimum heap limit varies between systems. If JIT
+is being used, only the match limit is relevant, and the other two are
+automatically omitted.
 </P>
 <P>
-If JIT is being used, only the match limit is relevant. If DFA matching is
-being used, only the depth limit is relevant.
-</P>
-<P>
-The <i>match_limit</i> number is a measure of the amount of backtracking
-that takes place, and learning the minimum value can be instructive. For most
-simple matches, the number is quite small, but for patterns with very large
-numbers of matching possibilities, it can become large very quickly with
-increasing length of subject string.
+When using this modifier, the pattern should not contain any limit settings
+such as (*LIMIT_MATCH=...) within it. If such a setting is present and is
+lower than the minimum matching value, the minimum value cannot be found
+because <b>pcre2_set_match_limit()</b> etc. are only able to reduce the value of
+an in-pattern limit; they cannot increase it.
 </P>
 <P>
 For non-DFA matching, the minimum <i>depth_limit</i> number is a measure of how
@ -1460,6 +1596,20 @@ searched). In the case of DFA matching, <i>depth_limit</i> controls the depth of
 recursive calls of the internal function that is used for handling pattern
 recursion, lookaround assertions, and atomic groups.
 </P>
+<P>
+For non-DFA matching, the <i>match_limit</i> number is a measure of the amount
+of backtracking that takes place, and learning the minimum value can be
+instructive. For most simple matches, the number is quite small, but for
+patterns with very large numbers of matching possibilities, it can become large
+very quickly with increasing length of subject string. In the case of DFA
+matching, <i>match_limit</i> controls the total number of calls, both recursive
+and non-recursive, to the internal matching function, thus controlling the
+overall amount of computing resource that is used.
+</P>
+<P>
+For both kinds of matching, the <i>heap_limit</i> number, which is in kibibytes
+(units of 1024 bytes), limits the amount of heap memory used for matching.
+</P>
 <br><b>
 Showing MARK names
 </b><br>
@ -1476,13 +1626,12 @@ Showing memory usage
 <P>
 The <b>memory</b> modifier causes <b>pcre2test</b> to log the sizes of all heap
 memory allocation and freeing calls that occur during a call to
-<b>pcre2_match()</b>. These occur only when a match requires a bigger vector
-than the default for remembering backtracking points. In many cases there will
-be no heap memory used and therefore no additional output. No heap memory is
-allocated during matching with <b>pcre2_dfa_match</b> or with JIT, so in those
-cases the <b>memory</b> modifier never has any effect. For this modifier to
-work, the <b>null_context</b> modifier must not be set on both the pattern and
-the subject, though it can be set on one or the other.
+<b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>. In the latter case, heap memory
+is used only when a match requires more internal workspace that the default
+allocation on the stack, so in many cases there will be no output. No heap
+memory is allocated during matching with JIT. For this modifier to work, the
+<b>null_context</b> modifier must not be set on both the pattern and the
+subject, though it can be set on one or the other.
 </P>
 <br><b>
 Setting a starting offset
@ -1534,15 +1683,21 @@ When testing <b>pcre2_substitute()</b>, this modifier also has the effect of
 passing the replacement string as zero-terminated.
 </P>
 <br><b>
-Passing a NULL context
+Passing a NULL context, subject, or replacement
 </b><br>
 <P>
 Normally, <b>pcre2test</b> passes a context block to <b>pcre2_match()</b>,
-<b>pcre2_dfa_match()</b> or <b>pcre2_jit_match()</b>. If the <b>null_context</b>
-modifier is set, however, NULL is passed. This is for testing that the matching
-functions behave correctly in this case (they use default values). This
-modifier cannot be used with the <b>find_limits</b> modifier or when testing the
-substitution function.
+<b>pcre2_dfa_match()</b>, <b>pcre2_jit_match()</b> or <b>pcre2_substitute()</b>.
+If the <b>null_context</b> modifier is set, however, NULL is passed. This is for
+testing that the matching and substitution functions behave correctly in this
+case (they use default values). This modifier cannot be used with the
+<b>find_limits</b>, <b>find_limits_noheap</b>, or <b>substitute_callout</b>
+modifiers.
+</P>
+<P>
+Similarly, for testing purposes, if the <b>null_subject</b> or
+<b>null_replacement</b> modifier is set, the subject or replacement string
+pointers are passed as NULL, respectively, to the relevant functions.
 </P>
 <br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
 <P>
@ -1689,7 +1844,7 @@ restart the match with additional subject data by means of the
 <b>dfa_restart</b> modifier. For example:
 <pre>
    re&#62; /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
-  data&#62; 23ja\=P,dfa
+  data&#62; 23ja\=ps,dfa
  Partial match: 23ja
  data&#62; n05\=dfa,dfa_restart
   0: n05
@ -1913,15 +2068,21 @@ documentation. In this section we describe the features of <b>pcre2test</b> that
 can be used to test these functions.
 </P>
 <P>
-When a pattern with <b>push</b> modifier is successfully compiled, it is pushed
-onto a stack of compiled patterns, and <b>pcre2test</b> expects the next line to
-contain a new pattern (or command) instead of a subject line. By contrast,
-the <b>pushcopy</b> modifier causes a copy of the compiled pattern to be
-stacked, leaving the original available for immediate matching. By using
-<b>push</b> and/or <b>pushcopy</b>, a number of patterns can be compiled and
-retained. These modifiers are incompatible with <b>posix</b>, and control
-modifiers that act at match time are ignored (with a message) for the stacked
-patterns. The <b>jitverify</b> modifier applies only at compile time.
+Note that "serialization" in PCRE2 does not convert compiled patterns to an
+abstract format like Java or .NET. It just makes a reloadable byte code stream.
+Hence the restrictions on reloading mentioned above.
+</P>
+<P>
+In <b>pcre2test</b>, when a pattern with <b>push</b> modifier is successfully
+compiled, it is pushed onto a stack of compiled patterns, and <b>pcre2test</b>
+expects the next line to contain a new pattern (or command) instead of a
+subject line. By contrast, the <b>pushcopy</b> modifier causes a copy of the
+compiled pattern to be stacked, leaving the original available for immediate
+matching. By using <b>push</b> and/or <b>pushcopy</b>, a number of patterns can
+be compiled and retained. These modifiers are incompatible with <b>posix</b>,
+and control modifiers that act at match time are ignored (with a message) for
+the stacked patterns. The <b>jitverify</b> modifier applies only at compile
+time.
 </P>
 <P>
 The command
@ -1975,16 +2136,16 @@ on the stack.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC21" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 21 December 2017
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2unicode.html
+++ b/doc/html/pcre2unicode.html
@ -16,62 +16,87 @@ please consult the man page, in case the conversion went wrong.
 UNICODE AND UTF SUPPORT
 </b><br>
 <P>
-When PCRE2 is built with Unicode support (which is the default), it has
-knowledge of Unicode character properties and can process text strings in
-UTF-8, UTF-16, or UTF-32 format (depending on the code unit width). However, by
-default, PCRE2 assumes that one code unit is one character. To process a
-pattern as a UTF string, where a character may require more than one code unit,
-you must call
-<a href="pcre2_compile.html"><b>pcre2_compile()</b></a>
-with the PCRE2_UTF option flag, or the pattern must start with the sequence
-(*UTF). When either of these is the case, both the pattern and any subject
-strings that are matched against it are treated as UTF strings instead of
-strings of individual one-code-unit characters.
+PCRE2 is normally built with Unicode support, though if you do not need it, you
+can build it without, in which case the library will be smaller. With Unicode
+support, PCRE2 has knowledge of Unicode character properties and can process
+strings of text in UTF-8, UTF-16, and UTF-32 format (depending on the code unit
+width), but this is not the default. Unless specifically requested, PCRE2
+treats each code unit in a string as one character.
 </P>
 <P>
-If you do not need Unicode support you can build PCRE2 without it, in which
-case the library will be smaller.
+There are two ways of telling PCRE2 to switch to UTF mode, where characters may
+consist of more than one code unit and the range of values is constrained. The
+program can call
+<a href="pcre2_compile.html"><b>pcre2_compile()</b></a>
+with the PCRE2_UTF option, or the pattern may start with the sequence (*UTF).
+However, the latter facility can be locked out by the PCRE2_NEVER_UTF option.
+That is, the programmer can prevent the supplier of the pattern from switching
+to UTF mode.
+</P>
+<P>
+Note that the PCRE2_MATCH_INVALID_UTF option (see
+<a href="#matchinvalid">below)</a>
+forces PCRE2_UTF to be set.
+</P>
+<P>
+In UTF mode, both the pattern and any subject strings that are matched against
+it are treated as UTF strings instead of strings of individual one-code-unit
+characters. There are also some other changes to the way characters are
+handled, as documented below.
 </P>
 <br><b>
 UNICODE PROPERTY SUPPORT
 </b><br>
 <P>
 When PCRE2 is built with Unicode support, the escape sequences \p{..},
-\P{..}, and \X can be used. The Unicode properties that can be tested are
-limited to the general category properties such as Lu for an upper case letter
-or Nd for a decimal number, the Unicode script names such as Arabic or Han, and
-the derived properties Any and L&. Full lists are given in the
+\P{..}, and \X can be used. This is not dependent on the PCRE2_UTF setting.
+The Unicode properties that can be tested are a subset of those that Perl
+supports. Currently they are limited to the general category properties such as
+Lu for an upper case letter or Nd for a decimal number, the Unicode script
+names such as Arabic or Han, Bidi_Class, Bidi_Control, and the derived
+properties Any and LC (synonym L&). Full lists are given in the
 <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
 and
 <a href="pcre2syntax.html"><b>pcre2syntax</b></a>
-documentation. Only the short names for properties are supported. For example,
-\p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported.
-Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
-compatibility with Perl 5.6. PCRE2 does not support this.
+documentation. In general, only the short names for properties are supported.
+For example, \p{L} matches a letter. Its longer synonym, \p{Letter}, is not
+supported. Furthermore, in Perl, many properties may optionally be prefixed by
+"Is", for compatibility with Perl 5.6. PCRE2 does not support this.
 </P>
 <br><b>
 WIDE CHARACTERS AND UTF MODES
 </b><br>
 <P>
-Codepoints less than 256 can be specified in patterns by either braced or
+Code points less than 256 can be specified in patterns by either braced or
 unbraced hexadecimal escape sequences (for example, \x{b3} or \xb3). Larger
 values have to use braced sequences. Unbraced octal code points up to \777 are
 also recognized; larger ones can be coded using \o{...}.
 </P>
 <P>
-In UTF modes, repeat quantifiers apply to complete UTF characters, not to
+The escape sequence \N{U+&#60;hex digits&#62;} is recognized as another way of
+specifying a Unicode character by code point in a UTF mode. It is not allowed
+in non-UTF mode.
+</P>
+<P>
+In UTF mode, repeat quantifiers apply to complete UTF characters, not to
 individual code units.
 </P>
 <P>
-In UTF modes, the dot metacharacter matches one UTF character instead of a
+In UTF mode, the dot metacharacter matches one UTF character instead of a
 single code unit.
 </P>
 <P>
-The escape sequence \C can be used to match a single code unit in a UTF mode,
+In UTF mode, capture group names are not restricted to ASCII, and may contain
+any Unicode letters and decimal digits, as well as underscore.
+</P>
+<P>
+The escape sequence \C can be used to match a single code unit in UTF mode,
 but its use can lead to some strange effects because it breaks up multi-unit
 characters (see the description of \C in the
 <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
-documentation).
+documentation). For this reason, there is a build-time option that disables
+support for \C completely. There is also a less draconian compile-time option
+for locking out the use of \C when a pattern is compiled.
 </P>
 <P>
 The use of \C is not supported by the alternative matching function
@ -80,7 +105,7 @@ may consist of more than one code unit. The use of \C in these modes provokes
 a match-time error. Also, the JIT optimization does not support \C in these
 modes. If JIT optimization is requested for a UTF-8 or UTF-16 pattern that
 contains \C, it will not succeed, and so when <b>pcre2_match()</b> is called,
-the matching will be carried out by the normal interpretive function.
+the matching will be carried out by the interpretive function.
 </P>
 <P>
 The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly test
@ -110,25 +135,159 @@ However, the special horizontal and vertical white space matching escapes (\h,
 not PCRE2_UCP is set.
 </P>
 <br><b>
-CASE-EQUIVALENCE IN UTF MODES
+UNICODE CASE-EQUIVALENCE
 </b><br>
 <P>
-Case-insensitive matching in a UTF mode makes use of Unicode properties except
-for characters whose code points are less than 128 and that have at most two
-case-equivalent values. For these, a direct table lookup is used for speed. A
-few Unicode characters such as Greek sigma have more than two codepoints that
-are case-equivalent, and these are treated as such.
+If either PCRE2_UTF or PCRE2_UCP is set, upper/lower case processing makes use
+of Unicode properties except for characters whose code points are less than 128
+and that have at most two case-equivalent values. For these, a direct table
+lookup is used for speed. A few Unicode characters such as Greek sigma have
+more than two code points that are case-equivalent, and these are treated
+specially. Setting PCRE2_UCP without PCRE2_UTF allows Unicode-style case
+processing for non-UTF character encodings such as UCS-2.
+<a name="scriptruns"></a></P>
+<br><b>
+SCRIPT RUNS
+</b><br>
+<P>
+The pattern constructs (*script_run:...) and (*atomic_script_run:...), with
+synonyms (*sr:...) and (*asr:...), verify that the string matched within the
+parentheses is a script run. In concept, a script run is a sequence of
+characters that are all from the same Unicode script. However, because some
+scripts are commonly used together, and because some diacritical and other
+marks are used with multiple scripts, it is not that simple.
+</P>
+<P>
+Every Unicode character has a Script property, mostly with a value
+corresponding to the name of a script, such as Latin, Greek, or Cyrillic. There
+are also three special values:
+</P>
+<P>
+"Unknown" is used for code points that have not been assigned, and also for the
+surrogate code points. In the PCRE2 32-bit library, characters whose code
+points are greater than the Unicode maximum (U+10FFFF), which are accessible
+only in non-UTF mode, are assigned the Unknown script.
+</P>
+<P>
+"Common" is used for characters that are used with many scripts. These include
+punctuation, emoji, mathematical, musical, and currency symbols, and the ASCII
+digits 0 to 9.
+</P>
+<P>
+"Inherited" is used for characters such as diacritical marks that modify a
+previous character. These are considered to take on the script of the character
+that they modify.
+</P>
+<P>
+Some Inherited characters are used with many scripts, but many of them are only
+normally used with a small number of scripts. For example, U+102E0 (Coptic
+Epact thousands mark) is used only with Arabic and Coptic. In order to make it
+possible to check this, a Unicode property called Script Extension exists. Its
+value is a list of scripts that apply to the character. For the majority of
+characters, the list contains just one script, the same one as the Script
+property. However, for characters such as U+102E0 more than one Script is
+listed. There are also some Common characters that have a single, non-Common
+script in their Script Extension list.
+</P>
+<P>
+The next section describes the basic rules for deciding whether a given string
+of characters is a script run. Note, however, that there are some special cases
+involving the Chinese Han script, and an additional constraint for decimal
+digits. These are covered in subsequent sections.
+</P>
+<br><b>
+Basic script run rules
+</b><br>
+<P>
+A string that is less than two characters long is a script run. This is the
+only case in which an Unknown character can be part of a script run. Longer
+strings are checked using only the Script Extensions property, not the basic
+Script property.
+</P>
+<P>
+If a character's Script Extension property is the single value "Inherited", it
+is always accepted as part of a script run. This is also true for the property
+"Common", subject to the checking of decimal digits described below. All the
+remaining characters in a script run must have at least one script in common in
+their Script Extension lists. In set-theoretic terminology, the intersection of
+all the sets of scripts must not be empty.
+</P>
+<P>
+A simple example is an Internet name such as "google.com". The letters are all
+in the Latin script, and the dot is Common, so this string is a script run.
+However, the Cyrillic letter "o" looks exactly the same as the Latin "o"; a
+string that looks the same, but with Cyrillic "o"s is not a script run.
+</P>
+<P>
+More interesting examples involve characters with more than one script in their
+Script Extension. Consider the following characters:
+<pre>
+  U+060C  Arabic comma
+  U+06D4  Arabic full stop
+</pre>
+The first has the Script Extension list Arabic, Hanifi Rohingya, Syriac, and
+Thaana; the second has just Arabic and Hanifi Rohingya. Both of them could
+appear in script runs of either Arabic or Hanifi Rohingya. The first could also
+appear in Syriac or Thaana script runs, but the second could not.
+</P>
+<br><b>
+The Chinese Han script
+</b><br>
+<P>
+The Chinese Han script is commonly used in conjunction with other scripts for
+writing certain languages. Japanese uses the Hiragana and Katakana scripts
+together with Han; Korean uses Hangul and Han; Taiwanese Mandarin uses Bopomofo
+and Han. These three combinations are treated as special cases when checking
+script runs and are, in effect, "virtual scripts". Thus, a script run may
+contain a mixture of Hiragana, Katakana, and Han, or a mixture of Hangul and
+Han, or a mixture of Bopomofo and Han, but not, for example, a mixture of
+Hangul and Bopomofo and Han. PCRE2 (like Perl) follows Unicode's Technical
+Standard 39 ("Unicode Security Mechanisms", http://unicode.org/reports/tr39/)
+in allowing such mixtures.
+</P>
+<br><b>
+Decimal digits
+</b><br>
+<P>
+Unicode contains many sets of 10 decimal digits in different scripts, and some
+scripts (including the Common script) contain more than one set. Some of these
+decimal digits them are visually indistinguishable from the common ASCII
+digits. In addition to the script checking described above, if a script run
+contains any decimal digits, they must all come from the same set of 10
+adjacent characters.
 </P>
 <br><b>
 VALIDITY OF UTF STRINGS
 </b><br>
 <P>
 When the PCRE2_UTF option is set, the strings passed as patterns and subjects
-are (by default) checked for validity on entry to the relevant functions.
-If an invalid UTF string is passed, an negative error code is returned. The
-code unit offset to the offending character can be extracted from the match
-data block by calling <b>pcre2_get_startchar()</b>, which is used for this
-purpose after a UTF error.
+are (by default) checked for validity on entry to the relevant functions. If an
+invalid UTF string is passed, a negative error code is returned. The code unit
+offset to the offending character can be extracted from the match data block by
+calling <b>pcre2_get_startchar()</b>, which is used for this purpose after a UTF
+error.
+</P>
+<P>
+In some situations, you may already know that your strings are valid, and
+therefore want to skip these checks in order to improve performance, for
+example in the case of a long subject string that is being scanned repeatedly.
+If you set the PCRE2_NO_UTF_CHECK option at compile time or at match time,
+PCRE2 assumes that the pattern or subject it is given (respectively) contains
+only valid UTF code unit sequences.
+</P>
+<P>
+If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result
+is undefined and your program may crash or loop indefinitely or give incorrect
+results. There is, however, one mode of matching that can handle invalid UTF
+subject strings. This is enabled by passing PCRE2_MATCH_INVALID_UTF to
+<b>pcre2_compile()</b> and is discussed below in the next section. The rest of
+this section covers the case when PCRE2_MATCH_INVALID_UTF is not set.
+</P>
+<P>
+Passing PCRE2_NO_UTF_CHECK to <b>pcre2_compile()</b> just disables the UTF check
+for the pattern; it does not also apply to subject strings. If you want to
+disable the check for a subject string you must pass this same option to
+<b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>.
 </P>
 <P>
 UTF-16 and UTF-32 strings can indicate their endianness by special code knows
@ -136,13 +295,14 @@ as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting
 strings to be in host byte order.
 </P>
 <P>
-A UTF string is checked before any other processing takes place. In the case of
-<b>pcre2_match()</b> and <b>pcre2_dfa_match()</b> calls with a non-zero starting
-offset, the check is applied only to that part of the subject that could be
-inspected during matching, and there is a check that the starting offset points
-to the first code unit of a character or to the end of the subject. If there
-are no lookbehind assertions in the pattern, the check starts at the starting
-offset. Otherwise, it starts at the length of the longest lookbehind before the
+Unless PCRE2_NO_UTF_CHECK is set, a UTF string is checked before any other
+processing takes place. In the case of <b>pcre2_match()</b> and
+<b>pcre2_dfa_match()</b> calls with a non-zero starting offset, the check is
+applied only to that part of the subject that could be inspected during
+matching, and there is a check that the starting offset points to the first
+code unit of a character or to the end of the subject. If there are no
+lookbehind assertions in the pattern, the check starts at the starting offset.
+Otherwise, it starts at the length of the longest lookbehind before the
 starting offset, or at the start of the subject if there are not that many
 characters before the starting offset. Note that the sequences \b and \B are
 one-character lookbehinds.
@ -162,31 +322,12 @@ surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and
 UTF-32.)
 </P>
 <P>
-In some situations, you may already know that your strings are valid, and
-therefore want to skip these checks in order to improve performance, for
-example in the case of a long subject string that is being scanned repeatedly.
-If you set the PCRE2_NO_UTF_CHECK option at compile time or at match time,
-PCRE2 assumes that the pattern or subject it is given (respectively) contains
-only valid UTF code unit sequences.
-</P>
-<P>
-Passing PCRE2_NO_UTF_CHECK to <b>pcre2_compile()</b> just disables the check for
-the pattern; it does not also apply to subject strings. If you want to disable
-the check for a subject string you must pass this option to <b>pcre2_match()</b>
-or <b>pcre2_dfa_match()</b>.
-</P>
-<P>
-If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result
-is undefined and your program may crash or loop indefinitely.
-</P>
-<P>
-Note that setting PCRE2_NO_UTF_CHECK at compile time does not disable the error
-that is given if an escape sequence for an invalid Unicode code point is
-encountered in the pattern. If you want to allow escape sequences such as
-\x{d800} (a surrogate code point) you can set the
-PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra option. However, this is possible
-only in UTF-8 and UTF-32 modes, because these values are not representable in
-UTF-16.
+Setting PCRE2_NO_UTF_CHECK at compile time does not disable the error that is
+given if an escape sequence for an invalid Unicode code point is encountered in
+the pattern. If you want to allow escape sequences such as \x{d800} (a
+surrogate code point) you can set the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra
+option. However, this is possible only in UTF-8 and UTF-32 modes, because these
+values are not representable in UTF-16.
 <a name="utf8strings"></a></P>
 <br><b>
 Errors in UTF-8 strings
@ -224,7 +365,7 @@ these code points are excluded by RFC 3629.
 <pre>
  PCRE2_ERROR_UTF8_ERR13
 </pre>
-A 4-byte character has a value greater than 0x10fff; these code points are
+A 4-byte character has a value greater than 0x10ffff; these code points are
 excluded by RFC 3629.
 <pre>
  PCRE2_ERROR_UTF8_ERR14
@ -277,7 +418,59 @@ The following negative error codes are given for invalid UTF-32 strings:
  PCRE2_ERROR_UTF32_ERR1  Surrogate character (0xd800 to 0xdfff)
  PCRE2_ERROR_UTF32_ERR2  Code point is greater than 0x10ffff

-</PRE>
+<a name="matchinvalid"></a></PRE>
+</P>
+<br><b>
+MATCHING IN INVALID UTF STRINGS
+</b><br>
+<P>
+You can run pattern matches on subject strings that may contain invalid UTF
+sequences if you call <b>pcre2_compile()</b> with the PCRE2_MATCH_INVALID_UTF
+option. This is supported by <b>pcre2_match()</b>, including JIT matching, but
+not by <b>pcre2_dfa_match()</b>. When PCRE2_MATCH_INVALID_UTF is set, it forces
+PCRE2_UTF to be set as well. Note, however, that the pattern itself must be a
+valid UTF string.
+</P>
+<P>
+Setting PCRE2_MATCH_INVALID_UTF does not affect what <b>pcre2_compile()</b>
+generates, but if <b>pcre2_jit_compile()</b> is subsequently called, it does
+generate different code. If JIT is not used, the option affects the behaviour
+of the interpretive code in <b>pcre2_match()</b>. When PCRE2_MATCH_INVALID_UTF
+is set at compile time, PCRE2_NO_UTF_CHECK is ignored at match time.
+</P>
+<P>
+In this mode, an invalid code unit sequence in the subject never matches any
+pattern item. It does not match dot, it does not match \p{Any}, it does not
+even match negative items such as [^X]. A lookbehind assertion fails if it
+encounters an invalid sequence while moving the current point backwards. In
+other words, an invalid UTF code unit sequence acts as a barrier which no match
+can cross.
+</P>
+<P>
+You can also think of this as the subject being split up into fragments of
+valid UTF, delimited internally by invalid code unit sequences. The pattern is
+matched fragment by fragment. The result of a successful match, however, is
+given as code unit offsets in the entire subject string in the usual way. There
+are a few points to consider:
+</P>
+<P>
+The internal boundaries are not interpreted as the beginnings or ends of lines
+and so do not match circumflex or dollar characters in the pattern.
+</P>
+<P>
+If <b>pcre2_match()</b> is called with an offset that points to an invalid
+UTF-sequence, that sequence is skipped, and the match starts at the next valid
+UTF character, or the end of the subject.
+</P>
+<P>
+At internal fragment boundaries, \b and \B behave in the same way as at the
+beginning and end of the subject. For example, a sequence such as \bWORD\b
+would match an instance of WORD that is surrounded by invalid UTF code units.
+</P>
+<P>
+Using PCRE2_MATCH_INVALID_UTF, an application can run matches on arbitrary
+data, knowing that any matched strings that are returned are valid UTF. This
+can be useful when searching for UTF text in executable or other binary files.
 </P>
 <br><b>
 AUTHOR
@ -285,7 +478,7 @@ AUTHOR
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
@ -294,9 +487,9 @@ Cambridge, England.
 REVISION
 </b><br>
 <P>
-Last updated: 17 May 2017
+Last updated: 22 December 2021
 <br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2021 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/index.html.src
+++ b/doc/index.html.src
@ -141,11 +141,14 @@ in the library.
    <td>&nbsp;&nbsp;Free a general context</td></tr>

 <tr><td><a href="pcre2_get_error_message.html">pcre2_get_error_message</a></td>
-    <td>&nbsp;&nbsp;Free study data</td></tr>
+    <td>&nbsp;&nbsp;Get textual error message for error number</td></tr>

 <tr><td><a href="pcre2_get_mark.html">pcre2_get_mark</a></td>
    <td>&nbsp;&nbsp;Get a (*MARK) name</td></tr>

+<tr><td><a href="pcre2_get_match_data_size.html">pcre2_get_match_data_size</a></td>
+    <td>&nbsp;&nbsp;Get the size of a match data block</td></tr>
+
 <tr><td><a href="pcre2_get_ovector_count.html">pcre2_get_ovector_count</a></td>
    <td>&nbsp;&nbsp;Get the ovector count</td></tr>

@ -176,6 +179,9 @@ in the library.
 <tr><td><a href="pcre2_maketables.html">pcre2_maketables</a></td>
    <td>&nbsp;&nbsp;Build character tables in current locale</td></tr>

+<tr><td><a href="pcre2_maketables_free.html">pcre2_maketables_free</a></td>
+    <td>&nbsp;&nbsp;Free character tables</td></tr>
+
 <tr><td><a href="pcre2_match.html">pcre2_match</a></td>
    <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
    (Perl compatible)</td></tr>
--- a/doc/pcre2-config.txt
+++ b/doc/pcre2-config.txt
@ -16,8 +16,8 @@ DESCRIPTION

       pcre2-config returns the configuration of the installed PCRE2 libraries
       and the options required to compile a program to use them. Some of  the
-       options  apply  only  to  the  8-bit,  or  16-bit, or 32-bit libraries,
-       respectively, and are not available for libraries that  have  not  been
+       options  apply  only  to the 8-bit, or 16-bit, or 32-bit libraries, re-
+       spectively, and are not available for  libraries  that  have  not  been
       built. If an unavailable option is encountered, the "usage" information
       is output.

@ -36,30 +36,30 @@ OPTIONS
       --version Writes the version number of the installed PCRE2 libraries to
                 the standard output.

-       --libs8   Writes  to  the  standard  output  the  command  line options
-                 required to link with the 8-bit PCRE2 library  (-lpcre2-8  on
+       --libs8   Writes  to  the  standard output the command line options re-
+                 quired to link with the 8-bit  PCRE2  library  (-lpcre2-8  on
                 many systems).

-       --libs16  Writes  to  the  standard  output  the  command  line options
-                 required to link with the 16-bit PCRE2 library (-lpcre2-16 on
+       --libs16  Writes  to  the  standard output the command line options re-
+                 quired to link with the 16-bit PCRE2 library  (-lpcre2-16  on
                 many systems).

-       --libs32  Writes  to  the  standard  output  the  command  line options
-                 required to link with the 32-bit PCRE2 library (-lpcre2-32 on
+       --libs32  Writes  to  the  standard output the command line options re-
+                 quired to link with the 32-bit PCRE2 library  (-lpcre2-32  on
                 many systems).

       --libs-posix
-                 Writes  to  the  standard  output  the  command  line options
-                 required to link  with  PCRE2's  POSIX  API  wrapper  library
+                 Writes  to  the  standard output the command line options re-
+                 quired  to  link  with  PCRE2's  POSIX  API  wrapper  library
                 (-lpcre2-posix -lpcre2-8 on many systems).

-       --cflags  Writes  to  the  standard  output  the  command  line options
-                 required to compile files that use PCRE2  (this  may  include
-                 some -I options, but is blank on many systems).
+       --cflags  Writes  to  the  standard output the command line options re-
+                 quired to compile files that use PCRE2 (this may include some
+                 -I options, but is blank on many systems).

       --cflags-posix
-                 Writes  to  the  standard  output  the  command  line options
-                 required to compile files that use PCRE2's POSIX API  wrapper
+                 Writes  to  the  standard output the command line options re-
+                 quired to compile files that use PCRE2's  POSIX  API  wrapper
                 library  (this  may  include some -I options, but is blank on
                 many systems).

--- a/doc/pcre2.3
+++ b/doc/pcre2.3
@ -1,4 +1,4 @@
-.TH PCRE2 3 "01 April 2017" "PCRE2 10.30"
+.TH PCRE2 3 "27 August 2021" "PCRE2 10.38"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH INTRODUCTION
@ -6,15 +6,27 @@ PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
 PCRE2 is the name used for a revised API for the PCRE library, which is a set
 of functions, written in C, that implement regular expression pattern matching
-using the same syntax and semantics as Perl, with just a few differences. Some
-features that appeared in Python and the original PCRE before they appeared in
-Perl are also available using the Python syntax. There is also some support for
-one or two .NET and Oniguruma syntax items, and there are options for
-requesting some minor changes that give better ECMAScript (aka JavaScript)
-compatibility.
+using the same syntax and semantics as Perl, with just a few differences. After
+nearly two decades, the limitations of the original API were making development
+increasingly difficult. The new API is more extensible, and it was simplified
+by abolishing the separate "study" optimizing function; in PCRE2, patterns are
+automatically optimized where possible. Since forking from PCRE1, the code has
+been extensively refactored and new features introduced. The old library is now
+obsolete and is no longer maintained.
+.P
+As well as Perl-style regular expression patterns, some features that appeared
+in Python and the original PCRE before they appeared in Perl are available
+using the Python syntax. There is also some support for one or two .NET and
+Oniguruma syntax items, and there are options for requesting some minor changes
+that give better ECMAScript (aka JavaScript) compatibility.
+.P
+The source code for PCRE2 can be compiled to support strings of 8-bit, 16-bit,
+or 32-bit code units, which means that up to three separate libraries may be
+installed, one for each code unit size. The size of code unit is not related to
+the bit size of the underlying hardware. In a 64-bit environment that also
+supports 32-bit applications, versions of PCRE2 that are compiled in both
+64-bit and 32-bit modes may be needed.
 .P
-The source code for PCRE2 can be compiled to support 8-bit, 16-bit, or 32-bit
-code units, which means that up to three separate libraries may be installed.
 The original work to extend PCRE to 16-bit and 32-bit code units was done by
 Zoltan Herczeg and Christian Persch, respectively. In all three cases, strings
 can be interpreted either as one character per code unit, or as UTF-encoded
@ -150,8 +162,9 @@ listing), and the short pages for individual functions, are concatenated in
  pcre2-config       show PCRE2 installation configuration information
  pcre2api           details of PCRE2's native C API
  pcre2build         building PCRE2
-  pcre2callout       details of the callout feature
+  pcre2callout       details of the pattern callout feature
  pcre2compat        discussion of Perl compatibility
+  pcre2convert       details of pattern conversion functions
  pcre2demo          a demonstration C program that uses PCRE2
  pcre2grep          description of the \fBpcre2grep\fP command (8-bit only)
  pcre2jit           discussion of just-in-time optimization support
@ -164,6 +177,7 @@ listing), and the short pages for individual functions, are concatenated in
  pcre2perform       discussion of performance issues
  pcre2posix         the POSIX-compatible C API for the 8-bit library
  pcre2sample        discussion of the pcre2demo program
+  pcre2serialize     details of pattern serialization
  pcre2syntax        quick syntax reference
  pcre2test          description of the \fBpcre2test\fP command
  pcre2unicode       discussion of Unicode and UTF support
@ -177,18 +191,18 @@ function, listing its arguments and results.
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .P
 Putting an actual email address here is a spam magnet. If you want to email me,
-use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
+use my two names separated by a dot at gmail.com.
 .
 .
 .SH REVISION
 .rs
 .sp
 .nf
-Last updated: 01 April 2017
-Copyright (c) 1997-2017 University of Cambridge.
+Last updated: 27 August 2021
+Copyright (c) 1997-2021 University of Cambridge.
 .fi
--- a/doc/pcre2.txt
+++ b/doc/pcre2.txt
--- a/doc/pcre2_code_free.3
+++ b/doc/pcre2_code_free.3
@ -1,4 +1,4 @@
-.TH PCRE2_CODE_FREE 3 "23 March 2017" "PCRE2 10.30"
+.TH PCRE2_CODE_FREE 3 "28 June 2018" "PCRE2 10.32"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@ -13,7 +13,8 @@ PCRE2 - Perl-compatible regular expressions (revised API)
 .SH DESCRIPTION
 .rs
 .sp
-This function frees the memory used for a compiled pattern, including any
+If \fIcode\fP is NULL, this function does nothing. Otherwise, \fIcode\fP must
+point to a compiled pattern. This function frees its memory, including any
 memory used by the JIT compiler. If the compiled pattern was created by a call
 to \fBpcre2_code_copy_with_tables()\fP, the memory for the character tables is
 also freed.
--- a/doc/pcre2_compile.3
+++ b/doc/pcre2_compile.3
@ -1,4 +1,4 @@
-.TH PCRE2_COMPILE 3 "16 June 2017" "PCRE2 10.30"
+.TH PCRE2_COMPILE 3 "22 April 2022" "PCRE2 10.41"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@ -53,7 +53,8 @@ The option bits are:
  PCRE2_EXTENDED           Ignore white space and # comments
  PCRE2_FIRSTLINE          Force matching to be before newline
  PCRE2_LITERAL            Pattern characters are all literal
-  PCRE2_MATCH_UNSET_BACKREF  Match unset back references
+  PCRE2_MATCH_INVALID_UTF  Enable support for matching invalid UTF
+  PCRE2_MATCH_UNSET_BACKREF  Match unset backreferences
  PCRE2_MULTILINE          ^ and $ match newlines within data
  PCRE2_NEVER_BACKSLASH_C  Lock out the use of \eC in patterns
  PCRE2_NEVER_UCP          Lock out PCRE2_UCP, e.g. via (*UCP)
@ -73,8 +74,23 @@ The option bits are:
 PCRE2 must be built with Unicode support (the default) in order to use
 PCRE2_UTF, PCRE2_UCP and related options.
 .P
-The yield of the function is a pointer to a private data structure that
-contains the compiled pattern, or NULL if an error was detected.
+Additional options may be set in the compile context via the
+.\" HREF
+\fBpcre2_set_compile_extra_options\fP
+.\"
+function.
+.P
+If either of \fIerrorcode\fP or \fIerroroffset\fP is NULL, the function returns
+NULL immediately. Otherwise, the yield of this function is a pointer to a
+private data structure that contains the compiled pattern, or NULL if an error
+was detected. In the error case, a text error message can be obtained by
+passing the value returned via the \fIerrorcode\fP argument to the the
+\fBpcre2_get_error_message()\fP function. The offset (in code units) where the
+error was encountered is returned via the \fIerroroffset\fP argument.
+.P
+If there is no error, the value passed via \fIerrorcode\fP returns the message
+"no error" if passed to \fBpcre2_get_error_message()\fP, and the value passed
+via \fIerroroffset\fP is zero.
 .P
 There is a complete description of the PCRE2 native API, with more detail on
 each option, in the
--- a/doc/pcre2_compile_context_free.3
+++ b/doc/pcre2_compile_context_free.3
@ -1,4 +1,4 @@
-.TH PCRE2_COMPILE_CONTEXT_FREE 3 "22 October 2014" "PCRE2 10.00"
+.TH PCRE2_COMPILE_CONTEXT_FREE 3 "29 June 2018" "PCRE2 10.32"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@ -15,7 +15,8 @@ PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
 This function frees the memory occupied by a compile context, using the memory
 freeing function from the general context with which it was created, or
-\fBfree()\fP if that was not set.
+\fBfree()\fP if that was not set. If the argument is NULL, the function returns
+immediately without doing anything.
 .P
 There is a complete description of the PCRE2 native API in the
 .\" HREF
--- a/doc/pcre2_convert_context_free.3
+++ b/doc/pcre2_convert_context_free.3
@ -1,4 +1,4 @@
-.TH PCRE2_CONVERT_CONTEXT_FREE 3 "10 July 2017" "PCRE2 10.30"
+.TH PCRE2_CONVERT_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.32"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@ -16,7 +16,8 @@ PCRE2 - Perl-compatible regular expressions (revised API)
 This function is part of an experimental set of pattern conversion functions.
 It frees the memory occupied by a convert context, using the memory
 freeing function from the general context with which it was created, or
-\fBfree()\fP if that was not set.
+\fBfree()\fP if that was not set. If the argument is NULL, the function returns
+immediately without doing anything.
 .P
 The pattern conversion functions are described in the
 .\" HREF
--- a/doc/pcre2_converted_pattern_free.3
+++ b/doc/pcre2_converted_pattern_free.3
@ -1,4 +1,4 @@
-.TH PCRE2_CONVERTED_PATTERN_FREE 3 "11 July 2017" "PCRE2 10.30"
+.TH PCRE2_CONVERTED_PATTERN_FREE 3 "28 June 2018" "PCRE2 10.32"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@ -16,7 +16,8 @@ PCRE2 - Perl-compatible regular expressions (revised API)
 This function is part of an experimental set of pattern conversion functions.
 It frees the memory occupied by a converted pattern that was obtained by
 calling \fBpcre2_pattern_convert()\fP with arguments that caused it to place
-the converted pattern into newly obtained heap memory.
+the converted pattern into newly obtained heap memory. If the argument is NULL,
+the function returns immediately without doing anything.
 .P
 The pattern conversion functions are described in the
 .\" HREF
--- a/doc/pcre2_dfa_match.3
+++ b/doc/pcre2_dfa_match.3
@ -1,4 +1,4 @@
-.TH PCRE2_DFA_MATCH 3 "30 May 2017" "PCRE2 10.30"
+.TH PCRE2_DFA_MATCH 3 "28 August 2021" "PCRE2 10.38"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@ -33,12 +33,19 @@ just once (except when processing lookaround assertions). This function is
  \fIworkspace\fP    Points to a vector of ints used as working space
  \fIwscount\fP      Number of elements in the vector
 .sp
-For \fBpcre2_dfa_match()\fP, a match context is needed only if you want to set
-up a callout function or specify the match and/or the recursion depth limits.
-The \fIlength\fP and \fIstartoffset\fP values are code units, not characters.
-The options are:
+The size of output vector needed to contain all the results depends on the
+number of simultaneous matches, not on the number of parentheses in the
+pattern. Using \fBpcre2_match_data_create_from_pattern()\fP to create the match
+data block is therefore not advisable when using this function.
+.P
+A match context is needed only if you want to set up a callout function or
+specify the heap limit or the match or the recursion depth limits. The
+\fIlength\fP and \fIstartoffset\fP values are code units, not characters. The
+options are:
 .sp
  PCRE2_ANCHORED          Match only at the first position
+  PCRE2_COPY_MATCHED_SUBJECT
+                          On success, make a private subject copy
  PCRE2_ENDANCHORED       Pattern can match only at end of subject
  PCRE2_NOTBOL            Subject is not the beginning of a line
  PCRE2_NOTEOL            Subject is not the end of a line
--- a/doc/pcre2_general_context_free.3
+++ b/doc/pcre2_general_context_free.3
@ -1,4 +1,4 @@
-.TH PCRE2_GENERAL_CONTEXT_FREE 3 "22 October 2014" "PCRE2 10.00"
+.TH PCRE2_GENERAL_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.32"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@ -14,7 +14,8 @@ PCRE2 - Perl-compatible regular expressions (revised API)
 .rs
 .sp
 This function frees the memory occupied by a general context, using the memory
-freeing function within the context, if set.
+freeing function within the context, if set.  If the argument is NULL, the
+function returns immediately without doing anything.
 .P
 There is a complete description of the PCRE2 native API in the
 .\" HREF
--- a/doc/pcre2_get_match_data_size.3
+++ b/doc/pcre2_get_match_data_size.3
@ -0,0 +1,27 @@
+.TH PCRE2_GET_MATCH_DATA_SIZE 3 "16 July 2019" "PCRE2 10.34"
+.SH NAME
+PCRE2 - Perl-compatible regular expressions (revised API)
+.SH SYNOPSIS
+.rs
+.sp
+.B #include <pcre2.h>
+.PP
+.nf
+.B PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *\fImatch_data\fP);
+.fi
+.
+.SH DESCRIPTION
+.rs
+.sp
+This function returns the size, in bytes, of the match data block that is its
+argument.
+.P
+There is a complete description of the PCRE2 native API in the
+.\" HREF
+\fBpcre2api\fP
+.\"
+page and a description of the POSIX API in the
+.\" HREF
+\fBpcre2posix\fP
+.\"
+page.
--- a/doc/pcre2_jit_compile.3
+++ b/doc/pcre2_jit_compile.3
@ -1,4 +1,4 @@
-.TH PCRE2_JIT_COMPILE 3 "21 October 2014" "PCRE2 10.00"
+.TH PCRE2_JIT_COMPILE 3 "29 July 2019" "PCRE2 10.34"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@ -30,9 +30,15 @@ bits:
  PCRE2_JIT_PARTIAL_SOFT  compile code for soft partial matching
  PCRE2_JIT_PARTIAL_HARD  compile code for hard partial matching
 .sp
+There is also an obsolete option called PCRE2_JIT_INVALID_UTF, which has been
+superseded by the \fBpcre2_compile()\fP option PCRE2_MATCH_INVALID_UTF. The old
+option is deprecated and may be removed in the future.
+.P
 The yield of the function is 0 for success, or a negative error code otherwise.
 In particular, PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or
-if an unknown bit is set in \fIoptions\fP.
+if an unknown bit is set in \fIoptions\fP. The function can also return
+PCRE2_ERROR_NOMEMORY if JIT is unable to allocate executable memory for the
+compiler, even if it was because of a system security restriction.
 .P
 There is a complete description of the PCRE2 native API in the
 .\" HREF
--- a/Show More
+++ b/Show More