Implement --never-backslash-C

2015-10-17 13:50:56 +00:00 · 2015-10-17 13:50:56 +00:00 · 3263d44b97
parent 5923caf05e
commit 3263d44b97
58 changed files with 2060 additions and 1479 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -70,6 +70,7 @@
 # 2015-04-24 PH added support for PCRE2_DEBUG
 # 2015-07-16 PH updated for new pcre2_find_bracket source module
 # 2015-08-24 PH correct C_FLAGS setting (patch from Roy Ivy III)
+# 2015-10=16 PH added support for never-backslash-C

 PROJECT(PCRE2 C)

@ -162,6 +163,9 @@ SET(PCRE2_SUPPORT_UNICODE ON CACHE BOOL
 SET(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
    "ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")

+SET(PCRE2_NEVER_BACKSLASH_C OFF CACHE BOOL
+    "If ON, backslash-C (upper case C) is locked out.")
+
 SET(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL
    "Enable Valgrind support.")

@ -252,6 +256,10 @@ IF(PCRE2_SUPPORT_BSR_ANYCRLF)
        SET(BSR_ANYCRLF 1)
 ENDIF(PCRE2_SUPPORT_BSR_ANYCRLF)

+IF(PCRE2_NEVER_BACKSLASH_C)
+        SET(NEVER_BACKSLASH_C 1)
+ENDIF(PCRE2_NEVER_BACKSLASH_C)         
+
 IF(PCRE2_SUPPORT_UNICODE)
        SET(SUPPORT_UNICODE 1)
 ENDIF(PCRE2_SUPPORT_UNICODE)
@ -719,6 +727,7 @@ IF(PCRE2_SHOW_REPORT)
  MESSAGE(STATUS "  Enable Unicode support .......... : ${PCRE2_SUPPORT_UNICODE}")
  MESSAGE(STATUS "  Newline char/sequence ........... : ${PCRE2_NEWLINE}")
  MESSAGE(STATUS "  \\R matches only ANYCRLF ......... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
+  MESSAGE(STATUS "  \\C is disabled .................. : ${PCRE2_NEVER_BACKSLASH_C}")
  MESSAGE(STATUS "  EBCDIC coding ................... : ${PCRE2_EBCDIC}")
  MESSAGE(STATUS "  EBCDIC coding with NL=0x25 ...... : ${PCRE2_EBCDIC_NL25}")
  MESSAGE(STATUS "  Rebuild char tables ............. : ${PCRE2_REBUILD_CHARTABLES}")
--- a/2
+++ b/2
@ -201,6 +201,8 @@ escape was being ignored.
 57. Fixed integer overflow for patterns whose minimum matching length is very, 
 very large.

+58. Implemented --never-backslash-C.
+

 Version 10.20 30-June-2015
 --------------------------
--- a/9
+++ b/9
@ -220,6 +220,13 @@ library. They are also documented in the pcre2build man page.
  restrict \R to match only CR, LF, or CRLF. You can make this the default by
  adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
  
+. In a pattern, the escape sequence \C matches a single code unit, even in a 
+  UTF mode. This can be dangerous because it breaks up multi-code-unit 
+  characters. You can build PCRE2 with the use of \C permanently locked out by
+  adding --enable-never-backslash-C (note the upper case C) to the "configure" 
+  command. When \C is allowed by the library, individual applications can lock 
+  it out by calling pcre2_compile() with the PCRE2_NEVER_BACKSLASH_C option. 
+
 . PCRE2 has a counter that limits the depth of nesting of parentheses in a
  pattern. This limits the amount of system stack that a pattern uses when it
  is compiled. The default is 250, but you can change it by setting, for
@ -833,4 +840,4 @@ The distribution should contain the files listed below.
 Philip Hazel
 Email local part: ph10
 Email domain: cam.ac.uk
-Last updated: 16 July 2015
+Last updated: 16 October 2015
--- a/64
+++ b/64
@ -75,7 +75,10 @@ title17="Test 17: JIT-specific features when JIT is available"
 title18="Test 18: Tests of the POSIX interface, excluding UTF/UCP"
 title19="Test 19: Tests of the POSIX interface with UTF/UCP"
 title20="Test 20: Serialization tests"
-maxtest=20
+title21="Test 21: \C tests without UTF (supported for DFA matching)"
+title22="Test 22: \C tests with UTF (not supported for DFA matching)"
+title23="Test 23: \C disabled test"
+maxtest=23

 if [ $# -eq 1 -a "$1" = "list" ]; then
  echo $title0
@ -99,6 +102,9 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
  echo $title18
  echo $title19
  echo $title20
+  echo $title21
+  echo $title22
+  echo $title23
  exit 0
 fi

@ -223,6 +229,9 @@ do17=no
 do18=no
 do19=no
 do20=no
+do21=no
+do22=no
+do23=no

 while [ $# -gt 0 ] ; do
  case $1 in
@ -247,6 +256,9 @@ while [ $# -gt 0 ] ; do
   18) do18=yes;;
   19) do19=yes;;
   20) do20=yes;;
+   21) do21=yes;;
+   22) do22=yes;;
+   23) do23=yes;;
   -8) arg8=yes;;
  -16) arg16=yes;;
  -32) arg32=yes;;
@ -326,6 +338,11 @@ support16=$?
 $sim ./pcre2test -C pcre2-32 >/dev/null
 support32=$?

+# \C may be disabled
+
+$sim ./pcre2test -C backslash-C >/dev/null
+supportBSC=$?
+
 # Initialize all bitsizes skipped

 test8=skip
@ -400,7 +417,7 @@ if [ $do0  = no -a $do1  = no -a $do2  = no -a $do3  = no -a \
     $do8  = no -a $do9  = no -a $do10 = no -a $do11 = no -a \
     $do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \
     $do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \
-     $do20 = no \
+     $do20 = no -a $do21 = no -a $do22 = no -a $do23 = no \
   ]; then
  do0=yes
  do1=yes
@ -423,6 +440,9 @@ if [ $do0  = no -a $do1  = no -a $do2  = no -a $do3  = no -a \
  do18=yes
  do19=yes
  do20=yes 
+  do21=yes 
+  do22=yes 
+  do23=yes 
 fi

 # Handle any explicit skips at this stage, so that an argument list may consist
@ -781,6 +801,46 @@ for bmode in "$test8" "$test16" "$test32"; do
    checkresult $? 20 ""
  fi
  
+  # \C tests without UTF - DFA matching is supported
+
+  if [ "$do21" = yes ] ; then
+    echo $title21
+    if [ $supportBSC -eq 0 ] ; then
+      echo "  Skipped because \C is disabled"
+    else
+      for opt in "" $jitopt -dfa; do
+        $sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput21 testtry
+        checkresult $? 21 "$opt"
+      done
+    fi
+  fi
+
+  # \C tests with UTF - DFA matching is not supported for \C in UTF mode
+   
+  if [ "$do22" = yes ] ; then
+    echo $title22
+    if [ $supportBSC -eq 0 ] ; then
+      echo "  Skipped because \C is disabled"
+    else
+      for opt in "" $jitopt; do
+        $sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput22 testtry
+        checkresult $? 22-$bits "$opt"
+      done
+    fi
+  fi
+
+  # Test when \C is disabled
+   
+  if [ "$do23" = yes ] ; then
+    echo $title23
+    if [ $supportBSC -ne 0 ] ; then
+      echo "  Skipped because \C is not disabled"
+    else
+      $sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput23 testtry
+      checkresult $? 23 ""
+    fi
+  fi
+
 # End of loop for 8/16/32-bit tests
 done

--- a/RunTest.bat
+++ b/RunTest.bat
@ -13,11 +13,10 @@
@rem line. Added argument validation and added error reporting.
@rem
@rem Sheri Pierce added logic to skip feature dependent tests
-@rem tests 4 5 9 15 and 18 require utf support
-@rem tests 6 7 10 16 and 19 require ucp support
-@rem 11 requires ucp and link size 2
-@rem 12 requires presence of jit support
-@rem 13 requires absence of jit support
+@rem tests 4 5 7 10 12 14 19 and 22 require Unicode support
+@rem 8 requires Unicode and link size 2
+@rem 16 requires absence of jit support
+@rem 17 requires presence of jit support
@rem Sheri P also added override tests for study and jit testing
@rem Zoltan Herczeg added libpcre16 support
@rem Zoltan Herczeg added libpcre32 support
@ -25,6 +24,7 @@
@rem
@rem The file was converted for PCRE2 by PH, February 2015.
@rem Updated for new test 14 (moving others up a number), August 2015.
+@rem Tidied and updated for new tests 21, 22, 23 by PH, October 2015.


 setlocal enabledelayedexpansion
@ -65,6 +65,8 @@ set support32=%ERRORLEVEL%
 set unicode=%ERRORLEVEL%
 %pcre2test% -C jit >NUL
 set jit=%ERRORLEVEL%
+%pcre2test% -C backslash-C >NUL
+set supportBSC=%ERRORLEVEL%

 if %support8% EQU 1 (
 if not exist testout8 md testout8
@ -101,18 +103,21 @@ set do17=no
 set do18=no
 set do19=no
 set do20=no
+set do21=no
+set do22=no
+set do23=no
 set all=yes

 for %%a in (%*) do (
  set valid=no
-  for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20) do if %%v == %%a set valid=yes
+  for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) do if %%v == %%a set valid=yes
  if "!valid!" == "yes" (
    set do%%a=yes
    set all=no
 ) else (
    echo Invalid test number - %%a!
        echo Usage %0 [ test_number ] ...
-        echo Where test_number is one or more optional test numbers 1 through 20, default is all tests.
+        echo Where test_number is one or more optional test numbers 1 through 23, default is all tests.
        exit /b 1
 )
 )
@ -139,6 +144,9 @@ if "%all%" == "yes" (
  set do18=yes
  set do19=yes
  set do20=yes
+  set do21=yes
+  set do22=yes
+  set do23=yes
 )

@echo RunTest.bat's pcre2test output is written to newly created subfolders
@ -187,6 +195,9 @@ if "%do17%" == "yes" call :do17
 if "%do18%" == "yes" call :do18
 if "%do19%" == "yes" call :do19
 if "%do20%" == "yes" call :do20
+if "%do21%" == "yes" call :do21
+if "%do22%" == "yes" call :do22
+if "%do23%" == "yes" call :do23
 :modeSkip
 if "%mode%" == "" (
  set mode=-16
@ -323,7 +334,7 @@ if %unicode% EQU 0 (
 goto :eof

 :do6
-  call :runsub 6 testout "DFA matching main non-UTF, non-UCP functionality" -q -dfa
+  call :runsub 6 testout "DFA matching main non-UTF, non-UCP functionality" -q
 goto :eof

 :do7
@ -331,7 +342,7 @@ if %unicode% EQU 0 (
  echo Test 7 Skipped due to absence of Unicode support.
  goto :eof
 )
-  call :runsub 7 testout "DFA matching with UTF-%bits% and Unicode property support" -q -dfa
+  call :runsub 7 testout "DFA matching with UTF-%bits% and Unicode property support" -q
  goto :eof

 :do8
@ -395,12 +406,16 @@ if %bits% EQU 8 (
  echo Test 13 Skipped when running 8-bit tests.
  goto :eof
 )
-  call :runsub 13 testout "DFA specials for the basic 16/32-bit library" -q -dfa
+  call :runsub 13 testout "DFA specials for the basic 16/32-bit library" -q
 goto :eof

 :do14
-call :runsub 14 testout "DFA specials for UTF and UCP support" -q
-goto :eof
+if %unicode% EQU 0 (
+  echo Test 14 Skipped due to absence of Unicode support.
+  goto :eof
+)
+  call :runsub 14 testout "DFA specials for UTF and UCP support" -q
+  goto :eof

 :do15
 call :runsub 15 testout "Non-JIT limits and other non_JIT tests" -q
@ -442,6 +457,10 @@ if %bits% EQU 16 (
 if %bits% EQU 32 (
  echo Test 19 Skipped when running 32-bit tests.
  goto :eof
+)
+if %unicode% EQU 0 (
+  echo Test 19 Skipped due to absence of Unicode support.
+  goto :eof
 )
  call :runsub 19 testout "POSIX interface with UTF-8 and UCP" -q
 goto :eof
@ -450,6 +469,37 @@ goto :eof
 call :runsub 20 testout "Serialization tests" -q
 goto :eof

+:do21
+if %supportBSC% EQU 0 (
+  echo Test 21 Skipped due to absence of backslash-C support.
+  goto :eof
+)
+  call :runsub 21 testout "Backslash-C tests without UTF" -q
+  call :runsub 21 testout "Backslash-C tests without UTF (DFA)" -q -dfa
+  if %jit% EQU 1 call :runsub 21 testoutjit "Test with JIT Override" -q -jit
+goto :eof
+
+:do22
+if %supportBSC% EQU 0 (
+  echo Test 22 Skipped due to absence of backslash-C support.
+  goto :eof
+)
+if %unicode% EQU 0 (
+  echo Test 22 Skipped due to absence of Unicode support.
+  goto :eof
+)
+  call :runsub 22 testout "Backslash-C tests with UTF" -q
+  if %jit% EQU 1 call :runsub 22 testoutjit "Test with JIT Override" -q -jit
+goto :eof
+
+:do23
+if %supportBSC% EQU 1 (
+  echo Test 23 Skipped due to presence of backslash-C support.
+  goto :eof
+)
+  call :runsub 23 testout "Backslash-C disabled test" -q
+goto :eof
+
 :conferror
@echo.
@echo Either your build is incomplete or you have a configuration error.
--- a/config-cmake.h.in
+++ b/config-cmake.h.in
@ -33,6 +33,7 @@
 #cmakedefine EBCDIC 1
 #cmakedefine EBCDIC_NL25 1
 #cmakedefine HEAP_MATCH_RECURSE 1
+#cmakedefine NEVER_BACKSLASH_C 1

 #define LINK_SIZE		@PCRE2_LINK_SIZE@
 #define MATCH_LIMIT		@PCRE2_MATCH_LIMIT@
--- a/configure.ac
+++ b/configure.ac
@ -190,6 +190,12 @@ AC_ARG_ENABLE(bsr-anycrlf,
                             [\R matches only CR, LF, CRLF by default]),
              , enable_bsr_anycrlf=no)
              
+# Handle --enable-never-backslash-C
+AC_ARG_ENABLE(never-backslash-C,
+              AS_HELP_STRING([--enable-never-backslash-C],
+                             [use of \C causes an error]),
+              , enable_never_backslash_C=no)                  
+
 # Handle --enable-ebcdic
 AC_ARG_ENABLE(ebcdic,
              AS_HELP_STRING([--enable-ebcdic],
@ -604,6 +610,11 @@ if test "$enable_bsr_anycrlf" = "yes"; then
    The build-time default can be overridden by the user of PCRE2 at runtime.])
 fi

+if test "$enable_never_backslash_C" = "yes"; then
+  AC_DEFINE([NEVER_BACKSLASH_C], [], [
+    Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns.])
+fi      
+
 AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
  The value of LINK_SIZE determines the number of bytes used to store
  links as offsets within the compiled regex. The default is 2, which
@ -881,6 +892,7 @@ $PACKAGE-$VERSION configuration summary:
    Enable Unicode support .......... : ${enable_unicode}
    Newline char/sequence ........... : ${enable_newline}
    \R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
+    \C is disabled .................. : ${enable_never_backslash_C} 
    EBCDIC coding ................... : ${enable_ebcdic}
    EBCDIC code for NL .............. : ${ebcdic_nl_code}
    Rebuild char tables ............. : ${enable_rebuild_chartables}
--- a/doc/html/README.txt
+++ b/doc/html/README.txt
@ -220,6 +220,13 @@ library. They are also documented in the pcre2build man page.
  restrict \R to match only CR, LF, or CRLF. You can make this the default by
  adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
  
+. In a pattern, the escape sequence \C matches a single code unit, even in a 
+  UTF mode. This can be dangerous because it breaks up multi-code-unit 
+  characters. You can build PCRE2 with the use of \C permanently locked out by
+  adding --enable-never-backslash-C (note the upper case C) to the "configure" 
+  command. When \C is allowed by the library, individual applications can lock 
+  it out by calling pcre2_compile() with the PCRE2_NEVER_BACKSLASH_C option. 
+
 . PCRE2 has a counter that limits the depth of nesting of parentheses in a
  pattern. This limits the amount of system stack that a pattern uses when it
  is compiled. The default is 250, but you can change it by setting, for
@ -833,4 +840,4 @@ The distribution should contain the files listed below.
 Philip Hazel
 Email local part: ph10
 Email domain: cam.ac.uk
-Last updated: 16 July 2015
+Last updated: 16 October 2015
--- a/doc/html/pcre2.html
+++ b/doc/html/pcre2.html
@ -126,8 +126,10 @@ running redundant checks.
 <P>
 The use of the \C escape sequence in a UTF-8 or UTF-16 pattern can lead to
 problems, because it may leave the current matching point in the middle of a
-multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C option can be used to
-lock out the use of \C, causing a compile-time error if it is encountered.
+multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C option can be used by an 
+application to lock out the use of \C, causing a compile-time error if it is
+encountered. It is also possible to build PCRE2 with the use of \C permanently 
+disabled.
 </P>
 <P>
 Another way that performance can be hit is by running a pattern that has a very
@ -187,7 +189,7 @@ use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
 </P>
 <br><a name="SEC5" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 13 April 2015
+Last updated: 16 October 2015
 <br>
 Copyright &copy; 1997-2015 University of Cambridge.
 <br>
--- a/doc/html/pcre2_substitute.html
+++ b/doc/html/pcre2_substitute.html
@ -59,20 +59,22 @@ units, not characters, as is the contents of the variable pointed at by
 <i>outlengthptr</i>, which is updated to the actual length of the new string.
 The options are:
 <pre>
-  PCRE2_ANCHORED          Match only at the first position
-  PCRE2_NOTBOL            Subject string is not the beginning of a line
-  PCRE2_NOTEOL            Subject string is not the end of a line
-  PCRE2_NOTEMPTY          An empty string is not a valid match
-  PCRE2_NOTEMPTY_ATSTART  An empty string at the start of the subject
-                           is not a valid match
-  PCRE2_NO_UTF_CHECK      Do not check the subject or replacement for
-                           UTF validity (only relevant if PCRE2_UTF
-                           was set at compile time)
-  PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject
+  PCRE2_ANCHORED             Match only at the first position
+  PCRE2_NOTBOL               Subject is not the beginning of a line
+  PCRE2_NOTEOL               Subject is not the end of a line
+  PCRE2_NOTEMPTY             An empty string is not a valid match
+  PCRE2_NOTEMPTY_ATSTART     An empty string at the start of the
+                              subject is not a valid match
+  PCRE2_NO_UTF_CHECK         Do not check the subject or replacement
+                              for UTF validity (only relevant if
+                              PCRE2_UTF was set at compile time)
+  PCRE2_SUBSTITUTE_EXTENDED  Do extended replacement processing
+  PCRE2_SUBSTITUTE_GLOBAL    Replace all occurrences in the subject
 </pre>
 The function returns the number of substitutions, which may be zero if there
 were no matches. The result can be greater than one only when
-PCRE2_SUBSTITUTE_GLOBAL is set.
+PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code
+is returned.
 </P>
 <P>
 There is a complete description of the PCRE2 native API in the
--- a/doc/html/pcre2api.html
+++ b/doc/html/pcre2api.html
@ -1237,7 +1237,8 @@ This option locks out the use of \C in the pattern that is being compiled.
 This escape can cause unpredictable behaviour in UTF-8 or UTF-16 modes, because
 it may leave the current matching point in the middle of a multi-code-unit
 character. This option may be useful in applications that process patterns from
-external sources.
+external sources. Note that there is also a build-time option that permanently 
+locks out the use of \C.
 <pre>
  PCRE2_NEVER_UCP
 </pre>
@ -2613,43 +2614,17 @@ same number causes an error at compile time.
 <b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
 <b>  PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
 <b>  uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
-<b>  pcre2_match_context *<i>mcontext</i>, PCRE2_SPTR \fIreplacementzfP,</b>
+<b>  pcre2_match_context *<i>mcontext</i>, PCRE2_SPTR <i>replacement</i>,</b>
 <b>  PCRE2_SIZE <i>rlength</i>, PCRE2_UCHAR *\fIoutputbuffer\zfP,</b>
 <b>  PCRE2_SIZE *<i>outlengthptr</i>);</b>
+</P>
+<P>
 This function calls <b>pcre2_match()</b> and then makes a copy of the subject
 string in <i>outputbuffer</i>, replacing the part that was matched with the
 <i>replacement</i> string, whose length is supplied in <b>rlength</b>. This can
 be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
 </P>
 <P>
-In the replacement string, which is interpreted as a UTF string in UTF mode,
-and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
-dollar character is an escape character that can specify the insertion of
-characters from capturing groups or (*MARK) items in the pattern. The following
-forms are recognized:
-<pre>
-  $$                  insert a dollar character
-  $&#60;n&#62; or ${&#60;n&#62;}      insert the contents of group &#60;n&#62;
-  $*MARK or ${*MARK}  insert the name of the last (*MARK) encountered 
-</pre>
-Either a group number or a group name can be given for &#60;n&#62;. Curly brackets are
-required only if the following character would be interpreted as part of the
-number or name. The number may be zero to include the entire matched string.
-For example, if the pattern a(b)c is matched with "=abc=" and the replacement
-string "+$1$0$1+", the result is "=+babcb+=". Group insertion is done by
-calling <b>pcre2_copy_byname()</b> or <b>pcre2_copy_bynumber()</b> as
-appropriate.
-</P>
-<P>
-The facility for inserting a (*MARK) name can be used to perform simple 
-simultaneous substitutions, as this <b>pcre2test</b> example shows:
-<pre>
-  /(*:pear)apple|(*:orange)lemon/g,replace=${*MARK}
-      apple lemon
-   2: pear orange
-</PRE>
-</P>
-<P>
 The first seven arguments of <b>pcre2_substitute()</b> are the same as for
 <b>pcre2_match()</b>, except that the partial matching options are not
 permitted, and <i>match_data</i> may be passed as NULL, in which case a match
@ -2658,25 +2633,112 @@ functions from the match context, if provided, or else those that were used to
 allocate memory for the compiled code.
 </P>
 <P>
-There is one additional option, PCRE2_SUBSTITUTE_GLOBAL, which causes the
+The <i>outlengthptr</i> argument must point to a variable that contains the
+length, in code units, of the output buffer. If the function is successful,
+the value is updated to contain the length of the new string, excluding the
+trailing zero that is automatically added. If the function is not successful,
+the value is set to PCRE2_UNSET for general errors (such as output buffer too
+small). For syntax errors in the replacement string, the value is set to the
+offset in the replacement string where the error was detected.
+</P>
+<P>
+In the replacement string, which is interpreted as a UTF string in UTF mode,
+and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
+dollar character is an escape character that can specify the insertion of
+characters from capturing groups or (*MARK) items in the pattern. The following
+forms are always recognized:
+<pre>
+  $$                  insert a dollar character
+  $&#60;n&#62; or ${&#60;n&#62;}      insert the contents of group &#60;n&#62;
+  $*MARK or ${*MARK}  insert the name of the last (*MARK) encountered
+</pre>
+Either a group number or a group name can be given for &#60;n&#62;. Curly brackets are
+required only if the following character would be interpreted as part of the
+number or name. The number may be zero to include the entire matched string.
+For example, if the pattern a(b)c is matched with "=abc=" and the replacement
+string "+$1$0$1+", the result is "=+babcb+=".
+</P>
+<P>
+The facility for inserting a (*MARK) name can be used to perform simple
+simultaneous substitutions, as this <b>pcre2test</b> example shows:
+<pre>
+  /(*:pear)apple|(*:orange)lemon/g,replace=${*MARK}
+      apple lemon
+   2: pear orange
+</pre>
+There is an additional option, PCRE2_SUBSTITUTE_GLOBAL, which causes the
 function to iterate over the subject string, replacing every matching
 substring. If this is not set, only the first matching substring is replaced.
 </P>
 <P>
-The <i>outlengthptr</i> argument must point to a variable that contains the
-length, in code units, of the output buffer. It is updated to contain the
-length of the new string, excluding the trailing zero that is automatically
-added.
+A second additional option, PCRE2_SUBSTITUTE_EXTENDED, causes extra processing
+to be applied to the replacement string. Without this option, only the dollar
+character is special, and only the group insertion forms listed above are
+valid. When PCRE2_SUBSTITUTE_EXTENDED is set, two things change:
 </P>
 <P>
-The function returns the number of replacements that were made. This may be
-zero if no matches were found, and is never greater than 1 unless
-PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code
-is returned. Except for PCRE2_ERROR_NOMATCH (which is never returned), any
-errors from <b>pcre2_match()</b> or the substring copying functions are passed
-straight back. PCRE2_ERROR_BADREPLACEMENT is returned for an invalid
-replacement string (unrecognized sequence following a dollar sign), and
-PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big enough.
+Firstly, backslash in a replacement string is interpreted as an escape
+character. The usual forms such as \n or \x{ddd} can be used to specify
+particular character codes, and backslash followed by any non-alphanumeric
+character quotes that character. Extended quoting can be coded using \Q...\E,
+exactly as in pattern strings.
+</P>
+<P>
+There are also four escape sequences for forcing the case of inserted letters.
+The insertion mechanism has three states: no case forcing, force upper case,
+and force lower case. The escape sequences change the current state: \U and
+\L change to upper or lower case forcing, respectively, and \E (when not
+terminating a \Q quoted sequence) reverts to no case forcing. The sequences
+\u and \l force the next character (if it is a letter) to upper or lower
+case, respectively, and then the state automatically reverts to no case
+forcing. Case forcing applies to all inserted  characters, including those from
+captured groups and letters within \Q...\E quoted sequences.
+</P>
+<P>
+Note that case forcing sequences such as \U...\E do not nest. For example,
+the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final \E has no
+effect.
+</P>
+<P>
+The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
+flexibility to group substitution. The syntax is similar to that used by Bash:
+<pre>
+  ${&#60;n&#62;:-&#60;string&#62;}
+  ${&#60;n&#62;:+&#60;string1&#62;:&#60;string2&#62;}
+</pre>
+As before, &#60;n&#62; may be a group number or a name. The first form specifies a
+default value. If group &#60;n&#62; is set, its value is inserted; if not, &#60;string&#62; is
+expanded and the result inserted. The second form specifies strings that are
+expanded and inserted when group &#60;n&#62; is set or unset, respectively. The first
+form is just a convenient shorthand for
+<pre>
+  ${&#60;n&#62;:+${&#60;n&#62;}:&#60;string&#62;}
+</pre>
+Backslash can be used to escape colons and closing curly brackets in the
+replacement strings. A change of the case forcing state within a replacement
+string remains in force afterwards, as shown in this <b>pcre2test</b> example:
+<pre>
+  /(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo
+      body
+   1: hello
+      somebody
+   1: HELLO
+</pre>
+If successful, the function returns the number of replacements that were made.
+This may be zero if no matches were found, and is never greater than 1 unless
+PCRE2_SUBSTITUTE_GLOBAL is set.
+</P>
+<P>
+In the event of an error, a negative error code is returned. Except for
+PCRE2_ERROR_NOMATCH (which is never returned), errors from <b>pcre2_match()</b>
+are passed straight back. PCRE2_ERROR_NOMEMORY is returned if the output buffer
+is not big enough. PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax
+errors in the replacement string, with more particular errors being
+PCRE2_ERROR_BADREPESCAPE (invalid escape sequence),
+PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket not found), and
+PCRE2_BADSUBSTITUTION (syntax error in extended group substitution). As for all
+PCRE2 errors, a text message that describes the error can be obtained by
+calling <b>pcre2_get_error_message()</b>.
 </P>
 <br><a name="SEC35" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
 <P>
@ -2908,8 +2970,8 @@ There are in addition the following errors that are specific to
  PCRE2_ERROR_DFA_UITEM
 </pre>
 This return is given if <b>pcre2_dfa_match()</b> encounters an item in the
-pattern that it does not support, for instance, the use of \C or a back
-reference.
+pattern that it does not support, for instance, the use of \C in a UTF mode or
+a back reference.
 <pre>
  PCRE2_ERROR_DFA_UCOND
 </pre>
@ -2953,7 +3015,7 @@ Cambridge, England.
 </P>
 <br><a name="SEC40" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 22 September 2015
+Last updated: 16 October 2015
 <br>
 Copyright &copy; 1997-2015 University of Cambridge.
 <br>
--- a/doc/html/pcre2build.html
+++ b/doc/html/pcre2build.html
@ -18,23 +18,24 @@ please consult the man page, in case the conversion went wrong.
 <li><a name="TOC3" href="#SEC3">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
 <li><a name="TOC4" href="#SEC4">BUILDING SHARED AND STATIC LIBRARIES</a>
 <li><a name="TOC5" href="#SEC5">UNICODE AND UTF SUPPORT</a>
-<li><a name="TOC6" href="#SEC6">JUST-IN-TIME COMPILER SUPPORT</a>
-<li><a name="TOC7" href="#SEC7">NEWLINE RECOGNITION</a>
-<li><a name="TOC8" href="#SEC8">WHAT \R MATCHES</a>
-<li><a name="TOC9" href="#SEC9">HANDLING VERY LARGE PATTERNS</a>
-<li><a name="TOC10" href="#SEC10">AVOIDING EXCESSIVE STACK USAGE</a>
-<li><a name="TOC11" href="#SEC11">LIMITING PCRE2 RESOURCE USAGE</a>
-<li><a name="TOC12" href="#SEC12">CREATING CHARACTER TABLES AT BUILD TIME</a>
-<li><a name="TOC13" href="#SEC13">USING EBCDIC CODE</a>
-<li><a name="TOC14" href="#SEC14">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
-<li><a name="TOC15" href="#SEC15">PCRE2GREP BUFFER SIZE</a>
-<li><a name="TOC16" href="#SEC16">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a>
-<li><a name="TOC17" href="#SEC17">INCLUDING DEBUGGING CODE</a>
-<li><a name="TOC18" href="#SEC18">DEBUGGING WITH VALGRIND SUPPORT</a>
-<li><a name="TOC19" href="#SEC19">CODE COVERAGE REPORTING</a>
-<li><a name="TOC20" href="#SEC20">SEE ALSO</a>
-<li><a name="TOC21" href="#SEC21">AUTHOR</a>
-<li><a name="TOC22" href="#SEC22">REVISION</a>
+<li><a name="TOC6" href="#SEC6">DISABLING THE USE OF \C</a>
+<li><a name="TOC7" href="#SEC7">JUST-IN-TIME COMPILER SUPPORT</a>
+<li><a name="TOC8" href="#SEC8">NEWLINE RECOGNITION</a>
+<li><a name="TOC9" href="#SEC9">WHAT \R MATCHES</a>
+<li><a name="TOC10" href="#SEC10">HANDLING VERY LARGE PATTERNS</a>
+<li><a name="TOC11" href="#SEC11">AVOIDING EXCESSIVE STACK USAGE</a>
+<li><a name="TOC12" href="#SEC12">LIMITING PCRE2 RESOURCE USAGE</a>
+<li><a name="TOC13" href="#SEC13">CREATING CHARACTER TABLES AT BUILD TIME</a>
+<li><a name="TOC14" href="#SEC14">USING EBCDIC CODE</a>
+<li><a name="TOC15" href="#SEC15">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
+<li><a name="TOC16" href="#SEC16">PCRE2GREP BUFFER SIZE</a>
+<li><a name="TOC17" href="#SEC17">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a>
+<li><a name="TOC18" href="#SEC18">INCLUDING DEBUGGING CODE</a>
+<li><a name="TOC19" href="#SEC19">DEBUGGING WITH VALGRIND SUPPORT</a>
+<li><a name="TOC20" href="#SEC20">CODE COVERAGE REPORTING</a>
+<li><a name="TOC21" href="#SEC21">SEE ALSO</a>
+<li><a name="TOC22" href="#SEC22">AUTHOR</a>
+<li><a name="TOC23" href="#SEC23">REVISION</a>
 </ul>
 <br><a name="SEC1" href="#TOC1">BUILDING PCRE2</a><br>
 <P>
@ -148,13 +149,19 @@ properties. The application can request that they do by setting the PCRE2_UCP
 option. Unless the application has set PCRE2_NEVER_UCP, a pattern may also
 request this by starting with (*UCP).
 </P>
+<br><a name="SEC6" href="#TOC1">DISABLING THE USE OF \C</a><br>
 <P>
 The \C escape sequence, which matches a single code unit, even in a UTF mode,
 can cause unpredictable behaviour because it may leave the current matching
-point in the middle of a multi-code-unit character. It can be locked out by
-setting the PCRE2_NEVER_BACKSLASH_C option.
+point in the middle of a multi-code-unit character. The application can lock it
+out by setting the PCRE2_NEVER_BACKSLASH_C option when calling
+<b>pcre2_compile()</b>. There is also a build-time option
+<pre>
+  --enable-never-backslash-C
+</pre>
+(note the upper case C) which locks out the use of \C entirely.
 </P>
-<br><a name="SEC6" href="#TOC1">JUST-IN-TIME COMPILER SUPPORT</a><br>
+<br><a name="SEC7" href="#TOC1">JUST-IN-TIME COMPILER SUPPORT</a><br>
 <P>
 Just-in-time compiler support is included in the build by specifying
 <pre>
@ -171,7 +178,7 @@ pcre2grep automatically makes use of it, unless you add
 </pre>
 to the "configure" command.
 </P>
-<br><a name="SEC7" href="#TOC1">NEWLINE RECOGNITION</a><br>
+<br><a name="SEC8" href="#TOC1">NEWLINE RECOGNITION</a><br>
 <P>
 By default, PCRE2 interprets the linefeed (LF) character as indicating the end
 of a line. This is the normal newline character on Unix-like systems. You can
@ -208,7 +215,7 @@ Whatever default line ending convention is selected when PCRE2 is built can be
 overridden by applications that use the library. At build time it is
 conventional to use the standard for your operating system.
 </P>
-<br><a name="SEC8" href="#TOC1">WHAT \R MATCHES</a><br>
+<br><a name="SEC9" href="#TOC1">WHAT \R MATCHES</a><br>
 <P>
 By default, the sequence \R in a pattern matches any Unicode newline sequence,
 independently of what has been selected as the line ending sequence. If you
@ -220,7 +227,7 @@ the default is changed so that \R matches only CR, LF, or CRLF. Whatever is
 selected when PCRE2 is built can be overridden by applications that use the
 called.
 </P>
-<br><a name="SEC9" href="#TOC1">HANDLING VERY LARGE PATTERNS</a><br>
+<br><a name="SEC10" href="#TOC1">HANDLING VERY LARGE PATTERNS</a><br>
 <P>
 Within a compiled pattern, offset values are used to point from one part to
 another (for example, from an opening parenthesis to an alternation
@ -239,7 +246,7 @@ longer offsets slows down the operation of PCRE2 because it has to load
 additional data when handling them. For the 32-bit library the value is always
 4 and cannot be overridden; the value of --with-link-size is ignored.
 </P>
-<br><a name="SEC10" href="#TOC1">AVOIDING EXCESSIVE STACK USAGE</a><br>
+<br><a name="SEC11" href="#TOC1">AVOIDING EXCESSIVE STACK USAGE</a><br>
 <P>
 When matching with the <b>pcre2_match()</b> function, PCRE2 implements
 backtracking by making recursive calls to an internal function called
@ -261,7 +268,7 @@ custom memory management functions can be called instead. PCRE2 runs noticeably
 more slowly when built in this way. This option affects only the
 <b>pcre2_match()</b> function; it is not relevant for <b>pcre2_dfa_match()</b>.
 </P>
-<br><a name="SEC11" href="#TOC1">LIMITING PCRE2 RESOURCE USAGE</a><br>
+<br><a name="SEC12" href="#TOC1">LIMITING PCRE2 RESOURCE USAGE</a><br>
 <P>
 Internally, PCRE2 has a function called <b>match()</b>, which it calls
 repeatedly (sometimes recursively) when matching a pattern with the
@ -290,7 +297,7 @@ constraints. However, you can set a lower limit by adding, for example,
 </pre>
 to the <b>configure</b> command. This value can also be overridden at run time.
 </P>
-<br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
+<br><a name="SEC13" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
 <P>
 PCRE2 uses fixed tables for processing characters whose code points are less
 than 256. By default, PCRE2 is built with a set of tables that are distributed
@ -307,7 +314,7 @@ compiling, because <b>dftables</b> is run on the local host. If you need to
 create alternative tables when cross compiling, you will have to do so "by
 hand".)
 </P>
-<br><a name="SEC13" href="#TOC1">USING EBCDIC CODE</a><br>
+<br><a name="SEC14" href="#TOC1">USING EBCDIC CODE</a><br>
 <P>
 PCRE2 assumes by default that it will run in an environment where the character
 code is ASCII or Unicode, which is a superset of ASCII. This is the case for
@ -342,7 +349,7 @@ The options that select newline behaviour, such as --enable-newline-is-cr,
 and equivalent run-time options, refer to these character values in an EBCDIC
 environment.
 </P>
-<br><a name="SEC14" href="#TOC1">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
+<br><a name="SEC15" href="#TOC1">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
 <P>
 By default, <b>pcre2grep</b> reads all files as plain text. You can build it so
 that it recognizes files whose names end in <b>.gz</b> or <b>.bz2</b>, and reads
@ -355,7 +362,7 @@ to the <b>configure</b> command. These options naturally require that the
 relevant libraries are installed on your system. Configuration will fail if
 they are not.
 </P>
-<br><a name="SEC15" href="#TOC1">PCRE2GREP BUFFER SIZE</a><br>
+<br><a name="SEC16" href="#TOC1">PCRE2GREP BUFFER SIZE</a><br>
 <P>
 <b>pcre2grep</b> uses an internal buffer to hold a "window" on the file it is
 scanning, in order to be able to output "before" and "after" lines when it
@ -370,7 +377,7 @@ parameter value by adding, for example,
 to the <b>configure</b> command. The caller of \fPpcre2grep\fP can override this
 value by using --buffer-size on the command line..
 </P>
-<br><a name="SEC16" href="#TOC1">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a><br>
+<br><a name="SEC17" href="#TOC1">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a><br>
 <P>
 If you add one of
 <pre>
@ -404,7 +411,7 @@ automatically included, you may need to add something like
 </pre>
 immediately before the <b>configure</b> command.
 </P>
-<br><a name="SEC17" href="#TOC1">INCLUDING DEBUGGING CODE</a><br>
+<br><a name="SEC18" href="#TOC1">INCLUDING DEBUGGING CODE</a><br>
 <P>
 If you add
 <pre>
@ -413,7 +420,7 @@ If you add
 to the <b>configure</b> command, additional debugging code is included in the
 build. This feature is intended for use by the PCRE2 maintainers.
 </P>
-<br><a name="SEC18" href="#TOC1">DEBUGGING WITH VALGRIND SUPPORT</a><br>
+<br><a name="SEC19" href="#TOC1">DEBUGGING WITH VALGRIND SUPPORT</a><br>
 <P>
 If you add
 <pre>
@ -423,7 +430,7 @@ to the <b>configure</b> command, PCRE2 will use valgrind annotations to mark
 certain memory regions as unaddressable. This allows it to detect invalid
 memory accesses, and is mostly useful for debugging PCRE2 itself.
 </P>
-<br><a name="SEC19" href="#TOC1">CODE COVERAGE REPORTING</a><br>
+<br><a name="SEC20" href="#TOC1">CODE COVERAGE REPORTING</a><br>
 <P>
 If your C compiler is gcc, you can build a version of PCRE2 that can generate a
 code coverage report for its test suite. To enable this, you must install
@ -480,11 +487,11 @@ This cleans all coverage data including the generated coverage report. For more
 information about code coverage, see the <b>gcov</b> and <b>lcov</b>
 documentation.
 </P>
-<br><a name="SEC20" href="#TOC1">SEE ALSO</a><br>
+<br><a name="SEC21" href="#TOC1">SEE ALSO</a><br>
 <P>
 <b>pcre2api</b>(3), <b>pcre2-config</b>(3).
 </P>
-<br><a name="SEC21" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC22" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel
 <br>
@ -493,9 +500,9 @@ University Computing Service
 Cambridge, England.
 <br>
 </P>
-<br><a name="SEC22" href="#TOC1">REVISION</a><br>
+<br><a name="SEC23" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 24 April 2015
+Last updated: 16 October 2015
 <br>
 Copyright &copy; 1997-2015 University of Cambridge.
 <br>
--- a/doc/html/pcre2pattern.html
+++ b/doc/html/pcre2pattern.html
@ -1236,14 +1236,21 @@ with \C in UTF-8 or UTF-16 mode means that the rest of the string may start
 with a malformed UTF character. This has undefined results, because PCRE2
 assumes that it is matching character by character in a valid UTF string (by
 default it checks the subject string's validity at the start of processing
-unless the PCRE2_NO_UTF_CHECK option is used). An application can lock out the
-use of \C by setting the PCRE2_NEVER_BACKSLASH_C option.
+unless the PCRE2_NO_UTF_CHECK option is used). 
+</P>
+<P>
+An application can lock out the use of \C by setting the
+PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also possible to
+build PCRE2 with the use of \C permanently disabled.
 </P>
 <P>
 PCRE2 does not allow \C to appear in lookbehind assertions
 <a href="#lookbehind">(described below)</a>
 in a UTF mode, because this would make it impossible to calculate the length of
-the lookbehind.
+the lookbehind. Neither the alternative matching function 
+<b>pcre2_dfa_match()</b> not the JIT optimizer support \C in a UTF mode. The 
+former gives a match-time error; the latter fails to optimize and so the match 
+is always run using the interpreter.
 </P>
 <P>
 In general, the \C escape sequence is best avoided. However, one way of using
@ -3351,7 +3358,7 @@ Cambridge, England.
 </P>
 <br><a name="SEC30" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 01 September 2015
+Last updated: 16 October 2015
 <br>
 Copyright &copy; 1997-2015 University of Cambridge.
 <br>
--- a/doc/html/pcre2perform.html
+++ b/doc/html/pcre2perform.html
@ -12,17 +12,21 @@ This page is part of the PCRE2 HTML documentation. It was generated
 automatically from the original man page. If there is any nonsense in it,
 please consult the man page, in case the conversion went wrong.
 <br>
-<br><b>
-PCRE2 PERFORMANCE
-</b><br>
+<ul>
+<li><a name="TOC1" href="#SEC1">PCRE2 PERFORMANCE</a>
+<li><a name="TOC2" href="#SEC2">COMPILED PATTERN MEMORY USAGE</a>
+<li><a name="TOC3" href="#SEC3">STACK USAGE AT RUN TIME</a>
+<li><a name="TOC4" href="#SEC4">PROCESSING TIME</a>
+<li><a name="TOC5" href="#SEC5">AUTHOR</a>
+<li><a name="TOC6" href="#SEC6">REVISION</a>
+</ul>
+<br><a name="SEC1" href="#TOC1">PCRE2 PERFORMANCE</a><br>
 <P>
 Two aspects of performance are discussed below: memory usage and processing
 time. The way you express your pattern as a regular expression can affect both
 of them.
 </P>
-<br><b>
-COMPILED PATTERN MEMORY USAGE
-</b><br>
+<br><a name="SEC2" href="#TOC1">COMPILED PATTERN MEMORY USAGE</a><br>
 <P>
 Patterns are compiled by PCRE2 into a reasonably efficient interpretive code,
 so that most simple patterns do not use much memory. However, there is one case
@ -75,9 +79,7 @@ pattern. Nevertheless, if the atomic grouping is not a problem and the loss of
 speed is acceptable, this kind of rewriting will allow you to process patterns
 that PCRE2 cannot otherwise handle.
 </P>
-<br><b>
-STACK USAGE AT RUN TIME
-</b><br>
+<br><a name="SEC3" href="#TOC1">STACK USAGE AT RUN TIME</a><br>
 <P>
 When <b>pcre2_match()</b> is used for matching, certain kinds of pattern can
 cause it to use large amounts of the process stack. In some environments the
@ -86,9 +88,7 @@ SIGSEGV. Rewriting your pattern can often help. The
 <a href="pcre2stack.html"><b>pcre2stack</b></a>
 documentation discusses this issue in detail.
 </P>
-<br><b>
-PROCESSING TIME
-</b><br>
+<br><a name="SEC4" href="#TOC1">PROCESSING TIME</a><br>
 <P>
 Certain items in regular expression patterns are processed more efficiently
 than others. It is more efficient to use a character class like [aeiou] than a
@ -177,9 +177,7 @@ appreciable time with strings longer than about 20 characters.
 In many cases, the solution to this kind of performance issue is to use an
 atomic group or a possessive quantifier.
 </P>
-<br><b>
-AUTHOR
-</b><br>
+<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel
 <br>
@ -188,9 +186,7 @@ University Computing Service
 Cambridge, England.
 <br>
 </P>
-<br><b>
-REVISION
-</b><br>
+<br><a name="SEC6" href="#TOC1">REVISION</a><br>
 <P>
 Last updated: 02 January 2015
 <br>
--- a/doc/html/pcre2syntax.html
+++ b/doc/html/pcre2syntax.html
@ -111,9 +111,10 @@ it matches a literal "u".
  \W         a "non-word" character
  \X         a Unicode extended grapheme cluster
 </pre>
-The application can lock out the use of \C by setting the
-PCRE2_NEVER_BACKSLASH_C option. It is dangerous because it may leave the
-current matching point in the middle of a UTF-8 or UTF-16 character.
+\C is dangerous because it may leave the current matching point in the middle
+of a UTF-8 or UTF-16 character. The application can lock out the use of \C by
+setting the PCRE2_NEVER_BACKSLASH_C option. It is also possible to build PCRE2 
+with the use of \C permanently disabled.
 </P>
 <P>
 By default, \d, \s, and \w match only ASCII characters, even in UTF-8 mode
@ -588,7 +589,7 @@ Cambridge, England.
 </P>
 <br><a name="SEC27" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 17 July 2015
+Last updated: 16 October 2015
 <br>
 Copyright &copy; 1997-2015 University of Cambridge.
 <br>
--- a/doc/html/pcre2test.html
+++ b/doc/html/pcre2test.html
@ -155,12 +155,13 @@ following options output the value and set the exit code as indicated:
 The following options output 1 for true or 0 for false, and set the exit code
 to the same value:
 <pre>
-  ebcdic     compiled for an EBCDIC environment
-  jit        just-in-time support is available
-  pcre2-16   the 16-bit library was built
-  pcre2-32   the 32-bit library was built
-  pcre2-8    the 8-bit library was built
-  unicode    Unicode support is available
+  backslash-C  \C is supported (not locked out)
+  ebcdic       compiled for an EBCDIC environment
+  jit          just-in-time support is available
+  pcre2-16     the 16-bit library was built
+  pcre2-32     the 32-bit library was built
+  pcre2-8      the 8-bit library was built
+  unicode      Unicode support is available
 </pre>
 If an unknown option is given, an error message is output; the exit code is 0.
 </P>
@ -510,7 +511,7 @@ Setting compilation options
 <P>
 The following modifiers set options for <b>pcre2_compile()</b>. The most common
 ones have single-letter abbreviations. See
-<a href="pcreapi.html"><b>pcreapi</b></a>
+<a href="pcre2api.html"><b>pcre2api</b></a>
 for a description of their effects.
 <pre>
      allow_empty_class         set PCRE2_ALLOW_EMPTY_CLASS
@ -537,6 +538,7 @@ for a description of their effects.
      no_utf_check              set PCRE2_NO_UTF_CHECK
      ucp                       set PCRE2_UCP
      ungreedy                  set PCRE2_UNGREEDY
+      use_offset_limit          set PCRE2_USE_OFFSET_LIMIT 
      utf                       set PCRE2_UTF
 </pre>
 As well as turning on the PCRE2_UTF option, the <b>utf</b> modifier causes all
@ -564,6 +566,7 @@ about the pattern:
      locale=&#60;name&#62;             use this locale
      memory                    show memory used
      newline=&#60;type&#62;            set newline type
+      null_context              compile with a NULL context
      parens_nest_limit=&#60;n&#62;     set maximum parentheses depth
      posix                     use the POSIX API
      push                      push compiled pattern onto the stack
@ -642,6 +645,15 @@ is requested. For each callout, either its number or string is given, followed
 by the item that follows it in the pattern.
 </P>
 <br><b>
+Passing a NULL context
+</b><br>
+<P>
+Normally, <b>pcre2test</b> passes a context block to <b>pcre2_compile()</b>. If 
+the <b>null_context</b> modifier is set, however, NULL is passed. This is for 
+testing that <b>pcre2_compile()</b> behaves correctly in this case (it uses 
+default values).
+</P>
+<br><b>
 Specifying a pattern in hex
 </b><br>
 <P>
@ -920,9 +932,11 @@ pattern.
  /g  global                    global matching
      jitstack=&#60;n&#62;              set size of JIT stack
      mark                      show mark values
-      match_limit=&#62;n&#62;           set a match limit
+      match_limit=&#60;n&#62;           set a match limit
      memory                    show memory usage
+      null_context              match with a NULL context 
      offset=&#60;n&#62;                set starting offset
+      offset_limit=&#60;n&#62;          set offset limit
      ovector=&#60;n&#62;               set size of output vector
      recursion_limit=&#60;n&#62;       set a recursion limit
      replace=&#60;string&#62;          specify a replacement string
@ -1170,6 +1184,16 @@ The <b>offset</b> modifier sets an offset in the subject string at which
 matching starts. Its value is a number of code units, not characters.
 </P>
 <br><b>
+Setting an offset limit
+</b><br>
+<P>
+The <b>offset_limit</b> modifier sets a limit for unanchored matches. If a match 
+cannot be found starting at or before this offset in the subject, a "no match" 
+return is given. The data value is a number of code units, not characters. When 
+this modifier is used, the <b>use_offset_limit</b> modifier must have been set 
+for the pattern; if not, an error is generated.
+</P>
+<br><b>
 Setting the size of the output vector
 </b><br>
 <P>
@ -1201,6 +1225,17 @@ this modifier has no effect, as there is no facility for passing a length.)
 When testing <b>pcre2_substitute()</b>, this modifier also has the effect of
 passing the replacement string as zero-terminated.
 </P>
+<br><b>
+Passing a NULL context
+</b><br>
+<P>
+Normally, <b>pcre2test</b> passes a context block to <b>pcre2_match()</b>,
+<b>pcre2_dfa_match()</b> or <b>pcre2_jit_match()</b>. If the <b>null_context</b>
+modifier is set, however, NULL is passed. This is for testing that the matching
+functions behave correctly in this case (they use default values). This 
+modifier cannot be used with the <b>find_limits</b> modifier or when testing the 
+substitution function.
+</P>
 <br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
 <P>
 By default, <b>pcre2test</b> uses the standard PCRE2 matching function,
@ -1539,7 +1574,7 @@ Cambridge, England.
 </P>
 <br><a name="SEC21" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 14 September 2015
+Last updated: 17 October 2015
 <br>
 Copyright &copy; 1997-2015 University of Cambridge.
 <br>
--- a/doc/html/pcre2unicode.html
+++ b/doc/html/pcre2unicode.html
@ -71,11 +71,12 @@ The escape sequence \C can be used to match a single code unit, in a UTF mode,
 but its use can lead to some strange effects because it breaks up multi-unit
 characters (see the description of \C in the
 <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
-documentation). The use of \C is not supported in the alternative matching
-function <b>pcre2_dfa_match()</b>, nor is it supported in UTF mode by the JIT
-optimization. If JIT optimization is requested for a UTF pattern that contains
-\C, it will not succeed, and so the matching will be carried out by the normal
-interpretive function.
+documentation). The use of \C is not supported by the alternative matching
+function <b>pcre2_dfa_match()</b> when in UTF mode. Its use provokes a
+match-time error. The JIT optimization also does not support \C in UTF mode.
+If JIT optimization is requested for a UTF pattern that contains \C, it will
+not succeed, and so the matching will be carried out by the normal interpretive
+function.
 </P>
 <P>
 The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly test
@ -275,7 +276,7 @@ Cambridge, England.
 REVISION
 </b><br>
 <P>
-Last updated: 18 August 2015
+Last updated: 16 October 2015
 <br>
 Copyright &copy; 1997-2015 University of Cambridge.
 <br>
--- a/doc/pcre2.3
+++ b/doc/pcre2.3
@ -1,4 +1,4 @@
-.TH PCRE2 3 "13 April 2015" "PCRE2 10.20"
+.TH PCRE2 3 "16 October 2015" "PCRE2 10.21"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH INTRODUCTION
@ -118,8 +118,10 @@ running redundant checks.
 .P
 The use of the \eC escape sequence in a UTF-8 or UTF-16 pattern can lead to
 problems, because it may leave the current matching point in the middle of a
-multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C option can be used to
-lock out the use of \eC, causing a compile-time error if it is encountered.
+multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C option can be used by an 
+application to lock out the use of \eC, causing a compile-time error if it is
+encountered. It is also possible to build PCRE2 with the use of \eC permanently 
+disabled.
 .P
 Another way that performance can be hit is by running a pattern that has a very
 large search tree against a string that will never match. Nested unlimited
@ -187,6 +189,6 @@ use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
 .rs
 .sp
 .nf
-Last updated: 13 April 2015
+Last updated: 16 October 2015
 Copyright (c) 1997-2015 University of Cambridge.
 .fi
--- a/doc/pcre2.txt
+++ b/doc/pcre2.txt
@ -104,26 +104,27 @@ SECURITY CONSIDERATIONS
       The use of the \C escape sequence in a UTF-8 or UTF-16 pattern can lead
       to  problems,  because  it  may leave the current matching point in the
       middle of  a  multi-code-unit  character.  The  PCRE2_NEVER_BACKSLASH_C
-       option  can  be  used to lock out the use of \C, causing a compile-time
-       error if it is encountered.
+       option can be used by an application to lock out the use of \C, causing
+       a compile-time error if it is encountered. It is also possible to build
+       PCRE2 with the use of \C permanently disabled.

-       Another way that performance can be hit is by running  a  pattern  that
-       has  a  very  large search tree against a string that will never match.
-       Nested unlimited repeats in a pattern are a common example. PCRE2  pro-
-       vides  some  protection  against  this: see the pcre2_set_match_limit()
+       Another  way  that  performance can be hit is by running a pattern that
+       has a very large search tree against a string that  will  never  match.
+       Nested  unlimited repeats in a pattern are a common example. PCRE2 pro-
+       vides some protection against  this:  see  the  pcre2_set_match_limit()
       function in the pcre2api page.


 USER DOCUMENTATION

-       The user documentation for PCRE2 comprises a number of  different  sec-
-       tions.  In the "man" format, each of these is a separate "man page". In
-       the HTML format, each is a separate page, linked from the  index  page.
-       In  the  plain  text  format,  the  descriptions  of  the pcre2grep and
+       The  user  documentation for PCRE2 comprises a number of different sec-
+       tions. In the "man" format, each of these is a separate "man page".  In
+       the  HTML  format, each is a separate page, linked from the index page.
+       In the plain  text  format,  the  descriptions  of  the  pcre2grep  and
       pcre2test programs are in files called pcre2grep.txt and pcre2test.txt,
-       respectively.  The remaining sections, except for the pcre2demo section
-       (which is a program listing), and the short pages for individual  func-
-       tions,  are  concatenated in pcre2.txt, for ease of searching. The sec-
+       respectively. The remaining sections, except for the pcre2demo  section
+       (which  is a program listing), and the short pages for individual func-
+       tions, are concatenated in pcre2.txt, for ease of searching.  The  sec-
       tions are as follows:

         pcre2              this document
@ -148,7 +149,7 @@ USER DOCUMENTATION
         pcre2test          description of the pcre2test command
         pcre2unicode       discussion of Unicode and UTF support

-       In the "man" and HTML formats, there is also a short page  for  each  C
+       In  the  "man"  and HTML formats, there is also a short page for each C
       library function, listing its arguments and results.


@ -158,14 +159,14 @@ AUTHOR
       University Computing Service
       Cambridge, England.

-       Putting  an  actual email address here is a spam magnet. If you want to
-       email me, use my two initials, followed by the two digits  10,  at  the
+       Putting an actual email address here is a spam magnet. If you  want  to
+       email  me,  use  my two initials, followed by the two digits 10, at the
       domain cam.ac.uk.


 REVISION

-       Last updated: 13 April 2015
+       Last updated: 16 October 2015
       Copyright (c) 1997-2015 University of Cambridge.
 ------------------------------------------------------------------------------
 
@ -1276,7 +1277,9 @@ COMPILING A PATTERN
       piled.   This  escape  can  cause  unpredictable  behaviour in UTF-8 or
       UTF-16 modes, because it may leave the current matching  point  in  the
       middle  of  a  multi-code-unit  character. This option may be useful in
-       applications that process patterns from external sources.
+       applications that process patterns from  external  sources.  Note  that
+       there is also a build-time option that permanently locks out the use of
+       \C.

         PCRE2_NEVER_UCP

@ -2571,19 +2574,36 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
       int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject,
         PCRE2_SIZE length, PCRE2_SIZE startoffset,
         uint32_t options, pcre2_match_data *match_data,
-         pcre2_match_context *mcontext, PCRE2_SPTR replacementzfP,
+         pcre2_match_context *mcontext, PCRE2_SPTR replacement,
         PCRE2_SIZE rlength, PCRE2_UCHAR *outputbufferP,
         PCRE2_SIZE *outlengthptr);
+
       This function calls pcre2_match() and then makes a copy of the  subject
       string  in  outputbuffer,  replacing the part that was matched with the
       replacement string, whose length is supplied in rlength.  This  can  be
       given as PCRE2_ZERO_TERMINATED for a zero-terminated string.

+       The  first  seven  arguments  of pcre2_substitute() are the same as for
+       pcre2_match(), except that the partial matching options are not permit-
+       ted,  and  match_data may be passed as NULL, in which case a match data
+       block is obtained and freed within this function, using memory  manage-
+       ment  functions from the match context, if provided, or else those that
+       were used to allocate memory for the compiled code.
+
+       The outlengthptr argument must point to a variable  that  contains  the
+       length,  in  code  units, of the output buffer. If the function is suc-
+       cessful, the value is updated to contain the length of the new  string,
+       excluding  the  trailing zero that is automatically added. If the func-
+       tion is not successful, the value is set  to  PCRE2_UNSET  for  general
+       errors  (such  as  output  buffer  too small). For syntax errors in the
+       replacement string, the value is set to the offset in  the  replacement
+       string where the error was detected.
+
       In  the replacement string, which is interpreted as a UTF string in UTF
       mode, and is checked for UTF  validity  unless  the  PCRE2_NO_UTF_CHECK
       option is set, a dollar character is an escape character that can spec-
       ify the insertion of characters from capturing groups or (*MARK)  items
-       in the pattern. The following forms are recognized:
+       in the pattern. The following forms are always recognized:

         $$                  insert a dollar character
         $<n> or ${<n>}      insert the contents of group <n>
@ -2594,8 +2614,7 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
       preted as part of the number or name. The number may be zero to include
       the entire matched string.   For  example,  if  the  pattern  a(b)c  is
       matched  with "=abc=" and the replacement string "+$1$0$1+", the result
-       is "=+babcb+=". Group insertion is done by calling  pcre2_copy_byname()
-       or pcre2_copy_bynumber() as appropriate.
+       is "=+babcb+=".

       The facility for inserting a (*MARK) name can be used to perform simple
       simultaneous substitutions, as this pcre2test example shows:
@ -2604,32 +2623,80 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
             apple lemon
          2: pear orange

-       The first seven arguments of pcre2_substitute() are  the  same  as  for
-       pcre2_match(), except that the partial matching options are not permit-
-       ted, and match_data may be passed as NULL, in which case a  match  data
-       block  is obtained and freed within this function, using memory manage-
-       ment functions from the match context, if provided, or else those  that
-       were used to allocate memory for the compiled code.
-
-       There  is  one additional option, PCRE2_SUBSTITUTE_GLOBAL, which causes
+       There  is  an  additional option, PCRE2_SUBSTITUTE_GLOBAL, which causes
       the function to iterate over the subject string, replacing every match-
       ing substring. If this is not set, only the first matching substring is
       replaced.

-       The outlengthptr argument must point to a variable  that  contains  the
-       length,  in  code units, of the output buffer. It is updated to contain
-       the length of the new string, excluding the trailing zero that is auto-
-       matically added.
+       A second additional  option,  PCRE2_SUBSTITUTE_EXTENDED,  causes  extra
+       processing  to  be  applied  to  the  replacement  string. Without this
+       option, only the dollar character is special, and only the group inser-
+       tion  forms  listed  above are valid. When PCRE2_SUBSTITUTE_EXTENDED is
+       set, two things change:

-       The  function  returns  the number of replacements that were made. This
-       may be zero if no matches were found,  and  is  never  greater  than  1
-       unless PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a neg-
-       ative error code is returned. Except for PCRE2_ERROR_NOMATCH (which  is
-       never returned), any errors from pcre2_match() or the substring copying
-       functions  are  passed  straight  back.  PCRE2_ERROR_BADREPLACEMENT  is
-       returned  for an invalid replacement string (unrecognized sequence fol-
-       lowing a dollar sign), and PCRE2_ERROR_NOMEMORY is returned if the out-
-       put buffer is not big enough.
+       Firstly, backslash in a replacement string is interpreted as an  escape
+       character. The usual forms such as \n or \x{ddd} can be used to specify
+       particular character codes, and backslash followed by any  non-alphanu-
+       meric  character  quotes  that character. Extended quoting can be coded
+       using \Q...\E, exactly as in pattern strings.
+
+       There are also four escape sequences for forcing the case  of  inserted
+       letters.   The  insertion  mechanism has three states: no case forcing,
+       force upper case, and force lower case. The escape sequences change the
+       current state: \U and \L change to upper or lower case forcing, respec-
+       tively, and \E (when not terminating a \Q quoted sequence)  reverts  to
+       no  case  forcing. The sequences \u and \l force the next character (if
+       it is a letter) to upper or lower  case,  respectively,  and  then  the
+       state automatically reverts to no case forcing. Case forcing applies to
+       all inserted  characters, including those from captured groups and let-
+       ters within \Q...\E quoted sequences.
+
+       Note that case forcing sequences such as \U...\E do not nest. For exam-
+       ple, the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc";  the  final
+       \E has no effect.
+
+       The  second  effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
+       flexibility to group substitution. The syntax is similar to  that  used
+       by Bash:
+
+         ${<n>:-<string>}
+         ${<n>:+<string1>:<string2>}
+
+       As  before,  <n> may be a group number or a name. The first form speci-
+       fies a default value. If group <n> is set, its value  is  inserted;  if
+       not,  <string>  is  expanded  and  the result inserted. The second form
+       specifies strings that are expanded and inserted when group <n> is  set
+       or  unset,  respectively. The first form is just a convenient shorthand
+       for
+
+         ${<n>:+${<n>}:<string>}
+
+       Backslash can be used to escape colons and closing  curly  brackets  in
+       the  replacement  strings.  A change of the case forcing state within a
+       replacement string remains  in  force  afterwards,  as  shown  in  this
+       pcre2test example:
+
+         /(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo
+             body
+          1: hello
+             somebody
+          1: HELLO
+
+       If  successful,  the  function  returns the number of replacements that
+       were made.  This may be zero if no matches were  found,  and  is  never
+       greater than 1 unless PCRE2_SUBSTITUTE_GLOBAL is set.
+
+       In the event of an error, a negative error code is returned. Except for
+       PCRE2_ERROR_NOMATCH   (which   is   never   returned),   errors    from
+       pcre2_match()   are   passed  straight  back.  PCRE2_ERROR_NOMEMORY  is
+       returned   if    the    output    buffer    is    not    big    enough.
+       PCRE2_ERROR_BADREPLACEMENT  is  used for miscellaneous syntax errors in
+       the   replacement   string,   with   more   particular   errors   being
+       PCRE2_ERROR_BADREPESCAPE  (invalid  escape  sequence), PCRE2_ERROR_REP-
+       MISSING_BRACE (closing curly bracket not found), and PCRE2_BADSUBSTITU-
+       TION  (syntax  error  in extended group substitution). As for all PCRE2
+       errors, a text message that describes the  error  can  be  obtained  by
+       calling pcre2_get_error_message().


 DUPLICATE SUBPATTERN NAMES
@ -2845,8 +2912,8 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
         PCRE2_ERROR_DFA_UITEM

       This return is given if pcre2_dfa_match() encounters  an  item  in  the
-       pattern that it does not support, for instance, the use of \C or a back
-       reference.
+       pattern  that it does not support, for instance, the use of \C in a UTF
+       mode or a back reference.

         PCRE2_ERROR_DFA_UCOND

@ -2890,7 +2957,7 @@ AUTHOR

 REVISION

-       Last updated: 22 September 2015
+       Last updated: 16 October 2015
       Copyright (c) 1997-2015 University of Cambridge.
 ------------------------------------------------------------------------------
 
@ -3010,10 +3077,18 @@ UNICODE AND UTF SUPPORT
       PCRE2_UCP option. Unless the application  has  set  PCRE2_NEVER_UCP,  a
       pattern may also request this by starting with (*UCP).

+
+DISABLING THE USE OF \C
+
       The \C escape sequence, which matches a single code unit, even in a UTF
       mode, can cause unpredictable behaviour because it may leave  the  cur-
-       rent  matching  point  in the middle of a multi-code-unit character. It
-       can be locked out by setting the PCRE2_NEVER_BACKSLASH_C option.
+       rent  matching  point in the middle of a multi-code-unit character. The
+       application can lock it  out  by  setting  the  PCRE2_NEVER_BACKSLASH_C
+       option when calling pcre2_compile(). There is also a build-time option
+
+         --enable-never-backslash-C
+
+       (note the upper case C) which locks out the use of \C entirely.


 JUST-IN-TIME COMPILER SUPPORT
@ -3022,10 +3097,10 @@ JUST-IN-TIME COMPILER SUPPORT

         --enable-jit

-       This support is available only for certain hardware  architectures.  If
-       this  option  is  set for an unsupported architecture, a building error
-       occurs.  See the pcre2jit documentation for a discussion of JIT  usage.
-       When  JIT  support is enabled, pcre2grep automatically makes use of it,
+       This  support  is available only for certain hardware architectures. If
+       this option is set for an unsupported architecture,  a  building  error
+       occurs.   See the pcre2jit documentation for a discussion of JIT usage.
+       When JIT support is enabled, pcre2grep automatically makes use  of  it,
       unless you add

         --disable-pcre2grep-jit
@ -3035,14 +3110,14 @@ JUST-IN-TIME COMPILER SUPPORT

 NEWLINE RECOGNITION

-       By default, PCRE2 interprets the linefeed (LF) character as  indicating
-       the  end  of  a line. This is the normal newline character on Unix-like
-       systems. You can compile PCRE2 to use carriage return (CR) instead,  by
+       By  default, PCRE2 interprets the linefeed (LF) character as indicating
+       the end of a line. This is the normal newline  character  on  Unix-like
+       systems.  You can compile PCRE2 to use carriage return (CR) instead, by
       adding

         --enable-newline-is-cr

-       to  the  configure  command.  There  is  also an --enable-newline-is-lf
+       to the configure  command.  There  is  also  an  --enable-newline-is-lf
       option, which explicitly specifies linefeed as the newline character.

       Alternatively, you can specify that line endings are to be indicated by
@ -3055,76 +3130,76 @@ NEWLINE RECOGNITION

         --enable-newline-is-anycrlf

-       which causes PCRE2 to recognize any of the three sequences CR,  LF,  or
+       which  causes  PCRE2 to recognize any of the three sequences CR, LF, or
       CRLF as indicating a line ending. Finally, a fifth option, specified by

         --enable-newline-is-any

-       causes  PCRE2  to  recognize  any Unicode newline sequence. The Unicode
+       causes PCRE2 to recognize any Unicode  newline  sequence.  The  Unicode
       newline sequences are the three just mentioned, plus the single charac-
       ters VT (vertical tab, U+000B), FF (form feed, U+000C), NEL (next line,
-       U+0085), LS (line separator,  U+2028),  and  PS  (paragraph  separator,
+       U+0085),  LS  (line  separator,  U+2028),  and PS (paragraph separator,
       U+2029).

       Whatever default line ending convention is selected when PCRE2 is built
-       can be overridden by applications that use the library. At  build  time
+       can  be  overridden by applications that use the library. At build time
       it is conventional to use the standard for your operating system.


 WHAT \R MATCHES

-       By  default,  the  sequence \R in a pattern matches any Unicode newline
-       sequence, independently of what has been selected as  the  line  ending
+       By default, the sequence \R in a pattern matches  any  Unicode  newline
+       sequence,  independently  of  what has been selected as the line ending
       sequence. If you specify

         --enable-bsr-anycrlf

-       the  default  is changed so that \R matches only CR, LF, or CRLF. What-
-       ever is selected when PCRE2 is built can be overridden by  applications
+       the default is changed so that \R matches only CR, LF, or  CRLF.  What-
+       ever  is selected when PCRE2 is built can be overridden by applications
       that use the called.


 HANDLING VERY LARGE PATTERNS

-       Within  a  compiled  pattern,  offset values are used to point from one
-       part to another (for example, from an opening parenthesis to an  alter-
-       nation  metacharacter).  By default, in the 8-bit and 16-bit libraries,
-       two-byte values are used for these offsets, leading to a  maximum  size
-       for  a compiled pattern of around 64K code units. This is sufficient to
+       Within a compiled pattern, offset values are used  to  point  from  one
+       part  to another (for example, from an opening parenthesis to an alter-
+       nation metacharacter). By default, in the 8-bit and  16-bit  libraries,
+       two-byte  values  are used for these offsets, leading to a maximum size
+       for a compiled pattern of around 64K code units. This is sufficient  to
       handle all but the most gigantic patterns. Nevertheless, some people do
-       want  to  process truly enormous patterns, so it is possible to compile
-       PCRE2 to use three-byte or four-byte offsets by adding a  setting  such
+       want to process truly enormous patterns, so it is possible  to  compile
+       PCRE2  to  use three-byte or four-byte offsets by adding a setting such
       as

         --with-link-size=3

-       to  the  configure command. The value given must be 2, 3, or 4. For the
-       16-bit library, a value of 3 is rounded up to 4.  In  these  libraries,
-       using  longer  offsets slows down the operation of PCRE2 because it has
-       to load additional data when handling them. For the 32-bit library  the
-       value  is  always 4 and cannot be overridden; the value of --with-link-
+       to the configure command. The value given must be 2, 3, or 4.  For  the
+       16-bit  library,  a  value of 3 is rounded up to 4. In these libraries,
+       using longer offsets slows down the operation of PCRE2 because  it  has
+       to  load additional data when handling them. For the 32-bit library the
+       value is always 4 and cannot be overridden; the value  of  --with-link-
       size is ignored.


 AVOIDING EXCESSIVE STACK USAGE

-       When matching with the pcre2_match() function, PCRE2  implements  back-
-       tracking  by  making  recursive  calls  to  an internal function called
-       match(). In environments where the size of the stack is  limited,  this
-       can  severely  limit  PCRE2's operation. (The Unix environment does not
-       usually suffer from this problem, but it may sometimes be necessary  to
+       When  matching  with the pcre2_match() function, PCRE2 implements back-
+       tracking by making recursive  calls  to  an  internal  function  called
+       match().  In  environments where the size of the stack is limited, this
+       can severely limit PCRE2's operation. (The Unix  environment  does  not
+       usually  suffer from this problem, but it may sometimes be necessary to
       increase  the  maximum  stack  size.  There  is  a  discussion  in  the
-       pcre2stack documentation.) An alternative approach  to  recursion  that
-       uses  memory from the heap to remember data, instead of using recursive
-       function calls, has been implemented to work round the problem of  lim-
-       ited  stack  size.  If  you want to build a version of PCRE2 that works
+       pcre2stack  documentation.)  An  alternative approach to recursion that
+       uses memory from the heap to remember data, instead of using  recursive
+       function  calls, has been implemented to work round the problem of lim-
+       ited stack size. If you want to build a version  of  PCRE2  that  works
       this way, add

         --disable-stack-for-recursion

       to the configure command. By default, the system functions malloc() and
-       free()  are called to manage the heap memory that is required, but cus-
-       tom memory management functions  can  be  called  instead.  PCRE2  runs
+       free() are called to manage the heap memory that is required, but  cus-
+       tom  memory  management  functions  can  be  called instead. PCRE2 runs
       noticeably more slowly when built in this way. This option affects only
       the pcre2_match() function; it is not relevant for pcre2_dfa_match().

@ -3132,30 +3207,30 @@ AVOIDING EXCESSIVE STACK USAGE
 LIMITING PCRE2 RESOURCE USAGE

       Internally, PCRE2 has a function called match(), which it calls repeat-
-       edly   (sometimes   recursively)  when  matching  a  pattern  with  the
+       edly  (sometimes  recursively)  when  matching  a  pattern   with   the
       pcre2_match() function. By controlling the maximum number of times this
-       function  may be called during a single matching operation, a limit can
-       be placed on the resources used by a single call to pcre2_match().  The
+       function may be called during a single matching operation, a limit  can
+       be  placed on the resources used by a single call to pcre2_match(). The
       limit can be changed at run time, as described in the pcre2api documen-
-       tation. The default is 10 million, but this can be changed by adding  a
+       tation.  The default is 10 million, but this can be changed by adding a
       setting such as

         --with-match-limit=500000

-       to   the   configure  command.  This  setting  has  no  effect  on  the
+       to  the  configure  command.  This  setting  has  no  effect   on   the
       pcre2_dfa_match() matching function.

-       In some environments it is desirable to limit the  depth  of  recursive
+       In  some  environments  it is desirable to limit the depth of recursive
       calls of match() more strictly than the total number of calls, in order
-       to restrict the maximum amount of stack (or heap,  if  --disable-stack-
+       to  restrict  the maximum amount of stack (or heap, if --disable-stack-
       for-recursion is specified) that is used. A second limit controls this;
-       it defaults to the value that  is  set  for  --with-match-limit,  which
-       imposes  no  additional constraints. However, you can set a lower limit
+       it  defaults  to  the  value  that is set for --with-match-limit, which
+       imposes no additional constraints. However, you can set a  lower  limit
       by adding, for example,

         --with-match-limit-recursion=10000

-       to the configure command. This value can  also  be  overridden  at  run
+       to  the  configure  command.  This  value can also be overridden at run
       time.


@ -3163,45 +3238,45 @@ CREATING CHARACTER TABLES AT BUILD TIME

       PCRE2 uses fixed tables for processing characters whose code points are
       less than 256. By default, PCRE2 is built with a set of tables that are
-       distributed  in  the file src/pcre2_chartables.c.dist. These tables are
+       distributed in the file src/pcre2_chartables.c.dist. These  tables  are
       for ASCII codes only. If you add

         --enable-rebuild-chartables

-       to the configure command, the distributed tables are  no  longer  used.
-       Instead,  a  program  called dftables is compiled and run. This outputs
+       to  the  configure  command, the distributed tables are no longer used.
+       Instead, a program called dftables is compiled and  run.  This  outputs
       the source for new set of tables, created in the default locale of your
-       C  run-time  system. (This method of replacing the tables does not work
-       if you are cross compiling, because dftables is run on the local  host.
+       C run-time system. (This method of replacing the tables does  not  work
+       if  you are cross compiling, because dftables is run on the local host.
       If you need to create alternative tables when cross compiling, you will
       have to do so "by hand".)


 USING EBCDIC CODE

-       PCRE2 assumes by default that it will run in an environment  where  the
-       character  code is ASCII or Unicode, which is a superset of ASCII. This
+       PCRE2  assumes  by default that it will run in an environment where the
+       character code is ASCII or Unicode, which is a superset of ASCII.  This
       is the case for most computer operating systems. PCRE2 can, however, be
       compiled to run in an 8-bit EBCDIC environment by adding

         --enable-ebcdic --disable-unicode

       to the configure command. This setting implies --enable-rebuild-charta-
-       bles. You should only use it if you know that  you  are  in  an  EBCDIC
+       bles.  You  should  only  use  it if you know that you are in an EBCDIC
       environment (for example, an IBM mainframe operating system).

-       It  is  not possible to support both EBCDIC and UTF-8 codes in the same
-       version of the library. Consequently,  --enable-unicode  and  --enable-
+       It is not possible to support both EBCDIC and UTF-8 codes in  the  same
+       version  of  the  library. Consequently, --enable-unicode and --enable-
       ebcdic are mutually exclusive.

       The EBCDIC character that corresponds to an ASCII LF is assumed to have
-       the value 0x15 by default. However, in some EBCDIC  environments,  0x25
+       the  value  0x15 by default. However, in some EBCDIC environments, 0x25
       is used. In such an environment you should use

         --enable-ebcdic-nl25

       as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR
-       has the same value as in ASCII, namely, 0x0d.  Whichever  of  0x15  and
+       has  the  same  value  as in ASCII, namely, 0x0d. Whichever of 0x15 and
       0x25 is not chosen as LF is made to correspond to the Unicode NEL char-
       acter (which, in Unicode, is 0x85).

@ -3212,31 +3287,31 @@ USING EBCDIC CODE

 PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT

-       By default, pcre2grep reads all files as plain text. You can  build  it
-       so  that  it recognizes files whose names end in .gz or .bz2, and reads
+       By  default,  pcre2grep reads all files as plain text. You can build it
+       so that it recognizes files whose names end in .gz or .bz2,  and  reads
       them with libz or libbz2, respectively, by adding one or both of

         --enable-pcre2grep-libz
         --enable-pcre2grep-libbz2

       to the configure command. These options naturally require that the rel-
-       evant  libraries  are installed on your system. Configuration will fail
+       evant libraries are installed on your system. Configuration  will  fail
       if they are not.


 PCRE2GREP BUFFER SIZE

-       pcre2grep uses an internal buffer to hold a "window" on the file it  is
+       pcre2grep  uses an internal buffer to hold a "window" on the file it is
       scanning, in order to be able to output "before" and "after" lines when
-       it finds a match. The size of the buffer is controlled by  a  parameter
+       it  finds  a match. The size of the buffer is controlled by a parameter
       whose default value is 20K. The buffer itself is three times this size,
       but because of the way it is used for holding "before" lines, the long-
-       est  line  that  is guaranteed to be processable is the parameter size.
+       est line that is guaranteed to be processable is  the  parameter  size.
       You can change the default parameter value by adding, for example,

         --with-pcre2grep-bufsize=50K

-       to the configure command. The caller of  pcre2grep  can  override  this
+       to  the  configure  command.  The caller of pcre2grep can override this
       value by using --buffer-size on the command line..


@ -3247,26 +3322,26 @@ PCRE2TEST OPTION FOR LIBREADLINE SUPPORT
         --enable-pcre2test-libreadline
         --enable-pcre2test-libedit

-       to  the  configure  command,  pcre2test  is linked with the libreadline
+       to the configure command, pcre2test  is  linked  with  the  libreadline
       orlibedit library, respectively, and when its input is from a terminal,
-       it  reads  it using the readline() function. This provides line-editing
-       and history facilities. Note that libreadline is  GPL-licensed,  so  if
-       you  distribute  a binary of pcre2test linked in this way, there may be
+       it reads it using the readline() function. This  provides  line-editing
+       and  history  facilities.  Note that libreadline is GPL-licensed, so if
+       you distribute a binary of pcre2test linked in this way, there  may  be
       licensing issues. These can be avoided by linking instead with libedit,
       which has a BSD licence.

-       Setting  --enable-pcre2test-libreadline causes the -lreadline option to
-       be added to the pcre2test build. In many operating environments with  a
-       sytem-installed  readline  library this is sufficient. However, in some
+       Setting --enable-pcre2test-libreadline causes the -lreadline option  to
+       be  added to the pcre2test build. In many operating environments with a
+       sytem-installed readline library this is sufficient. However,  in  some
       environments (e.g. if an unmodified distribution version of readline is
-       in  use),  some  extra configuration may be necessary. The INSTALL file
+       in use), some extra configuration may be necessary.  The  INSTALL  file
       for libreadline says this:

         "Readline uses the termcap functions, but does not link with
         the termcap or curses library itself, allowing applications
         which link with readline the to choose an appropriate library."

-       If your environment has not been set up so that an appropriate  library
+       If  your environment has not been set up so that an appropriate library
       is automatically included, you may need to add something like

         LIBS="-ncurses"
@ -3280,7 +3355,7 @@ INCLUDING DEBUGGING CODE

         --enable-debug

-       to  the configure command, additional debugging code is included in the
+       to the configure command, additional debugging code is included in  the
       build. This feature is intended for use by the PCRE2 maintainers.


@ -3290,15 +3365,15 @@ DEBUGGING WITH VALGRIND SUPPORT

         --enable-valgrind

-       to the configure command, PCRE2 will use valgrind annotations  to  mark
-       certain  memory  regions  as  unaddressable.  This  allows it to detect
-       invalid memory accesses, and  is  mostly  useful  for  debugging  PCRE2
+       to  the  configure command, PCRE2 will use valgrind annotations to mark
+       certain memory regions as  unaddressable.  This  allows  it  to  detect
+       invalid  memory  accesses,  and  is  mostly  useful for debugging PCRE2
       itself.


 CODE COVERAGE REPORTING

-       If  your  C  compiler is gcc, you can build a version of PCRE2 that can
+       If your C compiler is gcc, you can build a version of  PCRE2  that  can
       generate a code coverage report for its test suite. To enable this, you
       must install lcov version 1.6 or above. Then specify

@ -3307,20 +3382,20 @@ CODE COVERAGE REPORTING
       to the configure command and build PCRE2 in the usual way.

       Note that using ccache (a caching C compiler) is incompatible with code
-       coverage reporting. If you have configured ccache to run  automatically
+       coverage  reporting. If you have configured ccache to run automatically
       on your system, you must set the environment variable

         CCACHE_DISABLE=1

       before running make to build PCRE2, so that ccache is not used.

-       When  --enable-coverage  is  used,  the  following addition targets are
+       When --enable-coverage is used,  the  following  addition  targets  are
       added to the Makefile:

         make coverage

-       This creates a fresh coverage report for the PCRE2 test  suite.  It  is
-       equivalent  to running "make coverage-reset", "make coverage-baseline",
+       This  creates  a  fresh coverage report for the PCRE2 test suite. It is
+       equivalent to running "make coverage-reset", "make  coverage-baseline",
       "make check", and then "make coverage-report".

         make coverage-reset
@ -3337,18 +3412,18 @@ CODE COVERAGE REPORTING

         make coverage-clean-report

-       This removes the generated coverage report without cleaning the  cover-
+       This  removes the generated coverage report without cleaning the cover-
       age data itself.

         make coverage-clean-data

-       This  removes  the captured coverage data without removing the coverage
+       This removes the captured coverage data without removing  the  coverage
       files created at compile time (*.gcno).

         make coverage-clean

-       This cleans all coverage data including the generated coverage  report.
-       For  more  information about code coverage, see the gcov and lcov docu-
+       This  cleans all coverage data including the generated coverage report.
+       For more information about code coverage, see the gcov and  lcov  docu-
       mentation.


@ -3366,7 +3441,7 @@ AUTHOR

 REVISION

-       Last updated: 24 April 2015
+       Last updated: 16 October 2015
       Copyright (c) 1997-2015 University of Cambridge.
 ------------------------------------------------------------------------------
 
@ -6028,12 +6103,18 @@ MATCHING A SINGLE CODE UNIT
       results, because PCRE2 assumes that it is matching character by charac-
       ter in a valid UTF string (by default it checks  the  subject  string's
       validity  at  the  start  of  processing  unless the PCRE2_NO_UTF_CHECK
-       option is used). An application can lock out the use of \C  by  setting
-       the PCRE2_NEVER_BACKSLASH_C option.
+       option is used).

-       PCRE2  does  not allow \C to appear in lookbehind assertions (described
-       below) in a UTF mode, because this would make it impossible  to  calcu-
-       late the length of the lookbehind.
+       An  application  can  lock  out  the  use  of   \C   by   setting   the
+       PCRE2_NEVER_BACKSLASH_C  option  when  compiling  a pattern. It is also
+       possible to build PCRE2 with the use of \C permanently disabled.
+
+       PCRE2 does not allow \C to appear in lookbehind  assertions  (described
+       below)  in  a UTF mode, because this would make it impossible to calcu-
+       late the length of the lookbehind.  Neither  the  alternative  matching
+       function  pcre2_dfa_match()  not  the JIT optimizer support \C in a UTF
+       mode. The former gives a match-time error; the latter fails to optimize
+       and so the match is always run using the interpreter.

       In general, the \C escape sequence is best avoided. However, one way of
       using it that avoids the problem of malformed UTF characters is to  use
@ -8036,7 +8117,7 @@ AUTHOR

 REVISION

-       Last updated: 01 September 2015
+       Last updated: 16 October 2015
       Copyright (c) 1997-2015 University of Cambridge.
 ------------------------------------------------------------------------------
 
@ -8966,10 +9047,10 @@ CHARACTER TYPES
         \W         a "non-word" character
         \X         a Unicode extended grapheme cluster

-       The  application  can  lock  out  the  use  of  \C   by   setting   the
-       PCRE2_NEVER_BACKSLASH_C  option.  It  is dangerous because it may leave
-       the current matching point in the middle of a UTF-8 or  UTF-16  charac-
-       ter.
+       \C is dangerous because it may leave the current matching point in  the
+       middle of a UTF-8 or UTF-16 character. The application can lock out the
+       use of \C by setting the PCRE2_NEVER_BACKSLASH_C  option.  It  is  also
+       possible to build PCRE2 with the use of \C permanently disabled.

       By  default,  \d, \s, and \w match only ASCII characters, even in UTF-8
       mode or in the 16-bit and 32-bit libraries. However, if locale-specific
@ -9325,7 +9406,7 @@ AUTHOR

 REVISION

-       Last updated: 17 July 2015
+       Last updated: 16 October 2015
       Copyright (c) 1997-2015 University of Cambridge.
 ------------------------------------------------------------------------------
 
@ -9384,89 +9465,90 @@ WIDE CHARACTERS AND UTF MODES
       The  escape  sequence  \C can be used to match a single code unit, in a
       UTF mode, but its use can lead  to  some  strange  effects  because  it
       breaks  up  multi-unit  characters  (see  the  description of \C in the
-       pcre2pattern documentation). The use of \C  is  not  supported  in  the
-       alternative matching function pcre2_dfa_match(), nor is it supported in
-       UTF mode by the JIT optimization. If JIT optimization is requested  for
-       a  UTF pattern that contains \C, it will not succeed, and so the match-
-       ing will be carried out by the normal interpretive function.
+       pcre2pattern documentation). The use of \C  is  not  supported  by  the
+       alternative  matching  function pcre2_dfa_match() when in UTF mode. Its
+       use provokes a match-time error. The JIT  optimization  also  does  not
+       support  \C  in  UTF  mode.  If JIT optimization is requested for a UTF
+       pattern that contains \C, it will not succeed, and so the matching will
+       be carried out by the normal interpretive function.

       The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly test
-       characters  of  any  code  value,  but, by default, the characters that
-       PCRE2 recognizes as digits, spaces, or word characters remain the  same
-       set  as  in  non-UTF  mode,  all  with  code points less than 256. This
-       remains true even when PCRE2  is  built  to  include  Unicode  support,
-       because  to do otherwise would slow down matching in many common cases.
-       Note that this also applies to \b and \B, because they are  defined  in
-       terms  of  \w  and  \W.  If you want to test for a wider sense of, say,
-       "digit", you can use explicit Unicode property tests  such  as  \p{Nd}.
-       Alternatively,  if you set the PCRE2_UCP option, the way that the char-
-       acter escapes work is changed so that Unicode properties  are  used  to
+       characters of any code value, but,  by  default,  the  characters  that
+       PCRE2  recognizes as digits, spaces, or word characters remain the same
+       set as in non-UTF mode, all  with  code  points  less  than  256.  This
+       remains  true  even  when  PCRE2  is  built to include Unicode support,
+       because to do otherwise would slow down matching in many common  cases.
+       Note  that  this also applies to \b and \B, because they are defined in
+       terms of \w and \W. If you want to test for  a  wider  sense  of,  say,
+       "digit",  you  can  use explicit Unicode property tests such as \p{Nd}.
+       Alternatively, if you set the PCRE2_UCP option, the way that the  char-
+       acter  escapes  work  is changed so that Unicode properties are used to
       determine which characters match. There are more details in the section
       on generic character types in the pcre2pattern documentation.

-       Similarly, characters that match the POSIX named character classes  are
+       Similarly,  characters that match the POSIX named character classes are
       all low-valued characters, unless the PCRE2_UCP option is set.

-       However,  the  special  horizontal  and  vertical  white space matching
+       However, the special  horizontal  and  vertical  white  space  matching
       escapes (\h, \H, \v, and \V) do match all the appropriate Unicode char-
       acters, whether or not PCRE2_UCP is set.

-       Case-insensitive  matching in UTF mode makes use of Unicode properties.
-       A few Unicode characters such as Greek sigma have more than  two  code-
+       Case-insensitive matching in UTF mode makes use of Unicode  properties.
+       A  few  Unicode characters such as Greek sigma have more than two code-
       points that are case-equivalent, and these are treated as such.


 VALIDITY OF UTF STRINGS

-       When  the  PCRE2_UTF  option is set, the strings passed as patterns and
+       When the PCRE2_UTF option is set, the strings passed  as  patterns  and
       subjects are (by default) checked for validity on entry to the relevant
-       functions.   If an invalid UTF string is passed, an negative error code
-       is returned. The code unit offset to the  offending  character  can  be
-       extracted  from  the match data block by calling pcre2_get_startchar(),
+       functions.  If an invalid UTF string is passed, an negative error  code
+       is  returned.  The  code  unit offset to the offending character can be
+       extracted from the match data block by  calling  pcre2_get_startchar(),
       which is used for this purpose after a UTF error.

       UTF-16 and UTF-32 strings can indicate their endianness by special code
-       knows  as  a  byte-order  mark (BOM). The PCRE2 functions do not handle
+       knows as a byte-order mark (BOM). The PCRE2  functions  do  not  handle
       this, expecting strings to be in host byte order.

       A UTF string is checked before any other processing takes place. In the
-       case  of  pcre2_match()  and  pcre2_dfa_match()  calls  with a non-zero
-       starting offset, the check is applied only to that part of the  subject
-       that  could be inspected during matching, and there is a check that the
-       starting offset points to the first code unit of a character or to  the
-       end  of  the subject. If there are no lookbehind assertions in the pat-
-       tern, the check starts at the starting offset. Otherwise, it starts  at
-       the  length of the longest lookbehind before the starting offset, or at
-       the start of the subject if there are not that many  characters  before
-       the  starting offset. Note that the sequences \b and \B are one-charac-
+       case of pcre2_match()  and  pcre2_dfa_match()  calls  with  a  non-zero
+       starting  offset, the check is applied only to that part of the subject
+       that could be inspected during matching, and there is a check that  the
+       starting  offset points to the first code unit of a character or to the
+       end of the subject. If there are no lookbehind assertions in  the  pat-
+       tern,  the check starts at the starting offset. Otherwise, it starts at
+       the length of the longest lookbehind before the starting offset, or  at
+       the  start  of the subject if there are not that many characters before
+       the starting offset. Note that the sequences \b and \B are  one-charac-
       ter lookbehinds.

-       In addition to checking the format of the string, there is a  check  to
+       In  addition  to checking the format of the string, there is a check to
       ensure that all code points lie in the range U+0 to U+10FFFF, excluding
-       the surrogate area. The so-called "non-character" code points  are  not
+       the  surrogate  area. The so-called "non-character" code points are not
       excluded because Unicode corrigendum #9 makes it clear that they should
       not be.

-       Characters in the "Surrogate Area" of Unicode are reserved for  use  by
-       UTF-16,  where they are used in pairs to encode code points with values
-       greater than 0xFFFF. The code points that are encoded by  UTF-16  pairs
-       are  available  independently  in  the  UTF-8 and UTF-32 encodings. (In
-       other words, the whole surrogate thing is  a  fudge  for  UTF-16  which
+       Characters  in  the "Surrogate Area" of Unicode are reserved for use by
+       UTF-16, where they are used in pairs to encode code points with  values
+       greater  than  0xFFFF. The code points that are encoded by UTF-16 pairs
+       are available independently in the  UTF-8  and  UTF-32  encodings.  (In
+       other  words,  the  whole  surrogate  thing is a fudge for UTF-16 which
       unfortunately messes up UTF-8 and UTF-32.)

-       In  some  situations, you may already know that your strings are valid,
-       and therefore want to skip these checks in  order  to  improve  perfor-
-       mance,  for  example in the case of a long subject string that is being
-       scanned repeatedly.  If you set the PCRE2_NO_UTF_CHECK option  at  com-
-       pile  time  or at match time, PCRE2 assumes that the pattern or subject
+       In some situations, you may already know that your strings  are  valid,
+       and  therefore  want  to  skip these checks in order to improve perfor-
+       mance, for example in the case of a long subject string that  is  being
+       scanned  repeatedly.   If you set the PCRE2_NO_UTF_CHECK option at com-
+       pile time or at match time, PCRE2 assumes that the pattern  or  subject
       it is given (respectively) contains only valid UTF code unit sequences.

-       Passing PCRE2_NO_UTF_CHECK to pcre2_compile() just disables  the  check
+       Passing  PCRE2_NO_UTF_CHECK  to pcre2_compile() just disables the check
       for the pattern; it does not also apply to subject strings. If you want
-       to disable the check for a subject string you must pass this option  to
+       to  disable the check for a subject string you must pass this option to
       pcre2_match() or pcre2_dfa_match().

-       If  you  pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the
+       If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is  set,  the
       result is undefined and your program may crash or loop indefinitely.

   Errors in UTF-8 strings
@ -9479,10 +9561,10 @@ VALIDITY OF UTF STRINGS
         PCRE2_ERROR_UTF8_ERR4
         PCRE2_ERROR_UTF8_ERR5

-       The string ends with a truncated UTF-8 character;  the  code  specifies
-       how  many bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8
-       characters to be no longer than 4 bytes, the  encoding  scheme  (origi-
-       nally  defined  by  RFC  2279)  allows  for  up to 6 bytes, and this is
+       The  string  ends  with a truncated UTF-8 character; the code specifies
+       how many bytes are missing (1 to 5). Although RFC 3629 restricts  UTF-8
+       characters  to  be  no longer than 4 bytes, the encoding scheme (origi-
+       nally defined by RFC 2279) allows for  up  to  6  bytes,  and  this  is
       checked first; hence the possibility of 4 or 5 missing bytes.

         PCRE2_ERROR_UTF8_ERR6
@ -9492,24 +9574,24 @@ VALIDITY OF UTF STRINGS
         PCRE2_ERROR_UTF8_ERR10

       The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of
-       the  character  do  not have the binary value 0b10 (that is, either the
+       the character do not have the binary value 0b10 (that  is,  either  the
       most significant bit is 0, or the next bit is 1).

         PCRE2_ERROR_UTF8_ERR11
         PCRE2_ERROR_UTF8_ERR12

-       A character that is valid by the RFC 2279 rules is either 5 or 6  bytes
+       A  character that is valid by the RFC 2279 rules is either 5 or 6 bytes
       long; these code points are excluded by RFC 3629.

         PCRE2_ERROR_UTF8_ERR13

-       A  4-byte character has a value greater than 0x10fff; these code points
+       A 4-byte character has a value greater than 0x10fff; these code  points
       are excluded by RFC 3629.

         PCRE2_ERROR_UTF8_ERR14

-       A 3-byte character has a value in the  range  0xd800  to  0xdfff;  this
-       range  of code points are reserved by RFC 3629 for use with UTF-16, and
+       A  3-byte  character  has  a  value in the range 0xd800 to 0xdfff; this
+       range of code points are reserved by RFC 3629 for use with UTF-16,  and
       so are excluded from UTF-8.

         PCRE2_ERROR_UTF8_ERR15
@ -9518,26 +9600,26 @@ VALIDITY OF UTF STRINGS
         PCRE2_ERROR_UTF8_ERR18
         PCRE2_ERROR_UTF8_ERR19

-       A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it  codes
-       for  a  value that can be represented by fewer bytes, which is invalid.
-       For example, the two bytes 0xc0, 0xae give the value 0x2e,  whose  cor-
+       A  2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes
+       for a value that can be represented by fewer bytes, which  is  invalid.
+       For  example,  the two bytes 0xc0, 0xae give the value 0x2e, whose cor-
       rect coding uses just one byte.

         PCRE2_ERROR_UTF8_ERR20

       The two most significant bits of the first byte of a character have the
-       binary value 0b10 (that is, the most significant bit is 1 and the  sec-
-       ond  is  0). Such a byte can only validly occur as the second or subse-
+       binary  value 0b10 (that is, the most significant bit is 1 and the sec-
+       ond is 0). Such a byte can only validly occur as the second  or  subse-
       quent byte of a multi-byte character.

         PCRE2_ERROR_UTF8_ERR21

-       The first byte of a character has the value 0xfe or 0xff. These  values
+       The  first byte of a character has the value 0xfe or 0xff. These values
       can never occur in a valid UTF-8 string.

   Errors in UTF-16 strings

-       The  following  negative  error  codes  are  given  for  invalid UTF-16
+       The following  negative  error  codes  are  given  for  invalid  UTF-16
       strings:

         PCRE_UTF16_ERR1  Missing low surrogate at end of string
@ -9547,7 +9629,7 @@ VALIDITY OF UTF STRINGS

   Errors in UTF-32 strings

-       The following  negative  error  codes  are  given  for  invalid  UTF-32
+       The  following  negative  error  codes  are  given  for  invalid UTF-32
       strings:

         PCRE_UTF32_ERR1  Surrogate character (range from 0xd800 to 0xdfff)
@ -9563,7 +9645,7 @@ AUTHOR

 REVISION

-       Last updated: 18 August 2015
+       Last updated: 16 October 2015
       Copyright (c) 1997-2015 University of Cambridge.
 ------------------------------------------------------------------------------
 
--- a/doc/pcre2api.3
+++ b/doc/pcre2api.3
@ -1,4 +1,4 @@
-.TH PCRE2API 3 "07 October 2015" "PCRE2 10.21"
+.TH PCRE2API 3 "16 October 2015" "PCRE2 10.21"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
@ -1209,7 +1209,8 @@ This option locks out the use of \eC in the pattern that is being compiled.
 This escape can cause unpredictable behaviour in UTF-8 or UTF-16 modes, because
 it may leave the current matching point in the middle of a multi-code-unit
 character. This option may be useful in applications that process patterns from
-external sources.
+external sources. Note that there is also a build-time option that permanently 
+locks out the use of \eC.
 .sp
  PCRE2_NEVER_UCP
 .sp
@ -3014,8 +3015,8 @@ There are in addition the following errors that are specific to
  PCRE2_ERROR_DFA_UITEM
 .sp
 This return is given if \fBpcre2_dfa_match()\fP encounters an item in the
-pattern that it does not support, for instance, the use of \eC or a back
-reference.
+pattern that it does not support, for instance, the use of \eC in a UTF mode or
+a back reference.
 .sp
  PCRE2_ERROR_DFA_UCOND
 .sp
@ -3065,6 +3066,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 07 October 2015
+Last updated: 16 October 2015
 Copyright (c) 1997-2015 University of Cambridge.
 .fi
--- a/doc/pcre2build.3
+++ b/doc/pcre2build.3
@ -1,4 +1,4 @@
-.TH PCRE2BUILD 3 "23 April 2015" "PCRE2 10.20"
+.TH PCRE2BUILD 3 "16 October 2015" "PCRE2 10.21"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .
@ -132,11 +132,20 @@ Pattern escapes such as \ed and \ew do not by default make use of Unicode
 properties. The application can request that they do by setting the PCRE2_UCP
 option. Unless the application has set PCRE2_NEVER_UCP, a pattern may also
 request this by starting with (*UCP).
-.P
+.
+.
+.SH "DISABLING THE USE OF \eC"
+.rs
+.sp
 The \eC escape sequence, which matches a single code unit, even in a UTF mode,
 can cause unpredictable behaviour because it may leave the current matching
-point in the middle of a multi-code-unit character. It can be locked out by
-setting the PCRE2_NEVER_BACKSLASH_C option.
+point in the middle of a multi-code-unit character. The application can lock it
+out by setting the PCRE2_NEVER_BACKSLASH_C option when calling
+\fBpcre2_compile()\fP. There is also a build-time option
+.sp
+  --enable-never-backslash-C
+.sp
+(note the upper case C) which locks out the use of \eC entirely.
 .
 .
 .SH "JUST-IN-TIME COMPILER SUPPORT"
@ -510,6 +519,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 24 April 2015
+Last updated: 16 October 2015
 Copyright (c) 1997-2015 University of Cambridge.
 .fi
--- a/doc/pcre2pattern.3
+++ b/doc/pcre2pattern.3
@ -1,4 +1,4 @@
-.TH PCRE2PATTERN 3 "01 September 2015" "PCRE2 10.21"
+.TH PCRE2PATTERN 3 "16 October 2015" "PCRE2 10.21"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "PCRE2 REGULAR EXPRESSION DETAILS"
@ -1233,8 +1233,11 @@ with \eC in UTF-8 or UTF-16 mode means that the rest of the string may start
 with a malformed UTF character. This has undefined results, because PCRE2
 assumes that it is matching character by character in a valid UTF string (by
 default it checks the subject string's validity at the start of processing
-unless the PCRE2_NO_UTF_CHECK option is used). An application can lock out the
-use of \eC by setting the PCRE2_NEVER_BACKSLASH_C option.
+unless the PCRE2_NO_UTF_CHECK option is used). 
+.P
+An application can lock out the use of \eC by setting the
+PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also possible to
+build PCRE2 with the use of \eC permanently disabled.
 .P
 PCRE2 does not allow \eC to appear in lookbehind assertions
 .\" HTML <a href="#lookbehind">
@ -1242,7 +1245,10 @@ PCRE2 does not allow \eC to appear in lookbehind assertions
 (described below)
 .\"
 in a UTF mode, because this would make it impossible to calculate the length of
-the lookbehind.
+the lookbehind. Neither the alternative matching function 
+\fBpcre2_dfa_match()\fP not the JIT optimizer support \eC in a UTF mode. The 
+former gives a match-time error; the latter fails to optimize and so the match 
+is always run using the interpreter.
 .P
 In general, the \eC escape sequence is best avoided. However, one way of using
 it that avoids the problem of malformed UTF characters is to use a lookahead to
@ -3386,6 +3392,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 01 September 2015
+Last updated: 16 October 2015
 Copyright (c) 1997-2015 University of Cambridge.
 .fi
--- a/doc/pcre2syntax.3
+++ b/doc/pcre2syntax.3
@ -1,4 +1,4 @@
-.TH PCRE2SYNTAX 3 "17 July 2015" "PCRE2 10.21"
+.TH PCRE2SYNTAX 3 "16 October 2015" "PCRE2 10.21"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY"
@ -81,9 +81,10 @@ it matches a literal "u".
  \eW         a "non-word" character
  \eX         a Unicode extended grapheme cluster
 .sp
-The application can lock out the use of \eC by setting the
-PCRE2_NEVER_BACKSLASH_C option. It is dangerous because it may leave the
-current matching point in the middle of a UTF-8 or UTF-16 character.
+\eC is dangerous because it may leave the current matching point in the middle
+of a UTF-8 or UTF-16 character. The application can lock out the use of \eC by
+setting the PCRE2_NEVER_BACKSLASH_C option. It is also possible to build PCRE2 
+with the use of \eC permanently disabled.
 .P
 By default, \ed, \es, and \ew match only ASCII characters, even in UTF-8 mode
 or in the 16-bit and 32-bit libraries. However, if locale-specific matching is
@ -576,6 +577,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 17 July 2015
+Last updated: 16 October 2015
 Copyright (c) 1997-2015 University of Cambridge.
 .fi
--- a/doc/pcre2test.1
+++ b/doc/pcre2test.1
@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "23 September 2015" "PCRE 10.21"
+.TH PCRE2TEST 1 "17 October 2015" "PCRE 10.21"
 .SH NAME
 pcre2test - a program for testing Perl-compatible regular expressions.
 .SH SYNOPSIS
@ -122,12 +122,13 @@ following options output the value and set the exit code as indicated:
 The following options output 1 for true or 0 for false, and set the exit code
 to the same value:
 .sp
-  ebcdic     compiled for an EBCDIC environment
-  jit        just-in-time support is available
-  pcre2-16   the 16-bit library was built
-  pcre2-32   the 32-bit library was built
-  pcre2-8    the 8-bit library was built
-  unicode    Unicode support is available
+  backslash-C  \eC is supported (not locked out)
+  ebcdic       compiled for an EBCDIC environment
+  jit          just-in-time support is available
+  pcre2-16     the 16-bit library was built
+  pcre2-32     the 32-bit library was built
+  pcre2-8      the 8-bit library was built
+  unicode      Unicode support is available
 .sp
 If an unknown option is given, an error message is output; the exit code is 0.
 .TP 10
@ -1559,6 +1560,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 23 September 2015
+Last updated: 17 October 2015
 Copyright (c) 1997-2015 University of Cambridge.
 .fi
--- a/doc/pcre2test.txt
+++ b/doc/pcre2test.txt
@ -119,12 +119,13 @@ COMMAND LINE OPTIONS
                 The following options output 1 for true or 0 for  false,  and
                 set the exit code to the same value:

-                   ebcdic     compiled for an EBCDIC environment
-                   jit        just-in-time support is available
-                   pcre2-16   the 16-bit library was built
-                   pcre2-32   the 32-bit library was built
-                   pcre2-8    the 8-bit library was built
-                   unicode    Unicode support is available
+                   backslash-C  \C is supported (not locked out)
+                   ebcdic       compiled for an EBCDIC environment
+                   jit          just-in-time support is available
+                   pcre2-16     the 16-bit library was built
+                   pcre2-32     the 32-bit library was built
+                   pcre2-8      the 8-bit library was built
+                   unicode      Unicode support is available

                 If  an  unknown  option is given, an error message is output;
                 the exit code is 0.
@ -457,7 +458,7 @@ PATTERN MODIFIERS
   Setting compilation options

       The following modifiers set options for pcre2_compile(). The most  com-
-       mon  ones  have single-letter abbreviations. See pcreapi for a descrip-
+       mon  ones have single-letter abbreviations. See pcre2api for a descrip-
       tion of their effects.

             allow_empty_class         set PCRE2_ALLOW_EMPTY_CLASS
@ -484,6 +485,7 @@ PATTERN MODIFIERS
             no_utf_check              set PCRE2_NO_UTF_CHECK
             ucp                       set PCRE2_UCP
             ungreedy                  set PCRE2_UNGREEDY
+             use_offset_limit          set PCRE2_USE_OFFSET_LIMIT
             utf                       set PCRE2_UTF

       As well as turning on the PCRE2_UTF option, the utf modifier causes all
@ -509,6 +511,7 @@ PATTERN MODIFIERS
             locale=<name>             use this locale
             memory                    show memory used
             newline=<type>            set newline type
+             null_context              compile with a NULL context
             parens_nest_limit=<n>     set maximum parentheses depth
             posix                     use the POSIX API
             push                      push compiled pattern onto the stack
@ -579,35 +582,42 @@ PATTERN MODIFIERS
       mation that is requested. For each callout, either its number or string
       is given, followed by the item that follows it in the pattern.

+   Passing a NULL context
+
+       Normally,  pcre2test  passes a context block to pcre2_compile(). If the
+       null_context modifier is set, however, NULL  is  passed.  This  is  for
+       testing  that  pcre2_compile()  behaves correctly in this case (it uses
+       default values).
+
   Specifying a pattern in hex

       The hex modifier specifies that the characters of the pattern are to be
-       interpreted as pairs of hexadecimal digits. White  space  is  permitted
+       interpreted  as  pairs  of hexadecimal digits. White space is permitted
       between pairs. For example:

         /ab 32 59/hex

-       This  feature  is  provided  as a way of creating patterns that contain
-       binary zero and other non-printing characters.  By  default,  pcre2test
-       passes  patterns  as zero-terminated strings to pcre2_compile(), giving
+       This feature is provided as a way of  creating  patterns  that  contain
+       binary  zero  and  other non-printing characters. By default, pcre2test
+       passes patterns as zero-terminated strings to  pcre2_compile(),  giving
       the length as PCRE2_ZERO_TERMINATED. However, for patterns specified in
       hexadecimal, the actual length of the pattern is passed.

   JIT compilation

-       Just-in-time  (JIT)  compiling  is  a heavyweight optimization that can
-       greatly speed up pattern matching. See the pcre2jit  documentation  for
-       details.  JIT  compiling  happens, optionally, after a pattern has been
-       successfully compiled into an internal form. The JIT compiler  converts
+       Just-in-time (JIT) compiling is a  heavyweight  optimization  that  can
+       greatly  speed  up pattern matching. See the pcre2jit documentation for
+       details. JIT compiling happens, optionally, after a  pattern  has  been
+       successfully  compiled into an internal form. The JIT compiler converts
       this to optimized machine code. It needs to know whether the match-time
       options PCRE2_PARTIAL_HARD and PCRE2_PARTIAL_SOFT are going to be used,
-       because  different  code  is generated for the different cases. See the
-       partial modifier in "Subject Modifiers" below for details of how  these
+       because different code is generated for the different  cases.  See  the
+       partial  modifier in "Subject Modifiers" below for details of how these
       options are specified for each match attempt.

-       JIT  compilation  is  requested by the /jit pattern modifier, which may
+       JIT compilation is requested by the /jit pattern  modifier,  which  may
       optionally be followed by an equals sign and a number in the range 0 to
-       7.   The  three bits that make up the number specify which of the three
+       7.  The three bits that make up the number specify which of  the  three
       JIT operating modes are to be compiled:

         1  compile JIT code for non-partial matching
@ -624,31 +634,31 @@ PATTERN MODIFIERS
         6  soft and hard partial matching only
         7  all three modes

-       If no number is given, 7 is  assumed.  The  phrase  "partial  matching"
+       If  no  number  is  given,  7 is assumed. The phrase "partial matching"
       means a call to pcre2_match() with either the PCRE2_PARTIAL_SOFT or the
-       PCRE2_PARTIAL_HARD option set. Note that such a call may return a  com-
+       PCRE2_PARTIAL_HARD  option set. Note that such a call may return a com-
       plete match; the options enable the possibility of a partial match, but
-       do not require it. Note also that if you request JIT  compilation  only
-       for  partial  matching (for example, /jit=2) but do not set the partial
-       modifier on a subject line, that match will not use  JIT  code  because
+       do  not  require it. Note also that if you request JIT compilation only
+       for partial matching (for example, /jit=2) but do not set  the  partial
+       modifier  on  a  subject line, that match will not use JIT code because
       none was compiled for non-partial matching.

-       If  JIT compilation is successful, the compiled JIT code will automati-
-       cally be used when an appropriate type of match  is  run,  except  when
-       incompatible  run-time options are specified. For more details, see the
-       pcre2jit documentation. See also the jitstack modifier below for a  way
+       If JIT compilation is successful, the compiled JIT code will  automati-
+       cally  be  used  when  an appropriate type of match is run, except when
+       incompatible run-time options are specified. For more details, see  the
+       pcre2jit  documentation. See also the jitstack modifier below for a way
       of setting the size of the JIT stack.

-       If  the  jitfast  modifier is specified, matching is done using the JIT
-       "fast path" interface, pcre2_jit_match(), which skips some of the  san-
-       ity  checks that are done by pcre2_match(), and of course does not work
-       when JIT is not supported. If jitfast is specified without  jit,  jit=7
+       If the jitfast modifier is specified, matching is done  using  the  JIT
+       "fast  path" interface, pcre2_jit_match(), which skips some of the san-
+       ity checks that are done by pcre2_match(), and of course does not  work
+       when  JIT  is not supported. If jitfast is specified without jit, jit=7
       is assumed.

-       If  the jitverify modifier is specified, information about the compiled
-       pattern shows whether JIT compilation was or  was  not  successful.  If
-       jitverify  is  specified without jit, jit=7 is assumed. If JIT compila-
-       tion is successful when jitverify is set, the text "(JIT)" is added  to
+       If the jitverify modifier is specified, information about the  compiled
+       pattern  shows  whether  JIT  compilation was or was not successful. If
+       jitverify is specified without jit, jit=7 is assumed. If  JIT  compila-
+       tion  is successful when jitverify is set, the text "(JIT)" is added to
       the first output line after a match or non match when JIT-compiled code
       was actually used in the match.

@ -659,18 +669,18 @@ PATTERN MODIFIERS
         /pattern/locale=fr_FR

       The given locale is set, pcre2_maketables() is called to build a set of
-       character  tables for the locale, and this is then passed to pcre2_com-
-       pile() when compiling the regular expression. The same tables are  used
+       character tables for the locale, and this is then passed to  pcre2_com-
+       pile()  when compiling the regular expression. The same tables are used
       when matching the following subject lines. The /locale modifier applies
       only to the pattern on which it appears, but can be given in a #pattern
-       command  if a default is needed. Setting a locale and alternate charac-
+       command if a default is needed. Setting a locale and alternate  charac-
       ter tables are mutually exclusive.

   Showing pattern memory

-       The /memory modifier causes the size in bytes of  the  memory  used  to
-       hold  the compiled pattern to be output. This does not include the size
-       of the pcre2_code block; it is just the actual compiled  data.  If  the
+       The  /memory  modifier  causes  the size in bytes of the memory used to
+       hold the compiled pattern to be output. This does not include the  size
+       of  the  pcre2_code  block; it is just the actual compiled data. If the
       pattern is subsequently passed to the JIT compiler, the size of the JIT
       compiled code is also output. Here is an example:

@ -681,19 +691,19 @@ PATTERN MODIFIERS

   Limiting nested parentheses

-       The parens_nest_limit modifier sets a limit  on  the  depth  of  nested
-       parentheses  in  a  pattern.  Breaching  the limit causes a compilation
-       error.  The default for the library is set when  PCRE2  is  built,  but
-       pcre2test  sets  its  own default of 220, which is required for running
+       The  parens_nest_limit  modifier  sets  a  limit on the depth of nested
+       parentheses in a pattern. Breaching  the  limit  causes  a  compilation
+       error.   The  default  for  the library is set when PCRE2 is built, but
+       pcre2test sets its own default of 220, which is  required  for  running
       the standard test suite.

   Using the POSIX wrapper API

-       The /posix modifier causes pcre2test to call PCRE2 via the POSIX  wrap-
-       per  API  rather  than  its  native  API.  This supports only the 8-bit
-       library.  Note that it does not imply  POSIX  matching  semantics;  for
-       more  detail  see  the  pcre2posix documentation. When the POSIX API is
-       being used, the following pattern modifiers set options  for  the  reg-
+       The  /posix modifier causes pcre2test to call PCRE2 via the POSIX wrap-
+       per API rather than its  native  API.  This  supports  only  the  8-bit
+       library.   Note  that  it  does not imply POSIX matching semantics; for
+       more detail see the pcre2posix documentation. When  the  POSIX  API  is
+       being  used,  the  following pattern modifiers set options for the reg-
       comp() function:

         caseless           REG_ICASE
@ -704,24 +714,24 @@ PATTERN MODIFIERS
         ucp                REG_UCP        )   the POSIX standard
         utf                REG_UTF8       )

-       The  aftertext  and  allaftertext  subject  modifiers work as described
+       The aftertext and allaftertext  subject  modifiers  work  as  described
       below. All other modifiers cause an error.

   Testing the stack guard feature

-       The /stackguard modifier is used to  test  the  use  of  pcre2_set_com-
-       pile_recursion_guard(),  a  function  that  is provided to enable stack
-       availability to be checked during compilation (see the  pcre2api  docu-
-       mentation  for  details).  If  the  number specified by the modifier is
+       The  /stackguard  modifier  is  used  to test the use of pcre2_set_com-
+       pile_recursion_guard(), a function that is  provided  to  enable  stack
+       availability  to  be checked during compilation (see the pcre2api docu-
+       mentation for details). If the number  specified  by  the  modifier  is
       greater than zero, pcre2_set_compile_recursion_guard() is called to set
-       up  callback  from pcre2_compile() to a local function. The argument it
-       receives is the current nesting parenthesis depth; if this  is  greater
+       up callback from pcre2_compile() to a local function. The  argument  it
+       receives  is  the current nesting parenthesis depth; if this is greater
       than the value given by the modifier, non-zero is returned, causing the
       compilation to be aborted.

   Using alternative character tables

-       The value specified for the /tables modifier must be one of the  digits
+       The  value specified for the /tables modifier must be one of the digits
       0, 1, or 2. It causes a specific set of built-in character tables to be
       passed to pcre2_compile(). This is used in the PCRE2 tests to check be-
       haviour with different character tables. The digit specifies the tables
@ -732,15 +742,15 @@ PATTERN MODIFIERS
               pcre2_chartables.c.dist
         2   a set of tables defining ISO 8859 characters

-       In table 2, some characters whose codes are greater than 128 are  iden-
-       tified  as  letters,  digits,  spaces, etc. Setting alternate character
+       In  table 2, some characters whose codes are greater than 128 are iden-
+       tified as letters, digits, spaces,  etc.  Setting  alternate  character
       tables and a locale are mutually exclusive.

   Setting certain match controls

       The following modifiers are really subject modifiers, and are described
-       below.   However, they may be included in a pattern's modifier list, in
-       which case they are applied to every subject  line  that  is  processed
+       below.  However, they may be included in a pattern's modifier list,  in
+       which  case  they  are  applied to every subject line that is processed
       with that pattern. They do not affect the compilation process.

             aftertext           show text after match
@ -752,20 +762,20 @@ PATTERN MODIFIERS
             replace=<string>    specify a replacement string
             startchar           show starting character when relevant

-       These  modifiers may not appear in a #pattern command. If you want them
+       These modifiers may not appear in a #pattern command. If you want  them
       as defaults, set them in a #subject command.

   Saving a compiled pattern

-       When a pattern with the push modifier is successfully compiled,  it  is
-       pushed  onto  a  stack  of compiled patterns, and pcre2test expects the
-       next line to contain a new pattern (or a command) instead of a  subject
+       When  a  pattern with the push modifier is successfully compiled, it is
+       pushed onto a stack of compiled patterns,  and  pcre2test  expects  the
+       next  line to contain a new pattern (or a command) instead of a subject
       line. This facility is used when saving compiled patterns to a file, as
-       described in the section entitled "Saving and restoring  compiled  pat-
+       described  in  the section entitled "Saving and restoring compiled pat-
       terns" below.  The push modifier is incompatible with compilation modi-
       fiers such as global that act at match time. Any that are specified are
-       ignored,  with  a  warning message, except for replace, which causes an
-       error. Note that, jitverify, which is allowed, does not  carry  through
+       ignored, with a warning message, except for replace,  which  causes  an
+       error.  Note  that, jitverify, which is allowed, does not carry through
       to any subsequent matching that uses this pattern.


@ -776,7 +786,7 @@ SUBJECT MODIFIERS

   Setting match options

-       The   following   modifiers   set   options   for   pcre2_match()    or
+       The    following   modifiers   set   options   for   pcre2_match()   or
       pcre2_dfa_match(). See pcreapi for a description of their effects.

             anchored                  set PCRE2_ANCHORED
@ -790,20 +800,20 @@ SUBJECT MODIFIERS
             partial_hard (or ph)      set PCRE2_PARTIAL_HARD
             partial_soft (or ps)      set PCRE2_PARTIAL_SOFT

-       The  partial matching modifiers are provided with abbreviations because
+       The partial matching modifiers are provided with abbreviations  because
       they appear frequently in tests.

-       If the /posix modifier was present on the pattern,  causing  the  POSIX
+       If  the  /posix  modifier was present on the pattern, causing the POSIX
       wrapper API to be used, the only option-setting modifiers that have any
-       effect  are  notbol,  notempty,   and   noteol,   causing   REG_NOTBOL,
-       REG_NOTEMPTY,  and REG_NOTEOL, respectively, to be passed to regexec().
+       effect   are   notbol,   notempty,   and  noteol,  causing  REG_NOTBOL,
+       REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to  regexec().
       Any other modifiers cause an error.

   Setting match controls

-       The following modifiers affect the matching process  or  request  addi-
-       tional  information.  Some  of  them may also be specified on a pattern
-       line (see above), in which case they apply to every subject  line  that
+       The  following  modifiers  affect the matching process or request addi-
+       tional information. Some of them may also be  specified  on  a  pattern
+       line  (see  above), in which case they apply to every subject line that
       is matched against that pattern.

             aftertext                 show text after match
@ -823,9 +833,11 @@ SUBJECT MODIFIERS
         /g  global                    global matching
             jitstack=<n>              set size of JIT stack
             mark                      show mark values
-             match_limit=>n>           set a match limit
+             match_limit=<n>           set a match limit
             memory                    show memory usage
+             null_context              match with a NULL context
             offset=<n>                set starting offset
+             offset_limit=<n>          set offset limit
             ovector=<n>               set size of output vector
             recursion_limit=<n>       set a recursion limit
             replace=<string>          specify a replacement string
@ -836,23 +848,23 @@ SUBJECT MODIFIERS

   Showing more text

-       The  aftertext modifier requests that as well as outputting the part of
+       The aftertext modifier requests that as well as outputting the part  of
       the subject string that matched the entire pattern, pcre2test should in
       addition output the remainder of the subject string. This is useful for
       tests where the subject contains multiple copies of the same substring.
-       The  allaftertext  modifier  requests the same action for captured sub-
+       The allaftertext modifier requests the same action  for  captured  sub-
       strings as well as the main matched substring. In each case the remain-
       der is output on the following line with a plus character following the
       capture number.

-       The allusedtext modifier requests that all the text that was  consulted
-       during  a  successful pattern match by the interpreter should be shown.
-       This feature is not supported for JIT matching, and if  requested  with
-       JIT  it  is  ignored  (with  a  warning message). Setting this modifier
+       The  allusedtext modifier requests that all the text that was consulted
+       during a successful pattern match by the interpreter should  be  shown.
+       This  feature  is not supported for JIT matching, and if requested with
+       JIT it is ignored (with  a  warning  message).  Setting  this  modifier
       affects the output if there is a lookbehind at the start of a match, or
-       a  lookahead  at  the  end, or if \K is used in the pattern. Characters
-       that precede or follow the start and end of the actual match are  indi-
-       cated  in  the output by '<' or '>' characters underneath them. Here is
+       a lookahead at the end, or if \K is used  in  the  pattern.  Characters
+       that  precede or follow the start and end of the actual match are indi-
+       cated in the output by '<' or '>' characters underneath them.  Here  is
       an example:

           re> /(?<=pqr)abc(?=xyz)/
@ -860,16 +872,16 @@ SUBJECT MODIFIERS
          0: pqrabcxyz
             <<<   >>>

-       This shows that the matched string is "abc",  with  the  preceding  and
-       following  strings  "pqr"  and  "xyz"  having been consulted during the
+       This  shows  that  the  matched string is "abc", with the preceding and
+       following strings "pqr" and "xyz"  having  been  consulted  during  the
       match (when processing the assertions).

-       The startchar modifier requests that the  starting  character  for  the
-       match  be  indicated,  if  it  is different to the start of the matched
+       The  startchar  modifier  requests  that the starting character for the
+       match be indicated, if it is different to  the  start  of  the  matched
       string. The only time when this occurs is when \K has been processed as
       part of the match. In this situation, the output for the matched string
-       is displayed from the starting character  instead  of  from  the  match
-       point,  with  circumflex  characters  under the earlier characters. For
+       is  displayed  from  the  starting  character instead of from the match
+       point, with circumflex characters under  the  earlier  characters.  For
       example:

           re> /abc\Kxyz/
@ -877,7 +889,7 @@ SUBJECT MODIFIERS
          0: abcxyz
             ^^^

-       Unlike allusedtext, the startchar modifier can be used with JIT.   How-
+       Unlike  allusedtext, the startchar modifier can be used with JIT.  How-
       ever, these two modifiers are mutually exclusive.

   Showing the value of all capture groups
@ -885,88 +897,88 @@ SUBJECT MODIFIERS
       The allcaptures modifier requests that the values of all potential cap-
       tured parentheses be output after a match. By default, only those up to
       the highest one actually used in the match are output (corresponding to
-       the return code from pcre2_match()). Groups that did not take  part  in
+       the  return  code from pcre2_match()). Groups that did not take part in
       the match are output as "<unset>".

   Testing callouts

-       A  callout function is supplied when pcre2test calls the library match-
-       ing functions, unless callout_none is specified. If callout_capture  is
+       A callout function is supplied when pcre2test calls the library  match-
+       ing  functions, unless callout_none is specified. If callout_capture is
       set, the current captured groups are output when a callout occurs.

-       The  callout_fail modifier can be given one or two numbers. If there is
+       The callout_fail modifier can be given one or two numbers. If there  is
       only one number, 1 is returned instead of 0 when a callout of that num-
-       ber  is  reached.  If two numbers are given, 1 is returned when callout
+       ber is reached. If two numbers are given, 1 is  returned  when  callout
       <n> is reached for the <m>th time. Note that callouts with string argu-
-       ments  are  always  given  the  number zero. See "Callouts" below for a
+       ments are always given the number zero.  See  "Callouts"  below  for  a
       description of the output when a callout it taken.

-       The callout_data modifier can be given an unsigned or a  negative  num-
-       ber.   This  is  set  as the "user data" that is passed to the matching
-       function, and passed back when the callout  function  is  invoked.  Any
-       value  other  than  zero  is  used as a return from pcre2test's callout
+       The  callout_data  modifier can be given an unsigned or a negative num-
+       ber.  This is set as the "user data" that is  passed  to  the  matching
+       function,  and  passed  back  when the callout function is invoked. Any
+       value other than zero is used as  a  return  from  pcre2test's  callout
       function.

   Finding all matches in a string

       Searching for all possible matches within a subject can be requested by
-       the  global or /altglobal modifier. After finding a match, the matching
-       function is called again to search the remainder of  the  subject.  The
-       difference  between  global  and  altglobal is that the former uses the
-       start_offset argument to pcre2_match() or  pcre2_dfa_match()  to  start
-       searching  at  a new point within the entire string (which is what Perl
+       the global or /altglobal modifier. After finding a match, the  matching
+       function  is  called  again to search the remainder of the subject. The
+       difference between global and altglobal is that  the  former  uses  the
+       start_offset  argument  to  pcre2_match() or pcre2_dfa_match() to start
+       searching at a new point within the entire string (which is  what  Perl
       does), whereas the latter passes over a shortened subject. This makes a
       difference to the matching process if the pattern begins with a lookbe-
       hind assertion (including \b or \B).

-       If an empty string  is  matched,  the  next  match  is  done  with  the
+       If  an  empty  string  is  matched,  the  next  match  is done with the
       PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search
       for another, non-empty, match at the same point in the subject. If this
-       match  fails,  the  start  offset  is advanced, and the normal match is
-       retried. This imitates the way Perl handles such cases when  using  the
-       /g  modifier  or  the  split()  function. Normally, the start offset is
-       advanced by one character, but if  the  newline  convention  recognizes
-       CRLF  as  a newline, and the current character is CR followed by LF, an
+       match fails, the start offset is advanced,  and  the  normal  match  is
+       retried.  This  imitates the way Perl handles such cases when using the
+       /g modifier or the split() function.  Normally,  the  start  offset  is
+       advanced  by  one  character,  but if the newline convention recognizes
+       CRLF as a newline, and the current character is CR followed by  LF,  an
       advance of two characters occurs.

   Testing substring extraction functions

-       The copy  and  get  modifiers  can  be  used  to  test  the  pcre2_sub-
+       The  copy  and  get  modifiers  can  be  used  to  test  the pcre2_sub-
       string_copy_xxx() and pcre2_substring_get_xxx() functions.  They can be
-       given more than once, and each can specify a group name or number,  for
+       given  more than once, and each can specify a group name or number, for
       example:

          abcd\=copy=1,copy=3,get=G1

-       If  the  #subject command is used to set default copy and/or get lists,
-       these can be unset by specifying a negative number to cancel  all  num-
+       If the #subject command is used to set default copy and/or  get  lists,
+       these  can  be unset by specifying a negative number to cancel all num-
       bered groups and an empty name to cancel all named groups.

-       The  getall  modifier  tests pcre2_substring_list_get(), which extracts
+       The getall modifier tests  pcre2_substring_list_get(),  which  extracts
       all captured substrings.

-       If the subject line is successfully matched, the  substrings  extracted
-       by  the  convenience  functions  are  output  with C, G, or L after the
-       string number instead of a colon. This is in  addition  to  the  normal
-       full  list.  The string length (that is, the return from the extraction
+       If  the  subject line is successfully matched, the substrings extracted
+       by the convenience functions are output with  C,  G,  or  L  after  the
+       string  number  instead  of  a colon. This is in addition to the normal
+       full list. The string length (that is, the return from  the  extraction
       function) is given in parentheses after each substring, followed by the
       name when the extraction was by name.

   Testing the substitution function

-       If  the  replace  modifier  is  set, the pcre2_substitute() function is
-       called instead  of  one  of  the  matching  functions.  Unlike  subject
-       strings,  pcre2test  does  not  process  replacement strings for escape
+       If the replace modifier is  set,  the  pcre2_substitute()  function  is
+       called  instead  of  one  of  the  matching  functions.  Unlike subject
+       strings, pcre2test does not  process  replacement  strings  for  escape
       sequences. In UTF mode, a replacement string is checked to see if it is
       a valid UTF-8 string.  If so, it is correctly converted to a UTF string
-       of the appropriate code unit width. If it is not a valid UTF-8  string,
+       of  the appropriate code unit width. If it is not a valid UTF-8 string,
       the individual code units are copied directly. This provides a means of
       passing an invalid UTF-8 string for testing purposes.

-       If the global modifier is set,  PCRE2_SUBSTITUTE_GLOBAL  is  passed  to
+       If  the  global  modifier  is set, PCRE2_SUBSTITUTE_GLOBAL is passed to
       pcre2_substitute().  After  a  successful  substitution,  the  modified
-       string is output, preceded by the number of replacements. This  may  be
-       zero  if there were no matches. Here is a simple example of a substitu-
+       string  is  output, preceded by the number of replacements. This may be
+       zero if there were no matches. Here is a simple example of a  substitu-
       tion test:

         /abc/replace=xxx
@ -975,11 +987,11 @@ SUBJECT MODIFIERS
             =abc=abc=\=global
          2: =xxx=xxx=

-       Subject and replacement strings should be  kept  relatively  short  for
-       substitution  tests, as fixed-size buffers are used. To make it easy to
-       test for buffer overflow, if the replacement string starts with a  num-
-       ber  in square brackets, that number is passed to pcre2_substitute() as
-       the size of the output buffer, with the replacement string starting  at
+       Subject  and  replacement  strings  should be kept relatively short for
+       substitution tests, as fixed-size buffers are used. To make it easy  to
+       test  for buffer overflow, if the replacement string starts with a num-
+       ber in square brackets, that number is passed to pcre2_substitute()  as
+       the  size of the output buffer, with the replacement string starting at
       the next character. Here is an example that tests the edge case:

         /abc/
@ -989,90 +1001,107 @@ SUBJECT MODIFIERS
         Failed: error -47: no more memory

       A replacement string is ignored with POSIX and DFA matching. Specifying
-       partial matching provokes an error return  ("bad  option  value")  from
+       partial  matching  provokes  an  error return ("bad option value") from
       pcre2_substitute().

   Setting the JIT stack size

-       The  jitstack modifier provides a way of setting the maximum stack size
-       that is used by the just-in-time optimization code. It  is  ignored  if
+       The jitstack modifier provides a way of setting the maximum stack  size
+       that  is  used  by the just-in-time optimization code. It is ignored if
       JIT optimization is not being used. The value is a number of kilobytes.
       Providing a stack that is larger than the default 32K is necessary only
       for very complicated patterns.

   Setting match and recursion limits

-       The  match_limit and recursion_limit modifiers set the appropriate lim-
+       The match_limit and recursion_limit modifiers set the appropriate  lim-
       its in the match context. These values are ignored when the find_limits
       modifier is specified.

   Finding minimum limits

-       If  the  find_limits modifier is present, pcre2test calls pcre2_match()
-       several times, setting  different  values  in  the  match  context  via
-       pcre2_set_match_limit()  and pcre2_set_recursion_limit() until it finds
-       the minimum values for each parameter that allow pcre2_match() to  com-
+       If the find_limits modifier is present, pcre2test  calls  pcre2_match()
+       several  times,  setting  different  values  in  the  match context via
+       pcre2_set_match_limit() and pcre2_set_recursion_limit() until it  finds
+       the  minimum values for each parameter that allow pcre2_match() to com-
       plete without error.

       If JIT is being used, only the match limit is relevant. If DFA matching
-       is being used, neither limit is relevant, and this modifier is  ignored
+       is  being used, neither limit is relevant, and this modifier is ignored
       (with a warning message).

-       The  match_limit number is a measure of the amount of backtracking that
-       takes place, and learning the minimum value  can  be  instructive.  For
-       most  simple  matches, the number is quite small, but for patterns with
-       very large numbers of matching possibilities, it can become large  very
-       quickly    with    increasing    length    of   subject   string.   The
-       match_limit_recursion number is a measure of how  much  stack  (or,  if
-       PCRE2  is  compiled with NO_RECURSE, how much heap) memory is needed to
+       The match_limit number is a measure of the amount of backtracking  that
+       takes  place,  and  learning  the minimum value can be instructive. For
+       most simple matches, the number is quite small, but for  patterns  with
+       very  large numbers of matching possibilities, it can become large very
+       quickly   with   increasing   length    of    subject    string.    The
+       match_limit_recursion  number  is  a  measure of how much stack (or, if
+       PCRE2 is compiled with NO_RECURSE, how much heap) memory is  needed  to
       complete the match attempt.

   Showing MARK names


       The mark modifier causes the names from backtracking control verbs that
-       are  returned from calls to pcre2_match() to be displayed. If a mark is
-       returned for a match, non-match, or partial match, pcre2test shows  it.
-       For  a  match, it is on a line by itself, tagged with "MK:". Otherwise,
+       are returned from calls to pcre2_match() to be displayed. If a mark  is
+       returned  for a match, non-match, or partial match, pcre2test shows it.
+       For a match, it is on a line by itself, tagged with  "MK:".  Otherwise,
       it is added to the non-match message.

   Showing memory usage

-       The memory modifier causes pcre2test to log all memory  allocation  and
+       The  memory  modifier causes pcre2test to log all memory allocation and
       freeing calls that occur during a match operation.

   Setting a starting offset

-       The  offset  modifier  sets  an  offset  in the subject string at which
+       The offset modifier sets an offset  in  the  subject  string  at  which
       matching starts. Its value is a number of code units, not characters.

+   Setting an offset limit
+
+       The  offset_limit  modifier  sets  a limit for unanchored matches. If a
+       match cannot be found starting at or before this offset in the subject,
+       a "no match" return is given. The data value is a number of code units,
+       not characters. When this modifier is used, the use_offset_limit  modi-
+       fier must have been set for the pattern; if not, an error is generated.
+
   Setting the size of the output vector

-       The ovector modifier applies only to  the  subject  line  in  which  it
-       appears,  though  of  course  it can also be used to set a default in a
-       #subject command. It specifies the number of pairs of offsets that  are
+       The  ovector  modifier  applies  only  to  the subject line in which it
+       appears, though of course it can also be used to set  a  default  in  a
+       #subject  command. It specifies the number of pairs of offsets that are
       available for storing matching information. The default is 15.

-       A  value of zero is useful when testing the POSIX API because it causes
+       A value of zero is useful when testing the POSIX API because it  causes
       regexec() to be called with a NULL capture vector. When not testing the
-       POSIX  API,  a  value  of  zero  is used to cause pcre2_match_data_cre-
-       ate_from_pattern() to be called, in order to create a  match  block  of
+       POSIX API, a value of  zero  is  used  to  cause  pcre2_match_data_cre-
+       ate_from_pattern()  to  be  called, in order to create a match block of
       exactly the right size for the pattern. (It is not possible to create a
-       match block with a zero-length ovector; there is always  at  least  one
+       match  block  with  a zero-length ovector; there is always at least one
       pair of offsets.)

   Passing the subject as zero-terminated

       By default, the subject string is passed to a native API matching func-
       tion with its correct length. In order to test the facility for passing
-       a  zero-terminated  string, the zero_terminate modifier is provided. It
+       a zero-terminated string, the zero_terminate modifier is  provided.  It
       causes the length to be passed as PCRE2_ZERO_TERMINATED. (When matching
-       via  the  POSIX  interface, this modifier has no effect, as there is no
+       via the POSIX interface, this modifier has no effect, as  there  is  no
       facility for passing a length.)

-       When testing pcre2_substitute(), this modifier also has the  effect  of
+       When  testing  pcre2_substitute(), this modifier also has the effect of
       passing the replacement string as zero-terminated.

+   Passing a NULL context
+
+       Normally,  pcre2test  passes  a   context   block   to   pcre2_match(),
+       pcre2_dfa_match() or pcre2_jit_match(). If the null_context modifier is
+       set, however, NULL is passed. This is for  testing  that  the  matching
+       functions behave correctly in this case (they use default values). This
+       modifier cannot be used with the find_limits modifier or  when  testing
+       the substitution function.
+

 THE ALTERNATIVE MATCHING FUNCTION

@ -1398,5 +1427,5 @@ AUTHOR

 REVISION

-       Last updated: 14 September 2015
+       Last updated: 17 October 2015
       Copyright (c) 1997-2015 University of Cambridge.
--- a/doc/pcre2unicode.3
+++ b/doc/pcre2unicode.3
@ -1,4 +1,4 @@
-.TH PCRE2UNICODE 3 "18 August 2015" "PCRE2 10.21"
+.TH PCRE2UNICODE 3 "16 October 2015" "PCRE2 10.21"
 .SH NAME
 PCRE - Perl-compatible regular expressions (revised API)
 .SH "UNICODE AND UTF SUPPORT"
@ -63,11 +63,12 @@ characters (see the description of \eC in the
 .\" HREF
 \fBpcre2pattern\fP
 .\"
-documentation). The use of \eC is not supported in the alternative matching
-function \fBpcre2_dfa_match()\fP, nor is it supported in UTF mode by the JIT
-optimization. If JIT optimization is requested for a UTF pattern that contains
-\eC, it will not succeed, and so the matching will be carried out by the normal
-interpretive function.
+documentation). The use of \eC is not supported by the alternative matching
+function \fBpcre2_dfa_match()\fP when in UTF mode. Its use provokes a
+match-time error. The JIT optimization also does not support \eC in UTF mode.
+If JIT optimization is requested for a UTF pattern that contains \eC, it will
+not succeed, and so the matching will be carried out by the normal interpretive
+function.
 .P
 The character escapes \eb, \eB, \ed, \eD, \es, \eS, \ew, and \eW correctly test
 characters of any code value, but, by default, the characters that PCRE2
@ -262,6 +263,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 18 August 2015
+Last updated: 16 October 2015
 Copyright (c) 1997-2015 University of Cambridge.
 .fi
--- a/src/config.h.generic
+++ b/src/config.h.generic
@ -182,6 +182,9 @@ sure both macros are undefined; an emulation function will then be used. */
 #define MAX_NAME_SIZE 32
 #endif

+/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
+/* #undef NEVER_BACKSLASH_C */
+
 /* The value of NEWLINE_DEFAULT determines the default newline character
   sequence. PCRE2 client programs can override this by selecting other values
   at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5
--- a/src/config.h.in
+++ b/src/config.h.in
@ -169,6 +169,9 @@ sure both macros are undefined; an emulation function will then be used. */
   overflow caused by enormously large patterns. */
 #undef MAX_NAME_SIZE

+/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
+#undef NEVER_BACKSLASH_C
+
 /* The value of NEWLINE_DEFAULT determines the default newline character
   sequence. PCRE2 client programs can override this by selecting other values
   at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@ -583,7 +583,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
       ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
       ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
       ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
-       ERR81, ERR82, ERR83, ERR84 };
+       ERR81, ERR82, ERR83, ERR84, ERR85 };

 /* This is a table of start-of-pattern options such as (*UTF) and settings such
 as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@ -7053,11 +7053,19 @@ for (;; ptr++)

      /* The use of \C can be locked out. */
      
+#ifdef NEVER_BACKSLASH_C
+      else if (escape == ESC_C)
+        {
+        *errorcodeptr = ERR85;
+        goto FAILED;
+        }
+#else
      else if (escape == ESC_C && (options & PCRE2_NEVER_BACKSLASH_C) != 0)
        {
        *errorcodeptr = ERR83;
        goto FAILED;
        }
+#endif         

      /* For the rest (including \X when Unicode properties are supported), we
      can obtain the OP value by negating the escape value in the default
--- a/src/pcre2_error.c
+++ b/src/pcre2_error.c
@ -168,6 +168,8 @@ static const char compile_error_texts[] =
  "unrecognized string delimiter follows (?C\0"
  "using \\C is disabled by the application\0"
  "(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
+  /* 85 */
+  "using \\C is disabled in this PCRE2 library\0"
  ;

 /* Match-time and UTF error texts are in the same format. */
--- a/src/pcre2posix.c
+++ b/src/pcre2posix.c
@ -106,7 +106,7 @@ static const int eint1[] = {

 static const int eint2[] = {
  30, REG_ECTYPE,  /* unknown POSIX class name */
-  32, REG_INVARG,  /* this version of PCRE does not have UTF or UCP support */
+  32, REG_INVARG,  /* this version of PCRE2 does not have Unicode support */
  37, REG_EESCAPE, /* PCRE2 does not support \L, \l, \N{name}, \U, or \u */
  56, REG_INVARG,  /* internal error: unknown newline setting */
 };
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@ -667,6 +667,12 @@ table itself easier to read. */
 #define EBCDIC_NL 0
 #endif

+#ifdef NEVER_BACKSLASH_C
+#define BACKSLASH_C 0
+#else
+#define BACKSLASH_C 1
+#endif
+
 typedef struct coptstruct {
  const char *name;
  uint32_t    type;
@ -681,16 +687,17 @@ enum { CONF_BSR,
 };

 static coptstruct coptlist[] = {
-  { "bsr",       CONF_BSR, PCRE2_CONFIG_BSR },
-  { "ebcdic",    CONF_FIX, SUPPORT_EBCDIC },
-  { "ebcdic-nl", CONF_FIZ, EBCDIC_NL },
-  { "jit",       CONF_INT, PCRE2_CONFIG_JIT },
-  { "linksize",  CONF_INT, PCRE2_CONFIG_LINKSIZE },
-  { "newline",   CONF_NL,  PCRE2_CONFIG_NEWLINE },
-  { "pcre2-16",  CONF_FIX, SUPPORT_16 },
-  { "pcre2-32",  CONF_FIX, SUPPORT_32 },
-  { "pcre2-8",   CONF_FIX, SUPPORT_8 },
-  { "unicode",   CONF_INT, PCRE2_CONFIG_UNICODE }
+  { "backslash-C", CONF_FIX, BACKSLASH_C },
+  { "bsr",         CONF_BSR, PCRE2_CONFIG_BSR },
+  { "ebcdic",      CONF_FIX, SUPPORT_EBCDIC },
+  { "ebcdic-nl",   CONF_FIZ, EBCDIC_NL },
+  { "jit",         CONF_INT, PCRE2_CONFIG_JIT },
+  { "linksize",    CONF_INT, PCRE2_CONFIG_LINKSIZE },
+  { "newline",     CONF_NL,  PCRE2_CONFIG_NEWLINE },
+  { "pcre2-16",    CONF_FIX, SUPPORT_16 },
+  { "pcre2-32",    CONF_FIX, SUPPORT_32 },
+  { "pcre2-8",     CONF_FIX, SUPPORT_8 },
+  { "unicode",     CONF_INT, PCRE2_CONFIG_UNICODE }
 };

 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
@ -6467,6 +6474,7 @@ printf("  -b            set default pattern control 'fullbincode'\n");
 printf("  -C            show PCRE2 compile-time options and exit\n");
 printf("  -C arg        show a specific compile-time option and exit with its\n");
 printf("                  value if numeric (else 0). The arg can be:\n");
+printf("     backslash-C    use of \\C is enabled [0, 1]\n");
 printf("     bsr            \\R type [ANYCRLF, ANY]\n");
 printf("     ebcdic         compiled for EBCDIC character code [0,1]\n");
 printf("     ebcdic-nl      NL code if compiled for EBCDIC\n");
@ -6618,6 +6626,11 @@ print_newline_config(optval, FALSE);
 (void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
 printf("  \\R matches %s\n", optval? "CR, LF, or CRLF only" :
                                 "all Unicode newlines");
+#ifdef NEVER_BACKSLASH_C
+printf("  \\C is not supported\n");
+#else
+printf("  \\C is supported\n");
+#endif
 (void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
 printf("  Internal link size = %d\n", optval);
 (void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
--- a/testdata/testinput10
+++ b/testdata/testinput10
@ -1,46 +1,6 @@
 # This set of tests is for UTF-8 support and Unicode property support, with
 # relevance only for the 8-bit library.

-/X(\C{3})/utf
-    X\x{1234}
-
-/X(\C{4})/utf
-    X\x{1234}YZ
-
-/X\C*/utf
-    XYZabcdce
-
-/X\C*?/utf
-    XYZabcde
-
-/X\C{3,5}/utf
-    Xabcdefg
-    X\x{1234}
-    X\x{1234}YZ
-    X\x{1234}\x{512}
-    X\x{1234}\x{512}YZ
-
-/X\C{3,5}?/utf
-    Xabcdefg
-    X\x{1234}
-    X\x{1234}YZ
-    X\x{1234}\x{512}
-
-/a\Cb/utf
-    aXb
-    a\nb
-
-/a\C\Cb/utf
-    a\x{100}b
-
-/ab\Cde/utf
-    abXde
-
-/a\C\Cb/utf
-    a\x{100}b
-\= Expect no match
-    a\x{12257}b
-
 # The next 3 patterns have UTF-8 errors

 /[Ã]/utf
@ -212,21 +172,6 @@

 /\x{212ab}/IB,utf

-# This one is here not because it's different to Perl, but because the way
-# the captured single-byte is displayed. (In Perl it becomes a character, and you
-# can't tell the difference.)
-
-/X(\C)(.*)/utf
-    X\x{1234}
-    X\nabc
-
-# This one is here because Perl gives out a grumbly error message (quite
-# correctly, but that messes up comparisons).
-
-/a\Cb/utf
-\= Expect no match
-    a\x{100}b
-
 /[^ab\xC0-\xF0]/IB,utf
    \x{f1}
    \x{bf}
--- a/testdata/testinput11
+++ b/testdata/testinput11
@ -6,10 +6,6 @@
 #forbid_utf
 #newline_default LF ANY ANYCRLF

-/a\Cb/
-    aXb
-    a\nb
-  
 /[^\x{c4}]/IB
  
 /\x{100}/I
@ -344,7 +340,7 @@

 # Non-UTF characters 

-/\C{2,3}/
+/.{2,3}/
    \x{400000}\x{400001}\x{400002}\x{400003}

 /\x{400000}\x{800000}/IBi
--- a/testdata/testinput12
+++ b/testdata/testinput12
@ -7,49 +7,6 @@
 /abc/utf
    Ã]

-/X(\C{3})/utf
-    X\x{11234}Y
-    X\x{11234}YZ
-
-/X(\C{4})/utf
-    X\x{11234}YZ
-    X\x{11234}YZW
-
-/X\C*/utf
-    XYZabcdce
-
-/X\C*?/utf
-    XYZabcde
-
-/X\C{3,5}/utf
-    Xabcdefg
-    X\x{11234}Y
-    X\x{11234}YZ
-    X\x{11234}\x{512}
-    X\x{11234}\x{512}YZ
-    X\x{11234}\x{512}\x{11234}Z
-
-/X\C{3,5}?/utf
-    Xabcdefg
-    X\x{11234}Y
-    X\x{11234}YZ
-    X\x{11234}\x{512}YZ
-\= Expect no match
-    X\x{11234}
-
-/a\Cb/utf
-    aXb
-    a\nb
-
-/a\C\Cb/utf
-    a\x{12257}b
-\= Expect no match
-    a\x{12257}\x{11234}b
-    a\x{100}b
-
-/ab\Cde/utf
-    abXde
-
 # Check maximum character size 

 /\x{ffff}/IB,utf
@ -90,16 +47,6 @@

 /\x{212ab}/IB,utf

-# These two \C tests, copied from the UTF-8 input file, do not have any
-# problems in 16 or 32 bits.
-
-/X(\C)(.*)/utf
-    X\x{1234}
-    X\nabc
-
-/a\Cb/utf
-    a\x{100}b
-
 /[^ab\xC0-\xF0]/IB,utf
    \x{f1}
    \x{bf}
@ -336,9 +283,6 @@

 /\o{4200000}/utf

-/\C/utf
-    \x{110000}
-
 /\x{100}*A/IB,utf
    A

@ -396,4 +340,7 @@

 /\x{3a3}B/IBi,utf

+/./utf
+    \x{110000}
+
 # End of testinput12
--- a/testdata/testinput2
+++ b/testdata/testinput2
@ -3739,41 +3739,40 @@

 /[bcd]*a/B

-# A complete set of tests for auto-possessification of character types.
+# A complete set of tests for auto-possessification of character types, but
+# omitting \C because it might be disabled (it has its own tests).

-/\D+\D \D+\d \D+\S \D+\s \D+\W \D+\w \D+. \D+\C \D+\R \D+\H \D+\h \D+\V \D+\v \D+\Z \D+\z \D+$/Bx
+/\D+\D \D+\d \D+\S \D+\s \D+\W \D+\w \D+. \D+\R \D+\H \D+\h \D+\V \D+\v \D+\Z \D+\z \D+$/Bx

-/\d+\D \d+\d \d+\S \d+\s \d+\W \d+\w \d+. \d+\C \d+\R \d+\H \d+\h \d+\V \d+\v \d+\Z \d+\z \d+$/Bx
+/\d+\D \d+\d \d+\S \d+\s \d+\W \d+\w \d+. \d+\R \d+\H \d+\h \d+\V \d+\v \d+\Z \d+\z \d+$/Bx

-/\S+\D \S+\d \S+\S \S+\s \S+\W \S+\w \S+. \S+\C \S+\R \S+\H \S+\h \S+\V \S+\v \S+\Z \S+\z \S+$/Bx
+/\S+\D \S+\d \S+\S \S+\s \S+\W \S+\w \S+. \S+\R \S+\H \S+\h \S+\V \S+\v \S+\Z \S+\z \S+$/Bx

-/\s+\D \s+\d \s+\S \s+\s \s+\W \s+\w \s+. \s+\C \s+\R \s+\H \s+\h \s+\V \s+\v \s+\Z \s+\z \s+$/Bx
+/\s+\D \s+\d \s+\S \s+\s \s+\W \s+\w \s+. \s+\R \s+\H \s+\h \s+\V \s+\v \s+\Z \s+\z \s+$/Bx

-/\W+\D \W+\d \W+\S \W+\s \W+\W \W+\w \W+. \W+\C \W+\R \W+\H \W+\h \W+\V \W+\v \W+\Z \W+\z \W+$/Bx
+/\W+\D \W+\d \W+\S \W+\s \W+\W \W+\w \W+. \W+\R \W+\H \W+\h \W+\V \W+\v \W+\Z \W+\z \W+$/Bx

-/\w+\D \w+\d \w+\S \w+\s \w+\W \w+\w \w+. \w+\C \w+\R \w+\H \w+\h \w+\V \w+\v \w+\Z \w+\z \w+$/Bx
+/\w+\D \w+\d \w+\S \w+\s \w+\W \w+\w \w+. \w+\R \w+\H \w+\h \w+\V \w+\v \w+\Z \w+\z \w+$/Bx

-/\C+\D \C+\d \C+\S \C+\s \C+\W \C+\w \C+. \C+\C \C+\R \C+\H \C+\h \C+\V \C+\v \C+\Z \C+\z \C+$/Bx
+/\R+\D \R+\d \R+\S \R+\s \R+\W \R+\w \R+. \R+\R \R+\H \R+\h \R+\V \R+\v \R+\Z \R+\z \R+$/Bx

-/\R+\D \R+\d \R+\S \R+\s \R+\W \R+\w \R+. \R+\C \R+\R \R+\H \R+\h \R+\V \R+\v \R+\Z \R+\z \R+$/Bx
+/\H+\D \H+\d \H+\S \H+\s \H+\W \H+\w \H+. \H+\R \H+\H \H+\h \H+\V \H+\v \H+\Z \H+\z \H+$/Bx

-/\H+\D \H+\d \H+\S \H+\s \H+\W \H+\w \H+. \H+\C \H+\R \H+\H \H+\h \H+\V \H+\v \H+\Z \H+\z \H+$/Bx
+/\h+\D \h+\d \h+\S \h+\s \h+\W \h+\w \h+. \h+\R \h+\H \h+\h \h+\V \h+\v \h+\Z \h+\z \h+$/Bx

-/\h+\D \h+\d \h+\S \h+\s \h+\W \h+\w \h+. \h+\C \h+\R \h+\H \h+\h \h+\V \h+\v \h+\Z \h+\z \h+$/Bx
+/\V+\D \V+\d \V+\S \V+\s \V+\W \V+\w \V+. \V+\R \V+\H \V+\h \V+\V \V+\v \V+\Z \V+\z \V+$/Bx

-/\V+\D \V+\d \V+\S \V+\s \V+\W \V+\w \V+. \V+\C \V+\R \V+\H \V+\h \V+\V \V+\v \V+\Z \V+\z \V+$/Bx
+/\v+\D \v+\d \v+\S \v+\s \v+\W \v+\w \v+. \v+\R \v+\H \v+\h \v+\V \v+\v \v+\Z \v+\z \v+$/Bx

-/\v+\D \v+\d \v+\S \v+\s \v+\W \v+\w \v+. \v+\C \v+\R \v+\H \v+\h \v+\V \v+\v \v+\Z \v+\z \v+$/Bx
+/ a+\D  a+\d  a+\S  a+\s  a+\W  a+\w  a+.  a+\R  a+\H  a+\h  a+\V  a+\v  a+\Z  a+\z  a+$/Bx

-/ a+\D  a+\d  a+\S  a+\s  a+\W  a+\w  a+.  a+\C  a+\R  a+\H  a+\h  a+\V  a+\v  a+\Z  a+\z  a+$/Bx
+/\n+\D \n+\d \n+\S \n+\s \n+\W \n+\w \n+. \n+\R \n+\H \n+\h \n+\V \n+\v \n+\Z \n+\z \n+$/Bx

-/\n+\D \n+\d \n+\S \n+\s \n+\W \n+\w \n+. \n+\C \n+\R \n+\H \n+\h \n+\V \n+\v \n+\Z \n+\z \n+$/Bx
+/ .+\D  .+\d  .+\S  .+\s  .+\W  .+\w  .+.  .+\R  .+\H  .+\h  .+\V  .+\v  .+\Z  .+\z  .+$/Bx

-/ .+\D  .+\d  .+\S  .+\s  .+\W  .+\w  .+.  .+\C  .+\R  .+\H  .+\h  .+\V  .+\v  .+\Z  .+\z  .+$/Bx
+/ .+\D  .+\d  .+\S  .+\s  .+\W  .+\w  .+.  .+\R  .+\H  .+\h  .+\V  .+\v  .+\Z  .+\z  .+$/Bsx

-/ .+\D  .+\d  .+\S  .+\s  .+\W  .+\w  .+.  .+\C  .+\R  .+\H  .+\h  .+\V  .+\v  .+\Z  .+\z  .+$/Bsx
-
-/\D+$  \d+$  \S+$  \s+$  \W+$  \w+$  \C+$  \R+$  \H+$  \h+$  \V+$  \v+$   a+$  \n+$   .+$  .+$/Bmx
+/ \D+$  \d+$  \S+$  \s+$  \W+$  \w+$  \R+$  \H+$  \h+$  \V+$ \v+$  a+$   \n+$  .+$  .+$/Bmx

 /(?=a+)a(a+)++a/B

@ -4327,8 +4326,6 @@

 /((?2){73}(?2))((?1))/info

-/ab\Cde/never_backslash_c
-
 /abc/
 \= Expect no match
    \[9x!xxx(]{9999}
@ -4446,12 +4443,6 @@
 /\x0{ab}/
    \0{ab} 

-/ab\Cde/
-    abXde
-    
-/(?<=ab\Cde)X/
-    abZdeX
-
 /^(a(b))\1\g1\g{1}\g-1\g{-1}\g{-02}Z/
    ababababbbabZXXXX

--- a/testdata/testinput21
+++ b/testdata/testinput21
@ -0,0 +1,16 @@
+# These are tests of \C that do not involve UTF. They are not run when \C is
+# disabled by compiling with --enable-never-backslash-C.
+
+/\C+\D \C+\d \C+\S \C+\s \C+\W \C+\w \C+. \C+\R \C+\H \C+\h \C+\V \C+\v \C+\Z \C+\z \C+$/Bx
+
+/\D+\C \d+\C \S+\C \s+\C \W+\C \w+\C .+\C \R+\C \H+\C \h+\C \V+\C \v+\C a+\C \n+\C \C+\C/Bx
+
+/ab\Cde/never_backslash_c
+
+/ab\Cde/
+    abXde
+    
+/(?<=ab\Cde)X/
+    abZdeX
+
+# End of testinput21
--- a/testdata/testinput22
+++ b/testdata/testinput22
@ -0,0 +1,95 @@
+# Tests of \C when Unicode support is available. Note that \C is not supported
+# for DFA matching in UTF mode, so this test is not run with -dfa. The output
+# of this test is different in 8-, 16-, and 32-bit modes. Some tests may match
+# in some widths and not in others.
+
+/ab\Cde/utf
+    abXde
+
+# This should produce an error diagnostic (\C in UTF lookbehind)
+
+/(?<=ab\Cde)X/utf
+
+# Autopossessification tests
+
+/\C+\X \X+\C/Bx
+
+/\C+\X \X+\C/Bx,utf
+
+/\C\X*TӅ;
+{0,6}\v+
F
+/utf
+\= Expect no match
+    Ӆ\x0a
+
+/\C(\W?ſ)'?{{/utf
+\= Expect no match
+    \\C(\\W?ſ)'?{{
+
+/X(\C{3})/utf
+    X\x{1234}
+    X\x{11234}Y
+    X\x{11234}YZ
+
+/X(\C{4})/utf
+    X\x{1234}YZ
+    X\x{11234}YZ
+    X\x{11234}YZW
+
+/X\C*/utf
+    XYZabcdce
+
+/X\C*?/utf
+    XYZabcde
+
+/X\C{3,5}/utf
+    Xabcdefg
+    X\x{1234}
+    X\x{1234}YZ
+    X\x{1234}\x{512}
+    X\x{1234}\x{512}YZ
+    X\x{11234}Y
+    X\x{11234}YZ
+    X\x{11234}\x{512}
+    X\x{11234}\x{512}YZ
+    X\x{11234}\x{512}\x{11234}Z
+
+/X\C{3,5}?/utf
+    Xabcdefg
+    X\x{1234}
+    X\x{1234}YZ
+    X\x{1234}\x{512}
+    X\x{11234}Y
+    X\x{11234}YZ
+    X\x{11234}\x{512}YZ
+    X\x{11234}
+
+/a\Cb/utf
+    aXb
+    a\nb
+    a\x{100}b
+
+/a\C\Cb/utf
+    a\x{100}b
+    a\x{12257}b
+    a\x{12257}\x{11234}b
+
+/ab\Cde/utf
+    abXde
+
+# This one is here not because it's different to Perl, but because the way
+# the captured single code unit is displayed. (In Perl it becomes a character,
+# and you can't tell the difference.)
+
+/X(\C)(.*)/utf
+    X\x{1234}
+    X\nabc
+
+# This one is here because Perl gives out a grumbly error message (quite
+# correctly, but that messes up comparisons).
+
+/a\Cb/utf
+\= Expect no match in 8-bit mode
+    a\x{100}b
+
+# End of testinput22
--- a/testdata/testinput23
+++ b/testdata/testinput23
@ -0,0 +1,7 @@
+# This test is run when PCRE2 has been built with --enable-never-backslash-C,
+# which disables the use of \C. All we can do is check that it gives the 
+# correct error message.
+
+/a\Cb/
+
+# End of testinput23
--- a/testdata/testinput5
+++ b/testdata/testinput5
@ -111,9 +111,6 @@
 /.{3,5}?/IB,utf
    \x{212ab}\x{212ab}\x{212ab}\x{861}

-/(?<=\C)X/utf
-    Should produce an error diagnostic
-    
 /^[ab]/IB,utf
    bar
 \= Expect no match
@ -1367,8 +1364,6 @@
 \= Expect no match
    aAz

-/(?<=ab\Cde)X/utf
-
 /\X/
    a\=ps
    a\=ph
@ -1617,13 +1612,13 @@

 /[\p{L}ab]{2,3}+/B,no_auto_possess

-/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \C+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx
+/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx

 /.+\X/Bsx

 /\X+$/Bmx

-/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\C \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx
+/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx

 /\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp

@ -1665,16 +1660,6 @@

 "(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'"

-/\C\X*TӅ;
-{0,6}\v+
F
-/utf
-\= Expect no match
-    Ӆ\x0a
-
-/\C(\W?ſ)'?{{/utf
-\= Expect no match
-    \\C(\\W?ſ)'?{{
-
 /[\pS#moq]/
    =

--- a/testdata/testinput6
+++ b/testdata/testinput6
@ -4645,12 +4645,6 @@
    aaaa\=ovector=3
    aaaa\=ovector=4

-/ab\Cde/
-    abXde
-    
-/(?<=ab\Cde)X/
-    abZdeX
-
 /^\R/
    \r\=ps
    \r\=ph
--- a/testdata/testinput7
+++ b/testdata/testinput7
@ -671,11 +671,6 @@
    the cat\=ps
    the cat\=ph

-/ab\Cde/utf
-    abXde
-
-/(?<=ab\Cde)X/utf
-
 /./newline=crlf,utf
    \r\=ps
    \r\=ph
--- a/testdata/testinput9
+++ b/testdata/testinput9
@ -4,10 +4,8 @@
 #forbid_utf
 #newline_default lf any anycrlf

-/a\Cb/
-    aXb
-    a\nb
-\= Expect no match and error message (too big char)
+/ab/
+\= Expect error message (too big char) and no match
    A\x{123}B
    A\o{443}B
  
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@ -1,67 +1,6 @@
 # This set of tests is for UTF-8 support and Unicode property support, with
 # relevance only for the 8-bit library.

-/X(\C{3})/utf
-    X\x{1234}
- 0: X\x{1234}
- 1: \x{1234}
-
-/X(\C{4})/utf
-    X\x{1234}YZ
- 0: X\x{1234}Y
- 1: \x{1234}Y
-
-/X\C*/utf
-    XYZabcdce
- 0: XYZabcdce
-
-/X\C*?/utf
-    XYZabcde
- 0: X
-
-/X\C{3,5}/utf
-    Xabcdefg
- 0: Xabcde
-    X\x{1234}
- 0: X\x{1234}
-    X\x{1234}YZ
- 0: X\x{1234}YZ
-    X\x{1234}\x{512}
- 0: X\x{1234}\x{512}
-    X\x{1234}\x{512}YZ
- 0: X\x{1234}\x{512}
-
-/X\C{3,5}?/utf
-    Xabcdefg
- 0: Xabc
-    X\x{1234}
- 0: X\x{1234}
-    X\x{1234}YZ
- 0: X\x{1234}
-    X\x{1234}\x{512}
- 0: X\x{1234}
-
-/a\Cb/utf
-    aXb
- 0: aXb
-    a\nb
- 0: a\x{0a}b
-
-/a\C\Cb/utf
-    a\x{100}b
- 0: a\x{100}b
-
-/ab\Cde/utf
-    abXde
- 0: abXde
-
-/a\C\Cb/utf
-    a\x{100}b
- 0: a\x{100}b
-\= Expect no match
-    a\x{12257}b
-No match
-
 # The next 3 patterns have UTF-8 errors

 /[Ã]/utf
@ -511,28 +450,6 @@ First code unit = \xf0
 Last code unit = \xab
 Subject length lower bound = 1

-# This one is here not because it's different to Perl, but because the way
-# the captured single-byte is displayed. (In Perl it becomes a character, and you
-# can't tell the difference.)
-
-/X(\C)(.*)/utf
-    X\x{1234}
- 0: X\x{1234}
- 1: \x{e1}
- 2: \x{88}\x{b4}
-    X\nabc
- 0: X\x{0a}abc
- 1: \x{0a}
- 2: abc
-
-# This one is here because Perl gives out a grumbly error message (quite
-# correctly, but that messes up comparisons).
-
-/a\Cb/utf
-\= Expect no match
-    a\x{100}b
-No match
-
 /[^ab\xC0-\xF0]/IB,utf
 ------------------------------------------------------------------
        Bra
--- a/testdata/testoutput11-16
+++ b/testdata/testoutput11-16
@ -6,12 +6,6 @@
 #forbid_utf
 #newline_default LF ANY ANYCRLF

-/a\Cb/
-    aXb
- 0: aXb
-    a\nb
- 0: a\x0ab
-  
 /[^\x{c4}]/IB
 ------------------------------------------------------------------
        Bra
@ -582,7 +576,7 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to

 # Non-UTF characters 

-/\C{2,3}/
+/.{2,3}/
    \x{400000}\x{400001}\x{400002}\x{400003}
 ** Character \x{400000} is greater than 0xffff and UTF-16 mode is not enabled.
 ** Truncation will probably give the wrong result.
--- a/testdata/testoutput11-32
+++ b/testdata/testoutput11-32
@ -6,12 +6,6 @@
 #forbid_utf
 #newline_default LF ANY ANYCRLF

-/a\Cb/
-    aXb
- 0: aXb
-    a\nb
- 0: a\x0ab
-  
 /[^\x{c4}]/IB
 ------------------------------------------------------------------
        Bra
@ -583,7 +577,7 @@ Subject length lower bound = 2

 # Non-UTF characters 

-/\C{2,3}/
+/.{2,3}/
    \x{400000}\x{400001}\x{400002}\x{400003}
 0: \x{400000}\x{400001}\x{400002}

--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@ -9,76 +9,6 @@
    Ã]
 ** Failed: invalid UTF-8 string cannot be used as input in UTF mode

-/X(\C{3})/utf
-    X\x{11234}Y
- 0: X\x{11234}Y
- 1: \x{11234}Y
-    X\x{11234}YZ
- 0: X\x{11234}Y
- 1: \x{11234}Y
-
-/X(\C{4})/utf
-    X\x{11234}YZ
- 0: X\x{11234}YZ
- 1: \x{11234}YZ
-    X\x{11234}YZW
- 0: X\x{11234}YZ
- 1: \x{11234}YZ
-
-/X\C*/utf
-    XYZabcdce
- 0: XYZabcdce
-
-/X\C*?/utf
-    XYZabcde
- 0: X
-
-/X\C{3,5}/utf
-    Xabcdefg
- 0: Xabcde
-    X\x{11234}Y
- 0: X\x{11234}Y
-    X\x{11234}YZ
- 0: X\x{11234}YZ
-    X\x{11234}\x{512}
- 0: X\x{11234}\x{512}
-    X\x{11234}\x{512}YZ
- 0: X\x{11234}\x{512}YZ
-    X\x{11234}\x{512}\x{11234}Z
- 0: X\x{11234}\x{512}\x{11234}
-
-/X\C{3,5}?/utf
-    Xabcdefg
- 0: Xabc
-    X\x{11234}Y
- 0: X\x{11234}Y
-    X\x{11234}YZ
- 0: X\x{11234}Y
-    X\x{11234}\x{512}YZ
- 0: X\x{11234}\x{512}
-\= Expect no match
-    X\x{11234}
-No match
-
-/a\Cb/utf
-    aXb
- 0: aXb
-    a\nb
- 0: a\x{0a}b
-
-/a\C\Cb/utf
-    a\x{12257}b
- 0: a\x{12257}b
-\= Expect no match
-    a\x{12257}\x{11234}b
-No match
-    a\x{100}b
-No match
-
-/ab\Cde/utf
-    abXde
- 0: abXde
-
 # Check maximum character size 

 /\x{ffff}/IB,utf
@ -308,23 +238,6 @@ First code unit = \x{d844}
 Last code unit = \x{deab}
 Subject length lower bound = 1

-# These two \C tests, copied from the UTF-8 input file, do not have any
-# problems in 16 or 32 bits.
-
-/X(\C)(.*)/utf
-    X\x{1234}
- 0: X\x{1234}
- 1: \x{1234}
- 2: 
-    X\nabc
- 0: X\x{0a}abc
- 1: \x{0a}
- 2: abc
-
-/a\Cb/utf
-    a\x{100}b
- 0: a\x{100}b
-
 /[^ab\xC0-\xF0]/IB,utf
 ------------------------------------------------------------------
        Bra
@ -1127,10 +1040,6 @@ Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too
 /\o{4200000}/utf
 Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large

-/\C/utf
-    \x{110000}
-** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
-
 /\x{100}*A/IB,utf
 ------------------------------------------------------------------
        Bra
@ -1454,4 +1363,8 @@ Starting code units: \xff
 Last code unit = 'B' (caseless)
 Subject length lower bound = 2

+/./utf
+    \x{110000}
+** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
+
 # End of testinput12
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@ -9,74 +9,6 @@
    Ã]
 ** Failed: invalid UTF-8 string cannot be used as input in UTF mode

-/X(\C{3})/utf
-    X\x{11234}Y
-No match
-    X\x{11234}YZ
- 0: X\x{11234}YZ
- 1: \x{11234}YZ
-
-/X(\C{4})/utf
-    X\x{11234}YZ
-No match
-    X\x{11234}YZW
- 0: X\x{11234}YZW
- 1: \x{11234}YZW
-
-/X\C*/utf
-    XYZabcdce
- 0: XYZabcdce
-
-/X\C*?/utf
-    XYZabcde
- 0: X
-
-/X\C{3,5}/utf
-    Xabcdefg
- 0: Xabcde
-    X\x{11234}Y
-No match
-    X\x{11234}YZ
- 0: X\x{11234}YZ
-    X\x{11234}\x{512}
-No match
-    X\x{11234}\x{512}YZ
- 0: X\x{11234}\x{512}YZ
-    X\x{11234}\x{512}\x{11234}Z
- 0: X\x{11234}\x{512}\x{11234}Z
-
-/X\C{3,5}?/utf
-    Xabcdefg
- 0: Xabc
-    X\x{11234}Y
-No match
-    X\x{11234}YZ
- 0: X\x{11234}YZ
-    X\x{11234}\x{512}YZ
- 0: X\x{11234}\x{512}Y
-\= Expect no match
-    X\x{11234}
-No match
-
-/a\Cb/utf
-    aXb
- 0: aXb
-    a\nb
- 0: a\x{0a}b
-
-/a\C\Cb/utf
-    a\x{12257}b
-No match
-\= Expect no match
-    a\x{12257}\x{11234}b
- 0: a\x{12257}\x{11234}b
-    a\x{100}b
-No match
-
-/ab\Cde/utf
-    abXde
- 0: abXde
-
 # Check maximum character size 

 /\x{ffff}/IB,utf
@ -301,23 +233,6 @@ Options: utf
 First code unit = \x{212ab}
 Subject length lower bound = 1

-# These two \C tests, copied from the UTF-8 input file, do not have any
-# problems in 16 or 32 bits.
-
-/X(\C)(.*)/utf
-    X\x{1234}
- 0: X\x{1234}
- 1: \x{1234}
- 2: 
-    X\nabc
- 0: X\x{0a}abc
- 1: \x{0a}
- 2: abc
-
-/a\Cb/utf
-    a\x{100}b
- 0: a\x{100}b
-
 /[^ab\xC0-\xF0]/IB,utf
 ------------------------------------------------------------------
        Bra
@ -1119,10 +1034,6 @@ Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too
 /\o{4200000}/utf
 Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large

-/\C/utf
-    \x{110000}
-Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 0
-
 /\x{100}*A/IB,utf
 ------------------------------------------------------------------
        Bra
@ -1446,4 +1357,8 @@ Starting code units: \xff
 Last code unit = 'B' (caseless)
 Subject length lower bound = 2

+/./utf
+    \x{110000}
+Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 0
+
 # End of testinput12
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@ -11948,9 +11948,10 @@ Subject length lower bound = 5
        End
 ------------------------------------------------------------------

-# A complete set of tests for auto-possessification of character types.
+# A complete set of tests for auto-possessification of character types, but
+# omitting \C because it might be disabled (it has its own tests).

-/\D+\D \D+\d \D+\S \D+\s \D+\W \D+\w \D+. \D+\C \D+\R \D+\H \D+\h \D+\V \D+\v \D+\Z \D+\z \D+$/Bx
+/\D+\D \D+\d \D+\S \D+\s \D+\W \D+\w \D+. \D+\R \D+\H \D+\h \D+\V \D+\v \D+\Z \D+\z \D+$/Bx
 ------------------------------------------------------------------
        Bra
        \D+
@ -11968,8 +11969,6 @@ Subject length lower bound = 5
        \D+
        Any
        \D+
-        AllAny
-        \D+
        \R
        \D+
        \H
@ -11989,7 +11988,7 @@ Subject length lower bound = 5
        End
 ------------------------------------------------------------------

-/\d+\D \d+\d \d+\S \d+\s \d+\W \d+\w \d+. \d+\C \d+\R \d+\H \d+\h \d+\V \d+\v \d+\Z \d+\z \d+$/Bx
+/\d+\D \d+\d \d+\S \d+\s \d+\W \d+\w \d+. \d+\R \d+\H \d+\h \d+\V \d+\v \d+\Z \d+\z \d+$/Bx
 ------------------------------------------------------------------
        Bra
        \d++
@ -12006,8 +12005,6 @@ Subject length lower bound = 5
        \w
        \d+
        Any
-        \d+
-        AllAny
        \d++
        \R
        \d+
@ -12028,7 +12025,7 @@ Subject length lower bound = 5
        End
 ------------------------------------------------------------------

-/\S+\D \S+\d \S+\S \S+\s \S+\W \S+\w \S+. \S+\C \S+\R \S+\H \S+\h \S+\V \S+\v \S+\Z \S+\z \S+$/Bx
+/\S+\D \S+\d \S+\S \S+\s \S+\W \S+\w \S+. \S+\R \S+\H \S+\h \S+\V \S+\v \S+\Z \S+\z \S+$/Bx
 ------------------------------------------------------------------
        Bra
        \S+
@ -12045,8 +12042,6 @@ Subject length lower bound = 5
        \w
        \S+
        Any
-        \S+
-        AllAny
        \S++
        \R
        \S+
@ -12067,7 +12062,7 @@ Subject length lower bound = 5
        End
 ------------------------------------------------------------------

-/\s+\D \s+\d \s+\S \s+\s \s+\W \s+\w \s+. \s+\C \s+\R \s+\H \s+\h \s+\V \s+\v \s+\Z \s+\z \s+$/Bx
+/\s+\D \s+\d \s+\S \s+\s \s+\W \s+\w \s+. \s+\R \s+\H \s+\h \s+\V \s+\v \s+\Z \s+\z \s+$/Bx
 ------------------------------------------------------------------
        Bra
        \s+
@ -12085,8 +12080,6 @@ Subject length lower bound = 5
        \s+
        Any
        \s+
-        AllAny
-        \s+
        \R
        \s+
        \H
@ -12106,7 +12099,7 @@ Subject length lower bound = 5
        End
 ------------------------------------------------------------------

-/\W+\D \W+\d \W+\S \W+\s \W+\W \W+\w \W+. \W+\C \W+\R \W+\H \W+\h \W+\V \W+\v \W+\Z \W+\z \W+$/Bx
+/\W+\D \W+\d \W+\S \W+\s \W+\W \W+\w \W+. \W+\R \W+\H \W+\h \W+\V \W+\v \W+\Z \W+\z \W+$/Bx
 ------------------------------------------------------------------
        Bra
        \W+
@ -12124,8 +12117,6 @@ Subject length lower bound = 5
        \W+
        Any
        \W+
-        AllAny
-        \W+
        \R
        \W+
        \H
@ -12145,7 +12136,7 @@ Subject length lower bound = 5
        End
 ------------------------------------------------------------------

-/\w+\D \w+\d \w+\S \w+\s \w+\W \w+\w \w+. \w+\C \w+\R \w+\H \w+\h \w+\V \w+\v \w+\Z \w+\z \w+$/Bx
+/\w+\D \w+\d \w+\S \w+\s \w+\W \w+\w \w+. \w+\R \w+\H \w+\h \w+\V \w+\v \w+\Z \w+\z \w+$/Bx
 ------------------------------------------------------------------
        Bra
        \w+
@ -12162,8 +12153,6 @@ Subject length lower bound = 5
        \w
        \w+
        Any
-        \w+
-        AllAny
        \w++
        \R
        \w+
@ -12184,7 +12173,303 @@ Subject length lower bound = 5
        End
 ------------------------------------------------------------------

-/\C+\D \C+\d \C+\S \C+\s \C+\W \C+\w \C+. \C+\C \C+\R \C+\H \C+\h \C+\V \C+\v \C+\Z \C+\z \C+$/Bx
+/\R+\D \R+\d \R+\S \R+\s \R+\W \R+\w \R+. \R+\R \R+\H \R+\h \R+\V \R+\v \R+\Z \R+\z \R+$/Bx
+------------------------------------------------------------------
+        Bra
+        \R+
+        \D
+        \R++
+        \d
+        \R+
+        \S
+        \R++
+        \s
+        \R+
+        \W
+        \R++
+        \w
+        \R++
+        Any
+        \R+
+        \R
+        \R+
+        \H
+        \R++
+        \h
+        \R+
+        \V
+        \R+
+        \v
+        \R+
+        \Z
+        \R++
+        \z
+        \R+
+        $
+        Ket
+        End
+------------------------------------------------------------------
+
+/\H+\D \H+\d \H+\S \H+\s \H+\W \H+\w \H+. \H+\R \H+\H \H+\h \H+\V \H+\v \H+\Z \H+\z \H+$/Bx
+------------------------------------------------------------------
+        Bra
+        \H+
+        \D
+        \H+
+        \d
+        \H+
+        \S
+        \H+
+        \s
+        \H+
+        \W
+        \H+
+        \w
+        \H+
+        Any
+        \H+
+        \R
+        \H+
+        \H
+        \H++
+        \h
+        \H+
+        \V
+        \H+
+        \v
+        \H+
+        \Z
+        \H++
+        \z
+        \H+
+        $
+        Ket
+        End
+------------------------------------------------------------------
+
+/\h+\D \h+\d \h+\S \h+\s \h+\W \h+\w \h+. \h+\R \h+\H \h+\h \h+\V \h+\v \h+\Z \h+\z \h+$/Bx
+------------------------------------------------------------------
+        Bra
+        \h+
+        \D
+        \h++
+        \d
+        \h++
+        \S
+        \h+
+        \s
+        \h+
+        \W
+        \h++
+        \w
+        \h+
+        Any
+        \h++
+        \R
+        \h++
+        \H
+        \h+
+        \h
+        \h+
+        \V
+        \h++
+        \v
+        \h+
+        \Z
+        \h++
+        \z
+        \h+
+        $
+        Ket
+        End
+------------------------------------------------------------------
+
+/\V+\D \V+\d \V+\S \V+\s \V+\W \V+\w \V+. \V+\R \V+\H \V+\h \V+\V \V+\v \V+\Z \V+\z \V+$/Bx
+------------------------------------------------------------------
+        Bra
+        \V+
+        \D
+        \V+
+        \d
+        \V+
+        \S
+        \V+
+        \s
+        \V+
+        \W
+        \V+
+        \w
+        \V+
+        Any
+        \V++
+        \R
+        \V+
+        \H
+        \V+
+        \h
+        \V+
+        \V
+        \V++
+        \v
+        \V+
+        \Z
+        \V++
+        \z
+        \V+
+        $
+        Ket
+        End
+------------------------------------------------------------------
+
+/\v+\D \v+\d \v+\S \v+\s \v+\W \v+\w \v+. \v+\R \v+\H \v+\h \v+\V \v+\v \v+\Z \v+\z \v+$/Bx
+------------------------------------------------------------------
+        Bra
+        \v+
+        \D
+        \v++
+        \d
+        \v++
+        \S
+        \v+
+        \s
+        \v+
+        \W
+        \v++
+        \w
+        \v+
+        Any
+        \v+
+        \R
+        \v+
+        \H
+        \v++
+        \h
+        \v++
+        \V
+        \v+
+        \v
+        \v+
+        \Z
+        \v++
+        \z
+        \v+
+        $
+        Ket
+        End
+------------------------------------------------------------------
+
+/ a+\D  a+\d  a+\S  a+\s  a+\W  a+\w  a+.  a+\R  a+\H  a+\h  a+\V  a+\v  a+\Z  a+\z  a+$/Bx
+------------------------------------------------------------------
+        Bra
+        a+
+        \D
+        a++
+        \d
+        a+
+        \S
+        a++
+        \s
+        a++
+        \W
+        a+
+        \w
+        a+
+        Any
+        a++
+        \R
+        a+
+        \H
+        a++
+        \h
+        a+
+        \V
+        a++
+        \v
+        a++
+        \Z
+        a++
+        \z
+        a++
+        $
+        Ket
+        End
+------------------------------------------------------------------
+
+/\n+\D \n+\d \n+\S \n+\s \n+\W \n+\w \n+. \n+\R \n+\H \n+\h \n+\V \n+\v \n+\Z \n+\z \n+$/Bx
+------------------------------------------------------------------
+        Bra
+        \x0a+
+        \D
+        \x0a++
+        \d
+        \x0a++
+        \S
+        \x0a+
+        \s
+        \x0a+
+        \W
+        \x0a++
+        \w
+        \x0a+
+        Any
+        \x0a+
+        \R
+        \x0a+
+        \H
+        \x0a++
+        \h
+        \x0a++
+        \V
+        \x0a+
+        \v
+        \x0a+
+        \Z
+        \x0a++
+        \z
+        \x0a+
+        $
+        Ket
+        End
+------------------------------------------------------------------
+
+/ .+\D  .+\d  .+\S  .+\s  .+\W  .+\w  .+.  .+\R  .+\H  .+\h  .+\V  .+\v  .+\Z  .+\z  .+$/Bx
+------------------------------------------------------------------
+        Bra
+        Any+
+        \D
+        Any+
+        \d
+        Any+
+        \S
+        Any+
+        \s
+        Any+
+        \W
+        Any+
+        \w
+        Any+
+        Any
+        Any++
+        \R
+        Any+
+        \H
+        Any+
+        \h
+        Any+
+        \V
+        Any+
+        \v
+        Any+
+        \Z
+        Any++
+        \z
+        Any+
+        $
+        Ket
+        End
+------------------------------------------------------------------
+
+/ .+\D  .+\d  .+\S  .+\s  .+\W  .+\w  .+.  .+\R  .+\H  .+\h  .+\V  .+\v  .+\Z  .+\z  .+$/Bsx
 ------------------------------------------------------------------
        Bra
        AllAny+
@ -12200,8 +12485,6 @@ Subject length lower bound = 5
        AllAny+
        \w
        AllAny+
-        Any
-        AllAny+
        AllAny
        AllAny+
        \R
@ -12223,358 +12506,7 @@ Subject length lower bound = 5
        End
 ------------------------------------------------------------------

-/\R+\D \R+\d \R+\S \R+\s \R+\W \R+\w \R+. \R+\C \R+\R \R+\H \R+\h \R+\V \R+\v \R+\Z \R+\z \R+$/Bx
------------------------------------------------------------------
-        Bra
-        \R+
-        \D
-        \R++
-        \d
-        \R+
-        \S
-        \R++
-        \s
-        \R+
-        \W
-        \R++
-        \w
-        \R++
-        Any
-        \R+
-        AllAny
-        \R+
-        \R
-        \R+
-        \H
-        \R++
-        \h
-        \R+
-        \V
-        \R+
-        \v
-        \R+
-        \Z
-        \R++
-        \z
-        \R+
-        $
-        Ket
-        End
------------------------------------------------------------------
-
-/\H+\D \H+\d \H+\S \H+\s \H+\W \H+\w \H+. \H+\C \H+\R \H+\H \H+\h \H+\V \H+\v \H+\Z \H+\z \H+$/Bx
------------------------------------------------------------------
-        Bra
-        \H+
-        \D
-        \H+
-        \d
-        \H+
-        \S
-        \H+
-        \s
-        \H+
-        \W
-        \H+
-        \w
-        \H+
-        Any
-        \H+
-        AllAny
-        \H+
-        \R
-        \H+
-        \H
-        \H++
-        \h
-        \H+
-        \V
-        \H+
-        \v
-        \H+
-        \Z
-        \H++
-        \z
-        \H+
-        $
-        Ket
-        End
------------------------------------------------------------------
-
-/\h+\D \h+\d \h+\S \h+\s \h+\W \h+\w \h+. \h+\C \h+\R \h+\H \h+\h \h+\V \h+\v \h+\Z \h+\z \h+$/Bx
------------------------------------------------------------------
-        Bra
-        \h+
-        \D
-        \h++
-        \d
-        \h++
-        \S
-        \h+
-        \s
-        \h+
-        \W
-        \h++
-        \w
-        \h+
-        Any
-        \h+
-        AllAny
-        \h++
-        \R
-        \h++
-        \H
-        \h+
-        \h
-        \h+
-        \V
-        \h++
-        \v
-        \h+
-        \Z
-        \h++
-        \z
-        \h+
-        $
-        Ket
-        End
------------------------------------------------------------------
-
-/\V+\D \V+\d \V+\S \V+\s \V+\W \V+\w \V+. \V+\C \V+\R \V+\H \V+\h \V+\V \V+\v \V+\Z \V+\z \V+$/Bx
------------------------------------------------------------------
-        Bra
-        \V+
-        \D
-        \V+
-        \d
-        \V+
-        \S
-        \V+
-        \s
-        \V+
-        \W
-        \V+
-        \w
-        \V+
-        Any
-        \V+
-        AllAny
-        \V++
-        \R
-        \V+
-        \H
-        \V+
-        \h
-        \V+
-        \V
-        \V++
-        \v
-        \V+
-        \Z
-        \V++
-        \z
-        \V+
-        $
-        Ket
-        End
------------------------------------------------------------------
-
-/\v+\D \v+\d \v+\S \v+\s \v+\W \v+\w \v+. \v+\C \v+\R \v+\H \v+\h \v+\V \v+\v \v+\Z \v+\z \v+$/Bx
------------------------------------------------------------------
-        Bra
-        \v+
-        \D
-        \v++
-        \d
-        \v++
-        \S
-        \v+
-        \s
-        \v+
-        \W
-        \v++
-        \w
-        \v+
-        Any
-        \v+
-        AllAny
-        \v+
-        \R
-        \v+
-        \H
-        \v++
-        \h
-        \v++
-        \V
-        \v+
-        \v
-        \v+
-        \Z
-        \v++
-        \z
-        \v+
-        $
-        Ket
-        End
------------------------------------------------------------------
-
-/ a+\D  a+\d  a+\S  a+\s  a+\W  a+\w  a+.  a+\C  a+\R  a+\H  a+\h  a+\V  a+\v  a+\Z  a+\z  a+$/Bx
------------------------------------------------------------------
-        Bra
-        a+
-        \D
-        a++
-        \d
-        a+
-        \S
-        a++
-        \s
-        a++
-        \W
-        a+
-        \w
-        a+
-        Any
-        a+
-        AllAny
-        a++
-        \R
-        a+
-        \H
-        a++
-        \h
-        a+
-        \V
-        a++
-        \v
-        a++
-        \Z
-        a++
-        \z
-        a++
-        $
-        Ket
-        End
------------------------------------------------------------------
-
-/\n+\D \n+\d \n+\S \n+\s \n+\W \n+\w \n+. \n+\C \n+\R \n+\H \n+\h \n+\V \n+\v \n+\Z \n+\z \n+$/Bx
------------------------------------------------------------------
-        Bra
-        \x0a+
-        \D
-        \x0a++
-        \d
-        \x0a++
-        \S
-        \x0a+
-        \s
-        \x0a+
-        \W
-        \x0a++
-        \w
-        \x0a+
-        Any
-        \x0a+
-        AllAny
-        \x0a+
-        \R
-        \x0a+
-        \H
-        \x0a++
-        \h
-        \x0a++
-        \V
-        \x0a+
-        \v
-        \x0a+
-        \Z
-        \x0a++
-        \z
-        \x0a+
-        $
-        Ket
-        End
------------------------------------------------------------------
-
-/ .+\D  .+\d  .+\S  .+\s  .+\W  .+\w  .+.  .+\C  .+\R  .+\H  .+\h  .+\V  .+\v  .+\Z  .+\z  .+$/Bx
------------------------------------------------------------------
-        Bra
-        Any+
-        \D
-        Any+
-        \d
-        Any+
-        \S
-        Any+
-        \s
-        Any+
-        \W
-        Any+
-        \w
-        Any+
-        Any
-        Any+
-        AllAny
-        Any++
-        \R
-        Any+
-        \H
-        Any+
-        \h
-        Any+
-        \V
-        Any+
-        \v
-        Any+
-        \Z
-        Any++
-        \z
-        Any+
-        $
-        Ket
-        End
------------------------------------------------------------------
-
-/ .+\D  .+\d  .+\S  .+\s  .+\W  .+\w  .+.  .+\C  .+\R  .+\H  .+\h  .+\V  .+\v  .+\Z  .+\z  .+$/Bsx
------------------------------------------------------------------
-        Bra
-        AllAny+
-        \D
-        AllAny+
-        \d
-        AllAny+
-        \S
-        AllAny+
-        \s
-        AllAny+
-        \W
-        AllAny+
-        \w
-        AllAny+
-        AllAny
-        AllAny+
-        AllAny
-        AllAny+
-        \R
-        AllAny+
-        \H
-        AllAny+
-        \h
-        AllAny+
-        \V
-        AllAny+
-        \v
-        AllAny+
-        \Z
-        AllAny++
-        \z
-        AllAny+
-        $
-        Ket
-        End
------------------------------------------------------------------
-
-/\D+$  \d+$  \S+$  \s+$  \W+$  \w+$  \C+$  \R+$  \H+$  \h+$  \V+$  \v+$   a+$  \n+$   .+$  .+$/Bmx
+/ \D+$  \d+$  \S+$  \s+$  \W+$  \w+$  \R+$  \H+$  \h+$  \V+$ \v+$  a+$   \n+$  .+$  .+$/Bmx
 ------------------------------------------------------------------
        Bra
        \D+
@ -12588,8 +12520,6 @@ Subject length lower bound = 5
        \W+
     /m $
        \w++
-     /m $
-        AllAny+
     /m $
        \R+
     /m $
@ -14210,9 +14140,6 @@ Capturing subpattern count = 2
 May match empty string
 Subject length lower bound = 0

-/ab\Cde/never_backslash_c
-Failed: error 183 at offset 3: using \C is disabled by the application
-
 /abc/
 \= Expect no match
    \[9x!xxx(]{9999}
@ -14532,14 +14459,6 @@ Subject length lower bound = 0
    \0{ab} 
 0: \x00{ab}

-/ab\Cde/
-    abXde
- 0: abXde
-    
-/(?<=ab\Cde)X/
-    abZdeX
- 0: X
-
 /^(a(b))\1\g1\g{1}\g-1\g{-1}\g{-02}Z/
    ababababbbabZXXXX
 0: ababababbbabZ
--- a/testdata/testoutput21
+++ b/testdata/testoutput21
@ -0,0 +1,89 @@
+# These are tests of \C that do not involve UTF. They are not run when \C is
+# disabled by compiling with --enable-never-backslash-C.
+
+/\C+\D \C+\d \C+\S \C+\s \C+\W \C+\w \C+. \C+\R \C+\H \C+\h \C+\V \C+\v \C+\Z \C+\z \C+$/Bx
+------------------------------------------------------------------
+        Bra
+        AllAny+
+        \D
+        AllAny+
+        \d
+        AllAny+
+        \S
+        AllAny+
+        \s
+        AllAny+
+        \W
+        AllAny+
+        \w
+        AllAny+
+        Any
+        AllAny+
+        \R
+        AllAny+
+        \H
+        AllAny+
+        \h
+        AllAny+
+        \V
+        AllAny+
+        \v
+        AllAny+
+        \Z
+        AllAny++
+        \z
+        AllAny+
+        $
+        Ket
+        End
+------------------------------------------------------------------
+
+/\D+\C \d+\C \S+\C \s+\C \W+\C \w+\C .+\C \R+\C \H+\C \h+\C \V+\C \v+\C a+\C \n+\C \C+\C/Bx
+------------------------------------------------------------------
+        Bra
+        \D+
+        AllAny
+        \d+
+        AllAny
+        \S+
+        AllAny
+        \s+
+        AllAny
+        \W+
+        AllAny
+        \w+
+        AllAny
+        Any+
+        AllAny
+        \R+
+        AllAny
+        \H+
+        AllAny
+        \h+
+        AllAny
+        \V+
+        AllAny
+        \v+
+        AllAny
+        a+
+        AllAny
+        \x0a+
+        AllAny
+        AllAny+
+        AllAny
+        Ket
+        End
+------------------------------------------------------------------
+
+/ab\Cde/never_backslash_c
+Failed: error 183 at offset 3: using \C is disabled by the application
+
+/ab\Cde/
+    abXde
+ 0: abXde
+    
+/(?<=ab\Cde)X/
+    abZdeX
+ 0: X
+
+# End of testinput21
--- a/testdata/testoutput22-16
+++ b/testdata/testoutput22-16
@ -0,0 +1,161 @@
+# Tests of \C when Unicode support is available. Note that \C is not supported
+# for DFA matching in UTF mode, so this test is not run with -dfa. The output
+# of this test is different in 8-, 16-, and 32-bit modes. Some tests may match
+# in some widths and not in others.
+
+/ab\Cde/utf
+    abXde
+ 0: abXde
+
+# This should produce an error diagnostic (\C in UTF lookbehind)
+
+/(?<=ab\Cde)X/utf
+Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion
+
+# Autopossessification tests
+
+/\C+\X \X+\C/Bx
+------------------------------------------------------------------
+        Bra
+        AllAny+
+        extuni
+        extuni+
+        AllAny
+        Ket
+        End
+------------------------------------------------------------------
+
+/\C+\X \X+\C/Bx,utf
+------------------------------------------------------------------
+        Bra
+        Anybyte+
+        extuni
+        extuni+
+        Anybyte
+        Ket
+        End
+------------------------------------------------------------------
+
+/\C\X*TӅ;
+{0,6}\v+
F
+/utf
+\= Expect no match
+    Ӆ\x0a
+No match
+
+/\C(\W?ſ)'?{{/utf
+\= Expect no match
+    \\C(\\W?ſ)'?{{
+No match
+
+/X(\C{3})/utf
+    X\x{1234}
+No match
+    X\x{11234}Y
+ 0: X\x{11234}Y
+ 1: \x{11234}Y
+    X\x{11234}YZ
+ 0: X\x{11234}Y
+ 1: \x{11234}Y
+
+/X(\C{4})/utf
+    X\x{1234}YZ
+No match
+    X\x{11234}YZ
+ 0: X\x{11234}YZ
+ 1: \x{11234}YZ
+    X\x{11234}YZW
+ 0: X\x{11234}YZ
+ 1: \x{11234}YZ
+
+/X\C*/utf
+    XYZabcdce
+ 0: XYZabcdce
+
+/X\C*?/utf
+    XYZabcde
+ 0: X
+
+/X\C{3,5}/utf
+    Xabcdefg
+ 0: Xabcde
+    X\x{1234}
+No match
+    X\x{1234}YZ
+ 0: X\x{1234}YZ
+    X\x{1234}\x{512}
+No match
+    X\x{1234}\x{512}YZ
+ 0: X\x{1234}\x{512}YZ
+    X\x{11234}Y
+ 0: X\x{11234}Y
+    X\x{11234}YZ
+ 0: X\x{11234}YZ
+    X\x{11234}\x{512}
+ 0: X\x{11234}\x{512}
+    X\x{11234}\x{512}YZ
+ 0: X\x{11234}\x{512}YZ
+    X\x{11234}\x{512}\x{11234}Z
+ 0: X\x{11234}\x{512}\x{11234}
+
+/X\C{3,5}?/utf
+    Xabcdefg
+ 0: Xabc
+    X\x{1234}
+No match
+    X\x{1234}YZ
+ 0: X\x{1234}YZ
+    X\x{1234}\x{512}
+No match
+    X\x{11234}Y
+ 0: X\x{11234}Y
+    X\x{11234}YZ
+ 0: X\x{11234}Y
+    X\x{11234}\x{512}YZ
+ 0: X\x{11234}\x{512}
+    X\x{11234}
+No match
+
+/a\Cb/utf
+    aXb
+ 0: aXb
+    a\nb
+ 0: a\x{0a}b
+    a\x{100}b
+ 0: a\x{100}b
+
+/a\C\Cb/utf
+    a\x{100}b
+No match
+    a\x{12257}b
+ 0: a\x{12257}b
+    a\x{12257}\x{11234}b
+No match
+
+/ab\Cde/utf
+    abXde
+ 0: abXde
+
+# This one is here not because it's different to Perl, but because the way
+# the captured single code unit is displayed. (In Perl it becomes a character,
+# and you can't tell the difference.)
+
+/X(\C)(.*)/utf
+    X\x{1234}
+ 0: X\x{1234}
+ 1: \x{1234}
+ 2: 
+    X\nabc
+ 0: X\x{0a}abc
+ 1: \x{0a}
+ 2: abc
+
+# This one is here because Perl gives out a grumbly error message (quite
+# correctly, but that messes up comparisons).
+
+/a\Cb/utf
+\= Expect no match in 8-bit mode
+    a\x{100}b
+ 0: a\x{100}b
+
+# End of testinput22
--- a/testdata/testoutput22-32
+++ b/testdata/testoutput22-32
@ -0,0 +1,159 @@
+# Tests of \C when Unicode support is available. Note that \C is not supported
+# for DFA matching in UTF mode, so this test is not run with -dfa. The output
+# of this test is different in 8-, 16-, and 32-bit modes. Some tests may match
+# in some widths and not in others.
+
+/ab\Cde/utf
+    abXde
+ 0: abXde
+
+# This should produce an error diagnostic (\C in UTF lookbehind)
+
+/(?<=ab\Cde)X/utf
+Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion
+
+# Autopossessification tests
+
+/\C+\X \X+\C/Bx
+------------------------------------------------------------------
+        Bra
+        AllAny+
+        extuni
+        extuni+
+        AllAny
+        Ket
+        End
+------------------------------------------------------------------
+
+/\C+\X \X+\C/Bx,utf
+------------------------------------------------------------------
+        Bra
+        Anybyte+
+        extuni
+        extuni+
+        Anybyte
+        Ket
+        End
+------------------------------------------------------------------
+
+/\C\X*TӅ;
+{0,6}\v+
F
+/utf
+\= Expect no match
+    Ӆ\x0a
+No match
+
+/\C(\W?ſ)'?{{/utf
+\= Expect no match
+    \\C(\\W?ſ)'?{{
+No match
+
+/X(\C{3})/utf
+    X\x{1234}
+No match
+    X\x{11234}Y
+No match
+    X\x{11234}YZ
+ 0: X\x{11234}YZ
+ 1: \x{11234}YZ
+
+/X(\C{4})/utf
+    X\x{1234}YZ
+No match
+    X\x{11234}YZ
+No match
+    X\x{11234}YZW
+ 0: X\x{11234}YZW
+ 1: \x{11234}YZW
+
+/X\C*/utf
+    XYZabcdce
+ 0: XYZabcdce
+
+/X\C*?/utf
+    XYZabcde
+ 0: X
+
+/X\C{3,5}/utf
+    Xabcdefg
+ 0: Xabcde
+    X\x{1234}
+No match
+    X\x{1234}YZ
+ 0: X\x{1234}YZ
+    X\x{1234}\x{512}
+No match
+    X\x{1234}\x{512}YZ
+ 0: X\x{1234}\x{512}YZ
+    X\x{11234}Y
+No match
+    X\x{11234}YZ
+ 0: X\x{11234}YZ
+    X\x{11234}\x{512}
+No match
+    X\x{11234}\x{512}YZ
+ 0: X\x{11234}\x{512}YZ
+    X\x{11234}\x{512}\x{11234}Z
+ 0: X\x{11234}\x{512}\x{11234}Z
+
+/X\C{3,5}?/utf
+    Xabcdefg
+ 0: Xabc
+    X\x{1234}
+No match
+    X\x{1234}YZ
+ 0: X\x{1234}YZ
+    X\x{1234}\x{512}
+No match
+    X\x{11234}Y
+No match
+    X\x{11234}YZ
+ 0: X\x{11234}YZ
+    X\x{11234}\x{512}YZ
+ 0: X\x{11234}\x{512}Y
+    X\x{11234}
+No match
+
+/a\Cb/utf
+    aXb
+ 0: aXb
+    a\nb
+ 0: a\x{0a}b
+    a\x{100}b
+ 0: a\x{100}b
+
+/a\C\Cb/utf
+    a\x{100}b
+No match
+    a\x{12257}b
+No match
+    a\x{12257}\x{11234}b
+ 0: a\x{12257}\x{11234}b
+
+/ab\Cde/utf
+    abXde
+ 0: abXde
+
+# This one is here not because it's different to Perl, but because the way
+# the captured single code unit is displayed. (In Perl it becomes a character,
+# and you can't tell the difference.)
+
+/X(\C)(.*)/utf
+    X\x{1234}
+ 0: X\x{1234}
+ 1: \x{1234}
+ 2: 
+    X\nabc
+ 0: X\x{0a}abc
+ 1: \x{0a}
+ 2: abc
+
+# This one is here because Perl gives out a grumbly error message (quite
+# correctly, but that messes up comparisons).
+
+/a\Cb/utf
+\= Expect no match in 8-bit mode
+    a\x{100}b
+ 0: a\x{100}b
+
+# End of testinput22
--- a/testdata/testoutput22-8
+++ b/testdata/testoutput22-8
@ -0,0 +1,163 @@
+# Tests of \C when Unicode support is available. Note that \C is not supported
+# for DFA matching in UTF mode, so this test is not run with -dfa. The output
+# of this test is different in 8-, 16-, and 32-bit modes. Some tests may match
+# in some widths and not in others.
+
+/ab\Cde/utf
+    abXde
+ 0: abXde
+
+# This should produce an error diagnostic (\C in UTF lookbehind)
+
+/(?<=ab\Cde)X/utf
+Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion
+
+# Autopossessification tests
+
+/\C+\X \X+\C/Bx
+------------------------------------------------------------------
+        Bra
+        AllAny+
+        extuni
+        extuni+
+        AllAny
+        Ket
+        End
+------------------------------------------------------------------
+
+/\C+\X \X+\C/Bx,utf
+------------------------------------------------------------------
+        Bra
+        Anybyte+
+        extuni
+        extuni+
+        Anybyte
+        Ket
+        End
+------------------------------------------------------------------
+
+/\C\X*TӅ;
+{0,6}\v+
F
+/utf
+\= Expect no match
+    Ӆ\x0a
+No match
+
+/\C(\W?ſ)'?{{/utf
+\= Expect no match
+    \\C(\\W?ſ)'?{{
+No match
+
+/X(\C{3})/utf
+    X\x{1234}
+ 0: X\x{1234}
+ 1: \x{1234}
+    X\x{11234}Y
+ 0: X\x{f0}\x{91}\x{88}
+ 1: \x{f0}\x{91}\x{88}
+    X\x{11234}YZ
+ 0: X\x{f0}\x{91}\x{88}
+ 1: \x{f0}\x{91}\x{88}
+
+/X(\C{4})/utf
+    X\x{1234}YZ
+ 0: X\x{1234}Y
+ 1: \x{1234}Y
+    X\x{11234}YZ
+ 0: X\x{11234}
+ 1: \x{11234}
+    X\x{11234}YZW
+ 0: X\x{11234}
+ 1: \x{11234}
+
+/X\C*/utf
+    XYZabcdce
+ 0: XYZabcdce
+
+/X\C*?/utf
+    XYZabcde
+ 0: X
+
+/X\C{3,5}/utf
+    Xabcdefg
+ 0: Xabcde
+    X\x{1234}
+ 0: X\x{1234}
+    X\x{1234}YZ
+ 0: X\x{1234}YZ
+    X\x{1234}\x{512}
+ 0: X\x{1234}\x{512}
+    X\x{1234}\x{512}YZ
+ 0: X\x{1234}\x{512}
+    X\x{11234}Y
+ 0: X\x{11234}Y
+    X\x{11234}YZ
+ 0: X\x{11234}Y
+    X\x{11234}\x{512}
+ 0: X\x{11234}\x{d4}
+    X\x{11234}\x{512}YZ
+ 0: X\x{11234}\x{d4}
+    X\x{11234}\x{512}\x{11234}Z
+ 0: X\x{11234}\x{d4}
+
+/X\C{3,5}?/utf
+    Xabcdefg
+ 0: Xabc
+    X\x{1234}
+ 0: X\x{1234}
+    X\x{1234}YZ
+ 0: X\x{1234}
+    X\x{1234}\x{512}
+ 0: X\x{1234}
+    X\x{11234}Y
+ 0: X\x{f0}\x{91}\x{88}
+    X\x{11234}YZ
+ 0: X\x{f0}\x{91}\x{88}
+    X\x{11234}\x{512}YZ
+ 0: X\x{f0}\x{91}\x{88}
+    X\x{11234}
+ 0: X\x{f0}\x{91}\x{88}
+
+/a\Cb/utf
+    aXb
+ 0: aXb
+    a\nb
+ 0: a\x{0a}b
+    a\x{100}b
+No match
+
+/a\C\Cb/utf
+    a\x{100}b
+ 0: a\x{100}b
+    a\x{12257}b
+No match
+    a\x{12257}\x{11234}b
+No match
+
+/ab\Cde/utf
+    abXde
+ 0: abXde
+
+# This one is here not because it's different to Perl, but because the way
+# the captured single code unit is displayed. (In Perl it becomes a character,
+# and you can't tell the difference.)
+
+/X(\C)(.*)/utf
+    X\x{1234}
+ 0: X\x{1234}
+ 1: \x{e1}
+ 2: \x{88}\x{b4}
+    X\nabc
+ 0: X\x{0a}abc
+ 1: \x{0a}
+ 2: abc
+
+# This one is here because Perl gives out a grumbly error message (quite
+# correctly, but that messes up comparisons).
+
+/a\Cb/utf
+\= Expect no match in 8-bit mode
+    a\x{100}b
+No match
+
+# End of testinput22
--- a/testdata/testoutput23
+++ b/testdata/testoutput23
@ -0,0 +1,8 @@
+# This test is run when PCRE2 has been built with --enable-never-backslash-C,
+# which disables the use of \C. All we can do is check that it gives the 
+# correct error message.
+
+/a\Cb/
+Failed: error 185 at offset 2: using \C is disabled in this PCRE2 library
+
+# End of testinput23
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@ -181,10 +181,6 @@ Subject length lower bound = 3
    \x{212ab}\x{212ab}\x{212ab}\x{861}
 0: \x{212ab}\x{212ab}\x{212ab}

-/(?<=\C)X/utf
-Failed: error 136 at offset 6: \C is not allowed in a lookbehind assertion
-    Should produce an error diagnostic
-    
 /^[ab]/IB,utf
 ------------------------------------------------------------------
        Bra
@ -2905,9 +2901,6 @@ No match
    aAz
 No match

-/(?<=ab\Cde)X/utf
-Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion
-
 /\X/
    a\=ps
 0: a
@ -3803,7 +3796,7 @@ No match
        End
 ------------------------------------------------------------------

-/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \C+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx
+/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx
 ------------------------------------------------------------------
        Bra
        \D+
@ -3818,8 +3811,6 @@ No match
        extuni
        \w+
        extuni
-        AllAny+
-        extuni
        \R+
        extuni
        \H+
@ -3858,7 +3849,7 @@ No match
        End
 ------------------------------------------------------------------

-/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\C \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx
+/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx
 ------------------------------------------------------------------
        Bra
        extuni+
@ -3876,8 +3867,6 @@ No match
        extuni+
        Any
        extuni+
-        AllAny
-        extuni+
        \R
        extuni+
        \H
@ -4010,18 +3999,6 @@ Failed: error 122 at offset 1227: unmatched closing parenthesis
 "(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'"
 Failed: error 124 at offset 113: letter or underscore expected after (?< or (?'

-/\C\X*TӅ;
-{0,6}\v+
F
-/utf
-\= Expect no match
-    Ӆ\x0a
-No match
-
-/\C(\W?ſ)'?{{/utf
-\= Expect no match
-    \\C(\\W?ſ)'?{{
-No match
-
 /[\pS#moq]/
    =
 0: =
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@ -7174,14 +7174,6 @@ Matched, but offsets vector is too small to show all matches
 2: aa
 3: a

-/ab\Cde/
-    abXde
- 0: abXde
-    
-/(?<=ab\Cde)X/
-    abZdeX
- 0: X
-
 /^\R/
    \r\=ps
 0: \x0d
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@ -1141,13 +1141,6 @@ Partial match: abcde
    the cat\=ph
 Partial match: the cat

-/ab\Cde/utf
-    abXde
-Failed: error -42: pattern contains an item that is not supported for DFA matching
-
-/(?<=ab\Cde)X/utf
-Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion
-
 /./newline=crlf,utf
    \r\=ps
 0: \x{0d}
--- a/testdata/testoutput9
+++ b/testdata/testoutput9
@ -4,12 +4,8 @@
 #forbid_utf
 #newline_default lf any anycrlf

-/a\Cb/
-    aXb
- 0: aXb
-    a\nb
- 0: a\x0ab
-\= Expect no match and error message (too big char)
+/ab/
+\= Expect error message (too big char) and no match
    A\x{123}B
 ** Character \x{123} is greater than 255 and UTF-8 mode is not enabled.
 ** Truncation will probably give the wrong result.