Add additional compile options and PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES.

2017-05-17 17:55:11 +00:00 · 2017-05-17 17:55:11 +00:00 · dfc9712bcd
parent d9c33d0708
commit dfc9712bcd
34 changed files with 2860 additions and 2305 deletions
--- a/7
+++ b/7
@ -109,8 +109,9 @@ while (<STDIN>)
    # Handling .sp is subtle. If it is inside a literal section, do nothing if
    # the next line is a non literal text line; similarly, if not inside a
    # literal section, do nothing if a literal follows, unless we are inside
-    # a .nf/.ne section. The point being that the <pre> and </pre> that delimit
-    # literal sections will do the spacing. Always skip if no previous output.
+    # a .nf/.fi section or about to enter one. The point being that the <pre>
+    # and </pre> that delimit literal sections will do the spacing. Always skip
+    # if no previous output.

    elsif (/^\.sp/)
      {
@ -123,7 +124,7 @@ while (<STDIN>)
          }
        else
          {
-          print TEMP "<br>\n<br>\n" if ($innf || !/^[\s.]/);
+          print TEMP "<br>\n<br>\n" if ($innf || /^\.nf/ || !/^[\s.]/);
          }
        redo;    # Now process the lookahead line we just read
        }
--- a/3
+++ b/3
@ -166,6 +166,9 @@ pcre2test, a crash could occur.
 32. Make -bigstack in RunTest allocate a 64Mb stack (instead of 16 MB) so that 
 all the tests can run with clang's sanitizing options.

+33. Implement extra compile options in the compile context and add the first 
+one: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES.
+


 Version 10.23 14-February-2017
--- a/Makefile.am
+++ b/Makefile.am
@ -67,6 +67,7 @@ dist_html_DATA = \
  doc/html/pcre2_set_bsr.html \
  doc/html/pcre2_set_callout.html \
  doc/html/pcre2_set_character_tables.html \
+  doc/html/pcre2_set_compile_extra_options.html \
  doc/html/pcre2_set_compile_recursion_guard.html \
  doc/html/pcre2_set_depth_limit.html \
  doc/html/pcre2_set_heap_limit.html \
@ -151,6 +152,7 @@ dist_man_MANS = \
  doc/pcre2_set_bsr.3 \
  doc/pcre2_set_callout.3 \
  doc/pcre2_set_character_tables.3 \
+  doc/pcre2_set_compile_extra_options.3 \
  doc/pcre2_set_compile_recursion_guard.3 \
  doc/pcre2_set_depth_limit.3 \
  doc/pcre2_set_heap_limit.3 \
--- a/2
+++ b/2
@ -500,7 +500,7 @@ for bmode in "$test8" "$test16" "$test32"; do
    for opt in "" $jitopt; do
      $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
      if [ $? = 0 ] ; then
-        $sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -65,-62,-2,-1,0,100,188,189,190,191 >>testtry
+        $sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -65,-62,-2,-1,0,100,101,191,192 >>testtry
        checkresult $? 2 "$opt"
      fi
    done
--- a/doc/html/index.html
+++ b/doc/html/index.html
@ -207,6 +207,9 @@ in the library.
 <tr><td><a href="pcre2_set_character_tables.html">pcre2_set_character_tables</a></td>
    <td>&nbsp;&nbsp;Set character tables</td></tr>

+<tr><td><a href="pcre2_set_compile_extra_options.html">pcre2_set_compile_extra_options</a></td>
+    <td>&nbsp;&nbsp;Set compile time extra options</td></tr>
+
 <tr><td><a href="pcre2_set_compile_recursion_guard.html">pcre2_set_compile_recursion_guard</a></td>
    <td>&nbsp;&nbsp;Set up a compile recursion guard function</td></tr>

--- a/doc/html/pcre2_compile.html
+++ b/doc/html/pcre2_compile.html
@ -47,6 +47,7 @@ system stack size checking, or to change one or more of these parameters:
  The newline character sequence;
  The compile time nested parentheses limit;
  The maximum pattern length (in code units) that is allowed.
+  The additional options bits
 </pre>
 The option bits are:
 <pre>
--- a/doc/html/pcre2_set_compile_extra_options.html
+++ b/doc/html/pcre2_set_compile_extra_options.html
@ -0,0 +1,42 @@
+<html>
+<head>
+<title>pcre2_set_compile_extra_options specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2_set_compile_extra_options man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<br><b>
+SYNOPSIS
+</b><br>
+<P>
+<b>#include &#60;pcre2.h&#62;</b>
+</P>
+<P>
+<b>int pcre2_set_compile_extra_options(pcre2_compile_context *<i>ccontext</i>,</b>
+<b>  PCRE2_SIZE <i>extra_options</i>);</b>
+</P>
+<br><b>
+DESCRIPTION
+</b><br>
+<P>
+This function sets additional option bits for <b>pcre2_compile()</b> that are
+housed in a compile context. It completely replaces all the bits. The extra 
+options are:
+<pre>
+  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \x{df800} to \x{dfff} in UTF-8 and UTF-32 modes
+</pre>
+There is a complete description of the PCRE2 native API in the
+<a href="pcre2api.html"><b>pcre2api</b></a>
+page and a description of the POSIX API in the
+<a href="pcre2posix.html"><b>pcre2posix</b></a>
+page.
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
--- a/doc/html/pcre2api.html
+++ b/doc/html/pcre2api.html
@ -60,8 +60,8 @@ please consult the man page, in case the conversion went wrong.
 <b>#include &#60;pcre2.h&#62;</b>
 <br>
 <br>
-PCRE2 is a new API for PCRE. This document contains a description of all its
-functions. See the
+PCRE2 is a new API for PCRE, starting at release 10.0. This document contains a
+description of all its native functions. See the
 <a href="pcre2.html"><b>pcre2</b></a>
 document for an overview of all the PCRE2 documentation.
 </P>
@ -145,6 +145,10 @@ document for an overview of all the PCRE2 documentation.
 <b>  const unsigned char *<i>tables</i>);</b>
 <br>
 <br>
+<b>int pcre2_set_compile_extra_options(pcre2_compile_context *<i>ccontext</i>,</b>
+<b>  uint32_t <i>extra_options</i>);</b>
+<br>
+<br>
 <b>int pcre2_set_max_pattern_length(pcre2_compile_context *<i>ccontext</i>,</b>
 <b>  PCRE2_SIZE <i>value</i>);</b>
 <br>
@ -328,7 +332,7 @@ document for an overview of all the PCRE2 documentation.
 These functions became obsolete at release 10.30 and are retained only for 
 backward compatibility. They should not be used in new code. The first is
 replaced by <b>pcre2_set_depth_limit()</b>; the second is no longer needed and
-no longer has any effect (it always returns zero).
+has no effect (it always returns zero).
 </P>
 <br><a name="SEC12" href="#TOC1">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
 <P>
@ -389,23 +393,23 @@ For example, if you want to run a match using a pattern that was compiled with
 <P>
 In the function summaries above, and in the rest of this document and other
 PCRE2 documents, functions and data types are described using their generic
-names, without the 8, 16, or 32 suffix.
+names, without the _8, _16, or _32 suffix.
 </P>
 <br><a name="SEC13" href="#TOC1">PCRE2 API OVERVIEW</a><br>
 <P>
 PCRE2 has its own native API, which is described in this document. There are
 also some wrapper functions for the 8-bit library that correspond to the
 POSIX regular expression API, but they do not give access to all the
-functionality. They are described in the
+functionality of PCRE2. They are described in the
 <a href="pcre2posix.html"><b>pcre2posix</b></a>
 documentation. Both these APIs define a set of C function calls.
 </P>
 <P>
 The native API C data types, function prototypes, option values, and error
-codes are defined in the header file <b>pcre2.h</b>, which contains definitions
-of PCRE2_MAJOR and PCRE2_MINOR, the major and minor release numbers for the
-library. Applications can use these to include support for different releases
-of PCRE2.
+codes are defined in the header file <b>pcre2.h</b>, which also contains
+definitions of PCRE2_MAJOR and PCRE2_MINOR, the major and minor release numbers
+for the library. Applications can use these to include support for different
+releases of PCRE2.
 </P>
 <P>
 In a Windows environment, if you want to statically link an application program
@ -478,7 +482,7 @@ been matched by <b>pcre2_match()</b>. They are:
  <b>pcre2_substring_number_from_name()</b>
 </pre>
 <b>pcre2_substring_free()</b> and <b>pcre2_substring_list_free()</b> are also
-provided, to free the memory used for extracted strings.
+provided, to free memory used for extracted strings.
 </P>
 <P>
 The function <b>pcre2_substitute()</b> can be called to match a pattern and
@ -595,7 +599,7 @@ required. JIT compilation updates a pointer within the compiled code block, so
 a thread must gain unique write access to the pointer before calling
 <b>pcre2_jit_compile()</b>. Alternatively, <b>pcre2_code_copy()</b> or
 <b>pcre2_code_copy_with_tables()</b> can be used to obtain a private copy of the
-compiled code.
+compiled code before calling the JIT compiler.
 </P>
 <br><b>
 Context blocks
@ -649,6 +653,8 @@ library. The context is named `general' rather than specifically `memory'
 because in future other fields may be added. If you do not want to supply your
 own custom memory management functions, you do not need to bother with a
 general context. A general context is created by:
+<br>
+<br>
 <b>pcre2_general_context *pcre2_general_context_create(</b>
 <b>  void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
 <b>  void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
@ -675,11 +681,15 @@ used. When the time comes to free the block, this function is called.
 </P>
 <P>
 A general context can be copied by calling:
+<br>
+<br>
 <b>pcre2_general_context *pcre2_general_context_copy(</b>
 <b>  pcre2_general_context *<i>gcontext</i>);</b>
 <br>
 <br>
 The memory used for a general context should be freed by calling:
+<br>
+<br>
 <b>void pcre2_general_context_free(pcre2_general_context *<i>gcontext</i>);</b>
 <a name="compilecontext"></a></P>
 <br><b>
@ -695,6 +705,7 @@ following compile-time parameters:
  The newline character sequence
  The compile time nested parentheses limit
  The maximum length of the pattern string
+  The extra options bits (none set by default) 
 </pre>
 A compile context is also required if you are using custom memory management.
 If none of these apply, just pass NULL as the context argument of
@ -702,6 +713,8 @@ If none of these apply, just pass NULL as the context argument of
 </P>
 <P>
 A compile context is created, copied, and freed by the following functions:
+<br>
+<br>
 <b>pcre2_compile_context *pcre2_compile_context_create(</b>
 <b>  pcre2_general_context *<i>gcontext</i>);</b>
 <br>
@ -716,6 +729,8 @@ A compile context is created, copied, and freed by the following functions:
 A compile context is created with default values for its parameters. These can
 be changed by calling the following functions, which return 0 on success, or
 PCRE2_ERROR_BADDATA if invalid data is detected.
+<br>
+<br>
 <b>int pcre2_set_bsr(pcre2_compile_context *<i>ccontext</i>,</b>
 <b>  uint32_t <i>value</i>);</b>
 <br>
@ -725,6 +740,8 @@ or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any Unicode line
 ending sequence. The value is used by the JIT compiler and by the two
 interpreted matching functions, <i>pcre2_match()</i> and
 <i>pcre2_dfa_match()</i>.
+<br>
+<br>
 <b>int pcre2_set_character_tables(pcre2_compile_context *<i>ccontext</i>,</b>
 <b>  const unsigned char *<i>tables</i>);</b>
 <br>
@ -732,6 +749,22 @@ interpreted matching functions, <i>pcre2_match()</i> and
 The value must be the result of a call to <i>pcre2_maketables()</i>, whose only
 argument is a general context. This function builds a set of character tables
 in the current locale.
+<br>
+<br>
+<b>int pcre2_set_compile_extra_options(pcre2_compile_context *<i>ccontext</i>,</b>
+<b>  uint32_t <i>extra_options</i>);</b>
+<br>
+<br>
+As PCRE2 has developed, almost all the 32 option bits that are available in
+the <i>options</i> argument of <b>pcre2_compile()</b> have been used up. To avoid
+running out, the compile context contains a set of extra option bits which are 
+used for some newer, assumed rarer, options. This function sets those bits. It 
+always sets all the bits (either on or off). It does not modify any existing 
+setting. The available options are defined in the section entitled "Extra
+compile options"
+<a href="#extracompileoptions">below.</a>
+<br>
+<br>
 <b>int pcre2_set_max_pattern_length(pcre2_compile_context *<i>ccontext</i>,</b>
 <b>  PCRE2_SIZE <i>value</i>);</b>
 <br>
@ -741,6 +774,8 @@ compiled with this context. If the pattern is longer, an error is generated.
 This facility is provided so that applications that accept patterns from
 external sources can limit their size. The default is the largest number that a
 PCRE2_SIZE variable can hold, which is effectively unlimited.
+<br>
+<br>
 <b>int pcre2_set_newline(pcre2_compile_context *<i>ccontext</i>,</b>
 <b>  uint32_t <i>value</i>);</b>
 <br>
@ -758,11 +793,13 @@ sequence such as (*CRLF). See the
 page for details.
 </P>
 <P>
-When a pattern is compiled with the PCRE2_EXTENDED option, the newline 
-convention affects the recognition of white space and the end of internal
-comments starting with #. The value is saved with the compiled pattern for
-subsequent use by the JIT compiler and by the two interpreted matching
-functions, <i>pcre2_match()</i> and <i>pcre2_dfa_match()</i>.
+When a pattern is compiled with the PCRE2_EXTENDED or PCRE2_EXTENDED_MORE
+option, the newline convention affects the recognition of white space and the
+end of internal comments starting with #. The value is saved with the compiled
+pattern for subsequent use by the JIT compiler and by the two interpreted
+matching functions, <i>pcre2_match()</i> and <i>pcre2_dfa_match()</i>.
+<br>
+<br>
 <b>int pcre2_set_parens_nest_limit(pcre2_compile_context *<i>ccontext</i>,</b>
 <b>  uint32_t <i>value</i>);</b>
 <br>
@ -771,6 +808,8 @@ This parameter ajusts the limit, set when PCRE2 is built (default 250), on the
 depth of parenthesis nesting in a pattern. This limit stops rogue patterns
 using up too much system stack when being compiled. The limit applies to
 parentheses of all kinds, not just capturing parentheses.
+<br>
+<br>
 <b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
 <b>  int (*<i>guard_function</i>)(uint32_t, void *), void *<i>user_data</i>);</b>
 <br>
@ -778,10 +817,10 @@ parentheses of all kinds, not just capturing parentheses.
 There is at least one application that runs PCRE2 in threads with very limited
 system stack, where running out of stack is to be avoided at all costs. The
 parenthesis limit above cannot take account of how much stack is actually
-available. For a finer control, you can supply a function that is called
-whenever <b>pcre2_compile()</b> starts to compile a parenthesized part of a
-pattern. This function can check the actual stack size (or anything else that
-it wants to, of course).
+available during compilation. For a finer control, you can supply a function
+that is called whenever <b>pcre2_compile()</b> starts to compile a parenthesized
+part of a pattern. This function can check the actual stack size (or anything
+else that it wants to, of course).
 </P>
 <P>
 The first argument to the callout function gives the current depth of
@ -807,6 +846,8 @@ If none of these apply, just pass NULL as the context argument of
 </P>
 <P>
 A match context is created, copied, and freed by the following functions:
+<br>
+<br>
 <b>pcre2_match_context *pcre2_match_context_create(</b>
 <b>  pcre2_general_context *<i>gcontext</i>);</b>
 <br>
@ -821,6 +862,8 @@ A match context is created, copied, and freed by the following functions:
 A match context is created with default values for its parameters. These can
 be changed by calling the following functions, which return 0 on success, or
 PCRE2_ERROR_BADDATA if invalid data is detected.
+<br>
+<br>
 <b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b>
 <b>  int (*<i>callout_function</i>)(pcre2_callout_block *, void *),</b>
 <b>  void *<i>callout_data</i>);</b>
@ -830,6 +873,8 @@ This sets up a "callout" function for PCRE2 to call at specified points
 during a matching operation. Details are given in the
 <a href="pcre2callout.html"><b>pcre2callout</b></a>
 documentation.
+<br>
+<br>
 <b>int pcre2_set_offset_limit(pcre2_match_context *<i>mcontext</i>,</b>
 <b>  PCRE2_SIZE <i>value</i>);</b>
 <br>
@ -856,6 +901,8 @@ subject strings. See also the PCRE2_FIRSTLINE option, which requires a match to
 start within the first line of the subject. If this is set with an offset
 limit, a match must occur in the first line and also within the offset limit.
 In other words, whichever limit comes first is used.
+<br>
+<br>
 <b>int pcre2_set_heap_limit(pcre2_match_context *<i>mcontext</i>,</b>
 <b>  uint32_t <i>value</i>);</b>
 <br>
@ -889,6 +936,8 @@ Heap memory is used only if the initial vector is too small. If the heap limit
 is set to a value less than 21 (in particular, zero) no heap memory will be 
 used. In this case, only patterns that do not have a lot of nested backtracking 
 can be successfully processed. 
+<br>
+<br>
 <b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
 <b>  uint32_t <i>value</i>);</b>
 <br>
@ -926,6 +975,8 @@ of the form
 where ddd is a decimal number. However, such a setting is ignored unless ddd is
 less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
 limit is set, less than the default.
+<br>
+<br>
 <b>int pcre2_set_depth_limit(pcre2_match_context *<i>mcontext</i>,</b>
 <b>  uint32_t <i>value</i>);</b>
 <br>
@ -1282,8 +1333,9 @@ include a closing parenthesis in the name. However, if the PCRE2_ALT_VERBNAMES
 option is set, normal backslash processing is applied to verb names and only an
 unescaped closing parenthesis terminates the name. A closing parenthesis can be
 included in a name either as \) or between \Q and \E. If the PCRE2_EXTENDED 
-option is set, unescaped whitespace in verb names is skipped and #-comments are
-recognized in this mode, exactly as in the rest of the pattern.
+or PCRE2_EXTENDED_MORE option is set, unescaped whitespace in verb names is
+skipped and #-comments are recognized in this mode, exactly as in the rest of
+the pattern.
 <pre>
  PCRE2_AUTO_CALLOUT
 </pre>
@ -1298,7 +1350,13 @@ documentation.
 </pre>
 If this bit is set, letters in the pattern match both upper and lower case
 letters in the subject. It is equivalent to Perl's /i option, and it can be
-changed within a pattern by a (?i) option setting.
+changed within a pattern by a (?i) option setting. If PCRE2_UTF is set, Unicode 
+properties are used for all characters with more than one other case, and for
+all characters whose code points are greater than U+007f. For lower valued 
+characters with only one other case, a lookup table is used for speed. When 
+PCRE2_UTF is not set, a lookup table is used for all code points less than 256, 
+and higher code points (available only in 16-bit or 32-bit mode) are treated as 
+not having another case.
 <pre>
  PCRE2_DOLLAR_ENDONLY
 </pre>
@ -1380,18 +1438,18 @@ built.
 <pre>
  PCRE2_EXTENDED_MORE
 </pre>
-This option has the effect of PCRE2_EXTENDED, but, in addition, space and
-horizontal tab characters are also ignored inside a character class.
+This option has the effect of PCRE2_EXTENDED, but, in addition, unescaped space
+and horizontal tab characters are ignored inside a character class.
 PCRE2_EXTENDED_MORE is equivalent to Perl's 5.26 /xx option, and it can be
 changed within a pattern by a (?xx) option setting.
 <pre>
  PCRE2_FIRSTLINE
 </pre>
-If this option is set, an unanchored pattern is required to match before or at
-the first newline in the subject string, though the matched text may continue
-over the newline. See also PCRE2_USE_OFFSET_LIMIT, which provides a more
-general limiting facility. If PCRE2_FIRSTLINE is set with an offset limit, a
-match must occur in the first line and also within the offset limit. In other
+If this option is set, the start of an unanchored pattern match must be before
+or at the first newline in the subject string, though the matched text may
+continue over the newline. See also PCRE2_USE_OFFSET_LIMIT, which provides a
+more general limiting facility. If PCRE2_FIRSTLINE is set with an offset limit,
+a match must occur in the first line and also within the offset limit. In other
 words, whichever limit comes first is used.
 <pre>
  PCRE2_MATCH_UNSET_BACKREF
@ -1457,8 +1515,8 @@ PCRE2_NEVER_UTF causes an error.
 If this option is set, it disables the use of numbered capturing parentheses in
 the pattern. Any opening parenthesis that is not followed by ? behaves as if it
 were followed by ?: but named parentheses can still be used for capturing (and
-they acquire numbers in the usual way). There is no equivalent of this option
-in Perl. Note that, if this option is set, references to capturing groups (back
+they acquire numbers in the usual way). This is the same as Perl's /n option.
+Note that, when this option is set, references to capturing groups (back
 references or recursion/subroutine calls) may only refer to named groups,
 though the reference can be by name or by number.
 <pre>
@ -1494,8 +1552,8 @@ compiler.
 <P>
 There are a number of optimizations that may occur at the start of a match, in
 order to speed up the process. For example, if it is known that an unanchored
-match must start with a specific character, the matching code searches the
-subject for that character, and fails immediately if it cannot find it, without
+match must start with a specific code unit value, the matching code searches
+the subject for that value, and fails immediately if it cannot find it, without
 actually running the main matching function. This means that a special item
 such as (*COMMIT) at the start of a pattern is not considered until after a
 suitable starting point for the match has been found. Also, when callouts or
@ -1524,9 +1582,11 @@ current starting position, which in this case, it does. However, if the same
 match is run with PCRE2_NO_START_OPTIMIZE set, the initial scan along the
 subject string does not happen. The first match attempt is run starting from
 "D" and when this fails, (*COMMIT) prevents any further matches being tried, so
-the overall result is "no match". There are also other start-up optimizations.
-For example, a minimum length for the subject may be recorded. Consider the
-pattern
+the overall result is "no match". 
+</P>
+<P>
+There are also other start-up optimizations. For example, a minimum length for
+the subject may be recorded. Consider the pattern
 <pre>
  (*MARK:A)(X|Y)
 </pre>
@ -1551,12 +1611,26 @@ document. If an invalid UTF sequence is found, <b>pcre2_compile()</b> returns a
 negative error code. 
 </P>
 <P>
-If you know that your pattern is valid, and you want to skip this check for
-performance reasons, you can set the PCRE2_NO_UTF_CHECK option. When it is set,
-the effect of passing an invalid UTF string as a pattern is undefined. It may
-cause your program to crash or loop. Note that this option can also be passed
-to <b>pcre2_match()</b> and <b>pcre_dfa_match()</b>, to suppress validity
-checking of the subject string.
+If you know that your pattern is a valid UTF string, and you want to skip this
+check for performance reasons, you can set the PCRE2_NO_UTF_CHECK option. When
+it is set, the effect of passing an invalid UTF string as a pattern is
+undefined. It may cause your program to crash or loop. 
+</P>
+<P>
+Note that this option can also be passed to <b>pcre2_match()</b> and
+<b>pcre_dfa_match()</b>, to suppress UTF validity checking of the subject
+string.
+</P>
+<P>
+Note also that setting PCRE2_NO_UTF_CHECK at compile time does not disable the
+error that is given if an escape sequence for an invalid Unicode code point is
+encountered in the pattern. In particular, the so-called "surrogate" code
+points (0xd800 to 0xdfff) are invalid. If you want to allow escape sequences
+such as \x{d800} you can set the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra
+option, as described in the section entitled "Extra compile options"
+<a href="#extracompileoptions">below.</a>
+However, this is possible only in UTF-8 and UTF-32 modes, because these values
+are not representable in UTF-16.
 <pre>
  PCRE2_UCP
 </pre>
@ -1594,10 +1668,42 @@ This option causes PCRE2 to regard both the pattern and the subject strings
 that are subsequently processed as strings of UTF characters instead of
 single-code-unit strings. It is available when PCRE2 is built to include
 Unicode support (which is the default). If Unicode support is not available,
-the use of this option provokes an error. Details of how this option changes
-the behaviour of PCRE2 are given in the
+the use of this option provokes an error. Details of how PCRE2_UTF changes the
+behaviour of PCRE2 are given in the
 <a href="pcre2unicode.html"><b>pcre2unicode</b></a>
 page.
+<a name="extracompileoptions"></a></P>
+<br><b>
+Extra compile options
+</b><br>
+<P>
+Unlike the main compile-time options, the extra options are not saved with the
+compiled pattern. The option bits that can be set in a compile context by
+calling the <b>pcre2_set_compile_extra_options()</b> function are as follows:
+<pre>
+  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
+</pre>
+This option applies when compiling a pattern in UTF-8 or UTF-32 mode. It is 
+forbidden in UTF-16 mode, and ignored in non-UTF modes. Unicode "surrogate"
+code points in the range 0xd800 to 0xdfff are used in pairs in UTF-16 to encode
+code points with values in the range 0x10000 to 0x10ffff. The surrogates cannot 
+therefore be represented in UTF-16. They can be represented in UTF-8 and
+UTF-32, but are defined as invalid code points, and cause errors if encountered 
+in a UTF-8 or UTF-32 string that is being checked for validity by PCRE2. 
+</P>
+<P>
+These values also cause errors if encountered in escape sequences such as
+\x{d912} within a pattern. However, it seems that some applications, when
+using PCRE2 to check for unwanted characters in UTF-8 strings, explicitly test
+for the surrogates using escape sequences. The PCRE2_NO_UTF_CHECK option does
+not disable the error that occurs, because it applies only to the testing of
+input strings for UTF validity.
+</P>
+<P>
+If the extra option PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is set, surrogate code 
+point values in UTF-8 and UTF-32 patterns no longer provoke errors and are 
+incorporated in the compiled pattern. However, they can only match subject 
+characters if the matching function is called with PCRE2_NO_UTF_CHECK set.
 </P>
 <br><a name="SEC20" href="#TOC1">COMPILATION ERROR CODES</a><br>
 <P>
@ -1806,7 +1912,9 @@ The third argument should point to an <b>uint32_t</b> variable.
 If the pattern set a backtracking depth limit by including an item of the form
 (*LIMIT_DEPTH=nnnn) at the start, the value is returned. The third argument
 should point to an unsigned 32-bit integer. If no such value has been set, the
-call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET.
+call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET. Note 
+that this limit will only be used during matching if it is less than the limit
+set or defaulted by the caller of the match function.
 <pre>
  PCRE2_INFO_FIRSTBITMAP
 </pre>
@ -1824,15 +1932,15 @@ returned. Otherwise NULL is returned. The third argument should point to an
 Return information about the first code unit of any matched string, for a
 non-anchored pattern. The third argument should point to an <b>uint32_t</b>
 variable. If there is a fixed first value, for example, the letter "c" from a
-pattern such as (cat|cow|coyote), 1 is returned, and the character value can be
-retrieved using PCRE2_INFO_FIRSTCODEUNIT. If there is no fixed first value, but
-it is known that a match can occur only at the start of the subject or
-following a newline in the subject, 2 is returned. Otherwise, and for anchored
-patterns, 0 is returned.
+pattern such as (cat|cow|coyote), 1 is returned, and the value can be retrieved
+using PCRE2_INFO_FIRSTCODEUNIT. If there is no fixed first value, but it is
+known that a match can occur only at the start of the subject or following a
+newline in the subject, 2 is returned. Otherwise, and for anchored patterns, 0
+is returned.
 <pre>
  PCRE2_INFO_FIRSTCODEUNIT
 </pre>
-Return the value of the first code unit of any matched string in the situation
+Return the value of the first code unit of any matched string for a pattern
 where PCRE2_INFO_FIRSTCODETYPE returns 1; otherwise return 0. The third
 argument should point to an <b>uint32_t</b> variable. In the 8-bit library, the
 value is always less than 256. In the 16-bit library the value can be up to
@ -1864,7 +1972,9 @@ the equivalent hexadecimal or octal escape sequences.
 If the pattern set a heap memory limit by including an item of the form
 (*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argument
 should point to an unsigned 32-bit integer. If no such value has been set, the
-call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET.
+call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET. Note
+that this limit will only be used during matching if it is less than the limit
+set or defaulted by the caller of the match function.
 <pre>
  PCRE2_INFO_JCHANGED
 </pre>
@ -1891,10 +2001,10 @@ PCRE2_INFO_LASTCODEUNIT), but for /^a\dz\d/ the returned value is 0.
 <pre>
  PCRE2_INFO_LASTCODEUNIT
 </pre>
-Return the value of the rightmost literal data unit that must exist in any
-matched string, other than at its start, if such a value has been recorded. The
-third argument should point to an <b>uint32_t</b> variable. If there is no such
-value, 0 is returned.
+Return the value of the rightmost literal code unit that must exist in any
+matched string, other than at its start, for a pattern where
+PCRE2_INFO_LASTCODETYPE returns 1. Otherwise, return 0. The third argument
+should point to an <b>uint32_t</b> variable.
 <pre>
  PCRE2_INFO_MATCHEMPTY
 </pre>
@ -1909,7 +2019,9 @@ in such cases.
 If the pattern set a match limit by including an item of the form
 (*LIMIT_MATCH=nnnn) at the start, the value is returned. The third argument
 should point to an unsigned 32-bit integer. If no such value has been set, the
-call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET.
+call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET. Note
+that this limit will only be used during matching if it is less than the limit
+set or defaulted by the caller of the match function.
 <pre>
  PCRE2_INFO_MAXLOOKBEHIND
 </pre>
@ -1921,7 +2033,8 @@ require a one-character lookbehind. \A also registers a one-character
 lookbehind, though it does not actually inspect the previous character. This is
 to ensure that at least one character from the old segment is retained when a
 new segment is processed. Otherwise, if there are no lookbehinds in the
-pattern, \A might match incorrectly at the start of a new segment.
+pattern, \A might match incorrectly at the start of a second or subsequent
+segment.
 <pre>
  PCRE2_INFO_MINLENGTH
 </pre>
@ -2216,7 +2329,7 @@ character is CR followed by LF, advance the starting offset by two characters
 instead of one.
 </P>
 <P>
-If a non-zero starting offset is passed when the pattern is anchored, an single
+If a non-zero starting offset is passed when the pattern is anchored, a single
 attempt to match at the given offset is made. This can only succeed if the
 pattern does not require the match to be at the start of the subject. In other 
 words, the anchoring must be the result of setting the PCRE2_ANCHORED option or 
@ -2611,6 +2724,10 @@ documentation for details.
  PCRE2_ERROR_DEPTHLIMIT
 </pre>
 The nested backtracking depth limit was reached.
+<pre>
+  PCRE2_ERROR_HEAPLIMIT
+</pre>
+The heap limit was reached.
 <pre>
  PCRE2_ERROR_INTERNAL
 </pre>
@ -3290,7 +3407,7 @@ NOTE: PCRE2's "auto-possessification" optimization usually applies to character
 repeats at the end of a pattern (as well as internally). For example, the
 pattern "a\d+" is compiled as if it were "a\d++". For DFA matching, this
 means that only one possible match is found. If you really do want multiple
-matches in such cases, either use an ungreedy repeat auch as "a\d+?" or set
+matches in such cases, either use an ungreedy repeat such as "a\d+?" or set
 the PCRE2_NO_AUTO_POSSESS option when compiling.
 </P>
 <br><b>
@ -3351,7 +3468,7 @@ Cambridge, England.
 </P>
 <br><a name="SEC42" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 17 April 2017
+Last updated: 17 May 2017
 <br>
 Copyright &copy; 1997-2017 University of Cambridge.
 <br>
--- a/doc/html/pcre2pattern.html
+++ b/doc/html/pcre2pattern.html
@ -1545,12 +1545,13 @@ alternative in the subpattern.
 <br><a name="SEC13" href="#TOC1">INTERNAL OPTION SETTING</a><br>
 <P>
 The settings of the PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL,
-PCRE2_EXTENDED, and PCRE2_EXTENDED_MORE options (which are Perl-compatible) can
-be changed from within the pattern by a sequence of Perl option letters
-enclosed between "(?" and ")". The option letters are
+PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE options (which
+are Perl-compatible) can be changed from within the pattern by a sequence of
+Perl option letters enclosed between "(?" and ")". The option letters are
 <pre>
  i  for PCRE2_CASELESS
  m  for PCRE2_MULTILINE
+  n  for PCRE2_NO_AUTO_CAPTURE 
  s  for PCRE2_DOTALL
  x  for PCRE2_EXTENDED
  xx for PCRE2_EXTENDED_MORE
--- a/doc/html/pcre2syntax.html
+++ b/doc/html/pcre2syntax.html
@ -430,6 +430,7 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
  (?i)            caseless
  (?J)            allow duplicate names
  (?m)            multiline
+  (?n)            no auto capture 
  (?s)            single line (dotall)
  (?U)            default ungreedy (lazy)
  (?x)            extended: ignore white space except in classes
--- a/doc/html/pcre2test.html
+++ b/doc/html/pcre2test.html
@ -559,14 +559,19 @@ by a previous <b>#pattern</b> command.
 Setting compilation options
 </b><br>
 <P>
-The following modifiers set options for <b>pcre2_compile()</b>. The most common
-ones have single-letter abbreviations, with special handling for /x (to make
-it like Perl). If a second x is present, PCRE2_EXTENDED is converted into
-PCRE2_EXTENDED_MORE. A third appearance adds PCRE2_EXTENDED as well. See
+The following modifiers set options for <b>pcre2_compile()</b>. Most of them set
+bits in the options argument of that function, but those whose names start with 
+PCRE2_EXTRA are additional options that are set in the compile context. For the
+main options, there are some single-letter abbreviations that are the same as
+Perl options. There is special handling for /x: if a second x is present,
+PCRE2_EXTENDED is converted into PCRE2_EXTENDED_MORE as in Perl. A third
+appearance adds PCRE2_EXTENDED as well, though this makes no difference to the
+way <b>pcre2_compile()</b> behaves. See
 <a href="pcre2api.html"><b>pcre2api</b></a>
 for a description of the effects of these options.
 <pre>
      allow_empty_class         set PCRE2_ALLOW_EMPTY_CLASS
+      allow_surrogate_escapes   set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES 
      alt_bsux                  set PCRE2_ALT_BSUX
      alt_circumflex            set PCRE2_ALT_CIRCUMFLEX
      alt_verbnames             set PCRE2_ALT_VERBNAMES
@ -585,7 +590,7 @@ for a description of the effects of these options.
      never_backslash_c         set PCRE2_NEVER_BACKSLASH_C
      never_ucp                 set PCRE2_NEVER_UCP
      never_utf                 set PCRE2_NEVER_UTF
-      no_auto_capture           set PCRE2_NO_AUTO_CAPTURE
+  /n  no_auto_capture           set PCRE2_NO_AUTO_CAPTURE
      no_auto_possess           set PCRE2_NO_AUTO_POSSESS
      no_dotstar_anchor         set PCRE2_NO_DOTSTAR_ANCHOR
      no_start_optimize         set PCRE2_NO_START_OPTIMIZE
@ -607,7 +612,8 @@ Setting compilation controls
 </b><br>
 <P>
 The following modifiers affect the compilation process or request information
-about the pattern:
+about the pattern. There are single-letter abbreviations for some that are
+heavily used in the test files.
 <pre>
      bsr=[anycrlf|unicode]     specify \R handling
  /B  bincode                   show binary code without lengths
@ -1810,7 +1816,7 @@ Cambridge, England.
 </P>
 <br><a name="SEC21" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 17 April 2017
+Last updated: 17 May 2017
 <br>
 Copyright &copy; 1997-2017 University of Cambridge.
 <br>
--- a/doc/html/pcre2unicode.html
+++ b/doc/html/pcre2unicode.html
@ -47,7 +47,7 @@ and
 documentation. Only the short names for properties are supported. For example,
 \p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported.
 Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
-compatibility with Perl 5.6. PCRE does not support this.
+compatibility with Perl 5.6. PCRE2 does not support this.
 </P>
 <br><b>
 WIDE CHARACTERS AND UTF MODES
@ -109,10 +109,15 @@ However, the special horizontal and vertical white space matching escapes (\h,
 \H, \v, and \V) do match all the appropriate Unicode characters, whether or
 not PCRE2_UCP is set.
 </P>
+<br><b>
+CASE-EQUIVALENCE IN UTF MODES
+</b><br>
 <P>
-Case-insensitive matching in UTF mode makes use of Unicode properties. A few
-Unicode characters such as Greek sigma have more than two codepoints that are
-case-equivalent, and these are treated as such.
+Case-insensitive matching in a UTF mode makes use of Unicode properties except
+for characters whose code points are less than 128 and that have at most two
+case-equivalent values. For these, a direct table lookup is used for speed. A
+few Unicode characters such as Greek sigma have more than two codepoints that
+are case-equivalent, and these are treated as such.
 </P>
 <br><b>
 VALIDITY OF UTF STRINGS
@ -173,6 +178,15 @@ or <b>pcre2_dfa_match()</b>.
 <P>
 If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result
 is undefined and your program may crash or loop indefinitely.
+</P>
+<P>
+Note that setting PCRE2_NO_UTF_CHECK at compile time does not disable the error
+that is given if an escape sequence for an invalid Unicode code point is
+encountered in the pattern. If you want to allow escape sequences such as
+\x{d800} (a surrogate code point) you can set the
+PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra option. However, this is possible
+only in UTF-8 and UTF-32 modes, because these values are not representable in
+UTF-16.
 <a name="utf8strings"></a></P>
 <br><b>
 Errors in UTF-8 strings
@ -280,9 +294,9 @@ Cambridge, England.
 REVISION
 </b><br>
 <P>
-Last updated: 03 July 2016
+Last updated: 17 May 2017
 <br>
-Copyright &copy; 1997-2016 University of Cambridge.
+Copyright &copy; 1997-2017 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/index.html.src
+++ b/doc/index.html.src
@ -207,6 +207,9 @@ in the library.
 <tr><td><a href="pcre2_set_character_tables.html">pcre2_set_character_tables</a></td>
    <td>&nbsp;&nbsp;Set character tables</td></tr>

+<tr><td><a href="pcre2_set_compile_extra_options.html">pcre2_set_compile_extra_options</a></td>
+    <td>&nbsp;&nbsp;Set compile time extra options</td></tr>
+
 <tr><td><a href="pcre2_set_compile_recursion_guard.html">pcre2_set_compile_recursion_guard</a></td>
    <td>&nbsp;&nbsp;Set up a compile recursion guard function</td></tr>

--- a/doc/pcre2.txt
+++ b/doc/pcre2.txt
@ -181,9 +181,9 @@ NAME

       #include <pcre2.h>

-       PCRE2  is  a  new API for PCRE. This document contains a description of
-       all its functions. See the pcre2 document for an overview  of  all  the
-       PCRE2 documentation.
+       PCRE2  is  a  new API for PCRE, starting at release 10.0. This document
+       contains a description of all its native functions. See the pcre2 docu-
+       ment for an overview of all the PCRE2 documentation.


 PCRE2 NATIVE API BASIC FUNCTIONS
@ -253,6 +253,9 @@ PCRE2 NATIVE API COMPILE CONTEXT FUNCTIONS
       int pcre2_set_character_tables(pcre2_compile_context *ccontext,
         const unsigned char *tables);

+       int pcre2_set_compile_extra_options(pcre2_compile_context *ccontext,
+         uint32_t extra_options);
+
       int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext,
         PCRE2_SIZE value);

@ -407,7 +410,7 @@ PCRE2 NATIVE API OBSOLETE FUNCTIONS
       These  functions became obsolete at release 10.30 and are retained only
       for backward compatibility. They should not be used in  new  code.  The
       first  is  replaced by pcre2_set_depth_limit(); the second is no longer
-       needed and no longer has any effect (it always returns zero).
+       needed and has no effect (it always returns zero).


 PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES
@ -466,7 +469,7 @@ PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES

       In  the  function summaries above, and in the rest of this document and
       other PCRE2 documents, functions and data  types  are  described  using
-       their generic names, without the 8, 16, or 32 suffix.
+       their generic names, without the _8, _16, or _32 suffix.


 PCRE2 API OVERVIEW
@ -474,12 +477,12 @@ PCRE2 API OVERVIEW
       PCRE2  has  its  own  native  API, which is described in this document.
       There are also some wrapper functions for the 8-bit library that corre-
       spond  to the POSIX regular expression API, but they do not give access
-       to all the functionality. They are described in the pcre2posix documen-
-       tation. Both these APIs define a set of C function calls.
+       to all the functionality of PCRE2. They are described in the pcre2posix
+       documentation. Both these APIs define a set of C function calls.

       The  native  API  C data types, function prototypes, option values, and
-       error codes are defined in the header file pcre2.h, which contains def-
-       initions  of  PCRE2_MAJOR  and PCRE2_MINOR, the major and minor release
+       error codes are defined in the header file pcre2.h, which also contains
+       definitions of PCRE2_MAJOR and PCRE2_MINOR, the major and minor release
       numbers for the library. Applications can use these to include  support
       for different releases of PCRE2.

@ -544,7 +547,7 @@ PCRE2 API OVERVIEW
         pcre2_substring_number_from_name()

       pcre2_substring_free()  and  pcre2_substring_list_free()  are also pro-
-       vided, to free the memory used for extracted strings.
+       vided, to free memory used for extracted strings.

       The function pcre2_substitute() can be called to match  a  pattern  and
       return  a  copy of the subject string with substitutions for parts that
@ -652,7 +655,8 @@ MULTITHREADING
       compiled  code  block, so a thread must gain unique write access to the
       pointer    before    calling    pcre2_jit_compile().     Alternatively,
       pcre2_code_copy()  or  pcre2_code_copy_with_tables()  can  be  used  to
-       obtain a private copy of the compiled code.
+       obtain a private copy of the compiled code before calling the JIT  com-
+       piler.

   Context blocks

@ -748,6 +752,7 @@ PCRE2 CONTEXTS
         The newline character sequence
         The compile time nested parentheses limit
         The maximum length of the pattern string
+         The extra options bits (none set by default)

       A compile context is also required if you are using custom memory  man-
       agement.   If  none of these apply, just pass NULL as the context argu-
@ -784,6 +789,17 @@ PCRE2 CONTEXTS
       only argument is a general context. This function builds a set of char-
       acter tables in the current locale.

+       int pcre2_set_compile_extra_options(pcre2_compile_context *ccontext,
+         uint32_t extra_options);
+
+       As  PCRE2  has developed, almost all the 32 option bits that are avail-
+       able in the options argument of pcre2_compile() have been used  up.  To
+       avoid  running  out, the compile context contains a set of extra option
+       bits which are used for some newer, assumed rarer, options. This  func-
+       tion  sets  those bits. It always sets all the bits (either on or off).
+       It does not modify any existing  setting.  The  available  options  are
+       defined in the section entitled "Extra compile options" below.
+
       int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext,
         PCRE2_SIZE value);

@ -806,11 +822,12 @@ PCRE2 CONTEXTS
       A pattern can override the value set in the compile context by starting
       with a sequence such as (*CRLF). See the pcre2pattern page for details.

-       When a pattern is compiled with the PCRE2_EXTENDED option, the  newline
-       convention affects the recognition of white space and the end of inter-
-       nal comments starting with #. The value is saved with the compiled pat-
-       tern  for subsequent use by the JIT compiler and by the two interpreted
-       matching functions, pcre2_match() and pcre2_dfa_match().
+       When    a    pattern   is   compiled   with   the   PCRE2_EXTENDED   or
+       PCRE2_EXTENDED_MORE option, the newline convention affects the recogni-
+       tion  of  white space and the end of internal comments starting with #.
+       The value is saved with the compiled pattern for subsequent use by  the
+       JIT   compiler   and   by   the  two  interpreted  matching  functions,
+       pcre2_match() and pcre2_dfa_match().

       int pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext,
         uint32_t value);
@ -827,10 +844,11 @@ PCRE2 CONTEXTS
       There is at least one application that runs PCRE2 in threads with  very
       limited  system  stack,  where running out of stack is to be avoided at
       all costs. The parenthesis limit above cannot take account of how  much
-       stack  is  actually  available.  For  a finer control, you can supply a
-       function that is called whenever pcre2_compile() starts  to  compile  a
-       parenthesized  part  of  a  pattern. This function can check the actual
-       stack size (or anything else that it wants to, of course).
+       stack  is  actually  available during compilation. For a finer control,
+       you can supply a  function  that  is  called  whenever  pcre2_compile()
+       starts  to compile a parenthesized part of a pattern. This function can
+       check the actual stack size (or anything else  that  it  wants  to,  of
+       course).

       The  first  argument to the callout function gives the current depth of
       nesting, and the second is user data that is set up by the  last  argu-
@ -1302,10 +1320,10 @@ COMPILING A PATTERN
       However, if the PCRE2_ALT_VERBNAMES option  is  set,  normal  backslash
       processing  is  applied  to  verb  names  and only an unescaped closing
       parenthesis terminates the name. A closing parenthesis can be  included
-       in a name either as \) or between \Q  and  \E.  If  the  PCRE2_EXTENDED
-       option is set, unescaped whitespace in verb names is skipped and #-com-
-       ments are recognized in this mode, exactly as in the rest of  the  pat-
-       tern.
+       in  a  name either as \) or between \Q and \E. If the PCRE2_EXTENDED or
+       PCRE2_EXTENDED_MORE option is set, unescaped whitespace in  verb  names
+       is  skipped  and  #-comments are recognized in this mode, exactly as in
+       the rest of the pattern.

         PCRE2_AUTO_CALLOUT

@ -1318,7 +1336,14 @@ COMPILING A PATTERN

       If  this  bit is set, letters in the pattern match both upper and lower
       case letters in the subject. It is equivalent to Perl's /i option,  and
-       it can be changed within a pattern by a (?i) option setting.
+       it  can  be  changed  within  a  pattern  by  a (?i) option setting. If
+       PCRE2_UTF is set, Unicode properties are used for all  characters  with
+       more  than one other case, and for all characters whose code points are
+       greater than U+007f. For lower valued characters with  only  one  other
+       case,  a  lookup  table is used for speed. When PCRE2_UTF is not set, a
+       lookup table is used for all code points less than 256, and higher code
+       points  (available  only  in  16-bit or 32-bit mode) are treated as not
+       having another case.

         PCRE2_DOLLAR_ENDONLY

@ -1398,14 +1423,15 @@ COMPILING A PATTERN

         PCRE2_EXTENDED_MORE

-       This  option  has the effect of PCRE2_EXTENDED, but, in addition, space
-       and horizontal tab characters  are  also  ignored  inside  a  character
-       class.   PCRE2_EXTENDED_MORE  is  equivalent to Perl's 5.26 /xx option,
-       and it can be changed within a pattern by a (?xx) option setting.
+       This  option  has  the  effect  of  PCRE2_EXTENDED,  but,  in addition,
+       unescaped space and horizontal tab  characters  are  ignored  inside  a
+       character  class.  PCRE2_EXTENDED_MORE is equivalent to Perl's 5.26 /xx
+       option, and it can be changed within a pattern by a (?xx)  option  set-
+       ting.

         PCRE2_FIRSTLINE

-       If this option is set, an  unanchored  pattern  is  required  to  match
+       If this option is set, the start of an unanchored pattern match must be
       before or at the first  newline  in  the  subject  string,  though  the
       matched  text  may  continue  over the newline. See also PCRE2_USE_OFF-
       SET_LIMIT,  which  provides  a  more  general  limiting  facility.   If
@ -1479,11 +1505,11 @@ COMPILING A PATTERN
       If this option is set, it disables the use of numbered capturing paren-
       theses  in the pattern. Any opening parenthesis that is not followed by
       ? behaves as if it were followed by ?: but named parentheses can  still
-       be used for capturing (and they acquire  numbers  in  the  usual  way).
-       There  is  no  equivalent  of  this  option in Perl. Note that, if this
-       option is set, references  to  capturing  groups  (back  references  or
-       recursion/subroutine  calls) may only refer to named groups, though the
-       reference can be by name or by number.
+       be used for capturing (and they acquire numbers in the usual way). This
+       is the same as Perl's /n option.  Note that, when this option  is  set,
+       references to capturing groups (back references or recursion/subroutine
+       calls) may only refer to named groups, though the reference can  be  by
+       name or by number.

         PCRE2_NO_AUTO_POSSESS

@ -1517,8 +1543,8 @@ COMPILING A PATTERN

       There  are  a  number of optimizations that may occur at the start of a
       match, in order to speed up the process. For example, if  it  is  known
-       that an unanchored match must start  with  a  specific  character,  the
-       matching  code searches the subject for that character, and fails imme-
+       that  an  unanchored  match must start with a specific code unit value,
+       the matching code searches the subject for that value, and fails  imme-
       diately  if it cannot find it, without actually running the main match-
       ing function. This means that a special item such as (*COMMIT)  at  the
       start  of  a  pattern is not considered until after a suitable starting
@ -1548,9 +1574,10 @@ COMPILING A PATTERN
       set,  the  initial  scan  along the subject string does not happen. The
       first match attempt is run starting  from  "D"  and  when  this  fails,
       (*COMMIT)  prevents  any  further  matches  being tried, so the overall
-       result is "no match". There are also other start-up optimizations.  For
-       example, a minimum length for the subject may be recorded. Consider the
-       pattern
+       result is "no match".
+
+       There are also other start-up optimizations.  For  example,  a  minimum
+       length for the subject may be recorded. Consider the pattern

         (*MARK:A)(X|Y)

@ -1570,12 +1597,25 @@ COMPILING A PATTERN
       document. If an invalid UTF sequence is found, pcre2_compile()  returns
       a negative error code.

-       If you know that your pattern is valid, and you want to skip this check
-       for performance reasons, you can  set  the  PCRE2_NO_UTF_CHECK  option.
-       When  it  is set, the effect of passing an invalid UTF string as a pat-
-       tern is undefined. It may cause your program to  crash  or  loop.  Note
-       that   this   option   can   also   be   passed  to  pcre2_match()  and
-       pcre_dfa_match(), to suppress validity checking of the subject string.
+       If  you  know  that your pattern is a valid UTF string, and you want to
+       skip  this  check  for   performance   reasons,   you   can   set   the
+       PCRE2_NO_UTF_CHECK  option.  When  it  is set, the effect of passing an
+       invalid UTF string as a pattern is undefined. It may cause your program
+       to crash or loop.
+
+       Note  that  this  option  can  also  be  passed  to  pcre2_match()  and
+       pcre_dfa_match(), to suppress UTF  validity  checking  of  the  subject
+       string.
+
+       Note also that setting PCRE2_NO_UTF_CHECK at compile time does not dis-
+       able the error that is given if an escape sequence for an invalid  Uni-
+       code  code  point is encountered in the pattern. In particular, the so-
+       called "surrogate" code points (0xd800 to 0xdfff) are invalid.  If  you
+       want  to  allow  escape  sequences  such  as  \x{d800}  you can set the
+       PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra option, as described  in  the
+       section  entitled "Extra compile options" below.  However, this is pos-
+       sible only in UTF-8 and UTF-32 modes, because these values are not rep-
+       resentable in UTF-16.

         PCRE2_UCP

@ -1611,9 +1651,41 @@ COMPILING A PATTERN
       instead of single-code-unit strings. It  is  available  when  PCRE2  is
       built  to  include  Unicode  support (which is the default). If Unicode
       support is not available, the use of this  option  provokes  an  error.
-       Details of how this option changes the behaviour of PCRE2 are given  in
+       Details  of  how  PCRE2_UTF changes the behaviour of PCRE2 are given in
       the pcre2unicode page.

+   Extra compile options
+
+       Unlike the main compile-time options, the extra options are  not  saved
+       with the compiled pattern. The option bits that can be set in a compile
+       context by calling the pcre2_set_compile_extra_options()  function  are
+       as follows:
+
+         PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
+
+       This  option  applies when compiling a pattern in UTF-8 or UTF-32 mode.
+       It is forbidden in UTF-16 mode, and ignored in non-UTF  modes.  Unicode
+       "surrogate" code points in the range 0xd800 to 0xdfff are used in pairs
+       in UTF-16 to encode code points with values in  the  range  0x10000  to
+       0x10ffff.  The  surrogates  cannot  therefore be represented in UTF-16.
+       They can be represented in UTF-8 and UTF-32, but are defined as invalid
+       code  points,  and  cause  errors  if  encountered in a UTF-8 or UTF-32
+       string that is being checked for validity by PCRE2.
+
+       These values also cause errors if encountered in escape sequences  such
+       as \x{d912} within a pattern. However, it seems that some applications,
+       when using PCRE2 to check for unwanted  characters  in  UTF-8  strings,
+       explicitly   test  for  the  surrogates  using  escape  sequences.  The
+       PCRE2_NO_UTF_CHECK option does  not  disable  the  error  that  occurs,
+       because  it applies only to the testing of input strings for UTF valid-
+       ity.
+
+       If the extra option PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is set,  surro-
+       gate  code  point values in UTF-8 and UTF-32 patterns no longer provoke
+       errors and are incorporated in the compiled pattern. However, they  can
+       only  match  subject characters if the matching function is called with
+       PCRE2_NO_UTF_CHECK set.
+

 COMPILATION ERROR CODES

@ -1815,7 +1887,9 @@ INFORMATION ABOUT A COMPILED PATTERN
       the form (*LIMIT_DEPTH=nnnn) at the start, the value is  returned.  The
       third  argument  should point to an unsigned 32-bit integer. If no such
       value has been set, the call to pcre2_pattern_info() returns the  error
-       PCRE2_ERROR_UNSET.
+       PCRE2_ERROR_UNSET. Note that this limit will only be used during match-
+       ing if it is less than the limit set or defaulted by the caller of  the
+       match function.

         PCRE2_INFO_FIRSTBITMAP

@ -1833,16 +1907,16 @@ INFORMATION ABOUT A COMPILED PATTERN
       Return information about the first code unit of any matched string, for
       a  non-anchored pattern. The third argument should point to an uint32_t
       variable. If there is a fixed first value, for example, the letter  "c"
-       from a pattern such as (cat|cow|coyote), 1 is returned, and the charac-
-       ter  value can be retrieved using PCRE2_INFO_FIRSTCODEUNIT. If there is
-       no fixed first value, but it is known that a match can  occur  only  at
-       the  start  of  the subject or following a newline in the subject, 2 is
-       returned. Otherwise, and for anchored patterns, 0 is returned.
+       from  a  pattern such as (cat|cow|coyote), 1 is returned, and the value
+       can be retrieved using PCRE2_INFO_FIRSTCODEUNIT. If there is  no  fixed
+       first  value,  but it is known that a match can occur only at the start
+       of the subject or following a newline in the subject,  2  is  returned.
+       Otherwise, and for anchored patterns, 0 is returned.

         PCRE2_INFO_FIRSTCODEUNIT

-       Return the value of the first code unit of any matched  string  in  the
-       situation where PCRE2_INFO_FIRSTCODETYPE returns 1; otherwise return 0.
+       Return  the  value  of  the first code unit of any matched string for a
+       pattern where PCRE2_INFO_FIRSTCODETYPE returns 1; otherwise  return  0.
       The  third  argument should point to an uint32_t variable. In the 8-bit
       library, the value is always less than 256. In the 16-bit  library  the
       value  can  be  up  to 0xffff. In the 32-bit library in UTF-32 mode the
@ -1877,7 +1951,9 @@ INFORMATION ABOUT A COMPILED PATTERN
       (*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argu-
       ment  should  point to an unsigned 32-bit integer. If no such value has
       been  set,  the  call  to  pcre2_pattern_info()   returns   the   error
-       PCRE2_ERROR_UNSET.
+       PCRE2_ERROR_UNSET. Note that this limit will only be used during match-
+       ing if it is less than the limit set or defaulted by the caller of  the
+       match function.

         PCRE2_INFO_JCHANGED

@ -1906,10 +1982,10 @@ INFORMATION ABOUT A COMPILED PATTERN

         PCRE2_INFO_LASTCODEUNIT

-       Return  the value of the rightmost literal data unit that must exist in
-       any matched string, other than at its start, if such a value  has  been
-       recorded.  The  third argument should point to an uint32_t variable. If
-       there is no such value, 0 is returned.
+       Return the value of the rightmost literal code unit that must exist  in
+       any  matched  string,  other  than  at  its  start, for a pattern where
+       PCRE2_INFO_LASTCODETYPE returns 1. Otherwise, return 0. The third argu-
+       ment should point to an uint32_t variable.

         PCRE2_INFO_MATCHEMPTY

@ -1925,7 +2001,9 @@ INFORMATION ABOUT A COMPILED PATTERN
       (*LIMIT_MATCH=nnnn) at the start, the  value  is  returned.  The  third
       argument  should  point to an unsigned 32-bit integer. If no such value
       has been set,  the  call  to  pcre2_pattern_info()  returns  the  error
-       PCRE2_ERROR_UNSET.
+       PCRE2_ERROR_UNSET. Note that this limit will only be used during match-
+       ing if it is less than the limit set or defaulted by the caller of  the
+       match function.

         PCRE2_INFO_MAXLOOKBEHIND

@ -1938,7 +2016,7 @@ INFORMATION ABOUT A COMPILED PATTERN
       inspect  the  previous  character.  This is to ensure that at least one
       character from the old segment is retained when a new segment  is  pro-
       cessed. Otherwise, if there are no lookbehinds in the pattern, \A might
-       match incorrectly at the start of a new segment.
+       match incorrectly at the start of a second or subsequent segment.

         PCRE2_INFO_MINLENGTH

@ -2210,9 +2288,9 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
       so, and the current character is CR followed by LF, advance the  start-
       ing offset by two characters instead of one.

-       If a non-zero starting offset is passed when the pattern  is  anchored,
-       an  single  attempt to match at the given offset is made. This can only
-       succeed if the pattern does not require the match to be at the start of
+       If a non-zero starting offset is passed when the pattern is anchored, a
+       single attempt to match at the given offset is made. This can only suc-
+       ceed  if  the  pattern does not require the match to be at the start of
       the subject. In other words, the anchoring must be the result  of  set-
       ting  the PCRE2_ANCHORED option or the use of .* with PCRE2_DOTALL, not
       by starting the pattern with ^ or \A.
@ -2573,6 +2651,10 @@ ERROR RETURNS FROM pcre2_match()

       The nested backtracking depth limit was reached.

+         PCRE2_ERROR_HEAPLIMIT
+
+       The heap limit was reached.
+
         PCRE2_ERROR_INTERNAL

       An  unexpected  internal error has occurred. This error could be caused
@ -3208,7 +3290,7 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
       example,  the pattern "a\d+" is compiled as if it were "a\d++". For DFA
       matching, this means that only one possible  match  is  found.  If  you
       really  do  want multiple matches in such cases, either use an ungreedy
-       repeat  auch  as  "a\d+?"  or set the PCRE2_NO_AUTO_POSSESS option when
+       repeat such as "a\d+?" or set  the  PCRE2_NO_AUTO_POSSESS  option  when
       compiling.

   Error returns from pcre2_dfa_match()
@ -3265,7 +3347,7 @@ AUTHOR

 REVISION

-       Last updated: 17 April 2017
+       Last updated: 17 May 2017
       Copyright (c) 1997-2017 University of Cambridge.
 ------------------------------------------------------------------------------
 
@ -6803,12 +6885,14 @@ VERTICAL BAR
 INTERNAL OPTION SETTING

       The  settings  of  the  PCRE2_CASELESS,  PCRE2_MULTILINE, PCRE2_DOTALL,
-       PCRE2_EXTENDED, and PCRE2_EXTENDED_MORE options (which are Perl-compat-
-       ible)  can  be  changed  from  within the pattern by a sequence of Perl
-       option letters enclosed between "(?" and ")". The option letters are
+       PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE  options
+       (which are Perl-compatible) can be changed from within the pattern by a
+       sequence of Perl option letters enclosed  between  "(?"  and  ")".  The
+       option letters are

         i  for PCRE2_CASELESS
         m  for PCRE2_MULTILINE
+         n  for PCRE2_NO_AUTO_CAPTURE
         s  for PCRE2_DOTALL
         x  for PCRE2_EXTENDED
         xx for PCRE2_EXTENDED_MORE
@ -9649,6 +9733,7 @@ OPTION SETTING
         (?i)            caseless
         (?J)            allow duplicate names
         (?m)            multiline
+         (?n)            no auto capture
         (?s)            single line (dotall)
         (?U)            default ungreedy (lazy)
         (?x)            extended: ignore white space except in classes
@ -9856,7 +9941,7 @@ UNICODE PROPERTY SUPPORT
       names  for  properties are supported. For example, \p{L} matches a let-
       ter. Its Perl synonym, \p{Letter}, is not supported.   Furthermore,  in
       Perl,  many properties may optionally be prefixed by "Is", for compati-
-       bility with Perl 5.6. PCRE does not support this.
+       bility with Perl 5.6. PCRE2 does not support this.


 WIDE CHARACTERS AND UTF MODES
@ -9907,9 +9992,15 @@ WIDE CHARACTERS AND UTF MODES
       escapes (\h, \H, \v, and \V) do match all the appropriate Unicode char-
       acters, whether or not PCRE2_UCP is set.

-       Case-insensitive matching in UTF mode makes use of Unicode  properties.
-       A  few  Unicode characters such as Greek sigma have more than two code-
-       points that are case-equivalent, and these are treated as such.
+
+CASE-EQUIVALENCE IN UTF MODES
+
+       Case-insensitive matching in a UTF mode makes use of Unicode properties
+       except for characters whose code points are less than 128 and that have
+       at most two case-equivalent values. For these, a direct table lookup is
+       used  for speed. A few Unicode characters such as Greek sigma have more
+       than two codepoints that are case-equivalent, and these are treated  as
+       such.


 VALIDITY OF UTF STRINGS
@ -9965,6 +10056,14 @@ VALIDITY OF UTF STRINGS
       If  you  pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the
       result is undefined and your program may crash or loop indefinitely.

+       Note that setting PCRE2_NO_UTF_CHECK at compile time does  not  disable
+       the  error  that  is given if an escape sequence for an invalid Unicode
+       code point is encountered in the pattern. If you want to  allow  escape
+       sequences  such  as  \x{d800}  (a surrogate code point) you can set the
+       PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra option. However, this is pos-
+       sible only in UTF-8 and UTF-32 modes, because these values are not rep-
+       resentable in UTF-16.
+
   Errors in UTF-8 strings

       The following negative error codes are given for invalid UTF-8 strings:
@ -10059,8 +10158,8 @@ AUTHOR

 REVISION

-       Last updated: 03 July 2016
-       Copyright (c) 1997-2016 University of Cambridge.
+       Last updated: 17 May 2017
+       Copyright (c) 1997-2017 University of Cambridge.
 ------------------------------------------------------------------------------
 
 
--- a/doc/pcre2_compile.3
+++ b/doc/pcre2_compile.3
@ -1,4 +1,4 @@
-.TH PCRE2_COMPILE 3 "04 April 2017" "PCRE2 10.30"
+.TH PCRE2_COMPILE 3 "17 May 2017" "PCRE2 10.30"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@ -35,6 +35,7 @@ system stack size checking, or to change one or more of these parameters:
  The newline character sequence;
  The compile time nested parentheses limit;
  The maximum pattern length (in code units) that is allowed.
+  The additional options bits
 .sp
 The option bits are:
 .sp
--- a/doc/pcre2_set_compile_extra_options.3
+++ b/doc/pcre2_set_compile_extra_options.3
@ -0,0 +1,33 @@
+.TH PCRE2_SET_MAX_PATTERN_LENGTH 3 "17 May 2017" "PCRE2 10.30"
+.SH NAME
+PCRE2 - Perl-compatible regular expressions (revised API)
+.SH SYNOPSIS
+.rs
+.sp
+.B #include <pcre2.h>
+.PP
+.nf
+.B int pcre2_set_compile_extra_options(pcre2_compile_context *\fIccontext\fP,
+.B "  PCRE2_SIZE \fIextra_options\fP);"
+.fi
+.
+.SH DESCRIPTION
+.rs
+.sp
+This function sets additional option bits for \fBpcre2_compile()\fP that are
+housed in a compile context. It completely replaces all the bits. The extra 
+options are:
+.sp
+.\" JOIN
+  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \ex{df800} to \ex{dfff}
+                                         in UTF-8 and UTF-32 modes
+.sp
+There is a complete description of the PCRE2 native API in the
+.\" HREF
+\fBpcre2api\fP
+.\"
+page and a description of the POSIX API in the
+.\" HREF
+\fBpcre2posix\fP
+.\"
+page.
--- a/doc/pcre2api.3
+++ b/doc/pcre2api.3
@ -1,4 +1,4 @@
-.TH PCRE2API 3 "20 April 2017" "PCRE2 10.30"
+.TH PCRE2API 3 "17 May 2017" "PCRE2 10.30"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
@ -90,6 +90,9 @@ document for an overview of all the PCRE2 documentation.
 .B int pcre2_set_character_tables(pcre2_compile_context *\fIccontext\fP,
 .B "  const unsigned char *\fItables\fP);"
 .sp
+.B int pcre2_set_compile_extra_options(pcre2_compile_context *\fIccontext\fP,
+.B "  uint32_t \fIextra_options\fP);"
+.sp
 .B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP,
 .B "  PCRE2_SIZE \fIvalue\fP);"
 .sp
@ -643,6 +646,7 @@ following compile-time parameters:
  The newline character sequence
  The compile time nested parentheses limit
  The maximum length of the pattern string
+  The extra options bits (none set by default) 
 .sp
 A compile context is also required if you are using custom memory management.
 If none of these apply, just pass NULL as the context argument of
@ -685,6 +689,23 @@ argument is a general context. This function builds a set of character tables
 in the current locale.
 .sp
 .nf
+.B int pcre2_set_compile_extra_options(pcre2_compile_context *\fIccontext\fP,
+.B "  uint32_t \fIextra_options\fP);"
+.fi
+.sp
+As PCRE2 has developed, almost all the 32 option bits that are available in
+the \fIoptions\fP argument of \fBpcre2_compile()\fP have been used up. To avoid
+running out, the compile context contains a set of extra option bits which are 
+used for some newer, assumed rarer, options. This function sets those bits. It 
+always sets all the bits (either on or off). It does not modify any existing 
+setting. The available options are defined in the section entitled "Extra
+compile options"
+.\" HTML <a href="#extracompileoptions">
+.\" </a>
+below.
+.\"
+.sp
+.nf
 .B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP,
 .B "  PCRE2_SIZE \fIvalue\fP);"
 .fi
@ -1535,12 +1556,27 @@ in the
 document. If an invalid UTF sequence is found, \fBpcre2_compile()\fP returns a
 negative error code. 
 .P
-If you know that your pattern is valid, and you want to skip this check for
-performance reasons, you can set the PCRE2_NO_UTF_CHECK option. When it is set,
-the effect of passing an invalid UTF string as a pattern is undefined. It may
-cause your program to crash or loop. Note that this option can also be passed
-to \fBpcre2_match()\fP and \fBpcre_dfa_match()\fP, to suppress validity
-checking of the subject string.
+If you know that your pattern is a valid UTF string, and you want to skip this
+check for performance reasons, you can set the PCRE2_NO_UTF_CHECK option. When
+it is set, the effect of passing an invalid UTF string as a pattern is
+undefined. It may cause your program to crash or loop. 
+.P
+Note that this option can also be passed to \fBpcre2_match()\fP and
+\fBpcre_dfa_match()\fP, to suppress UTF validity checking of the subject
+string.
+.P
+Note also that setting PCRE2_NO_UTF_CHECK at compile time does not disable the
+error that is given if an escape sequence for an invalid Unicode code point is
+encountered in the pattern. In particular, the so-called "surrogate" code
+points (0xd800 to 0xdfff) are invalid. If you want to allow escape sequences
+such as \ex{d800} you can set the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra
+option, as described in the section entitled "Extra compile options"
+.\" HTML <a href="#extracompileoptions">
+.\" </a>
+below. 
+.\"
+However, this is possible only in UTF-8 and UTF-32 modes, because these values
+are not representable in UTF-16.
 .sp
  PCRE2_UCP
 .sp
@ -1594,6 +1630,37 @@ behaviour of PCRE2 are given in the
 page.
 .
 .
+.\" HTML <a name="extracompileoptions"></a>
+.SS "Extra compile options"
+.rs
+.sp
+Unlike the main compile-time options, the extra options are not saved with the
+compiled pattern. The option bits that can be set in a compile context by
+calling the \fBpcre2_set_compile_extra_options()\fP function are as follows:
+.sp
+  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
+.sp
+This option applies when compiling a pattern in UTF-8 or UTF-32 mode. It is 
+forbidden in UTF-16 mode, and ignored in non-UTF modes. Unicode "surrogate"
+code points in the range 0xd800 to 0xdfff are used in pairs in UTF-16 to encode
+code points with values in the range 0x10000 to 0x10ffff. The surrogates cannot 
+therefore be represented in UTF-16. They can be represented in UTF-8 and
+UTF-32, but are defined as invalid code points, and cause errors if encountered 
+in a UTF-8 or UTF-32 string that is being checked for validity by PCRE2. 
+.P
+These values also cause errors if encountered in escape sequences such as
+\ex{d912} within a pattern. However, it seems that some applications, when
+using PCRE2 to check for unwanted characters in UTF-8 strings, explicitly test
+for the surrogates using escape sequences. The PCRE2_NO_UTF_CHECK option does
+not disable the error that occurs, because it applies only to the testing of
+input strings for UTF validity.
+.P
+If the extra option PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is set, surrogate code 
+point values in UTF-8 and UTF-32 patterns no longer provoke errors and are 
+incorporated in the compiled pattern. However, they can only match subject 
+characters if the matching function is called with PCRE2_NO_UTF_CHECK set.
+.
+.
 .SH "COMPILATION ERROR CODES"
 .rs
 .sp
@ -3421,6 +3488,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 20 April 2017
+Last updated: 17 May 2017
 Copyright (c) 1997-2017 University of Cambridge.
 .fi
--- a/doc/pcre2test.1
+++ b/doc/pcre2test.1
@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "18 April 2017" "PCRE 10.30"
+.TH PCRE2TEST 1 "17 May 2017" "PCRE 10.30"
 .SH NAME
 pcre2test - a program for testing Perl-compatible regular expressions.
 .SH SYNOPSIS
@ -519,17 +519,21 @@ by a previous \fB#pattern\fP command.
 .SS "Setting compilation options"
 .rs
 .sp
-The following modifiers set options for \fBpcre2_compile()\fP. There are some
-single-letter abbreviations that are the same as Perl options. There is special
-handling for /x: if a second x is present, PCRE2_EXTENDED is converted into
-PCRE2_EXTENDED_MORE as in Perl. A third appearance adds PCRE2_EXTENDED as well, 
-though this makes no difference to the way \fBpcre2_compile()\fP behaves. See
+The following modifiers set options for \fBpcre2_compile()\fP. Most of them set
+bits in the options argument of that function, but those whose names start with 
+PCRE2_EXTRA are additional options that are set in the compile context. For the
+main options, there are some single-letter abbreviations that are the same as
+Perl options. There is special handling for /x: if a second x is present,
+PCRE2_EXTENDED is converted into PCRE2_EXTENDED_MORE as in Perl. A third
+appearance adds PCRE2_EXTENDED as well, though this makes no difference to the
+way \fBpcre2_compile()\fP behaves. See
 .\" HREF
 \fBpcre2api\fP
 .\"
 for a description of the effects of these options.
 .sp
      allow_empty_class         set PCRE2_ALLOW_EMPTY_CLASS
+      allow_surrogate_escapes   set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES 
      alt_bsux                  set PCRE2_ALT_BSUX
      alt_circumflex            set PCRE2_ALT_CIRCUMFLEX
      alt_verbnames             set PCRE2_ALT_VERBNAMES
@ -1788,6 +1792,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 18 April 2017
+Last updated: 17 May 2017
 Copyright (c) 1997-2017 University of Cambridge.
 .fi
--- a/doc/pcre2test.txt
+++ b/doc/pcre2test.txt
@ -503,14 +503,19 @@ PATTERN MODIFIERS

   Setting compilation options

-       The  following modifiers set options for pcre2_compile(). The most com-
-       mon ones have single-letter abbreviations, with special handling for /x
-       (to  make  it  like  Perl). If a second x is present, PCRE2_EXTENDED is
-       converted   into   PCRE2_EXTENDED_MORE.   A   third   appearance   adds
-       PCRE2_EXTENDED  as  well. See pcre2api for a description of the effects
+       The  following  modifiers set options for pcre2_compile(). Most of them
+       set bits in the options argument of  that  function,  but  those  whose
+       names start with PCRE2_EXTRA are additional options that are set in the
+       compile context. For the main options,  there  are  some  single-letter
+       abbreviations  that are the same as Perl options. There is special han-
+       dling for /x: if a second x is  present,  PCRE2_EXTENDED  is  converted
+       into   PCRE2_EXTENDED_MORE   as   in  Perl.  A  third  appearance  adds
+       PCRE2_EXTENDED as well, though this makes  no  difference  to  the  way
+       pcre2_compile()  behaves. See pcre2api for a description of the effects
       of these options.

             allow_empty_class         set PCRE2_ALLOW_EMPTY_CLASS
+             allow_surrogate_escapes   set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
             alt_bsux                  set PCRE2_ALT_BSUX
             alt_circumflex            set PCRE2_ALT_CIRCUMFLEX
             alt_verbnames             set PCRE2_ALT_VERBNAMES
@ -529,7 +534,7 @@ PATTERN MODIFIERS
             never_backslash_c         set PCRE2_NEVER_BACKSLASH_C
             never_ucp                 set PCRE2_NEVER_UCP
             never_utf                 set PCRE2_NEVER_UTF
-             no_auto_capture           set PCRE2_NO_AUTO_CAPTURE
+         /n  no_auto_capture           set PCRE2_NO_AUTO_CAPTURE
             no_auto_possess           set PCRE2_NO_AUTO_POSSESS
             no_dotstar_anchor         set PCRE2_NO_DOTSTAR_ANCHOR
             no_start_optimize         set PCRE2_NO_START_OPTIMIZE
@ -549,7 +554,8 @@ PATTERN MODIFIERS
   Setting compilation controls

       The  following  modifiers  affect  the  compilation  process or request
-       information about the pattern:
+       information about the pattern. There  are  single-letter  abbreviations
+       for some that are heavily used in the test files.

             bsr=[anycrlf|unicode]     specify \R handling
         /B  bincode                   show binary code without lengths
@ -1644,5 +1650,5 @@ AUTHOR

 REVISION

-       Last updated: 17 April 2017
+       Last updated: 17 May 2017
       Copyright (c) 1997-2017 University of Cambridge.
--- a/doc/pcre2unicode.3
+++ b/doc/pcre2unicode.3
@ -1,4 +1,4 @@
-.TH PCRE2UNICODE 3 "20 April 2017" "PCRE2 10.30"
+.TH PCRE2UNICODE 3 "17 May 2017" "PCRE2 10.30"
 .SH NAME
 PCRE - Perl-compatible regular expressions (revised API)
 .SH "UNICODE AND UTF SUPPORT"
@ -164,6 +164,14 @@ or \fBpcre2_dfa_match()\fP.
 .P
 If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result
 is undefined and your program may crash or loop indefinitely.
+.P
+Note that setting PCRE2_NO_UTF_CHECK at compile time does not disable the error
+that is given if an escape sequence for an invalid Unicode code point is
+encountered in the pattern. If you want to allow escape sequences such as
+\ex{d800} (a surrogate code point) you can set the
+PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra option. However, this is possible
+only in UTF-8 and UTF-32 modes, because these values are not representable in
+UTF-16.
 .
 .
 .\" HTML <a name="utf8strings"></a>
@ -272,6 +280,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 20 April 2017
+Last updated: 17 May 2017
 Copyright (c) 1997-2017 University of Cambridge.
 .fi
--- a/src/pcre2.h
+++ b/src/pcre2.h
@ -139,6 +139,10 @@ D   is inspected during pcre2_dfa_match() execution
 #define PCRE2_USE_OFFSET_LIMIT    0x00800000u  /*   J M D */
 #define PCRE2_EXTENDED_MORE       0x01000000u  /* C       */

+/* An additional compile options word is available in the compile context. */
+
+#define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  0x00000001u  /* C       */
+
 /* These are for pcre2_jit_compile(). */

 #define PCRE2_JIT_COMPLETE        0x00000001u  /* For full matching */
@ -448,6 +452,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
  pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
  pcre2_set_character_tables(pcre2_compile_context *, const unsigned char *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
  pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
@ -721,6 +727,7 @@ pcre2_compile are called by application code. */
 #define pcre2_set_bsr                         PCRE2_SUFFIX(pcre2_set_bsr_)
 #define pcre2_set_callout                     PCRE2_SUFFIX(pcre2_set_callout_)
 #define pcre2_set_character_tables            PCRE2_SUFFIX(pcre2_set_character_tables_)
+#define pcre2_set_compile_extra_options       PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
 #define pcre2_set_compile_recursion_guard     PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
 #define pcre2_set_depth_limit                 PCRE2_SUFFIX(pcre2_set_depth_limit_)
 #define pcre2_set_glob_separator              PCRE2_SUFFIX(pcre2_set_glob_separator_)
--- a/src/pcre2.h.in
+++ b/src/pcre2.h.in
@ -139,6 +139,10 @@ D   is inspected during pcre2_dfa_match() execution
 #define PCRE2_USE_OFFSET_LIMIT    0x00800000u  /*   J M D */
 #define PCRE2_EXTENDED_MORE       0x01000000u  /* C       */

+/* An additional compile options word is available in the compile context. */
+
+#define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  0x00000001u  /* C       */
+
 /* These are for pcre2_jit_compile(). */

 #define PCRE2_JIT_COMPLETE        0x00000001u  /* For full matching */
@ -448,6 +452,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
  pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
  pcre2_set_character_tables(pcre2_compile_context *, const unsigned char *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
  pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
@ -721,6 +727,7 @@ pcre2_compile are called by application code. */
 #define pcre2_set_bsr                         PCRE2_SUFFIX(pcre2_set_bsr_)
 #define pcre2_set_callout                     PCRE2_SUFFIX(pcre2_set_callout_)
 #define pcre2_set_character_tables            PCRE2_SUFFIX(pcre2_set_character_tables_)
+#define pcre2_set_compile_extra_options       PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
 #define pcre2_set_compile_recursion_guard     PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
 #define pcre2_set_depth_limit                 PCRE2_SUFFIX(pcre2_set_depth_limit_)
 #define pcre2_set_glob_separator              PCRE2_SUFFIX(pcre2_set_glob_separator_)
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@ -717,7 +717,8 @@ enum { ERR0 = COMPILE_ERROR_BASE,
       ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
       ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
       ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
-       ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90 };
+       ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
+       ERR91};

 /* This is a table of start-of-pattern options such as (*UTF) and settings such
 as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@ -1474,7 +1475,10 @@ else
      if (utf)
        {
        if (c > 0x10ffffU) *errorcodeptr = ERR77;
-          else if (c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
+        else
+          if (c >= 0xd800 && c <= 0xdfff &&
+            (cb->cx->extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0)
+              *errorcodeptr = ERR73;
        }
      else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR77;
      }
@ -1663,7 +1667,8 @@ else
        }
      else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET)
        {
-        if (utf && c >= 0xd800 && c <= 0xdfff)
+        if (utf && c >= 0xd800 && c <= 0xdfff &&
+            (cb->cx->extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0)
          {
          ptr--;
          *errorcodeptr = ERR73;
@ -1732,7 +1737,8 @@ else
          }
        else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET)
          {
-          if (utf && c >= 0xd800 && c <= 0xdfff)
+          if (utf && c >= 0xd800 && c <= 0xdfff &&
+              (cb->cx->extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0)
            {
            ptr--;
            *errorcodeptr = ERR73;
@ -9100,7 +9106,9 @@ if ((cb.external_options & (PCRE2_UTF|PCRE2_UCP)) != 0)
 #endif

 /* Check UTF. We have the original options in 'options', with that value as
-modified by (*UTF) etc in cb->external_options. */
+modified by (*UTF) etc in cb->external_options. The extra option
+PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not permitted in UTF-16 mode because the
+surrogate code points cannot be represented in UTF-16. */

 utf = (cb.external_options & PCRE2_UTF) != 0;
 if (utf)
@ -9113,6 +9121,14 @@ if (utf)
  if ((options & PCRE2_NO_UTF_CHECK) == 0 &&
       (errorcode = PRIV(valid_utf)(pattern, patlen, erroroffset)) != 0)
    goto HAD_ERROR;  /* Offset was set by valid_utf() */
+
+#if PCRE2_CODE_UNIT_WIDTH == 16
+  if ((ccontext->extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)
+    {
+    errorcode = ERR91;
+    goto HAD_EARLY_ERROR;
+    }
+#endif
  }

 /* Check UCP lockout. */
--- a/src/pcre2_context.c
+++ b/src/pcre2_context.c
@ -138,7 +138,8 @@ const pcre2_compile_context PRIV(default_compile_context) = {
  PCRE2_UNSET,                               /* Max pattern length */
  BSR_DEFAULT,                               /* Backslash R default */
  NEWLINE_DEFAULT,                           /* Newline convention */
-  PARENS_NEST_LIMIT };                       /* As it says */
+  PARENS_NEST_LIMIT,                         /* As it says */
+  0 };                                       /* Extra options */

 /* The create function copies the default into the new memory, but must
 override the default memory handling functions if a gcontext was provided. */
@ -371,6 +372,13 @@ ccontext->parens_nest_limit = limit;
 return 0;
 }

+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, uint32_t options)
+{
+ccontext->extra_options = options;
+return 0;
+}
+
 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
  int (*guard)(uint32_t, void *), void *user_data)
@ -448,3 +456,4 @@ return 0;


 /* End of pcre2_context.c */
+
--- a/src/pcre2_error.c
+++ b/src/pcre2_error.c
@ -176,6 +176,7 @@ static const unsigned char compile_error_texts[] =
  "internal error: unknown code in parsed pattern\0"
  /* 90 */
  "internal error: bad code value in parsed_skip()\0"
+  "PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0" 
  ;

 /* Match-time and UTF error texts are in the same format. */
--- a/src/pcre2_intmodedep.h
+++ b/src/pcre2_intmodedep.h
@ -572,6 +572,7 @@ typedef struct pcre2_real_compile_context {
  uint16_t bsr_convention;
  uint16_t newline_convention;
  uint32_t parens_nest_limit;
+  uint32_t extra_options; 
 } pcre2_real_compile_context;

 /* The real match context structure. */
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@ -194,6 +194,7 @@ void vms_setsymbol( char *, char *, int );
 #define LOCALESIZE 32             /* Size of locale name */
 #define LOOPREPEAT 500000         /* Default loop count for timing */
 #define MALLOCLISTSIZE 20         /* For remembering mallocs */
+#define PARENS_NEST_DEFAULT 220   /* Default parentheses nest limit */
 #define PATSTACKSIZE 20           /* Pattern stack for save/restore testing */
 #define REPLACE_MODSIZE 100       /* Field for reading 8-bit replacement */
 #define VERSION_SIZE 64           /* Size of buffer for the version strings */
@ -577,6 +578,7 @@ static modstruct modlist[] = {
  { "allaftertext",               MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT,           PO(control) },
  { "allcaptures",                MOD_PND,  MOD_CTL, CTL_ALLCAPTURES,            PO(control) },
  { "allow_empty_class",          MOD_PAT,  MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS,    PO(options) },
+  { "allow_surrogate_escapes",    MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
  { "allusedtext",                MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT,            PO(control) },
  { "alt_bsux",                   MOD_PAT,  MOD_OPT, PCRE2_ALT_BSUX,             PO(options) },
  { "alt_circumflex",             MOD_PAT,  MOD_OPT, PCRE2_ALT_CIRCUMFLEX,       PO(options) },
@ -686,6 +688,8 @@ static modstruct modlist[] = {
  PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_MULTILINE|PCRE2_UCP|PCRE2_UTF| \
  PCRE2_UNGREEDY)
  
+#define POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS (0) 
+
 #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
  CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_POSIX|CTL_POSIX_NOSUB)

@ -4025,6 +4029,32 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%
 }


+/*************************************************
+*           Show compile extra options           *
+*************************************************/
+
+/* Called for unsupported POSIX options.
+
+Arguments:
+  options     an options word
+  before      text to print before
+  after       text to print after
+
+Returns:      nothing
+*/
+
+static void
+show_compile_extra_options(uint32_t options, const char *before, 
+  const char *after) 
+{ 
+if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
+else fprintf(outfile, "%s%s%s",   
+  before,
+  ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
+  after);
+}
+
+

 #ifdef SUPPORT_PCRE2_8
 /*************************************************
@ -5161,6 +5191,16 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
      pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, "");
    msg = "";
    }
+
+  if ((FLD(pat_context, extra_options) & 
+       ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS) != 0)
+    {
+    show_compile_extra_options(
+      FLD(pat_context, extra_options) & ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS,
+        msg, "");
+    msg = "";       
+    }     
+
  if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 ||
      (pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0)
    {
@ -5170,6 +5210,10 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
    }

  if (local_newline_default != 0) prmsg(&msg, "#newline_default");
+  if (FLD(pat_context, max_pattern_length) != PCRE2_UNSET)
+    prmsg(&msg, "max_pattern_length");
+  if (FLD(pat_context, parens_nest_limit) != PARENS_NEST_DEFAULT)
+    prmsg(&msg, "parens_nest_limit"); 
    
  if (msg[0] == 0) fprintf(outfile, "\n");

@ -8123,6 +8167,7 @@ max_oveccount = DEFAULT_OVECCOUNT;
  G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))

 #define CONTEXTTESTS \
+  (void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \
  (void)G(pcre2_set_max_pattern_length_,BITS)(G(pat_context,BITS), 0); \
  (void)G(pcre2_set_offset_limit_,BITS)(G(dat_context,BITS), 0); \
  (void)G(pcre2_set_recursion_memory_management_,BITS)(G(dat_context,BITS), my_malloc, my_free, NULL)
@ -8163,7 +8208,7 @@ if (test_mode == PCRE32_MODE)
 /* Set a default parentheses nest limit that is large enough to run the
 standard tests (this also exercises the function). */

-PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, 220);
+PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, PARENS_NEST_DEFAULT);

 /* Handle command line modifier settings, sending any error messages to
 stderr. We need to know the mode before modifying the context, and it is tidier
--- a/testdata/testinput10
+++ b/testdata/testinput10
@ -458,4 +458,13 @@

 /[\s[:^ascii:]]/B,ucp

+# A special extra option allows excaped surrogate code points in 8-bit mode,
+# but subjects containing them must not be UTF-checked.
+
+/\x{d800}/utf,allow_surrogate_escapes
+    \x{d800}\=no_utf_check
+
+/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
+    \x{dfff}\x{df01}\=no_utf_check
+
 # End of testinput10
--- a/testdata/testinput12
+++ b/testdata/testinput12
@ -363,4 +363,14 @@
 /\pP/ucp
    \x{7fffffff}

+# A special extra option allows excaped surrogate code points in 32-bit mode,
+# but subjects containing them must not be UTF-checked. These patterns give
+# errors in 16-bit mode.
+
+/\x{d800}/utf,allow_surrogate_escapes
+    \x{d800}\=no_utf_check
+
+/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
+    \x{dfff}\x{df01}\=no_utf_check
+
 # End of testinput12
--- a/testdata/testinput18
+++ b/testdata/testinput18
@ -5,7 +5,7 @@
 #forbid_utf
 #pattern posix

-# Test invalid options
+# Test some invalid options

 /abc/auto_callout

@ -15,6 +15,10 @@
 /abc/
  abc\=partial_hard
  
+/a(())bc/parens_nest_limit=1
+
+/abc/allow_surrogate_escapes,max_pattern_length=2
+
 # Real tests

 /abc/
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@ -1575,4 +1575,15 @@ Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP),
        End
 ------------------------------------------------------------------

+# A special extra option allows excaped surrogate code points in 8-bit mode,
+# but subjects containing them must not be UTF-checked.
+
+/\x{d800}/utf,allow_surrogate_escapes
+    \x{d800}\=no_utf_check
+ 0: \x{d800}
+
+/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
+    \x{dfff}\x{df01}\=no_utf_check
+ 0: \x{dfff}\x{df01}
+
 # End of testinput10
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@ -1421,4 +1421,16 @@ No match
 ** Truncation will probably give the wrong result.
 No match

+# A special extra option allows excaped surrogate code points in 32-bit mode,
+# but subjects containing them must not be UTF-checked. These patterns give
+# errors in 16-bit mode.
+
+/\x{d800}/utf,allow_surrogate_escapes
+Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode
+    \x{d800}\=no_utf_check
+
+/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
+Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode
+    \x{dfff}\x{df01}\=no_utf_check
+
 # End of testinput12
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@ -1413,4 +1413,16 @@ No match
    \x{7fffffff}
 No match

+# A special extra option allows excaped surrogate code points in 32-bit mode,
+# but subjects containing them must not be UTF-checked. These patterns give
+# errors in 16-bit mode.
+
+/\x{d800}/utf,allow_surrogate_escapes
+    \x{d800}\=no_utf_check
+ 0: \x{d800}
+
+/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
+    \x{dfff}\x{df01}\=no_utf_check
+ 0: \x{dfff}\x{df01}
+
 # End of testinput12
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@ -15970,7 +15970,6 @@ Error -2: partial match
 Error -1: no match
 Error 0: PCRE2_ERROR_BADDATA (unknown error number)
 Error 100: no error
-Error 188: pattern string is longer than the limit set by the application
-Error 189: internal error: unknown code in parsed pattern
-Error 190: internal error: bad code value in parsed_skip()
-Error 191: PCRE2_ERROR_BADDATA (unknown error number)
+Error 101: \ at end of pattern
+Error 191: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode
+Error 192: PCRE2_ERROR_BADDATA (unknown error number)