Implemented PCRE2_ALT_VERBNAMES

2015-08-30 17:47:36 +00:00 · 2015-08-30 17:47:36 +00:00 · d2e87a75af
parent fd08e11c1e
commit d2e87a75af
28 changed files with 531 additions and 280 deletions
--- a/2
+++ b/2
@ -148,7 +148,7 @@ while (<STDIN>)
        printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
          $ref, $ref);
        printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
-          $ref, $ref);
+          $ref);
        $ref++;
        }
      else
--- a/2
+++ b/2
@ -167,6 +167,8 @@ test (there are now 20 in total).
 47. Modifier lists in pcre2test were splitting at spaces without the required 
 commas.
 48. Implemented PCRE2_ALT_VERBNAMES.
 Version 10.20 30-June-2015
 --------------------------
--- a/doc/html/NON-AUTOTOOLS-BUILD.txt
+++ b/doc/html/NON-AUTOTOOLS-BUILD.txt
@ -97,6 +97,7 @@ can skip ahead to the CMake section.
       pcre2_context.c
       pcre2_dfa_match.c
       pcre2_error.c
       pcre2_find_bracket.c 
       pcre2_jit_compile.c
       pcre2_maketables.c
       pcre2_match.c
@ -388,4 +389,4 @@ and executable, is in EBCDIC and native z/OS file formats and this is the
 recommended download site.
 =============================
-Last Updated: 15 June 2015
+Last Updated: 16 July 2015
--- a/doc/html/README.txt
+++ b/doc/html/README.txt
@ -724,6 +724,7 @@ The distribution should contain the files listed below.
  src/pcre2_context.c      )
  src/pcre2_dfa_match.c    )
  src/pcre2_error.c        )
  src/pcre2_find_bracket.c ) 
  src/pcre2_jit_compile.c  )
  src/pcre2_jit_match.c    ) sources for the functions in the library,
  src/pcre2_jit_misc.c     )   and some internal functions that they use
@ -832,4 +833,4 @@ The distribution should contain the files listed below.
 Philip Hazel
 Email local part: ph10
 Email domain: cam.ac.uk
-Last updated: 24 April 2015
+Last updated: 16 July 2015
--- a/doc/html/pcre2_code_free.html
+++ b/doc/html/pcre2_code_free.html
@ -19,7 +19,7 @@ SYNOPSIS
 <b>#include &#60;pcre2.h&#62;</b>
 </P>
 <P>
-<b>pcre2_code_free(pcre2_code *<i>code</i>);</b>
+<b>void pcre2_code_free(pcre2_code *<i>code</i>);</b>
 </P>
 <br><b>
 DESCRIPTION
--- a/doc/html/pcre2_match_data_create.html
+++ b/doc/html/pcre2_match_data_create.html
@ -19,7 +19,7 @@ SYNOPSIS
 <b>#include &#60;pcre2.h&#62;</b>
 </P>
 <P>
-<b>pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
+<b>pcre2_match_data *pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
 <b>  pcre2_general_context *<i>gcontext</i>);</b>
 </P>
 <br><b>
--- a/doc/html/pcre2_match_data_create_from_pattern.html
+++ b/doc/html/pcre2_match_data_create_from_pattern.html
@ -19,8 +19,8 @@ SYNOPSIS
 <b>#include &#60;pcre2.h&#62;</b>
 </P>
 <P>
-<b>pcre2_match_data_create_from_pattern(const pcre2_code *<i>code</i>,</b>
+<b>pcre2_match_data *pcre2_match_data_create_from_pattern(</b>
-<b>  pcre2_general_context *<i>gcontext</i>);</b>
+<b>  const pcre2_code *<i>code</i>, pcre2_general_context *<i>gcontext</i>);</b>
 </P>
 <br><b>
 DESCRIPTION
--- a/doc/html/pcre2api.html
+++ b/doc/html/pcre2api.html
@ -70,15 +70,15 @@ document for an overview of all the PCRE2 documentation.
 <b>  pcre2_compile_context *<i>ccontext</i>);</b>
 <br>
 <br>
-<b>pcre2_code_free(pcre2_code *<i>code</i>);</b>
+<b>void pcre2_code_free(pcre2_code *<i>code</i>);</b>
 <br>
 <br>
-<b>pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
+<b>pcre2_match_data *pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
 <b>  pcre2_general_context *<i>gcontext</i>);</b>
 <br>
 <br>
-<b>pcre2_match_data_create_from_pattern(const pcre2_code *<i>code</i>,</b>
+<b>pcre2_match_data *pcre2_match_data_create_from_pattern(</b>
-<b>  pcre2_general_context *<i>gcontext</i>);</b>
+<b>  const pcre2_code *<i>code</i>, pcre2_general_context *<i>gcontext</i>);</b>
 <br>
 <br>
 <b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
@ -936,7 +936,7 @@ The <i>where</i> argument should point to a buffer that is at least 24 code
 units long. (The exact length required can be found by calling
 <b>pcre2_config()</b> with <b>where</b> set to NULL.) If PCRE2 has been compiled
 without Unicode support, the buffer is filled with the text "Unicode not
-supported". Otherwise, the Unicode version string (for example, "7.0.0") is
+supported". Otherwise, the Unicode version string (for example, "8.0.0") is
 inserted. The number of code units used is returned. This is the length of the
 string plus one unit for the terminating zero.
 <pre>
@ -961,7 +961,7 @@ zero.
 <b>  pcre2_compile_context *<i>ccontext</i>);</b>
 <br>
 <br>
-<b>pcre2_code_free(pcre2_code *<i>code</i>);</b>
+<b>void pcre2_code_free(pcre2_code *<i>code</i>);</b>
 </P>
 <P>
 The <b>pcre2_compile()</b> function compiles a pattern into an internal form.
@ -1083,6 +1083,15 @@ after any internal newline. However, it does not match after a newline at the
 end of the subject, for compatibility with Perl. If you want a multiline
 circumflex also to match after a terminating newline, you must set
 PCRE2_ALT_CIRCUMFLEX.
 <pre>
  PCRE2_ALT_VERBNAMES
 </pre>
 By default, for compatibility with Perl, the name in any verb sequence such as
 (*MARK:NAME) is any sequence of characters that does not include a closing
 parenthesis. The name is not processed in any way, and it is not possible to
 include a closing parenthesis in the name. However, if the PCRE2_ALT_VERBNAMES
 option is set, normal backslash processing is applied to verb names and only an
 unescaped closing parenthesis terminates the name.
 <pre>
  PCRE2_AUTO_CALLOUT
 </pre>
@ -1778,12 +1787,12 @@ documentation.
 <a name="matchdatablock"></a></P>
 <br><a name="SEC25" href="#TOC1">THE MATCH DATA BLOCK</a><br>
 <P>
-<b>pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
+<b>pcre2_match_data *pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
 <b>  pcre2_general_context *<i>gcontext</i>);</b>
 <br>
 <br>
-<b>pcre2_match_data_create_from_pattern(const pcre2_code *<i>code</i>,</b>
+<b>pcre2_match_data *pcre2_match_data_create_from_pattern(</b>
-<b>  pcre2_general_context *<i>gcontext</i>);</b>
+<b>  const pcre2_code *<i>code</i>, pcre2_general_context *<i>gcontext</i>);</b>
 <br>
 <br>
 <b>void pcre2_match_data_free(pcre2_match_data *<i>match_data</i>);</b>
@ -2010,12 +2019,20 @@ If the pattern is anchored, such a match can occur only if the pattern contains
 </pre>
 When PCRE2_UTF is set at compile time, the validity of the subject as a UTF
 string is checked by default when <b>pcre2_match()</b> is subsequently called.
-The entire string is checked before any other processing takes place, and a
+If a non-zero starting offset is given, the check is applied only to that part
 of the subject that could be inspected during matching, and there is a check
 that the starting offset points to the first code unit of a character or to the
 end of the subject. If there are no lookbehind assertions in the pattern, the
 check starts at the starting offset. Otherwise, it starts at the length of the
 longest lookbehind before the starting offset, or at the start of the subject
 if there are not that many characters before the starting offset. Note that the
 sequences \b and \B are one-character lookbehinds.
 </P>
 <P>
 The check is carried out before any other processing takes place, and a
 negative error code is returned if the check fails. There are several UTF error
 codes for each code unit width, corresponding to different problems with the
-code unit sequence. The value of <i>startoffset</i> is also checked, to ensure
+code unit sequence. There are discussions about the validity of
 that it points to the start of a character or to the end of the subject. There
 are discussions about the validity of
 <a href="pcre2unicode.html#utf8strings">UTF-8 strings,</a>
 <a href="pcre2unicode.html#utf16strings">UTF-16 strings,</a>
 and
@ -2564,12 +2581,12 @@ be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
 In the replacement string, which is interpreted as a UTF string in UTF mode,
 and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
 dollar character is an escape character that can specify the insertion of
-characters from capturing groups in the pattern. The following forms are
+characters from capturing groups or (*MARK) items in the pattern. The following
-recognized:
+forms are recognized:
 <pre>
  $$                  insert a dollar character
-  $&#60;n&#62;    insert the contents of group &#60;n&#62;
+  $&#60;n&#62; or ${&#60;n&#62;}      insert the contents of group &#60;n&#62;
-  ${&#60;n&#62;}  insert the contents of group &#60;n&#62;
+  $*MARK or ${*MARK}  insert the name of the last (*MARK) encountered 
 </pre>
 Either a group number or a group name can be given for &#60;n&#62;. Curly brackets are
 required only if the following character would be interpreted as part of the
@ -2580,6 +2597,15 @@ calling <b>pcre2_copy_byname()</b> or <b>pcre2_copy_bynumber()</b> as
 appropriate.
 </P>
 <P>
 The facility for inserting a (*MARK) name can be used to perform simple 
 simultaneous substitutions, as this <b>pcre2test</b> example shows:
 <pre>
  /(*:pear)apple|(*:orange)lemon/g,replace=${*MARK}
      apple lemon
   2: pear orange
 </PRE>
 </P>
 <P>
 The first seven arguments of <b>pcre2_substitute()</b> are the same as for
 <b>pcre2_match()</b>, except that the partial matching options are not
 permitted, and <i>match_data</i> may be passed as NULL, in which case a match
@ -2883,7 +2909,7 @@ Cambridge, England.
 </P>
 <br><a name="SEC40" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 22 April 2015
+Last updated: 30 August 2015
 <br>
 Copyright &copy; 1997-2015 University of Cambridge.
 <br>
--- a/doc/html/pcre2jit.html
+++ b/doc/html/pcre2jit.html
@ -224,8 +224,14 @@ whether a match operation was executed by JIT or by the interpreter.
 </P>
 <P>
 You may safely use the same JIT stack for more than one pattern (either by
-assigning directly or by callback), as long as the patterns are all matched
+assigning directly or by callback), as long as the patterns are matched
-sequentially in the same thread. In a multithread application, if you do not
+sequentially in the same thread. Currently, the only way to set up
 non-sequential matches in one thread is to use callouts: if a callout function
 starts another match, that match must use a different JIT stack to the one used
 for currently suspended match(es).
 </P>
 <P>
 In a multithread application, if you do not
 specify a JIT stack, or if you assign or pass back NULL from a callback, that
 is thread-safe, because each thread has its own machine stack. However, if you
 assign or pass back a non-NULL JIT stack, this must be a different stack for
@ -419,9 +425,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC13" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 27 November 2014
+Last updated: 28 July 2015
 <br>
-Copyright &copy; 1997-2014 University of Cambridge.
+Copyright &copy; 1997-2015 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2pattern.html
+++ b/doc/html/pcre2pattern.html
@ -736,6 +736,8 @@ Those that are not part of an identified script are lumped together as
 "Common". The current list of scripts is:
 </P>
 <P>
 Ahom,
 Anatolian_Hieroglyphs,
 Arabic,
 Armenian,
 Avestan,
@ -776,6 +778,7 @@ Gurmukhi,
 Han,
 Hangul,
 Hanunoo,
 Hatran,
 Hebrew,
 Hiragana,
 Imperial_Aramaic,
@ -812,12 +815,14 @@ Miao,
 Modi,
 Mongolian,
 Mro,
 Multani,
 Myanmar,
 Nabataean,
 New_Tai_Lue,
 Nko,
 Ogham,
 Ol_Chiki,
 Old_Hungarian,
 Old_Italic,
 Old_North_Arabian,
 Old_Permic,
@ -839,6 +844,7 @@ Saurashtra,
 Sharada,
 Shavian,
 Siddham,
 SignWriting,
 Sinhala,
 Sora_Sompeng,
 Sundanese,
@ -1322,9 +1328,19 @@ where a range ending character is expected. For example, [z-\xff] is valid,
 but [A-\d] and [A-[:digit:]] are not.
 </P>
 <P>
-Ranges operate in the collating sequence of character values. They can also be
+Ranges normally include all code points between the start and end characters,
-used for characters specified numerically, for example [\000-\037]. Ranges
+inclusive. They can also be used for code points specified numerically, for
-can include any characters that are valid for the current mode.
+example [\000-\037]. Ranges can include any characters that are valid for the
 current mode.
 </P>
 <P>
 There is a special case in EBCDIC environments for ranges whose end points are 
 both specified as literal letters in the same case. For compatibility with 
 Perl, EBCDIC code points within the range that are not letters are omitted. For 
 example, [h-k] matches only four characters, even though the codes for h and k 
 are 0x88 and 0x92, a range of 11 code points. However, if the range is 
 specified numerically, for example, [\x88-\x92] or [h-\x92], all code points
 are included.
 </P>
 <P>
 If a range that includes letters is used when caseless matching is set, it
@ -2899,14 +2915,23 @@ remarks apply to the PCRE2 features described in this section.
 </P>
 <P>
 The new verbs make use of what was previously invalid syntax: an opening
-parenthesis followed by an asterisk. They are generally of the form
+parenthesis followed by an asterisk. They are generally of the form (*VERB) or
-(*VERB) or (*VERB:NAME). Some may take either form, possibly behaving
+(*VERB:NAME). Some verbs take either form, possibly behaving differently
-differently depending on whether or not a name is present. A name is any
+depending on whether or not a name is present.
-sequence of characters that does not include a closing parenthesis. The maximum
+</P>
-length of name is 255 in the 8-bit library and 65535 in the 16-bit and 32-bit
+<P>
-libraries. If the name is empty, that is, if the closing parenthesis
+By default, for compatibility with Perl, a name is any sequence of characters
-immediately follows the colon, the effect is as if the colon were not there.
+that does not include a closing parenthesis. The name is not processed in 
-Any number of these verbs may occur in a pattern.
+any way, and it is not possible to include a closing parenthesis in the name.
 However, if the PCRE2_ALT_VERBNAMES option is set, normal backslash processing 
 is applied to verb names and only an unescaped closing parenthesis terminates 
 the name.
 </P>
 <P>
 The maximum length of a name is 255 in the 8-bit library and 65535 in the
 16-bit and 32-bit libraries. If the name is empty, that is, if the closing
 parenthesis immediately follows the colon, the effect is as if the colon were
 not there. Any number of these verbs may occur in a pattern.
 </P>
 <P>
 Since these verbs are specifically related to backtracking, most of them can be
@ -3323,7 +3348,7 @@ Cambridge, England.
 </P>
 <br><a name="SEC30" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 13 June 2015
+Last updated: 30 August 2015
 <br>
 Copyright &copy; 1997-2015 University of Cambridge.
 <br>
--- a/doc/html/pcre2syntax.html
+++ b/doc/html/pcre2syntax.html
@ -187,6 +187,8 @@ at release 5.18.
 </P>
 <br><a name="SEC7" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
 <P>
 Ahom,
 Anatolian_Hieroglyphs,
 Arabic,
 Armenian,
 Avestan,
@ -227,6 +229,7 @@ Gurmukhi,
 Han,
 Hangul,
 Hanunoo,
 Hatran,
 Hebrew,
 Hiragana,
 Imperial_Aramaic,
@ -263,12 +266,14 @@ Miao,
 Modi,
 Mongolian,
 Mro,
 Multani,
 Myanmar,
 Nabataean,
 New_Tai_Lue,
 Nko,
 Ogham,
 Ol_Chiki,
 Old_Hungarian,
 Old_Italic,
 Old_North_Arabian,
 Old_Permic,
@ -290,6 +295,7 @@ Saurashtra,
 Sharada,
 Shavian,
 Siddham,
 SignWriting,
 Sinhala,
 Sora_Sompeng,
 Sundanese,
@ -582,7 +588,7 @@ Cambridge, England.
 </P>
 <br><a name="SEC27" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 13 June 2015
+Last updated: 17 July 2015
 <br>
 Copyright &copy; 1997-2015 University of Cambridge.
 <br>
--- a/doc/html/pcre2test.html
+++ b/doc/html/pcre2test.html
@ -340,12 +340,13 @@ subject lines. Modifiers on a subject line can change these settings.
 <br><a name="SEC7" href="#TOC1">MODIFIER SYNTAX</a><br>
 <P>
 Modifier lists are used with both pattern and subject lines. Items in a list
-are separated by commas and optional white space. Some modifiers may be given
+are separated by commas followed by optional white space. Trailing whitespace
-for both patterns and subject lines, whereas others are valid for one or the
+in a modifier list is ignored. Some modifiers may be given for both patterns
-other only. Each modifier has a long name, for example "anchored", and some of
+and subject lines, whereas others are valid only for one or the other. Each
-them must be followed by an equals sign and a value, for example, "offset=12".
+modifier has a long name, for example "anchored", and some of them must be
-Modifiers that do not take values may be preceded by a minus sign to turn off a
+followed by an equals sign and a value, for example, "offset=12". Values cannot
-previous setting.
+contain comma characters, but may contain spaces. Modifiers that do not take
 values may be preceded by a minus sign to turn off a previous setting.
 </P>
 <P>
 A few of the more common modifiers can also be specified as single letters, for
@ -479,6 +480,7 @@ for a description of their effects.
      allow_empty_class         set PCRE2_ALLOW_EMPTY_CLASS
      alt_bsux                  set PCRE2_ALT_BSUX
      alt_circumflex            set PCRE2_ALT_CIRCUMFLEX
      alt_verbnames             set PCRE2_ALT_VERBNAMES 
      anchored                  set PCRE2_ANCHORED
      auto_callout              set PCRE2_AUTO_CALLOUT
  /i  caseless                  set PCRE2_CASELESS
@ -1469,7 +1471,7 @@ Cambridge, England.
 </P>
 <br><a name="SEC21" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 20 May 2015
+Last updated: 30 August 2015
 <br>
 Copyright &copy; 1997-2015 University of Cambridge.
 <br>
--- a/doc/html/pcre2unicode.html
+++ b/doc/html/pcre2unicode.html
@ -126,11 +126,22 @@ as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting
 strings to be in host byte order.
 </P>
 <P>
-The entire string is checked before any other processing takes place. In
+A UTF string is checked before any other processing takes place. In the case of 
-addition to checking the format of the string, there is a check to ensure that
+<b>pcre2_match()</b> and <b>pcre2_dfa_match()</b> calls with a non-zero starting 
-all code points lie in the range U+0 to U+10FFFF, excluding the surrogate area.
+offset, the check is applied only to that part of the subject that could be
-The so-called "non-character" code points are not excluded because Unicode
+inspected during matching, and there is a check that the starting offset points
-corrigendum #9 makes it clear that they should not be.
+to the first code unit of a character or to the end of the subject. If there
 are no lookbehind assertions in the pattern, the check starts at the starting
 offset. Otherwise, it starts at the length of the longest lookbehind before the
 starting offset, or at the start of the subject if there are not that many
 characters before the starting offset. Note that the sequences \b and \B are
 one-character lookbehinds.
 </P>
 <P>
 In addition to checking the format of the string, there is a check to ensure
 that all code points lie in the range U+0 to U+10FFFF, excluding the surrogate
 area. The so-called "non-character" code points are not excluded because
 Unicode corrigendum #9 makes it clear that they should not be.
 </P>
 <P>
 Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16,
@ -264,9 +275,9 @@ Cambridge, England.
 REVISION
 </b><br>
 <P>
-Last updated: 23 November 2014
+Last updated: 18 August 2015
 <br>
-Copyright &copy; 1997-2014 University of Cambridge.
+Copyright &copy; 1997-2015 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/pcre2.txt
+++ b/doc/pcre2.txt
@ -190,13 +190,13 @@ PCRE2 NATIVE API BASIC FUNCTIONS
         uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset,
         pcre2_compile_context *ccontext);
-       pcre2_code_free(pcre2_code *code);
+       void pcre2_code_free(pcre2_code *code);
-       pcre2_match_data_create(uint32_t ovecsize,
+       pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize,
         pcre2_general_context *gcontext);
-       pcre2_match_data_create_from_pattern(const pcre2_code *code,
+       pcre2_match_data *pcre2_match_data_create_from_pattern(
-         pcre2_general_context *gcontext);
+         const pcre2_code *code, pcre2_general_context *gcontext);
       int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject,
         PCRE2_SIZE length, PCRE2_SIZE startoffset,
@ -989,7 +989,7 @@ CHECKING BUILD-TIME OPTIONS
       pcre2_config()  with  where  set  to  NULL.) If PCRE2 has been compiled
       without Unicode support, the buffer is filled with  the  text  "Unicode
       not  supported".  Otherwise,  the  Unicode version string (for example,
-       "7.0.0") is inserted. The number of code units used is  returned.  This
+       "8.0.0") is inserted. The number of code units used is  returned.  This
       is the length of the string plus one unit for the terminating zero.
         PCRE2_CONFIG_UNICODE
@ -1014,7 +1014,7 @@ COMPILING A PATTERN
         uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset,
         pcre2_compile_context *ccontext);
-       pcre2_code_free(pcre2_code *code);
+       void pcre2_code_free(pcre2_code *code);
       The pcre2_compile() function compiles a pattern into an internal  form.
       The  pattern  is  defined  by a pointer to a string of code units and a
@ -1128,6 +1128,16 @@ COMPILING A PATTERN
       Perl. If you want a multiline circumflex also to match after  a  termi-
       nating newline, you must set PCRE2_ALT_CIRCUMFLEX.
         PCRE2_ALT_VERBNAMES
       By  default, for compatibility with Perl, the name in any verb sequence
       such as (*MARK:NAME) is  any  sequence  of  characters  that  does  not
       include  a  closing  parenthesis. The name is not processed in any way,
       and it is not possible to include a closing parenthesis  in  the  name.
       However,  if  the  PCRE2_ALT_VERBNAMES  option is set, normal backslash
       processing is applied to verb  names  and  only  an  unescaped  closing
       parenthesis terminates the name.
         PCRE2_AUTO_CALLOUT
       If  this  bit  is  set,  pcre2_compile()  automatically inserts callout
@ -1809,11 +1819,11 @@ SERIALIZATION AND PRECOMPILING
 THE MATCH DATA BLOCK
-       pcre2_match_data_create(uint32_t ovecsize,
+       pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize,
         pcre2_general_context *gcontext);
-       pcre2_match_data_create_from_pattern(const pcre2_code *code,
+       pcre2_match_data *pcre2_match_data_create_from_pattern(
-         pcre2_general_context *gcontext);
+         const pcre2_code *code, pcre2_general_context *gcontext);
       void pcre2_match_data_free(pcre2_match_data *match_data);
@ -2022,12 +2032,20 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
       When PCRE2_UTF is set at compile time, the validity of the subject as a
       UTF  string  is  checked  by default when pcre2_match() is subsequently
-       called.  The entire string is checked before any other processing takes
+       called.  If a non-zero starting offset is given, the check  is  applied
-       place,  and a negative error code is returned if the check fails. There
+       only  to that part of the subject that could be inspected during match-
-       are several UTF error codes for each code unit width, corresponding  to
+       ing, and there is a check that the starting offset points to the  first
-       different  problems with the code unit sequence. The value of startoff-
+       code  unit of a character or to the end of the subject. If there are no
-       set is also checked, to ensure that it points to the start of a charac-
+       lookbehind assertions in the pattern, the check starts at the  starting
-       ter  or  to  the  end  of  the subject. There are discussions about the
+       offset.  Otherwise,  it  starts at the length of the longest lookbehind
       before the starting offset, or at the start of the subject if there are
       not  that  many  characters  before  the starting offset. Note that the
       sequences \b and \B are one-character lookbehinds.
       The check is carried out before any other processing takes place, and a
       negative  error  code is returned if the check fails. There are several
       UTF error codes for each code unit width,  corresponding  to  different
       problems  with  the code unit sequence. There are discussions about the
       validity of UTF-8 strings, UTF-16 strings, and UTF-32  strings  in  the
       pcre2unicode page.
@ -2525,12 +2543,12 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
       In  the replacement string, which is interpreted as a UTF string in UTF
       mode, and is checked for UTF  validity  unless  the  PCRE2_NO_UTF_CHECK
       option is set, a dollar character is an escape character that can spec-
-       ify the insertion of characters from capturing groups in  the  pattern.
+       ify the insertion of characters from capturing groups or (*MARK)  items
-       The following forms are recognized:
+       in the pattern. The following forms are recognized:
         $$                  insert a dollar character
-         $<n>    insert the contents of group <n>
+         $<n> or ${<n>}      insert the contents of group <n>
-         ${<n>}  insert the contents of group <n>
+         $*MARK or ${*MARK}  insert the name of the last (*MARK) encountered
       Either  a  group  number  or  a  group name can be given for <n>. Curly
       brackets are required only if the following character would  be  inter-
@ -2540,6 +2558,13 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
       is "=+babcb+=". Group insertion is done by calling  pcre2_copy_byname()
       or pcre2_copy_bynumber() as appropriate.
       The facility for inserting a (*MARK) name can be used to perform simple
       simultaneous substitutions, as this pcre2test example shows:
         /(*:pear)apple|(*:orange)lemon/g,replace=${*MARK}
             apple lemon
          2: pear orange
       The first seven arguments of pcre2_substitute() are  the  same  as  for
       pcre2_match(), except that the partial matching options are not permit-
       ted, and match_data may be passed as NULL, in which case a  match  data
@ -2826,7 +2851,7 @@ AUTHOR
 REVISION
-       Last updated: 22 April 2015
+       Last updated: 30 August 2015
       Copyright (c) 1997-2015 University of Cambridge.
 ------------------------------------------------------------------------------
@ -4051,13 +4076,17 @@ CONTROLLING THE JIT STACK
       interpreter.
       You may safely use the same JIT stack for more than one pattern (either
-       by assigning directly or by callback), as long as the patterns are  all
+       by assigning directly or by callback), as  long  as  the  patterns  are
-       matched  sequentially in the same thread. In a multithread application,
+       matched sequentially in the same thread. Currently, the only way to set
-       if you do not specify a JIT stack, or if you assign or pass  back  NULL
+       up non-sequential matches in one thread is to use callouts: if a  call-
-       from  a  callback, that is thread-safe, because each thread has its own
+       out  function starts another match, that match must use a different JIT
-       machine stack. However, if you assign  or  pass  back  a  non-NULL  JIT
+       stack to the one used for currently suspended match(es).
-       stack,  this  must  be  a  different  stack for each thread so that the
+
-       application is thread-safe.
+       In a multithread application, if you do not specify a JIT stack, or  if
       you  assign  or  pass  back  NULL from a callback, that is thread-safe,
       because each thread has its own machine stack. However, if  you  assign
       or  pass  back a non-NULL JIT stack, this must be a different stack for
       each thread so that the application is thread-safe.
       Strictly speaking, even more is allowed. You can assign the  same  non-
       NULL  stack  to a match context that is used by any number of patterns,
@ -4234,8 +4263,8 @@ AUTHOR
 REVISION
-       Last updated: 27 November 2014
+       Last updated: 28 July 2015
-       Copyright (c) 1997-2014 University of Cambridge.
+       Copyright (c) 1997-2015 University of Cambridge.
 ------------------------------------------------------------------------------
@ -5069,7 +5098,18 @@ VALIDITY OF UTF STRINGS
       knows  as  a  byte-order  mark (BOM). The PCRE2 functions do not handle
       this, expecting strings to be in host byte order.
-       The entire string is checked before any other processing  takes  place.
+       A UTF string is checked before any other processing takes place. In the
       case  of  pcre2_match()  and  pcre2_dfa_match()  calls  with a non-zero
       starting offset, the check is applied only to that part of the  subject
       that  could be inspected during matching, and there is a check that the
       starting offset points to the first code unit of a character or to  the
       end  of  the subject. If there are no lookbehind assertions in the pat-
       tern, the check starts at the starting offset. Otherwise, it starts  at
       the  length of the longest lookbehind before the starting offset, or at
       the start of the subject if there are not that many  characters  before
       the  starting offset. Note that the sequences \b and \B are one-charac-
       ter lookbehinds.
       In addition to checking the format of the string, there is a  check  to
       ensure that all code points lie in the range U+0 to U+10FFFF, excluding
       the surrogate area. The so-called "non-character" code points  are  not
@ -5192,8 +5232,8 @@ AUTHOR
 REVISION
-       Last updated: 23 November 2014
+       Last updated: 18 August 2015
-       Copyright (c) 1997-2014 University of Cambridge.
+       Copyright (c) 1997-2015 University of Cambridge.
 ------------------------------------------------------------------------------
--- a/doc/pcre2api.3
+++ b/doc/pcre2api.3
@ -1,4 +1,4 @@
-.TH PCRE2API 3 "29 August 2015" "PCRE2 10.21"
+.TH PCRE2API 3 "30 August 2015" "PCRE2 10.21"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
@ -1052,6 +1052,15 @@ after any internal newline. However, it does not match after a newline at the
 end of the subject, for compatibility with Perl. If you want a multiline
 circumflex also to match after a terminating newline, you must set
 PCRE2_ALT_CIRCUMFLEX.
 .sp
  PCRE2_ALT_VERBNAMES
 .sp
 By default, for compatibility with Perl, the name in any verb sequence such as
 (*MARK:NAME) is any sequence of characters that does not include a closing
 parenthesis. The name is not processed in any way, and it is not possible to
 include a closing parenthesis in the name. However, if the PCRE2_ALT_VERBNAMES
 option is set, normal backslash processing is applied to verb names and only an
 unescaped closing parenthesis terminates the name.
 .sp
  PCRE2_AUTO_CALLOUT
 .sp
@ -2953,6 +2962,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 29 August 2015
+Last updated: 30 August 2015
 Copyright (c) 1997-2015 University of Cambridge.
 .fi
--- a/doc/pcre2pattern.3
+++ b/doc/pcre2pattern.3
@ -1,4 +1,4 @@
-.TH PCRE2PATTERN 3 "24 July 2015" "PCRE2 10.21"
+.TH PCRE2PATTERN 3 "30 August 2015" "PCRE2 10.21"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "PCRE2 REGULAR EXPRESSION DETAILS"
@ -1334,7 +1334,7 @@ both specified as literal letters in the same case. For compatibility with
 Perl, EBCDIC code points within the range that are not letters are omitted. For 
 example, [h-k] matches only four characters, even though the codes for h and k 
 are 0x88 and 0x92, a range of 11 code points. However, if the range is 
-specified numerically, for example, [\ex88-\ex92] or [h-\x92], all code points
+specified numerically, for example, [\ex88-\ex92] or [h-\ex92], all code points
 are included.
 .P
 If a range that includes letters is used when caseless matching is set, it
@ -2944,14 +2944,21 @@ in production code should be noted to avoid problems during upgrades." The same
 remarks apply to the PCRE2 features described in this section.
 .P
 The new verbs make use of what was previously invalid syntax: an opening
-parenthesis followed by an asterisk. They are generally of the form
+parenthesis followed by an asterisk. They are generally of the form (*VERB) or
-(*VERB) or (*VERB:NAME). Some may take either form, possibly behaving
+(*VERB:NAME). Some verbs take either form, possibly behaving differently
-differently depending on whether or not a name is present. A name is any
+depending on whether or not a name is present.
-sequence of characters that does not include a closing parenthesis. The maximum
+.P
-length of name is 255 in the 8-bit library and 65535 in the 16-bit and 32-bit
+By default, for compatibility with Perl, a name is any sequence of characters
-libraries. If the name is empty, that is, if the closing parenthesis
+that does not include a closing parenthesis. The name is not processed in 
-immediately follows the colon, the effect is as if the colon were not there.
+any way, and it is not possible to include a closing parenthesis in the name.
-Any number of these verbs may occur in a pattern.
+However, if the PCRE2_ALT_VERBNAMES option is set, normal backslash processing 
 is applied to verb names and only an unescaped closing parenthesis terminates 
 the name.
 .P
 The maximum length of a name is 255 in the 8-bit library and 65535 in the
 16-bit and 32-bit libraries. If the name is empty, that is, if the closing
 parenthesis immediately follows the colon, the effect is as if the colon were
 not there. Any number of these verbs may occur in a pattern.
 .P
 Since these verbs are specifically related to backtracking, most of them can be
 used only when the pattern is to be matched using the traditional matching
@ -3376,6 +3383,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 24 July 2015
+Last updated: 30 August 2015
 Copyright (c) 1997-2015 University of Cambridge.
 .fi
--- a/doc/pcre2test.1
+++ b/doc/pcre2test.1
@ -445,6 +445,7 @@ for a description of their effects.
      allow_empty_class         set PCRE2_ALLOW_EMPTY_CLASS
      alt_bsux                  set PCRE2_ALT_BSUX
      alt_circumflex            set PCRE2_ALT_CIRCUMFLEX
      alt_verbnames             set PCRE2_ALT_VERBNAMES 
      anchored                  set PCRE2_ANCHORED
      auto_callout              set PCRE2_AUTO_CALLOUT
  /i  caseless                  set PCRE2_CASELESS
--- a/doc/pcre2test.txt
+++ b/doc/pcre2test.txt
@ -285,12 +285,14 @@ COMMAND LINES
 MODIFIER SYNTAX
       Modifier lists are used with both pattern and subject lines. Items in a
-       list  are  separated by commas and optional white space. Some modifiers
+       list are separated by commas followed by optional white space. Trailing
-       may be given for both patterns and subject lines,  whereas  others  are
+       whitespace in a modifier list is ignored. Some modifiers may  be  given
-       valid  for  one  or  the other only. Each modifier has a long name, for
+       for  both patterns and subject lines, whereas others are valid only for
-       example "anchored", and some of them must be followed by an equals sign
+       one  or  the  other.  Each  modifier  has  a  long  name,  for  example
-       and a value, for example, "offset=12".  Modifiers that do not take val-
+       "anchored",  and  some of them must be followed by an equals sign and a
-       ues may be preceded by a minus sign to turn off a previous setting.
+       value, for example, "offset=12". Values cannot  contain  comma  charac-
       ters,  but may contain spaces. Modifiers that do not take values may be
       preceded by a minus sign to turn off a previous setting.
       A few of the more common modifiers can also be specified as single let-
       ters,  for  example "i" for "caseless". In documentation, following the
@ -424,6 +426,7 @@ PATTERN MODIFIERS
             allow_empty_class         set PCRE2_ALLOW_EMPTY_CLASS
             alt_bsux                  set PCRE2_ALT_BSUX
             alt_circumflex            set PCRE2_ALT_CIRCUMFLEX
             alt_verbnames             set PCRE2_ALT_VERBNAMES
             anchored                  set PCRE2_ANCHORED
             auto_callout              set PCRE2_AUTO_CALLOUT
         /i  caseless                  set PCRE2_CASELESS
@ -1330,5 +1333,5 @@ AUTHOR
 REVISION
-       Last updated: 20 May 2015
+       Last updated: 30 August 2015
       Copyright (c) 1997-2015 University of Cambridge.
--- a/src/pcre2.h.in
+++ b/src/pcre2.h.in
@ -120,6 +120,7 @@ D   is inspected during pcre2_dfa_match() execution
 #define PCRE2_UTF                 0x00080000u  /* C J M D */
 #define PCRE2_NEVER_BACKSLASH_C   0x00100000u  /* C       */
 #define PCRE2_ALT_CIRCUMFLEX      0x00200000u  /*   J M D */
 #define PCRE2_ALT_VERBNAMES       0x00400000u  /* C       */
 /* These are for pcre2_jit_compile(). */
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@ -561,12 +561,12 @@ static PCRE2_SPTR posix_substitutes[] = {
 #define PUBLIC_COMPILE_OPTIONS \
  (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
-   PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY|PCRE2_DOTALL| \
+   PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
-   PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_FIRSTLINE|PCRE2_MATCH_UNSET_BACKREF| \
+   PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \
-   PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C|PCRE2_NEVER_UCP| \
+   PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
-   PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE|PCRE2_NO_AUTO_POSSESS| \
+   PCRE2_NEVER_UCP|PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE| \
-   PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE|PCRE2_NO_UTF_CHECK| \
+   PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
-   PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_UTF)
+   PCRE2_NO_UTF_CHECK|PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_UTF)
 /* Compile time error code numbers. They are given names so that they can more
 easily be tracked. When a new number is added, the tables called eint1 and
@ -5382,13 +5382,52 @@ for (;; ptr++)
      /* It appears that Perl allows any characters whatsoever, other than
      a closing parenthesis, to appear in arguments, so we no longer insist on
-      letters, digits, and underscores. */
+      letters, digits, and underscores. Perl does not, however, do any
      interpretation within arguments, and has no means of including a closing
      parenthesis. PCRE supports escape processing but only when it is
      requested by an option. Note that check_escape() will not return values
      greater than the code unit maximum when not in UTF mode. */
      if (*ptr == CHAR_COLON)
        {
        arg = ++ptr;
        if ((options & PCRE2_ALT_VERBNAMES) == 0)
          {
          while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
          arglen = (int)(ptr - arg);
          }
        else
          {
          arglen = 0;
          while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS)
            {
            if (*ptr == '\\')
              {
              uint32_t x;
              *errorcodeptr = 0;
              i = check_escape(&ptr, &x, errorcodeptr, options, FALSE, cb);
              if (*errorcodeptr != 0) goto FAILED;
              if (i != 0)
                {
                *errorcodeptr = ERR40;
                goto FAILED;
                }
 #ifdef SUPPORT_UNICODE
 #if PCRE2_CODE_UNIT_WIDTH == 8
              for (i = 0; i < PRIV(utf8_table1_size); i++)
                if ((int)x <= PRIV(utf8_table1)[i]) break;
              arglen += i;
 #elif PCRE2_CODE_UNIT_WIDTH == 16
              if (x > 0xffff) arglen++;
 #endif
 #endif
              }
            arglen++;
            ptr++;
            }
          }
        if ((unsigned int)arglen > MAX_MARK)
          {
          *errorcodeptr = ERR76;
@ -5456,8 +5495,42 @@ for (;; ptr++)
              }
            setverb = *code++ = verbs[i].op_arg;
            *code++ = arglen;
            /* If we are processing the argument for escapes, we don't need
            to apply checks here because it was all checked above when
            computing the length. */
            if ((options & PCRE2_ALT_VERBNAMES) != 0)
              {
              for (; arg != ptr; arg++)
                {
                if (*arg == '\\')
                  {
                  uint32_t x;
                  *errorcodeptr = 0;
                  (void)check_escape(&arg, &x, errorcodeptr, options, FALSE,
                    cb);
 #ifdef SUPPORT_UNICODE
                  if (utf)
                    {
                    PCRE2_UCHAR cbuff[8];
                    x = PRIV(ord2utf)(x, cbuff);
                    memcpy(code, cbuff, CU2BYTES(x));
                    code += x;
                    }
                  else
 #endif
                  *code++ = x;
                  }
                else *code++ = *arg;
                }
              }
            else   /* No argument processing */
              {
              memcpy(code, arg, CU2BYTES(arglen));
              code += arglen;
              }
            *code++ = 0;
            }
--- a/src/pcre2_error.c
+++ b/src/pcre2_error.c
@ -111,7 +111,7 @@ static const char compile_error_texts[] =
  "number after (?C is greater than 255\0"
  "closing parenthesis for (?C expected\0"
  /* 40 */
-  "SPARE ERROR\0"
+  "invalid escape sequence in (*VERB) name\0"
  "unrecognized character after (?P\0"
  "syntax error in subpattern name (missing terminator)\0"
  "two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0"
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@ -496,6 +496,7 @@ static modstruct modlist[] = {
  { "allusedtext",         MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT,           PO(control) },
  { "alt_bsux",            MOD_PAT,  MOD_OPT, PCRE2_ALT_BSUX,            PO(options) },
  { "alt_circumflex",      MOD_PAT,  MOD_OPT, PCRE2_ALT_CIRCUMFLEX,      PO(options) },
  { "alt_verbnames",       MOD_PAT,  MOD_OPT, PCRE2_ALT_VERBNAMES,       PO(options) },
  { "altglobal",           MOD_PND,  MOD_CTL, CTL_ALTGLOBAL,             PO(control) },
  { "anchored",            MOD_PD,   MOD_OPT, PCRE2_ANCHORED,            PD(options) },
  { "auto_callout",        MOD_PAT,  MOD_OPT, PCRE2_AUTO_CALLOUT,        PO(options) },
@ -3467,10 +3468,11 @@ static void
 show_compile_options(uint32_t options, const char *before, const char *after)
 {
 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
-else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
  before,
  ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
  ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
  ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "", 
  ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
  ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
  ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
--- a/testdata/testinput2
+++ b/testdata/testinput2
@ -4442,4 +4442,11 @@ a random value. /Ix
 /((*MARK:A))++a(*SKIP:B)b/
    aacb
 /(*MARK:a\zb)z/alt_verbnames 
 /(*:ab\t(d\)c)xxx/
 /(*:ab\t(d\)c)xxx/alt_verbnames,mark
    cxxxz
 # End of testinput2 
--- a/testdata/testinput5
+++ b/testdata/testinput5
@ -1662,4 +1662,9 @@
 /[\pS#moq]/
    =
 # UTF tests 
 /(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark
    cxxxz
 # End of testinput5 
--- a/testdata/testinput9
+++ b/testdata/testinput9
@ -251,4 +251,6 @@
 /[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
 /(*MARK:a\x{100}b)z/alt_verbnames 
 # End of testinput9
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@ -14713,4 +14713,15 @@ No match
    aacb
 No match
 /(*MARK:a\zb)z/alt_verbnames 
 Failed: error 140 at offset 9: invalid escape sequence in (*VERB) name
 /(*:ab\t(d\)c)xxx/
 Failed: error 122 at offset 12: unmatched closing parenthesis
 /(*:ab\t(d\)c)xxx/alt_verbnames,mark
    cxxxz
 0: xxx
 MK: ab\x09(d)c
 # End of testinput2 
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@ -4064,4 +4064,11 @@ No match
    =
 0: =
 # UTF tests 
 /(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark
    cxxxz
 0: xxx
 MK: a\x{12345}b\x{09}(d)c
 # End of testinput5 
--- a/testdata/testoutput9
+++ b/testdata/testoutput9
@ -356,4 +356,7 @@ Failed: error 177 at offset 6: character code point value in \u.... sequence is
        End
 ------------------------------------------------------------------
 /(*MARK:a\x{100}b)z/alt_verbnames 
 Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
 # End of testinput9