Partial documentation and partial code tweaks.

2014-10-14 16:23:57 +00:00 · 2014-10-14 16:23:57 +00:00 · 26cd0bccb3
parent a6302442f2
commit 26cd0bccb3
13 changed files with 1576 additions and 748 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -34,6 +34,7 @@ dist_html_DATA = \
  doc/html/pcre2jit.html \
  doc/html/pcre2limits.html \
  doc/html/pcre2matching.html \
  doc/html/pcre2partial.html \
  doc/html/pcre2test.html \
  doc/html/pcre2unicode.html
@ -64,7 +65,6 @@ dist_html_DATA = \
 #  doc/html/pcre2_utf16_to_host_byte_order.html \
 #  doc/html/pcre2_utf32_to_host_byte_order.html \
 #  doc/html/pcre2_version.html \
 #  doc/html/pcre2partial.html \
 #  doc/html/pcre2pattern.html \
 #  doc/html/pcre2perform.html \
 #  doc/html/pcre2posix.html \
@ -86,6 +86,7 @@ dist_man_MANS = \
  doc/pcre2jit.3 \
  doc/pcre2limits.3 \
  doc/pcre2matching.3 \
  doc/pcre2partial.3 \
  doc/pcre2test.1 \
  doc/pcre2unicode.3
@ -118,7 +119,6 @@ dist_man_MANS = \
 #  doc/pcre2_utf16_to_host_byte_order.3 \
 #  doc/pcre2_utf32_to_host_byte_order.3 \
 #  doc/pcre2_version.3 \
 #  doc/pcre2partial.3 \
 #  doc/pcre2pattern.3 \
 #  doc/pcre2perform.3 \
 #  doc/pcre2posix.3 \
--- a/doc/html/pcre2api.html
+++ b/doc/html/pcre2api.html
@ -90,9 +90,6 @@ document for an overview of all the PCRE2 documentation.
 </P>
 <br><a name="SEC2" href="#TOC1">PCRE2 NATIVE API AUXILIARY MATCH FUNCTIONS</a><br>
 <P>
 <b>PCRE2_SIZE pcre2_get_leftchar(pcre2_match_data *<i>match_data</i>);</b>
 <br>
 <br>
 <b>PCRE2_SPTR pcre2_get_mark(pcre2_match_data *<i>match_data</i>);</b>
 <br>
 <br>
@ -102,9 +99,6 @@ document for an overview of all the PCRE2 documentation.
 <b>PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *<i>match_data</i>);</b>
 <br>
 <br>
 <b>PCRE2_SIZE pcre2_get_rightchar(pcre2_match_data *<i>match_data</i>);</b>
 <br>
 <br>
 <b>PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *<i>match_data</i>);</b>
 </P>
 <br><a name="SEC3" href="#TOC1">PCRE2 NATIVE API GENERAL CONTEXT FUNCTIONS</a><br>
@ -133,7 +127,7 @@ document for an overview of all the PCRE2 documentation.
 <b>void pcre2_compile_context_free(pcre2_compile_context *<i>ccontext</i>);</b>
 <br>
 <br>
-<b>int pcre2_set_bsr_compile(pcre2_compile_context *<i>ccontext</i>,</b>
+<b>int pcre2_set_bsr(pcre2_compile_context *<i>ccontext</i>,</b>
 <b>  uint32_t <i>value</i>);</b>
 <br>
 <br>
@ -141,7 +135,7 @@ document for an overview of all the PCRE2 documentation.
 <b>  const unsigned char *<i>tables</i>);</b>
 <br>
 <br>
-<b>int pcre2_set_newline_compile(pcre2_compile_context *<i>ccontext</i>,</b>
+<b>int pcre2_set_newline(pcre2_compile_context *<i>ccontext</i>,</b>
 <b>  uint32_t <i>value</i>);</b>
 <br>
 <br>
@ -165,10 +159,6 @@ document for an overview of all the PCRE2 documentation.
 <b>void pcre2_match_context_free(pcre2_match_context *<i>mcontext</i>);</b>
 <br>
 <br>
 <b>int pcre2_set_bsr_match(pcre2_match_context *<i>mcontext</i>,</b>
 <b>  uint32_t <i>value</i>);</b>
 <br>
 <br>
 <b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b>
 <b>  int (*<i>callout_function</i>)(pcre2_callout_block *),</b>
 <b>  void *<i>callout_data</i>);</b>
@ -178,10 +168,6 @@ document for an overview of all the PCRE2 documentation.
 <b>  uint32_t <i>value</i>);</b>
 <br>
 <br>
 <b>int pcre2_set_newline_match(pcre2_match_context *<i>mcontext</i>,</b>
 <b>  uint32_t <i>value</i>);</b>
 <br>
 <br>
 <b>int pcre2_set_recursion_limit(pcre2_match_context *<i>mcontext</i>,</b>
 <b>  uint32_t <i>value</i>);</b>
 <br>
@ -596,7 +582,7 @@ A compile context is created, copied, and freed by the following functions:
 A compile context is created with default values for its parameters. These can 
 be changed by calling the following functions, which return 0 on success, or 
 PCRE2_ERROR_BADDATA if invalid data is detected.
-<b>int pcre2_set_bsr_compile(pcre2_compile_context *<i>ccontext</i>,</b>
+<b>int pcre2_set_bsr(pcre2_compile_context *<i>ccontext</i>,</b>
 <b>  uint32_t <i>value</i>);</b>
 <br>
 <br>
@ -605,8 +591,7 @@ or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any Unicode line
 ending sequence. The value of this parameter does not affect what is compiled; 
 it is just saved with the compiled pattern. The value is used by the JIT
 compiler and by the two interpreted matching functions, <i>pcre2_match()</i> and 
-<i>pcre2_dfa_match()</i>. You can change the value when calling these functions, 
+<i>pcre2_dfa_match()</i>.
 but doing so disables the use of JIT.
 <b>int pcre2_set_character_tables(pcre2_compile_context *<i>ccontext</i>,</b>
 <b>  const unsigned char *<i>tables</i>);</b>
 <br>
@ -614,7 +599,7 @@ but doing so disables the use of JIT.
 The value must be the result of a call to <i>pcre2_maketables()</i>, whose only 
 argument is a general context. This function builds a set of character tables
 in the current locale.
-<b>int pcre2_set_newline_compile(pcre2_compile_context *<i>ccontext</i>,</b>
+<b>int pcre2_set_newline(pcre2_compile_context *<i>ccontext</i>,</b>
 <b>  uint32_t <i>value</i>);</b>
 <br>
 <br>
@ -629,8 +614,7 @@ When a pattern is compiled with the PCRE2_EXTENDED option, the value of this
 parameter affects the recognition of white space and the end of internal
 comments starting with #. The value is saved with the compiled pattern for
 subsequent use by the JIT compiler and by the two interpreted matching
-functions, <i>pcre2_match()</i> and <i>pcre2_dfa_match()</i>. You can change the
+functions, <i>pcre2_match()</i> and <i>pcre2_dfa_match()</i>.
 value when calling these functions, but doing so disables the use of JIT.
 <b>int pcre2_set_parens_nest_limit(pcre2_compile_context *<i>ccontext</i>,</b>
 <b>  uint32_t <i>value</i>);</b>
 <br>
@ -685,14 +669,6 @@ A match context is created, copied, and freed by the following functions:
 A match context is created with default values for its parameters. These can 
 be changed by calling the following functions, which return 0 on success, or 
 PCRE2_ERROR_BADDATA if invalid data is detected.
 <b>int pcre2_set_bsr_match(pcre2_match_context *<i>mcontext</i>,</b>
 <b>  uint32_t <i>value</i>);</b>
 <br>
 <br>
 The value must be PCRE2_BSR_ANYCRLF, to specify that \R matches only CR, LF, 
 or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any Unicode line 
 ending sequence. If you want to make use of JIT matching, you should not use 
 this function, but instead set the value in a compile context.
 <b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b>
 <b>  int (*<i>callout_function</i>)(pcre2_callout_block *),</b>
 <b>  void *<i>callout_data</i>);</b>
@ -769,17 +745,6 @@ pattern of the form
 where ddd is a decimal number. However, such a setting is ignored unless ddd is
 less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
 limit is set, less than the default.
 <b>int pcre2_set_newline_match(pcre2_match_context *<i>mcontext</i>,</b>
 <b>  uint32_t <i>value</i>);</b>
 <br>
 <br>
 This specifies which characters or character sequences are to be recognized as
 newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only),
 PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character
 sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or
 PCRE2_NEWLINE_ANY (any Unicode newline sequence). If you want to make use of
 JIT matching, you should not use this function, but instead set the value in a
 compile context.
 <b>int pcre2_set_recursion_memory_management(</b>
 <b>  pcre2_match_context *<i>mcontext</i>,</b>
 <b>  void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
@ -956,9 +921,8 @@ documentation).
 <P>
 For those options that can be different in different parts of the pattern, the
 contents of the <i>options</i> argument specifies their settings at the start of
-compilation. The PCRE2_ANCHORED, PCRE2_NO_UTF_CHECK, and
+compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK options can be set at
-PCRE2_NO_START_OPTIMIZE options can be set at the time of matching as well as
+the time of matching as well as at compile time.
 at compile time.
 </P>
 <P>
 Other, less frequently required compile-time parameters (for example, the 
@ -1176,14 +1140,55 @@ purposes.
 <pre>
  PCRE2_NO_START_OPTIMIZE
 </pre>
-This is an option that acts at matching time; that is, it is really an option
+This is an option whose main effect is at matching time. It does not change
-for <b>pcre2_match()</b> or <b>pcre_dfa_match()</b>. If it is set at compile
+what <b>pcre2_compile()</b> generates, but it does affect the output of the JIT
-time, it is remembered with the compiled pattern and assumed at matching time.
+compiler.
-This is necessary if you want to use JIT execution, because the JIT compiler
+</P>
-needs to know whether or not this option is set. For details, see the
+<P>
-discussion of PCRE2_NO_START_OPTIMIZE in the section on <b>pcre2_match()</b> 
+There are a number of optimizations that may occur at the start of a match, in
-options
+order to speed up the process. For example, if it is known that an unanchored
-<a href="#matchoptions">below.</a>
+match must start with a specific character, the matching code searches the
 subject for that character, and fails immediately if it cannot find it, without
 actually running the main matching function. This means that a special item
 such as (*COMMIT) at the start of a pattern is not considered until after a
 suitable starting point for the match has been found. Also, when callouts or
 (*MARK) items are in use, these "start-up" optimizations can cause them to be
 skipped if the pattern is never actually used. The start-up optimizations are
 in effect a pre-scan of the subject that takes place before the pattern is run.
 </P>
 <P>
 The PCRE2_NO_START_OPTIMIZE option disables the start-up optimizations,
 possibly causing performance to suffer, but ensuring that in cases where the
 result is "no match", the callouts do occur, and that items such as (*COMMIT)
 and (*MARK) are considered at every possible starting position in the subject
 string.
 </P>
 <P>
 Setting PCRE2_NO_START_OPTIMIZE may change the outcome of a matching operation.
 Consider the pattern
 <pre>
  (*COMMIT)ABC
 </pre>
 When this is compiled, PCRE2 records the fact that a match must start with the
 character "A". Suppose the subject string is "DEFABC". The start-up
 optimization scans along the subject, finds "A" and runs the first match
 attempt from there. The (*COMMIT) item means that the pattern must match the
 current starting position, which in this case, it does. However, if the same
 match is run with PCRE2_NO_START_OPTIMIZE set, the initial scan along the
 subject string does not happen. The first match attempt is run starting from
 "D" and when this fails, (*COMMIT) prevents any further matches being tried, so
 the overall result is "no match". There are also other start-up optimizations.
 For example, a minimum length for the subject may be recorded. Consider the
 pattern
 <pre>
  (*MARK:A)(X|Y)
 </pre>
 The minimum length for a match is one character. If the subject is "ABC", there
 will be attempts to match "ABC", "BC", and "C". An attempt to match an empty 
 string at the end of the subject does not take place, because PCRE2 knows that
 the subject is now too short, and so the (*MARK) is never encountered. In this
 case, the optimization does not affect the overall match result, which is still
 "no match", but it does affect the auxiliary information that is returned.
 <pre>
  PCRE2_NO_UTF_CHECK
 </pre>
@ -1648,13 +1653,15 @@ string that define the matched part of the subject and any substrings that were
 capured. This is know as the <i>ovector</i>. 
 </P>
 <P>
-Before calling <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b> you must create a 
+Before calling <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b> you must create a
 match data block by calling one of the creation functions above. For
 <b>pcre2_match_data_create()</b>, the first argument is the number of pairs of
 offsets in the <i>ovector</i>. One pair of offsets is required to identify the
 string that matched the whole pattern, with another pair for each captured
-substring. For example, a value of 4 creates enough space to record the
+substring. For example, a value of 4 creates enough space to record the matched
-matched portion of the subject plus three captured substrings. 
+portion of the subject plus three captured substrings. A minimum of at least 1
 pair is imposed by <b>pcre2_match_data_create()</b>, so it is always possible to
 return the overall matched string.
 </P>
 <P>
 For <b>pcre2_match_data_create_from_pattern()</b>, the first argument is a
@ -1779,10 +1786,9 @@ Option bits for <b>pcre2_match()</b>
 </b><br>
 <P>
 The unused bits of the <i>options</i> argument for <b>pcre2_match()</b> must be
-zero. The only bits that may be set are PCRE2_ANCHORED, 
+zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
-PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
+PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK,
-PCRE2_NO_START_OPTIMIZE, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and
+PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is described below.
 PCRE2_PARTIAL_SOFT. Their action is described below.
 </P>
 <P>
 If the pattern was successfully processed by the just-in-time (JIT) compiler,
@ -1833,56 +1839,6 @@ valid, so PCRE2 searches further into the string for occurrences of "a" or "b".
 This is like PCRE2_NOTEMPTY, except that an empty string match that is not at
 the start of the subject is permitted. If the pattern is anchored, such a match
 can occur only if the pattern contains \K.
 <pre>
  PCRE2_NO_START_OPTIMIZE
 </pre>
 There are a number of optimizations that <b>pcre2_match()</b> uses at the start
 of a match, in order to speed up the process. For example, if it is known that
 an unanchored match must start with a specific character, it searches the
 subject for that character, and fails immediately if it cannot find it, without
 actually running the main matching function. This means that a special item
 such as (*COMMIT) at the start of a pattern is not considered until after a
 suitable starting point for the match has been found. Also, when callouts or
 (*MARK) items are in use, these "start-up" optimizations can cause them to be
 skipped if the pattern is never actually used. The start-up optimizations are
 in effect a pre-scan of the subject that takes place before the pattern is run.
 </P>
 <P>
 The PCRE2_NO_START_OPTIMIZE option disables the start-up optimizations,
 possibly causing performance to suffer, but ensuring that in cases where the
 result is "no match", the callouts do occur, and that items such as (*COMMIT)
 and (*MARK) are considered at every possible starting position in the subject
 string. If PCRE2_NO_START_OPTIMIZE is set at compile time, it cannot be unset
 at matching time. The use of PCRE2_NO_START_OPTIMIZE at matching time (that is,
 passing it to <b>pcre2_match()</b>) disables JIT execution; in this situation,
 matching is always done using interpretively.
 </P>
 <P>
 Setting PCRE2_NO_START_OPTIMIZE can change the outcome of a matching operation.
 Consider the pattern
 <pre>
  (*COMMIT)ABC
 </pre>
 When this is compiled, PCRE2 records the fact that a match must start with the
 character "A". Suppose the subject string is "DEFABC". The start-up
 optimization scans along the subject, finds "A" and runs the first match
 attempt from there. The (*COMMIT) item means that the pattern must match the
 current starting position, which in this case, it does. However, if the same
 match is run with PCRE2_NO_START_OPTIMIZE set, the initial scan along the
 subject string does not happen. The first match attempt is run starting from
 "D" and when this fails, (*COMMIT) prevents any further matches being tried, so
 the overall result is "no match". There are also other start-up optimizations.
 For example, a minimum length for the subject may be recorded. Consider the
 pattern
 <pre>
  (*MARK:A)(X|Y)
 </pre>
 The minimum length for a match is one character. If the subject is "ABC", there
 will be attempts to match "ABC", "BC", and "C". An attempt to match an empty 
 string at the end of the subject does not take place, because PCRE2 knows that
 the subject is now too short, and so the (*MARK) is never encountered. In this
 case, the optimization does not affect the overall match result, which is still
 "no match", but it does affect the auxiliary information that is returned.
 <pre>
  PCRE2_NO_UTF_CHECK
 </pre>
@ -2035,13 +1991,13 @@ returned.
 </P>
 <P>
 If the ovector is too small to hold all the captured substring offsets, as much
-as possible is filled in, and the function returns a value of zero. If neither
+as possible is filled in, and the function returns a value of zero. If captured
-the actual string matched nor any captured substrings are of interest,
+substrings are not of interest, <b>pcre2_match()</b> may be called with a match
-<b>pcre2_match()</b> may be called with a match data block whose ovector is of
+data block whose ovector is of minimum length (that is, one pair). However, if
-zero length. However, if the pattern contains back references and the
+the pattern contains back references and the <i>ovector</i> is not big enough to
-<i>ovector</i> is not big enough to remember the related substrings, PCRE2 has
+remember the related substrings, PCRE2 has to get additional memory for use
-to get additional memory for use during matching. Thus it is usually advisable
+during matching. Thus it is usually advisable to set up a match data block
-to set up a match data block containing an ovector of reasonable size.
+containing an ovector of reasonable size.
 </P>
 <P>
 It is possible for capturing subpattern number <i>n+1</i> to match some part of
@ -2074,12 +2030,6 @@ Other information about the match
 <b>PCRE2_SPTR pcre2_get_mark(pcre2_match_data *<i>match_data</i>);</b>
 <br>
 <br>
 <b>PCRE2_SIZE pcre2_get_leftchar(pcre2_match_data *<i>match_data</i>);</b>
 <br>
 <br>
 <b>PCRE2_SIZE pcre2_get_rightchar(pcre2_match_data *<i>match_data</i>);</b>
 <br>
 <br>
 <b>PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *<i>match_data</i>);</b>
 </P>
 <P>
@ -2093,39 +2043,10 @@ Otherwise NULL is returned. A (*MARK) name may be available after a failed
 match or a partial match, as well as after a successful one.
 </P>
 <P>
-The other three functions yield values that give information about the part of 
+The offset of the character at which the successful match started is
-the subject string that was inspected during a successful match or a partial 
+returned by <b>pcre2_get_startchar()</b>. This can be different to the value of
-match. Their results are undefined after a failed match. They return the 
+<i>ovector[0]</i> if the pattern contains the \K escape sequence. Note, 
-following values, respectively:
+however, the \K has no effect for a partial match.
 <br>
 <br>
 (1) The offset of the leftmost character that was inspected during the match.
 This can be earlier than the point at which the match started if the pattern
 contains lookbehind assertions or \b or \B at the start.
 <br>
 <br>
 (2) The offset of the character that follows the rightmost character that was
 inspected during the match. This can be after the end of the match if the 
 pattern contains lookahead assertions.
 <br>
 <br>
 (3) The offset of the character at which the successful or partial match 
 started. This can be different to the value of <i>ovector[0]</i> if the pattern 
 contains the \K escape sequence.
 </P>
 <P>
 For example, if the pattern (?&#60;=abc)xx\Kyy(?=def) is matched against the
 string "123abcxxyydef123", the resulting offsets are:
 <pre>
  ovector[0]   8
  ovector[1]  10
  leftchar     3
  rightchar   13
  startchar    6
 </pre>
 The <b>allusedtext</b> modifier in <b>pcre2test</b> can be used to display a
 longer string that shows the leftmost and rightmost characters in a match
 instead of just the matched string.
 <a name="errorlist"></a></P>
 <br><b>
 Error return values from <b>pcre2_match()</b>
@ -2513,10 +2434,9 @@ Option bits for <b>pcre_dfa_match()</b>
 The unused bits of the <i>options</i> argument for <b>pcre2_dfa_match()</b> must
 be zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
 PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK,
-PCRE2_NO_START_OPTIMIZE, PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT,
+PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and
-PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but the last four of these are
+PCRE2_DFA_RESTART. All but the last four of these are exactly the same as for
-exactly the same as for <b>pcre2_match()</b>, so their description is not
+<b>pcre2_match()</b>, so their description is not repeated here.
 repeated here.
 <pre>
  PCRE2_PARTIAL_HARD
  PCRE2_PARTIAL_SOFT
@ -2650,7 +2570,7 @@ Cambridge CB2 3QH, England.
 </P>
 <br><a name="SEC32" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 16 September 2014
+Last updated: 14 October 2014
 <br>
 Copyright &copy; 1997-2014 University of Cambridge.
 <br>
--- a/doc/html/pcre2callout.html
+++ b/doc/html/pcre2callout.html
@ -131,7 +131,7 @@ long enough, or, for unanchored patterns, if it has been scanned far enough.
 </P>
 <P>
 You can disable these optimizations by passing the PCRE2_NO_START_OPTIMIZE
-option to the matching function, or by starting the pattern with
+option to <b>pcre2_compile()</b>, or by starting the pattern with
 (*NO_START_OPT). This slows down the matching process, but does ensure that
 callouts such as the example above are obeyed.
 </P>
--- a/doc/html/pcre2jit.html
+++ b/doc/html/pcre2jit.html
@ -128,9 +128,8 @@ or the JIT compiler was not able to handle the pattern.
 <P>
 The <b>pcre2_match()</b> options that are supported for JIT matching are
 PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
-PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. The options 
+PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. The
-that are not supported at match time are PCRE2_ANCHORED and
+PCRE2_ANCHORED option is not supported at match time.
 PCRE2_NO_START_OPTIMIZE, though they are supported if given at compile time.
 </P>
 <P>
 The only unsupported pattern items are \C (match a single data unit) when
--- a/doc/html/pcre2partial.html
+++ b/doc/html/pcre2partial.html
@ -0,0 +1,464 @@
 <html>
 <head>
 <title>pcre2partial specification</title>
 </head>
 <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 <h1>pcre2partial man page</h1>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
 </p>
 <p>
 This page is part of the PCRE2 HTML documentation. It was generated
 automatically from the original man page. If there is any nonsense in it,
 please consult the man page, in case the conversion went wrong.
 <br>
 <ul>
 <li><a name="TOC1" href="#SEC1">PARTIAL MATCHING IN PCRE2</a>
 <li><a name="TOC2" href="#SEC2">PARTIAL MATCHING USING pcre2_match()</a>
 <li><a name="TOC3" href="#SEC3">PARTIAL MATCHING USING pcre2_dfa_match()</a>
 <li><a name="TOC4" href="#SEC4">PARTIAL MATCHING AND WORD BOUNDARIES</a>
 <li><a name="TOC5" href="#SEC5">EXAMPLE OF PARTIAL MATCHING USING PCRE2TEST</a>
 <li><a name="TOC6" href="#SEC6">MULTI-SEGMENT MATCHING WITH pcre2_dfa_match()</a>
 <li><a name="TOC7" href="#SEC7">MULTI-SEGMENT MATCHING WITH pcre2_match()</a>
 <li><a name="TOC8" href="#SEC8">ISSUES WITH MULTI-SEGMENT MATCHING</a>
 <li><a name="TOC9" href="#SEC9">AUTHOR</a>
 <li><a name="TOC10" href="#SEC10">REVISION</a>
 </ul>
 <br><a name="SEC1" href="#TOC1">PARTIAL MATCHING IN PCRE2</a><br>
 <P>
 In normal use of PCRE2, if the subject string that is passed to a matching
 function matches as far as it goes, but is too short to match the entire
 pattern, PCRE2_ERROR_NOMATCH is returned. There are circumstances where it
 might be helpful to distinguish this case from other cases in which there is no
 match.
 </P>
 <P>
 Consider, for example, an application where a human is required to type in data
 for a field with specific formatting requirements. An example might be a date
 in the form <i>ddmmmyy</i>, defined by this pattern:
 <pre>
  ^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$
 </pre>
 If the application sees the user's keystrokes one by one, and can check that
 what has been typed so far is potentially valid, it is able to raise an error
 as soon as a mistake is made, by beeping and not reflecting the character that
 has been typed, for example. This immediate feedback is likely to be a better
 user interface than a check that is delayed until the entire string has been
 entered. Partial matching can also be useful when the subject string is very
 long and is not all available at once.
 </P>
 <P>
 PCRE2 supports partial matching by means of the PCRE2_PARTIAL_SOFT and
 PCRE2_PARTIAL_HARD options, which can be set when calling a matching function.
 The difference between the two options is whether or not a partial match is
 preferred to an alternative complete match, though the details differ between
 the two types of matching function. If both options are set, PCRE2_PARTIAL_HARD
 takes precedence.
 </P>
 <P>
 If you want to use partial matching with just-in-time optimized code, you must
 call <b>pcre2_jit_compile()</b> with one or both of these options:
 <pre>
  PCRE2_JIT_PARTIAL_SOFT
  PCRE2_JIT_PARTIAL_HARD
 </pre>
 PCRE2_JIT_COMPLETE should also be set if you are going to run non-partial
 matches on the same pattern. If the appropriate JIT mode has not been compiled,
 interpretive matching code is used.
 </P>
 <P>
 Setting a partial matching option disables two of PCRE2's standard
 optimizations. PCRE2 remembers the last literal code unit in a pattern, and
 abandons matching immediately if it is not present in the subject string. This
 optimization cannot be used for a subject string that might match only
 partially. PCRE2 also knows the minimum length of a matching string, and does
 not bother to run the matching function on shorter strings. This optimization
 is also disabled for partial matching.
 </P>
 <br><a name="SEC2" href="#TOC1">PARTIAL MATCHING USING pcre2_match()</a><br>
 <P>
 A partial match occurs during a call to <b>pcre2_match()</b> when the end of the
 subject string is reached successfully, but matching cannot continue because
 more characters are needed. However, at least one character in the subject must
 have been inspected. This character need not form part of the final matched
 string; lookbehind assertions and the \K escape sequence provide ways of
 inspecting characters before the start of a matched string. The requirement for
 inspecting at least one character exists because an empty string can always be
 matched; without such a restriction there would always be a partial match of an
 empty string at the end of the subject.
 </P>
 <P>
 When a partial match is returned, the first two elements in the ovector point
 to the portion of the subject that was matched. The appearance of \K in the 
 pattern has no effect for a partial match. Consider this pattern:
 <pre>
  /abc\K123/
 </pre>
 If it is matched against "456abc123xyz" the result is a complete match, and the
 ovector defines the matched string as "123", because \K resets the "start of 
 match" point. However, if a partial match is requested and the subject string 
 is "456abc12", a partial match is found for the string "abc12", because all 
 these characters are needed for a subsequent re-match with additional
 characters.
 </P>
 <P>
 What happens when a partial match is identified depends on which of the two
 partial matching options are set.
 </P>
 <br><b>
 PCRE2_PARTIAL_SOFT WITH pcre2_match()
 </b><br>
 <P>
 If PCRE2_PARTIAL_SOFT is set when <b>pcre2_match()</b> identifies a partial
 match, the partial match is remembered, but matching continues as normal, and
 other alternatives in the pattern are tried. If no complete match can be found,
 PCRE2_ERROR_PARTIAL is returned instead of PCRE2_ERROR_NOMATCH.
 </P>
 <P>
 This option is "soft" because it prefers a complete match over a partial match.
 All the various matching items in a pattern behave as if the subject string is
 potentially complete. For example, \z, \Z, and $ match at the end of the
 subject, as normal, and for \b and \B the end of the subject is treated as a
 non-alphanumeric.
 </P>
 <P>
 If there is more than one partial match, the first one that was found provides
 the data that is returned. Consider this pattern:
 <pre>
  /123\w+X|dogY/
 </pre>
 If this is matched against the subject string "abc123dog", both
 alternatives fail to match, but the end of the subject is reached during
 matching, so PCRE2_ERROR_PARTIAL is returned. The offsets are set to 3 and 9,
 identifying "123dog" as the first partial match that was found. (In this
 example, there are two partial matches, because "dog" on its own partially
 matches the second alternative.)
 </P>
 <br><b>
 PCRE2_PARTIAL_HARD WITH pcre2_match()
 </b><br>
 <P>
 If PCRE2_PARTIAL_HARD is set for <b>pcre2_match()</b>, PCRE2_ERROR_PARTIAL is
 returned as soon as a partial match is found, without continuing to search for
 possible complete matches. This option is "hard" because it prefers an earlier
 partial match over a later complete match. For this reason, the assumption is
 made that the end of the supplied subject string may not be the true end of the
 available data, and so, if \z, \Z, \b, \B, or $ are encountered at the end
 of the subject, the result is PCRE2_ERROR_PARTIAL, provided that at least one
 character in the subject has been inspected.
 </P>
 <br><b>
 Comparing hard and soft partial matching
 </b><br>
 <P>
 The difference between the two partial matching options can be illustrated by a
 pattern such as:
 <pre>
  /dog(sbody)?/
 </pre>
 This matches either "dog" or "dogsbody", greedily (that is, it prefers the
 longer string if possible). If it is matched against the string "dog" with
 PCRE2_PARTIAL_SOFT, it yields a complete match for "dog". However, if
 PCRE2_PARTIAL_HARD is set, the result is PCRE2_ERROR_PARTIAL. On the other
 hand, if the pattern is made ungreedy the result is different:
 <pre>
  /dog(sbody)??/
 </pre>
 In this case the result is always a complete match because that is found first,
 and matching never continues after finding a complete match. It might be easier
 to follow this explanation by thinking of the two patterns like this:
 <pre>
  /dog(sbody)?/    is the same as  /dogsbody|dog/
  /dog(sbody)??/   is the same as  /dog|dogsbody/
 </pre>
 The second pattern will never match "dogsbody", because it will always find the
 shorter match first.
 </P>
 <br><a name="SEC3" href="#TOC1">PARTIAL MATCHING USING pcre2_dfa_match()</a><br>
 <P>
 The DFA functions move along the subject string character by character, without
 backtracking, searching for all possible matches simultaneously. If the end of
 the subject is reached before the end of the pattern, there is the possibility
 of a partial match, again provided that at least one character has been
 inspected.
 </P>
 <P>
 When PCRE2_PARTIAL_SOFT is set, PCRE2_ERROR_PARTIAL is returned only if there
 have been no complete matches. Otherwise, the complete matches are returned.
 However, if PCRE2_PARTIAL_HARD is set, a partial match takes precedence over
 any complete matches. The portion of the string that was matched when the
 longest partial match was found is set as the first matching string.
 </P>
 <P>
 Because the DFA functions always search for all possible matches, and there is
 no difference between greedy and ungreedy repetition, their behaviour is
 different from the standard functions when PCRE2_PARTIAL_HARD is set. Consider
 the string "dog" matched against the ungreedy pattern shown above:
 <pre>
  /dog(sbody)??/
 </pre>
 Whereas the standard functions stop as soon as they find the complete match for
 "dog", the DFA functions also find the partial match for "dogsbody", and so
 return that when PCRE2_PARTIAL_HARD is set.
 </P>
 <br><a name="SEC4" href="#TOC1">PARTIAL MATCHING AND WORD BOUNDARIES</a><br>
 <P>
 If a pattern ends with one of sequences \b or \B, which test for word
 boundaries, partial matching with PCRE2_PARTIAL_SOFT can give counter-intuitive
 results. Consider this pattern:
 <pre>
  /\bcat\b/
 </pre>
 This matches "cat", provided there is a word boundary at either end. If the
 subject string is "the cat", the comparison of the final "t" with a following
 character cannot take place, so a partial match is found. However, normal
 matching carries on, and \b matches at the end of the subject when the last
 character is a letter, so a complete match is found. The result, therefore, is
 <i>not</i> PCRE2_ERROR_PARTIAL. Using PCRE2_PARTIAL_HARD in this case does yield
 PCRE2_ERROR_PARTIAL, because then the partial match takes precedence.
 </P>
 <br><a name="SEC5" href="#TOC1">EXAMPLE OF PARTIAL MATCHING USING PCRE2TEST</a><br>
 <P>
 If the <b>partial_soft</b> (or <b>ps</b>) modifier is present on a
 <b>pcre2test</b> data line, the PCRE2_PARTIAL_SOFT option is used for the match.
 Here is a run of <b>pcre2test</b> that uses the date example quoted above:
 <pre>
    re&#62; /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
  data&#62; 25jun04\=ps
   0: 25jun04
   1: jun
  data&#62; 25dec3\=ps
  Partial match: 23dec3
  data&#62; 3ju\=ps
  Partial match: 3ju
  data&#62; 3juj\=ps
  No match
  data&#62; j\=ps
  No match
 </pre>
 The first data string is matched completely, so <b>pcre2test</b> shows the
 matched substrings. The remaining four strings do not match the complete
 pattern, but the first two are partial matches. Similar output is obtained
 if DFA matching is used.
 </P>
 <P>
 If the <b>partial_hard</b> (or <b>ph</b>) modifier is present on a
 <b>pcre2test</b> data line, the PCRE2_PARTIAL_HARD option is set for the match.
 </P>
 <br><a name="SEC6" href="#TOC1">MULTI-SEGMENT MATCHING WITH pcre2_dfa_match()</a><br>
 <P>
 When a partial match has been found using a DFA matching function, it is
 possible to continue the match by providing additional subject data and calling
 the function again with the same compiled regular expression, this time setting
 the PCRE2_DFA_RESTART option. You must pass the same working space as before,
 because this is where details of the previous partial match are stored. Here is
 an example using <b>pcre2test</b>:
 <pre>
    re&#62; /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
  data&#62; 23ja\=dfa,ps
  Partial match: 23ja
  data&#62; n05\=dfa,dfa_restart
   0: n05
 </pre>
 The first call has "23ja" as the subject, and requests partial matching; the
 second call has "n05" as the subject for the continued (restarted) match.
 Notice that when the match is complete, only the last part is shown; PCRE2 does
 not retain the previously partially-matched string. It is up to the calling
 program to do that if it needs to.
 </P>
 <P>
 That means that, for an unanchored pattern, if a continued match fails, it is
 not possible to try again at a new starting point. All this facility is capable
 of doing is continuing with the previous match attempt. In the previous
 example, if the second set of data is "ug23" the result is no match, even
 though there would be a match for "aug23" if the entire string were given at
 once. Depending on the application, this may or may not be what you want.
 The only way to allow for starting again at the next character is to retain the
 matched part of the subject and try a new complete match.
 </P>
 <P>
 You can set the PCRE2_PARTIAL_SOFT or PCRE2_PARTIAL_HARD options with
 PCRE2_DFA_RESTART to continue partial matching over multiple segments. This
 facility can be used to pass very long subject strings to the DFA matching
 functions.
 </P>
 <br><a name="SEC7" href="#TOC1">MULTI-SEGMENT MATCHING WITH pcre2_match()</a><br>
 <P>
 Unlike the DFA function, it is not possible to restart the previous match with
 a new segment of data when using <b>pcre2_match()</b>. Instead, new data must be
 added to the previous subject string, and the entire match re-run, starting
 from the point where the partial match occurred. Earlier data can be discarded.
 </P>
 <P>
 It is best to use PCRE2_PARTIAL_HARD in this situation, because it does not
 treat the end of a segment as the end of the subject when matching \z, \Z,
 \b, \B, and $. Consider an unanchored pattern that matches dates:
 <pre>
    re&#62; /\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d/
  data&#62; The date is 23ja\=ph
  Partial match: 23ja
 </pre>
 At this stage, an application could discard the text preceding "23ja", add on
 text from the next segment, and call the matching function again. Unlike the
 DFA matching function, the entire matching string must always be available,
 and the complete matching process occurs for each call, so more memory and more
 processing time is needed.
 </P>
 <br><a name="SEC8" href="#TOC1">ISSUES WITH MULTI-SEGMENT MATCHING</a><br>
 <P>
 Certain types of pattern may give problems with multi-segment matching,
 whichever matching function is used.
 </P>
 <P>
 1. If the pattern contains a test for the beginning of a line, you need to pass
 the PCRE2_NOTBOL option when the subject string for any call does start at the
 beginning of a line. There is also a PCRE2_NOTEOL option, but in practice when
 doing multi-segment matching you should be using PCRE2_PARTIAL_HARD, which
 includes the effect of PCRE2_NOTEOL.
 </P>
 <P>
 2. If a pattern contains a lookbehind assertion, characters that precede the
 start of the partial match may have been inspected during the matching process.
 When using <b>pcre2_match()</b>, sufficient characters must be retained for the
 next match attempt. You can ensure that enough characters are retained by doing
 the following:
 </P>
 <P>
 Before doing any matching, find the length of the longest lookbehind in the
 pattern by calling <b>pcre2_pattern_info()</b> with the PCRE2_INFO_MAXLOOKBEHIND
 option. Note that the resulting count is in characters, not code units. After a
 partial match, moving back from the ovector[0] offset in the subject by the
 number of characters given for the maximum lookbehind gets you to the earliest
 character that must be retained. In a non-UTF or a 32-bit situation, moving
 back is just a subtraction, but in UTF-8 or UTF-16 you have to count characters
 while moving back through the code units.
 </P>
 <P>
 Characters before the point you have now reached can be discarded, and after
 the next segment has been added to what is retained, you should run the next
 match with the <b>startoffset</b> argument set so that the match begins at the
 same point as before.
 </P>
 <P>
 For example, if the pattern "(?&#60;=123)abc" is partially matched against the
 string "xx123ab", the ovector offsets are 5 and 7 ("ab"). The maximum
 lookbehind count is 3, so all characters before offset 2 can be discarded. The
 value of <b>startoffset</b> for the next match should be 3. When <b>pcre2test</b> 
 displays a partial match, it indicates the lookbehind characters with '&#60;' 
 characters:
 <pre>
    re&#62; "(?&#60;=123)abc"
  data&#62; xx123ab\=ph
  Partial match: 123ab
                 &#60;&#60;&#60; 
 </PRE>
 </P>
 <P>
 3. Because a partial match must always contain at least one character, what
 might be considered a partial match of an empty string actually gives a "no
 match" result. For example:
 <pre>
    re&#62; /c(?&#60;=abc)x/
  data&#62; ab\=ps
  No match
 </pre>
 If the next segment begins "cx", a match should be found, but this will only
 happen if characters from the previous segment are retained. For this reason, a
 "no match" result should be interpreted as "partial match of an empty string"
 when the pattern contains lookbehinds.
 </P>
 <P>
 4. Matching a subject string that is split into multiple segments may not
 always produce exactly the same result as matching over one single long string,
 especially when PCRE2_PARTIAL_SOFT is used. The section "Partial Matching and
 Word Boundaries" above describes an issue that arises if the pattern ends with
 \b or \B. Another kind of difference may occur when there are multiple
 matching possibilities, because (for PCRE2_PARTIAL_SOFT) a partial match result
 is given only when there are no completed matches. This means that as soon as
 the shortest match has been found, continuation to a new subject segment is no
 longer possible. Consider this <b>pcre2test</b> example:
 <pre>
    re&#62; /dog(sbody)?/
  data&#62; dogsb\=ps
   0: dog
  data&#62; do\=ps,dfa
  Partial match: do
  data&#62; gsb\=ps,dfa,dfa_restart
   0: g
  data&#62; dogsbody\=dfa
   0: dogsbody
   1: dog
 </pre>
 The first data line passes the string "dogsb" to a standard matching function,
 setting the PCRE2_PARTIAL_SOFT option. Although the string is a partial match
 for "dogsbody", the result is not PCRE2_ERROR_PARTIAL, because the shorter
 string "dog" is a complete match. Similarly, when the subject is presented to
 a DFA matching function in several parts ("do" and "gsb" being the first two)
 the match stops when "dog" has been found, and it is not possible to continue.
 On the other hand, if "dogsbody" is presented as a single string, a DFA
 matching function finds both matches.
 </P>
 <P>
 Because of these problems, it is best to use PCRE2_PARTIAL_HARD when matching
 multi-segment data. The example above then behaves differently:
 <pre>
    re&#62; /dog(sbody)?/
  data&#62; dogsb\=ph
  Partial match: dogsb
  data&#62; do\=ps,dfa
  Partial match: do
  data&#62; gsb\=ph,dfa,dfa_restart
  Partial match: gsb
 </pre>
 5. Patterns that contain alternatives at the top level which do not all start
 with the same pattern item may not work as expected when PCRE2_DFA_RESTART is
 used. For example, consider this pattern:
 <pre>
  1234|3789
 </pre>
 If the first part of the subject is "ABC123", a partial match of the first
 alternative is found at offset 3. There is no partial match for the second
 alternative, because such a match does not start at the same point in the
 subject string. Attempting to continue with the string "7890" does not yield a
 match because only those alternatives that match at one point in the subject
 are remembered. The problem arises because the start of the second alternative
 matches within the first alternative. There is no problem with anchored
 patterns or patterns such as:
 <pre>
  1234|ABCD
 </pre>
 where no string can be a partial match for both alternatives. This is not a
 problem if a standard matching function is used, because the entire match has
 to be rerun each time:
 <pre>
    re&#62; /1234|3789/
  data&#62; ABC123\=ph
  Partial match: 123
  data&#62; 1237890
   0: 3789
 </pre>
 Of course, instead of using PCRE2_DFA_RESTART, the same technique of re-running
 the entire match can also be used with the DFA matching function. Another
 possibility is to work with two buffers. If a partial match at offset <i>n</i>
 in the first buffer is followed by "no match" when PCRE2_DFA_RESTART is used on
 the second buffer, you can then try a new match starting at offset <i>n+1</i> in
 the first buffer.
 </P>
 <br><a name="SEC9" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel
 <br>
 University Computing Service
 <br>
 Cambridge CB2 3QH, England.
 <br>
 </P>
 <br><a name="SEC10" href="#TOC1">REVISION</a><br>
 <P>
 Last updated: 14 October 2014
 <br>
 Copyright &copy; 1997-2014 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
 </p>
--- a/doc/html/pcre2test.html
+++ b/doc/html/pcre2test.html
@ -476,6 +476,7 @@ about the pattern:
  /I  info                      show info about compiled pattern
      hex                       pattern is coded in hexadecimal
      jit[=&#60;number&#62;]            use JIT
      jitverify                 verify JIT use 
      locale=&#60;name&#62;             use this locale
      memory                    show memory used
      newline=&#60;type&#62;            set newline type
@ -503,10 +504,6 @@ The <b>newline</b> modifier specifies which characters are to be interpreted as
 newlines, both in the pattern and (by default) in subject lines. The type must
 be one of CR, LF, CRLF, ANYCRLF, or ANY.
 </P>
 <P>
 Both the \R and newline settings can be changed at match time, but if this is
 done, JIT matching is disabled.
 </P>
 <br><b>
 Information about a pattern
 </b><br>
@ -556,29 +553,32 @@ length of the pattern is passed. This is implied if <b>hex</b> is set.
 JIT compilation
 </b><br>
 <P>
-The <b>/jit</b> modifier may optionally be followed by a number in the range 0
+The <b>/jit</b> modifier may optionally be followed by and equals sign and a
-to 7:
+number in the range 0 to 7:
 <pre>
  0  disable JIT
-  1  normal match only
+  1  use JIT for normal match only
-  2  soft partial match only
+  2  use JIT for soft partial match only
-  3  normal match and soft partial match
+  3  use JIT for normal match and soft partial match
-  4  hard partial match only
+  4  use JIT for hard partial match only
-  6  soft and hard partial match
+  6  use JIT for soft and hard partial match
  7  all three modes
 </pre>
 If no number is given, 7 is assumed. If JIT compilation is successful, the
-compiled JIT code will automatically be used when <b>pcre2_match()</b> is run,
+compiled JIT code will automatically be used when <b>pcre2_match()</b> is run 
-except when incompatible run-time options are specified. For more details, see
+for the appropriate type of match, except when incompatible run-time options
-the
+are specified. For more details, see the
 <a href="pcre2jit.html"><b>pcre2jit</b></a>
 documentation. See also the <b>jitstack</b> modifier below for a way of
 setting the size of the JIT stack.
 </P>
 <P>
-If the <b>jitverify</b> modifier is specified, the text "(JIT)" is added to the
+If the <b>jitverify</b> modifier is specified, information about the compiled
-first output line after a match or non match when JIT-compiled code was
+pattern shows whether JIT compilation was or was not successful. If
-actually used. This modifier can also be set on a subject line.
+<b>jitverify</b> is specified without <b>jit</b>, jit=7 is assumed. If JIT
 compilation is successful when <b>jitverify</b> is set, the text "(JIT)" is
 added to the first output line after a match or non match when JIT-compiled
 code was actually used.
 </P>
 <br><b>
 Setting a locale
@ -678,9 +678,8 @@ not affect the compilation process.
      aftertext                 show text after match
      allaftertext              show text after captures
      allcaptures               show all captures
-      allusedtext               show all consulted text 
+      allusedtext               show all consulted text
  /g  global                    global matching
      jitverify                 verify JIT usage
      mark                      show mark values
 </pre>
 These modifiers may not appear in a <b>#pattern</b> command. If you want them as
@ -703,7 +702,6 @@ for a description of their effects.
      anchored                  set PCRE2_ANCHORED
      dfa_restart               set PCRE2_DFA_RESTART
      dfa_shortest              set PCRE2_DFA_SHORTEST
      no_start_optimize         set PCRE2_NO_START_OPTIMIZE
      no_utf_check              set PCRE2_NO_UTF_CHECK
      notbol                    set PCRE2_NOTBOL
      notempty                  set PCRE2_NOTEMPTY
@ -734,9 +732,8 @@ pattern.
      aftertext                 show text after match
      allaftertext              show text after captures
      allcaptures               show all captures
-      allusedtext               show all consulted text 
+      allusedtext               show all consulted text (non-JIT only)
      altglobal                 alternative global matching
      bsr=[anycrlf|unicode]     specify \R handling
      callout_capture           show captures at callout time
      callout_data=&#60;n&#62;          set a value to pass via callouts
      callout_fail=&#60;n&#62;[:&#60;m&#62;]    control callout failure
@ -748,11 +745,9 @@ pattern.
      getall                    extract all captured substrings
  /g  global                    global matching
      jitstack=&#60;n&#62;              set size of JIT stack
      jitverify                 verify JIT usage
      mark                      show mark values
      match_limit=&#62;n&#62;           set a match limit
      memory                    show memory usage
      newline=&#60;type&#62;            set newline type
      offset=&#60;n&#62;                set starting offset
      ovector=&#60;n&#62;               set size of output vector
      recursion_limit=&#60;n&#62;       set a recursion limit
@ -761,14 +756,6 @@ The effects of these modifiers are described in the following sections.
 FIXME: Give more examples.
 </P>
 <br><b>
 Newline and \R handling
 </b><br>
 <P>
 These modifiers set the newline and \R processing conventions for the subject
 line, overriding any values that were set at compile time (as described above).
 JIT matching is disabled if these settings are changed at match time.
 </P>
 <br><b>
 Showing more text
 </b><br>
 <P>
@ -781,11 +768,13 @@ substring. In each case the remainder is output on the following line with a
 plus character following the capture number.
 </P>
 <P>
-The <b>allusedtext</b> modifier requests that all the text that was consulted 
+The <b>allusedtext</b> modifier requests that all the text that was consulted
-during a successful pattern match be shown. This affects the output if there 
+during a successful pattern match by the interpreter should be shown. This
-is a lookbehind at the start of a match, or a lookahead at the end, or if \K 
+feature is not supported for JIT matching, and if requested with JIT it is
-is used in the pattern. Characters that precede or follow the start and end of 
+ignored (with a warning message). Setting this modifier affects the output if
-the actual match are indicated in the output by '&#60;' or '&#62;' characters 
+there is a lookbehind at the start of a match, or a lookahead at the end, or if
 \K is used in the pattern. Characters that precede or follow the start and end
 of the actual match are indicated in the output by '&#60;' or '&#62;' characters
 underneath them. Here is an example:
 <pre>
  /(?&#60;=pqr)abc(?=xyz)/
@ -903,6 +892,11 @@ until it finds the minimum values for each parameter that allow
 <b>pcre2_match()</b> to complete without error.
 </P>
 <P>
 If JIT is being used, only the match limit is relevant. If DFA matching is 
 being used, neither limit is relevant, and this modifier is ignored (with a 
 warning message).
 </P>
 <P>
 The <i>match_limit</i> number is a measure of the amount of backtracking
 that takes place, and learning the minimum value can be instructive. For most
 simple matches, the number is quite small, but for patterns with very large
@ -944,6 +938,13 @@ appears, though of course it can also be used to set a default in a
 <b>#subject</b> command. It specifies the number of pairs of offsets that are
 available for storing matching information. The default is 15.
 </P>
 <P>
 At least one pair of offsets is always created by 
 <b>pcre2_match_data_create()</b>, for matching with PCRE2's native API, so a 
 value of 0 is the same as 1. However a value of 0 is useful when testing the 
 POSIX API because it causes <b>regexec()</b> to be called with a NULL capture 
 vector.
 </P>
 <br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
 <P>
 By default, <b>pcre2test</b> uses the standard PCRE2 matching function,
@ -1190,7 +1191,7 @@ Cambridge CB2 3QH, England.
 </P>
 <br><a name="SEC20" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 19 August 2014
+Last updated: 11 October 2014
 <br>
 Copyright &copy; 1997-2014 University of Cambridge.
 <br>
--- a/doc/pcre2.txt
+++ b/doc/pcre2.txt
--- a/doc/pcre2api.3
+++ b/doc/pcre2api.3
@ -1,4 +1,4 @@
-.TH PCRE2API 3 "10 October 2014" "PCRE2 10.00"
+.TH PCRE2API 3 "14 October 2014" "PCRE2 10.00"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
@ -2061,15 +2061,10 @@ pointer to the zero-terminated name, which is within the compiled pattern.
 Otherwise NULL is returned. A (*MARK) name may be available after a failed 
 match or a partial match, as well as after a successful one.
 .P
-The offset of the character at which the successful or partial match started is
+The offset of the character at which the successful match started is
 returned by \fBpcre2_get_startchar()\fP. This can be different to the value of
-\fIovector[0]\fP if the pattern contains the \eK escape sequence. This 
+\fIovector[0]\fP if the pattern contains the \eK escape sequence. Note, 
-information is needed when doing partial matching over multiple data segments 
+however, the \eK has no effect for a partial match.
 (see the
 .\" HREF
 \fBpcre2partial\fP
 .\"
 documentation).
 .
 .
 .\" HTML <a name="errorlist"></a>
@ -2626,6 +2621,6 @@ Cambridge CB2 3QH, England.
 .rs
 .sp
 .nf
-Last updated: 10 October 2014
+Last updated: 14 October 2014
 Copyright (c) 1997-2014 University of Cambridge.
 .fi
--- a/doc/pcre2partial.3
+++ b/doc/pcre2partial.3
@ -0,0 +1,433 @@
 .TH PCRE2PARTIAL 3 "14 October 2014" "PCRE2 10.00"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions
 .SH "PARTIAL MATCHING IN PCRE2"
 .rs
 .sp
 In normal use of PCRE2, if the subject string that is passed to a matching
 function matches as far as it goes, but is too short to match the entire
 pattern, PCRE2_ERROR_NOMATCH is returned. There are circumstances where it
 might be helpful to distinguish this case from other cases in which there is no
 match.
 .P
 Consider, for example, an application where a human is required to type in data
 for a field with specific formatting requirements. An example might be a date
 in the form \fIddmmmyy\fP, defined by this pattern:
 .sp
  ^\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed$
 .sp
 If the application sees the user's keystrokes one by one, and can check that
 what has been typed so far is potentially valid, it is able to raise an error
 as soon as a mistake is made, by beeping and not reflecting the character that
 has been typed, for example. This immediate feedback is likely to be a better
 user interface than a check that is delayed until the entire string has been
 entered. Partial matching can also be useful when the subject string is very
 long and is not all available at once.
 .P
 PCRE2 supports partial matching by means of the PCRE2_PARTIAL_SOFT and
 PCRE2_PARTIAL_HARD options, which can be set when calling a matching function.
 The difference between the two options is whether or not a partial match is
 preferred to an alternative complete match, though the details differ between
 the two types of matching function. If both options are set, PCRE2_PARTIAL_HARD
 takes precedence.
 .P
 If you want to use partial matching with just-in-time optimized code, you must
 call \fBpcre2_jit_compile()\fP with one or both of these options:
 .sp
  PCRE2_JIT_PARTIAL_SOFT
  PCRE2_JIT_PARTIAL_HARD
 .sp
 PCRE2_JIT_COMPLETE should also be set if you are going to run non-partial
 matches on the same pattern. If the appropriate JIT mode has not been compiled,
 interpretive matching code is used.
 .P
 Setting a partial matching option disables two of PCRE2's standard
 optimizations. PCRE2 remembers the last literal code unit in a pattern, and
 abandons matching immediately if it is not present in the subject string. This
 optimization cannot be used for a subject string that might match only
 partially. PCRE2 also knows the minimum length of a matching string, and does
 not bother to run the matching function on shorter strings. This optimization
 is also disabled for partial matching.
 .
 .
 .SH "PARTIAL MATCHING USING pcre2_match()"
 .rs
 .sp
 A partial match occurs during a call to \fBpcre2_match()\fP when the end of the
 subject string is reached successfully, but matching cannot continue because
 more characters are needed. However, at least one character in the subject must
 have been inspected. This character need not form part of the final matched
 string; lookbehind assertions and the \eK escape sequence provide ways of
 inspecting characters before the start of a matched string. The requirement for
 inspecting at least one character exists because an empty string can always be
 matched; without such a restriction there would always be a partial match of an
 empty string at the end of the subject.
 .P
 When a partial match is returned, the first two elements in the ovector point
 to the portion of the subject that was matched. The appearance of \eK in the 
 pattern has no effect for a partial match. Consider this pattern:
 .sp
  /abc\eK123/
 .sp
 If it is matched against "456abc123xyz" the result is a complete match, and the
 ovector defines the matched string as "123", because \eK resets the "start of 
 match" point. However, if a partial match is requested and the subject string 
 is "456abc12", a partial match is found for the string "abc12", because all 
 these characters are needed for a subsequent re-match with additional
 characters.
 .P
 What happens when a partial match is identified depends on which of the two
 partial matching options are set.
 .
 .
 .SS "PCRE2_PARTIAL_SOFT WITH pcre2_match()"
 .rs
 .sp
 If PCRE2_PARTIAL_SOFT is set when \fBpcre2_match()\fP identifies a partial
 match, the partial match is remembered, but matching continues as normal, and
 other alternatives in the pattern are tried. If no complete match can be found,
 PCRE2_ERROR_PARTIAL is returned instead of PCRE2_ERROR_NOMATCH.
 .P
 This option is "soft" because it prefers a complete match over a partial match.
 All the various matching items in a pattern behave as if the subject string is
 potentially complete. For example, \ez, \eZ, and $ match at the end of the
 subject, as normal, and for \eb and \eB the end of the subject is treated as a
 non-alphanumeric.
 .P
 If there is more than one partial match, the first one that was found provides
 the data that is returned. Consider this pattern:
 .sp
  /123\ew+X|dogY/
 .sp
 If this is matched against the subject string "abc123dog", both
 alternatives fail to match, but the end of the subject is reached during
 matching, so PCRE2_ERROR_PARTIAL is returned. The offsets are set to 3 and 9,
 identifying "123dog" as the first partial match that was found. (In this
 example, there are two partial matches, because "dog" on its own partially
 matches the second alternative.)
 .
 .
 .SS "PCRE2_PARTIAL_HARD WITH pcre2_match()"
 .rs
 .sp
 If PCRE2_PARTIAL_HARD is set for \fBpcre2_match()\fP, PCRE2_ERROR_PARTIAL is
 returned as soon as a partial match is found, without continuing to search for
 possible complete matches. This option is "hard" because it prefers an earlier
 partial match over a later complete match. For this reason, the assumption is
 made that the end of the supplied subject string may not be the true end of the
 available data, and so, if \ez, \eZ, \eb, \eB, or $ are encountered at the end
 of the subject, the result is PCRE2_ERROR_PARTIAL, provided that at least one
 character in the subject has been inspected.
 .
 .
 .SS "Comparing hard and soft partial matching"
 .rs
 .sp
 The difference between the two partial matching options can be illustrated by a
 pattern such as:
 .sp
  /dog(sbody)?/
 .sp
 This matches either "dog" or "dogsbody", greedily (that is, it prefers the
 longer string if possible). If it is matched against the string "dog" with
 PCRE2_PARTIAL_SOFT, it yields a complete match for "dog". However, if
 PCRE2_PARTIAL_HARD is set, the result is PCRE2_ERROR_PARTIAL. On the other
 hand, if the pattern is made ungreedy the result is different:
 .sp
  /dog(sbody)??/
 .sp
 In this case the result is always a complete match because that is found first,
 and matching never continues after finding a complete match. It might be easier
 to follow this explanation by thinking of the two patterns like this:
 .sp
  /dog(sbody)?/    is the same as  /dogsbody|dog/
  /dog(sbody)??/   is the same as  /dog|dogsbody/
 .sp
 The second pattern will never match "dogsbody", because it will always find the
 shorter match first.
 .
 .
 .SH "PARTIAL MATCHING USING pcre2_dfa_match()"
 .rs
 .sp
 The DFA functions move along the subject string character by character, without
 backtracking, searching for all possible matches simultaneously. If the end of
 the subject is reached before the end of the pattern, there is the possibility
 of a partial match, again provided that at least one character has been
 inspected.
 .P
 When PCRE2_PARTIAL_SOFT is set, PCRE2_ERROR_PARTIAL is returned only if there
 have been no complete matches. Otherwise, the complete matches are returned.
 However, if PCRE2_PARTIAL_HARD is set, a partial match takes precedence over
 any complete matches. The portion of the string that was matched when the
 longest partial match was found is set as the first matching string.
 .P
 Because the DFA functions always search for all possible matches, and there is
 no difference between greedy and ungreedy repetition, their behaviour is
 different from the standard functions when PCRE2_PARTIAL_HARD is set. Consider
 the string "dog" matched against the ungreedy pattern shown above:
 .sp
  /dog(sbody)??/
 .sp
 Whereas the standard functions stop as soon as they find the complete match for
 "dog", the DFA functions also find the partial match for "dogsbody", and so
 return that when PCRE2_PARTIAL_HARD is set.
 .
 .
 .SH "PARTIAL MATCHING AND WORD BOUNDARIES"
 .rs
 .sp
 If a pattern ends with one of sequences \eb or \eB, which test for word
 boundaries, partial matching with PCRE2_PARTIAL_SOFT can give counter-intuitive
 results. Consider this pattern:
 .sp
  /\ebcat\eb/
 .sp
 This matches "cat", provided there is a word boundary at either end. If the
 subject string is "the cat", the comparison of the final "t" with a following
 character cannot take place, so a partial match is found. However, normal
 matching carries on, and \eb matches at the end of the subject when the last
 character is a letter, so a complete match is found. The result, therefore, is
 \fInot\fP PCRE2_ERROR_PARTIAL. Using PCRE2_PARTIAL_HARD in this case does yield
 PCRE2_ERROR_PARTIAL, because then the partial match takes precedence.
 .
 .
 .SH "EXAMPLE OF PARTIAL MATCHING USING PCRE2TEST"
 .rs
 .sp
 If the \fBpartial_soft\fP (or \fBps\fP) modifier is present on a
 \fBpcre2test\fP data line, the PCRE2_PARTIAL_SOFT option is used for the match.
 Here is a run of \fBpcre2test\fP that uses the date example quoted above:
 .sp
    re> /^\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed$/
  data> 25jun04\e=ps
   0: 25jun04
   1: jun
  data> 25dec3\e=ps
  Partial match: 23dec3
  data> 3ju\e=ps
  Partial match: 3ju
  data> 3juj\e=ps
  No match
  data> j\e=ps
  No match
 .sp
 The first data string is matched completely, so \fBpcre2test\fP shows the
 matched substrings. The remaining four strings do not match the complete
 pattern, but the first two are partial matches. Similar output is obtained
 if DFA matching is used.
 .P
 If the \fBpartial_hard\fP (or \fBph\fP) modifier is present on a
 \fBpcre2test\fP data line, the PCRE2_PARTIAL_HARD option is set for the match.
 .
 .
 .SH "MULTI-SEGMENT MATCHING WITH pcre2_dfa_match()"
 .rs
 .sp
 When a partial match has been found using a DFA matching function, it is
 possible to continue the match by providing additional subject data and calling
 the function again with the same compiled regular expression, this time setting
 the PCRE2_DFA_RESTART option. You must pass the same working space as before,
 because this is where details of the previous partial match are stored. Here is
 an example using \fBpcre2test\fP:
 .sp
    re> /^\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed$/
  data> 23ja\e=dfa,ps
  Partial match: 23ja
  data> n05\e=dfa,dfa_restart
   0: n05
 .sp
 The first call has "23ja" as the subject, and requests partial matching; the
 second call has "n05" as the subject for the continued (restarted) match.
 Notice that when the match is complete, only the last part is shown; PCRE2 does
 not retain the previously partially-matched string. It is up to the calling
 program to do that if it needs to.
 .P
 That means that, for an unanchored pattern, if a continued match fails, it is
 not possible to try again at a new starting point. All this facility is capable
 of doing is continuing with the previous match attempt. In the previous
 example, if the second set of data is "ug23" the result is no match, even
 though there would be a match for "aug23" if the entire string were given at
 once. Depending on the application, this may or may not be what you want.
 The only way to allow for starting again at the next character is to retain the
 matched part of the subject and try a new complete match.
 .P
 You can set the PCRE2_PARTIAL_SOFT or PCRE2_PARTIAL_HARD options with
 PCRE2_DFA_RESTART to continue partial matching over multiple segments. This
 facility can be used to pass very long subject strings to the DFA matching
 functions.
 .
 .
 .SH "MULTI-SEGMENT MATCHING WITH pcre2_match()"
 .rs
 .sp
 Unlike the DFA function, it is not possible to restart the previous match with
 a new segment of data when using \fBpcre2_match()\fP. Instead, new data must be
 added to the previous subject string, and the entire match re-run, starting
 from the point where the partial match occurred. Earlier data can be discarded.
 .P
 It is best to use PCRE2_PARTIAL_HARD in this situation, because it does not
 treat the end of a segment as the end of the subject when matching \ez, \eZ,
 \eb, \eB, and $. Consider an unanchored pattern that matches dates:
 .sp
    re> /\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed/
  data> The date is 23ja\e=ph
  Partial match: 23ja
 .sp
 At this stage, an application could discard the text preceding "23ja", add on
 text from the next segment, and call the matching function again. Unlike the
 DFA matching function, the entire matching string must always be available,
 and the complete matching process occurs for each call, so more memory and more
 processing time is needed.
 .
 .
 .SH "ISSUES WITH MULTI-SEGMENT MATCHING"
 .rs
 .sp
 Certain types of pattern may give problems with multi-segment matching,
 whichever matching function is used.
 .P
 1. If the pattern contains a test for the beginning of a line, you need to pass
 the PCRE2_NOTBOL option when the subject string for any call does start at the
 beginning of a line. There is also a PCRE2_NOTEOL option, but in practice when
 doing multi-segment matching you should be using PCRE2_PARTIAL_HARD, which
 includes the effect of PCRE2_NOTEOL.
 .P
 2. If a pattern contains a lookbehind assertion, characters that precede the
 start of the partial match may have been inspected during the matching process.
 When using \fBpcre2_match()\fP, sufficient characters must be retained for the
 next match attempt. You can ensure that enough characters are retained by doing
 the following:
 .P
 Before doing any matching, find the length of the longest lookbehind in the
 pattern by calling \fBpcre2_pattern_info()\fP with the PCRE2_INFO_MAXLOOKBEHIND
 option. Note that the resulting count is in characters, not code units. After a
 partial match, moving back from the ovector[0] offset in the subject by the
 number of characters given for the maximum lookbehind gets you to the earliest
 character that must be retained. In a non-UTF or a 32-bit situation, moving
 back is just a subtraction, but in UTF-8 or UTF-16 you have to count characters
 while moving back through the code units.
 .P
 Characters before the point you have now reached can be discarded, and after
 the next segment has been added to what is retained, you should run the next
 match with the \fBstartoffset\fP argument set so that the match begins at the
 same point as before.
 .P
 For example, if the pattern "(?<=123)abc" is partially matched against the
 string "xx123ab", the ovector offsets are 5 and 7 ("ab"). The maximum
 lookbehind count is 3, so all characters before offset 2 can be discarded. The
 value of \fBstartoffset\fP for the next match should be 3. When \fBpcre2test\fP 
 displays a partial match, it indicates the lookbehind characters with '<' 
 characters:
 .sp
    re> "(?<=123)abc"
  data> xx123ab\e=ph
  Partial match: 123ab
                 <<< 
 .P
 3. Because a partial match must always contain at least one character, what
 might be considered a partial match of an empty string actually gives a "no
 match" result. For example:
 .sp
    re> /c(?<=abc)x/
  data> ab\e=ps
  No match
 .sp
 If the next segment begins "cx", a match should be found, but this will only
 happen if characters from the previous segment are retained. For this reason, a
 "no match" result should be interpreted as "partial match of an empty string"
 when the pattern contains lookbehinds.
 .P
 4. Matching a subject string that is split into multiple segments may not
 always produce exactly the same result as matching over one single long string,
 especially when PCRE2_PARTIAL_SOFT is used. The section "Partial Matching and
 Word Boundaries" above describes an issue that arises if the pattern ends with
 \eb or \eB. Another kind of difference may occur when there are multiple
 matching possibilities, because (for PCRE2_PARTIAL_SOFT) a partial match result
 is given only when there are no completed matches. This means that as soon as
 the shortest match has been found, continuation to a new subject segment is no
 longer possible. Consider this \fBpcre2test\fP example:
 .sp
    re> /dog(sbody)?/
  data> dogsb\e=ps
   0: dog
  data> do\e=ps,dfa
  Partial match: do
  data> gsb\e=ps,dfa,dfa_restart
   0: g
  data> dogsbody\e=dfa
   0: dogsbody
   1: dog
 .sp
 The first data line passes the string "dogsb" to a standard matching function,
 setting the PCRE2_PARTIAL_SOFT option. Although the string is a partial match
 for "dogsbody", the result is not PCRE2_ERROR_PARTIAL, because the shorter
 string "dog" is a complete match. Similarly, when the subject is presented to
 a DFA matching function in several parts ("do" and "gsb" being the first two)
 the match stops when "dog" has been found, and it is not possible to continue.
 On the other hand, if "dogsbody" is presented as a single string, a DFA
 matching function finds both matches.
 .P
 Because of these problems, it is best to use PCRE2_PARTIAL_HARD when matching
 multi-segment data. The example above then behaves differently:
 .sp
    re> /dog(sbody)?/
  data> dogsb\e=ph
  Partial match: dogsb
  data> do\e=ps,dfa
  Partial match: do
  data> gsb\e=ph,dfa,dfa_restart
  Partial match: gsb
 .sp
 5. Patterns that contain alternatives at the top level which do not all start
 with the same pattern item may not work as expected when PCRE2_DFA_RESTART is
 used. For example, consider this pattern:
 .sp
  1234|3789
 .sp
 If the first part of the subject is "ABC123", a partial match of the first
 alternative is found at offset 3. There is no partial match for the second
 alternative, because such a match does not start at the same point in the
 subject string. Attempting to continue with the string "7890" does not yield a
 match because only those alternatives that match at one point in the subject
 are remembered. The problem arises because the start of the second alternative
 matches within the first alternative. There is no problem with anchored
 patterns or patterns such as:
 .sp
  1234|ABCD
 .sp
 where no string can be a partial match for both alternatives. This is not a
 problem if a standard matching function is used, because the entire match has
 to be rerun each time:
 .sp
    re> /1234|3789/
  data> ABC123\e=ph
  Partial match: 123
  data> 1237890
   0: 3789
 .sp
 Of course, instead of using PCRE2_DFA_RESTART, the same technique of re-running
 the entire match can also be used with the DFA matching function. Another
 possibility is to work with two buffers. If a partial match at offset \fIn\fP
 in the first buffer is followed by "no match" when PCRE2_DFA_RESTART is used on
 the second buffer, you can then try a new match starting at offset \fIn+1\fP in
 the first buffer.
 .
 .
 .SH AUTHOR
 .rs
 .sp
 .nf
 Philip Hazel
 University Computing Service
 Cambridge CB2 3QH, England.
 .fi
 .
 .
 .SH REVISION
 .rs
 .sp
 .nf
 Last updated: 14 October 2014
 Copyright (c) 1997-2014 University of Cambridge.
 .fi
--- a/doc/pcre2test.txt
+++ b/doc/pcre2test.txt
@ -424,6 +424,7 @@ PATTERN MODIFIERS
         /I  info                      show info about compiled pattern
             hex                       pattern is coded in hexadecimal
             jit[=<number>]            use JIT
             jitverify                 verify JIT use
             locale=<name>             use this locale
             memory                    show memory used
             newline=<type>            set newline type
@ -448,68 +449,69 @@ PATTERN MODIFIERS
       as newlines, both in the pattern and (by default) in subject lines. The
       type must be one of CR, LF, CRLF, ANYCRLF, or ANY.
       Both the \R and newline settings can be changed at match time,  but  if
       this is done, JIT matching is disabled.
   Information about a pattern
-       The  debug modifier is a shorthand for info,fullbincode, requesting all
+       The debug modifier is a shorthand for info,fullbincode, requesting  all
       available information.
       The bincode modifier causes a representation of the compiled code to be
-       output  after compilation. This information does not contain length and
+       output after compilation. This information does not contain length  and
       offset values, which ensures that the same output is generated for dif-
-       ferent  internal  link  sizes  and different code unit widths. By using
+       ferent internal link sizes and different code  unit  widths.  By  using
-       bincode, the same regression tests can be used  in  different  environ-
+       bincode,  the  same  regression tests can be used in different environ-
       ments.
-       The  fullbincode  modifier, by contrast, does include length and offset
+       The fullbincode modifier, by contrast, does include length  and  offset
       values. This is used in a few special tests and is also useful for one-
       off tests.
-       The  info  modifier  requests  information  about  the compiled pattern
+       The info modifier  requests  information  about  the  compiled  pattern
-       (whether it is anchored, has a fixed first character, and so  on).  The
+       (whether  it  is anchored, has a fixed first character, and so on). The
       information is obtained from the pcre2_pattern_info() function.
   Specifying a pattern in hex
       The hex modifier specifies that the characters of the pattern are to be
-       interpreted as pairs of hexadecimal digits. White  space  is  permitted
+       interpreted  as  pairs  of hexadecimal digits. White space is permitted
       between pairs. For example:
         /ab 32 59/hex
-       This  feature  is  provided  as a way of creating patterns that contain
+       This feature is provided as a way of  creating  patterns  that  contain
       binary zero characters. When hex is set, it implies use_length.
   Using the pattern's length
-       By default, pcre2test passes patterns  as  zero-terminated  strings  to
+       By  default,  pcre2test  passes  patterns as zero-terminated strings to
-       pcre2_compile(),  giving  the  length  as -1. If use_length is set, the
+       pcre2_compile(), giving the length as -1. If  use_length  is  set,  the
       length of the pattern is passed. This is implied if hex is set.
   JIT compilation
-       The /jit modifier may optionally be followed by a number in the range 0
+       The  /jit  modifier may optionally be followed by and equals sign and a
-       to 7:
+       number in the range 0 to 7:
         0  disable JIT
-         1  normal match only
+         1  use JIT for normal match only
-         2  soft partial match only
+         2  use JIT for soft partial match only
-         3  normal match and soft partial match
+         3  use JIT for normal match and soft partial match
-         4  hard partial match only
+         4  use JIT for hard partial match only
-         6  soft and hard partial match
+         6  use JIT for soft and hard partial match
         7  all three modes
-       If  no number is given, 7 is assumed. If JIT compilation is successful,
+       If no number is given, 7 is assumed. If JIT compilation is  successful,
-       the compiled JIT code will automatically be used when pcre2_match()  is
+       the  compiled JIT code will automatically be used when pcre2_match() is
-       run,  except when incompatible run-time options are specified. For more
+       run for the appropriate type of match, except  when  incompatible  run-
-       details, see the pcre2jit documentation. See also the jitstack modifier
+       time options are specified. For more details, see the pcre2jit documen-
-       below for a way of setting the size of the JIT stack.
+       tation. See also the jitstack modifier below for a way of  setting  the
       size of the JIT stack.
-       If  the  jitverify  modifier is specified, the text "(JIT)" is added to
+       If  the jitverify modifier is specified, information about the compiled
       pattern shows whether JIT compilation was or  was  not  successful.  If
       jitverify  is  specified without jit, jit=7 is assumed. If JIT compila-
       tion is successful when jitverify is set, the text "(JIT)" is added  to
       the first output line after a match or non match when JIT-compiled code
-       was actually used. This modifier can also be set on a subject line.
+       was actually used.
   Setting a locale
@ -518,31 +520,31 @@ PATTERN MODIFIERS
         /pattern/locale=fr_FR
       The given locale is set, pcre2_maketables() is called to build a set of
-       character tables for the locale, and this is then passed to  pcre2_com-
+       character  tables for the locale, and this is then passed to pcre2_com-
-       pile()  when compiling the regular expression. The same tables are used
+       pile() when compiling the regular expression. The same tables are  used
       when matching the following subject lines. The /locale modifier applies
       only to the pattern on which it appears, but can be given in a #pattern
-       command if a default is needed. Setting a locale and alternate  charac-
+       command  if a default is needed. Setting a locale and alternate charac-
       ter tables are mutually exclusive.
   Showing pattern memory
-       The  /memory modifier causes the size in bytes of the memory block used
+       The /memory modifier causes the size in bytes of the memory block  used
-       to hold the compiled pattern to be output. This does  not  include  the
+       to  hold  the  compiled pattern to be output. This does not include the
-       size  of  the pcre2_code block; it is just the actual compiled data. If
+       size of the pcre2_code block; it is just the actual compiled  data.  If
       the pattern is subsequently passed to the JIT compiler, the size of the
       JIT compiled code is also output.
   Limiting nested parentheses
-       The  parens_nest_limit  modifier  sets  a  limit on the depth of nested
+       The parens_nest_limit modifier sets a limit  on  the  depth  of  nested
-       parentheses in a pattern. Breaching  the  limit  causes  a  compilation
+       parentheses  in  a  pattern.  Breaching  the limit causes a compilation
       error.
   Using the POSIX wrapper API
-       The  /posix modifier causes pcre2test to call PCRE2 via the POSIX wrap-
+       The /posix modifier causes pcre2test to call PCRE2 via the POSIX  wrap-
-       per API rather than its  native  API.  This  supports  only  the  8-bit
+       per  API  rather  than  its  native  API.  This supports only the 8-bit
       library.  When the POSIX API is being used, the following pattern modi-
       fiers set options for the regcomp() function:
@ -554,25 +556,25 @@ PATTERN MODIFIERS
         ucp                REG_UCP        )   the POSIX standard
         utf                REG_UTF8       )
-       The aftertext and allaftertext  subject  modifiers  work  as  described
+       The  aftertext  and  allaftertext  subject  modifiers work as described
       below. All other modifiers cause an error.
   Testing the stack guard feature
-       The  /stackguard  modifier  is  used  to test the use of pcre2_set_com-
+       The /stackguard modifier is used to  test  the  use  of  pcre2_set_com-
-       pile_recursion_guard(), a function that is  provided  to  enable  stack
+       pile_recursion_guard(),  a  function  that  is provided to enable stack
-       availability  to  be checked during compilation (see the pcre2api docu-
+       availability to be checked during compilation (see the  pcre2api  docu-
-       mentation for details). If the number  specified  by  the  modifier  is
+       mentation  for  details).  If  the  number specified by the modifier is
       greater than zero, pcre2_set_compile_recursion_guard() is called to set
-       up callback from pcre2_compile() to a local function. The  argument  it
+       up  callback  from pcre2_compile() to a local function. The argument it
-       is  passed is the current nesting parenthesis depth; if this is greater
+       is passed is the current nesting parenthesis depth; if this is  greater
       than the value given by the modifier, non-zero is returned, causing the
       compilation to be aborted.
   Using alternative character tables
-       The  /tables  modifier  must be followed by a single digit. It causes a
+       The /tables modifier must be followed by a single digit.  It  causes  a
-       specific set of built-in character tables to be  passed  to  pcre2_com-
+       specific  set  of  built-in character tables to be passed to pcre2_com-
       pile(). This is used in the PCRE2 tests to check behaviour with differ-
       ent character tables. The digit specifies the tables as follows:
@ -581,15 +583,15 @@ PATTERN MODIFIERS
               pcre2_chartables.c.dist
         2   a set of tables defining ISO 8859 characters
-       In table 2, some characters whose codes are greater than 128 are  iden-
+       In  table 2, some characters whose codes are greater than 128 are iden-
-       tified  as  letters,  digits,  spaces, etc. Setting alternate character
+       tified as letters, digits, spaces,  etc.  Setting  alternate  character
       tables and a locale are mutually exclusive.
   Setting certain match controls
       The following modifiers are really subject modifiers, and are described
-       below.   However, they may be included in a pattern's modifier list, in
+       below.  However, they may be included in a pattern's modifier list,  in
-       which case they are applied to every subject  line  that  is  processed
+       which  case  they  are  applied to every subject line that is processed
       with that pattern. They do not affect the compilation process.
             aftertext                 show text after match
@ -597,10 +599,9 @@ PATTERN MODIFIERS
             allcaptures               show all captures
             allusedtext               show all consulted text
         /g  global                    global matching
             jitverify                 verify JIT usage
             mark                      show mark values
-       These  modifiers may not appear in a #pattern command. If you want them
+       These modifiers may not appear in a #pattern command. If you want  them
       as defaults, set them in a #subject command.
@ -611,13 +612,12 @@ SUBJECT MODIFIERS
   Setting match options
-       The    following   modifiers   set   options   for   pcre2_match()   or
+       The   following   modifiers   set   options   for   pcre2_match()    or
       pcre2_dfa_match(). See pcreapi for a description of their effects.
             anchored                  set PCRE2_ANCHORED
             dfa_restart               set PCRE2_DFA_RESTART
             dfa_shortest              set PCRE2_DFA_SHORTEST
             no_start_optimize         set PCRE2_NO_START_OPTIMIZE
             no_utf_check              set PCRE2_NO_UTF_CHECK
             notbol                    set PCRE2_NOTBOL
             notempty                  set PCRE2_NOTEMPTY
@ -626,28 +626,27 @@ SUBJECT MODIFIERS
             partial_hard (or ph)      set PCRE2_PARTIAL_HARD
             partial_soft (or ps)      set PCRE2_PARTIAL_SOFT
-       The partial matching modifiers are provided with abbreviations  because
+       The  partial matching modifiers are provided with abbreviations because
       they appear frequently in tests.
-       If  the  /posix  modifier was present on the pattern, causing the POSIX
+       If the /posix modifier was present on the pattern,  causing  the  POSIX
       wrapper API to be used, the only option-setting modifiers that have any
-       effect   are   notbol,   notempty,   and  noteol,  causing  REG_NOTBOL,
+       effect  are  notbol,  notempty,   and   noteol,   causing   REG_NOTBOL,
-       REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to  regexec().
+       REG_NOTEMPTY,  and REG_NOTEOL, respectively, to be passed to regexec().
       Any other modifiers cause an error.
   Setting match controls
-       The  following  modifiers  affect the matching process or request addi-
+       The following modifiers affect the matching process  or  request  addi-
-       tional information. Some of them may also be  specified  on  a  pattern
+       tional  information.  Some  of  them may also be specified on a pattern
-       line  (see  above), in which case they apply to every subject line that
+       line (see above), in which case they apply to every subject  line  that
       is matched against that pattern.
             aftertext                 show text after match
             allaftertext              show text after captures
             allcaptures               show all captures
-             allusedtext               show all consulted text
+             allusedtext               show all consulted text (non-JIT only)
             altglobal                 alternative global matching
             bsr=[anycrlf|unicode]     specify \R handling
             callout_capture           show captures at callout time
             callout_data=<n>          set a value to pass via callouts
             callout_fail=<n>[:<m>]    control callout failure
@ -659,11 +658,9 @@ SUBJECT MODIFIERS
             getall                    extract all captured substrings
         /g  global                    global matching
             jitstack=<n>              set size of JIT stack
             jitverify                 verify JIT usage
             mark                      show mark values
             match_limit=>n>           set a match limit
             memory                    show memory usage
             newline=<type>            set newline type
             offset=<n>                set starting offset
             ovector=<n>               set size of output vector
             recursion_limit=<n>       set a recursion limit
@ -671,13 +668,6 @@ SUBJECT MODIFIERS
       The effects of these modifiers are described in the following sections.
       FIXME: Give more examples.
   Newline and \R handling
       These  modifiers  set the newline and \R processing conventions for the
       subject line, overriding any values that were set at compile  time  (as
       described  above).   JIT  matching  is  disabled  if these settings are
       changed at match time.
   Showing more text
       The aftertext modifier requests that as well  as  outputting  the  sub-
@ -690,18 +680,21 @@ SUBJECT MODIFIERS
       ture number.
       The  allusedtext modifier requests that all the text that was consulted
-       during a successful pattern match be shown. This affects the output  if
+       during a successful pattern match by the interpreter should  be  shown.
-       there  is  a  lookbehind at the start of a match, or a lookahead at the
+       This  feature  is not supported for JIT matching, and if requested with
-       end, or if \K is used in the pattern. Characters that precede or follow
+       JIT it is ignored (with  a  warning  message).  Setting  this  modifier
-       the  start  and  end of the actual match are indicated in the output by
+       affects the output if there is a lookbehind at the start of a match, or
-       '<' or '>' characters underneath them. Here is an example:
+       a lookahead at the end, or if \K is used  in  the  pattern.  Characters
       that  precede or follow the start and end of the actual match are indi-
       cated in the output by '<' or '>' characters underneath them.  Here  is
       an example:
         /(?<=pqr)abc(?=xyz)/
             123pqrabcxyz456\=allusedtext
          0: pqrabcxyz
             <<<   >>>
-       This shows that the matched string is "abc",  with  the  preceding  and
+       This  shows  that  the  matched string is "abc", with the preceding and
       following strings "pqr" and "xyz" also consulted during the match.
   Showing the value of all capture groups
@ -709,124 +702,133 @@ SUBJECT MODIFIERS
       The allcaptures modifier requests that the values of all potential cap-
       tured parentheses be output after a match. By default, only those up to
       the highest one actually used in the match are output (corresponding to
-       the return code from pcre2_match()). Groups that did not take  part  in
+       the  return  code from pcre2_match()). Groups that did not take part in
       the match are output as "<unset>".
   Testing callouts
-       A  callout function is supplied when pcre2test calls the library match-
+       A callout function is supplied when pcre2test calls the library  match-
-       ing functions, unless callout_none is specified. If callout_capture  is
+       ing  functions, unless callout_none is specified. If callout_capture is
       set, the current captured groups are output when a callout occurs.
-       The  callout_fail modifier can be given one or two numbers. If there is
+       The callout_fail modifier can be given one or two numbers. If there  is
       only one number, 1 is returned instead of 0 when a callout of that num-
-       ber  is  reached.  If two numbers are given, 1 is returned when callout
+       ber is reached. If two numbers are given, 1 is  returned  when  callout
       <n> is reached for the <m>th time.
-       The callout_data modifier can be given an unsigned or a  negative  num-
+       The  callout_data  modifier can be given an unsigned or a negative num-
-       ber.   Any  value  other than zero is used as a return from pcre2test's
+       ber.  Any value other than zero is used as a  return  from  pcre2test's
       callout function.
   Testing substring extraction functions
-       The copy  and  get  modifiers  can  be  used  to  test  the  pcre2_sub-
+       The  copy  and  get  modifiers  can  be  used  to  test  the pcre2_sub-
       string_copy_xxx() and pcre2_substring_get_xxx() functions.  They can be
-       given more than once, and each can specify a group name or number,  for
+       given  more than once, and each can specify a group name or number, for
       example:
          abcd\=copy=1,copy=3,get=G1
-       If  the  #subject  command  is  used to set default copy and get lists,
+       If the #subject command is used to set  default  copy  and  get  lists,
-       these can be unset by specifying a negative number for numbered  groups
+       these  can be unset by specifying a negative number for numbered groups
       and an empty name for named groups.
-       The  getall  modifier  tests pcre2_substring_list_get(), which extracts
+       The getall modifier tests  pcre2_substring_list_get(),  which  extracts
       all captured substrings.
-       If the subject line is successfully matched, the  substrings  extracted
+       If  the  subject line is successfully matched, the substrings extracted
-       by  the  convenience  functions  are  output  with C, G, or L after the
+       by the convenience functions are output with  C,  G,  or  L  after  the
-       string number instead of a colon. This is in  addition  to  the  normal
+       string  number  instead  of  a colon. This is in addition to the normal
-       full  list.  The string length (that is, the return from the extraction
+       full list. The string length (that is, the return from  the  extraction
       function) is given in parentheses after each substring.
   Finding all matches in a string
       Searching for all possible matches within a subject can be requested by
-       the  global or /altglobal modifier. After finding a match, the matching
+       the global or /altglobal modifier. After finding a match, the  matching
-       function is called again to search the remainder of  the  subject.  The
+       function  is  called  again to search the remainder of the subject. The
-       difference  between  global  and  altglobal is that the former uses the
+       difference between global and altglobal is that  the  former  uses  the
-       start_offset argument to pcre2_match() or  pcre2_dfa_match()  to  start
+       start_offset  argument  to  pcre2_match() or pcre2_dfa_match() to start
-       searching  at  a new point within the entire string (which is what Perl
+       searching at a new point within the entire string (which is  what  Perl
       does), whereas the latter passes over a shortened substring. This makes
       a difference to the matching process if the pattern begins with a look-
       behind assertion (including \b or \B).
-       If an empty string  is  matched,  the  next  match  is  done  with  the
+       If  an  empty  string  is  matched,  the  next  match  is done with the
       PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search
       for another, non-empty, match at the same point in the subject. If this
-       match  fails,  the  start  offset  is advanced, and the normal match is
+       match fails, the start offset is advanced,  and  the  normal  match  is
-       retried. This imitates the way Perl handles such cases when  using  the
+       retried.  This  imitates the way Perl handles such cases when using the
-       /g  modifier  or  the  split()  function. Normally, the start offset is
+       /g modifier or the split() function.  Normally,  the  start  offset  is
-       advanced by one character, but if  the  newline  convention  recognizes
+       advanced  by  one  character,  but if the newline convention recognizes
-       CRLF  as  a newline, and the current character is CR followed by LF, an
+       CRLF as a newline, and the current character is CR followed by  LF,  an
       advance of two is used.
   Setting the JIT stack size
-       The jitstack modifier provides a way of setting the maximum stack  size
+       The  jitstack modifier provides a way of setting the maximum stack size
-       that  is  used  by the just-in-time optimization code. It is ignored if
+       that is used by the just-in-time optimization code. It  is  ignored  if
-       JIT optimization is not being used. Providing a stack  that  is  larger
+       JIT  optimization  is  not being used. Providing a stack that is larger
       than the default 32K is necessary only for very complicated patterns.
   Setting match and recursion limits
-       The  match_limit and recursion_limit modifiers set the appropriate lim-
+       The match_limit and recursion_limit modifiers set the appropriate  lim-
       its in the match context. These values are ignored when the find_limits
       modifier is specified.
   Finding minimum limits
-       If  the  find_limits modifier is present, pcre2test calls pcre2_match()
+       If the find_limits modifier is present, pcre2test  calls  pcre2_match()
-       several times, setting  different  values  in  the  match  context  via
+       several  times,  setting  different  values  in  the  match context via
-       pcre2_set_match_limit()  and pcre2_set_recursion_limit() until it finds
+       pcre2_set_match_limit() and pcre2_set_recursion_limit() until it  finds
-       the minimum values for each parameter that allow pcre2_match() to  com-
+       the  minimum values for each parameter that allow pcre2_match() to com-
       plete without error.
-       The  match_limit number is a measure of the amount of backtracking that
+       If JIT is being used, only the match limit is relevant. If DFA matching
-       takes place, and learning the minimum value  can  be  instructive.  For
+       is  being used, neither limit is relevant, and this modifier is ignored
-       most  simple  matches, the number is quite small, but for patterns with
+       (with a warning message).
-       very large numbers of matching possibilities, it can become large  very
+
-       quickly    with    increasing    length    of   subject   string.   The
+       The match_limit number is a measure of the amount of backtracking  that
-       match_limit_recursion number is a measure of how  much  stack  (or,  if
+       takes  place,  and  learning  the minimum value can be instructive. For
-       PCRE2  is  compiled with NO_RECURSE, how much heap) memory is needed to
+       most simple matches, the number is quite small, but for  patterns  with
       very  large numbers of matching possibilities, it can become large very
       quickly   with   increasing   length    of    subject    string.    The
       match_limit_recursion  number  is  a  measure of how much stack (or, if
       PCRE2 is compiled with NO_RECURSE, how much heap) memory is  needed  to
       complete the match attempt.
   Showing MARK names
       The mark modifier causes the names from backtracking control verbs that
-       are  returned from calls to pcre2_match() to be displayed. If a mark is
+       are returned from calls to pcre2_match() to be displayed. If a mark  is
-       returned for a match, non-match, or partial match, pcre2test shows  it.
+       returned  for a match, non-match, or partial match, pcre2test shows it.
-       For  a  match, it is on a line by itself, tagged with "MK:". Otherwise,
+       For a match, it is on a line by itself, tagged with  "MK:".  Otherwise,
       it is added to the non-match message.
   Showing memory usage
-       The memory modifier causes pcre2test to log all memory  allocation  and
+       The  memory  modifier causes pcre2test to log all memory allocation and
       freeing calls that occur during a match operation.
   Setting a starting offset
-       The  offset  modifier  sets  an  offset  in the subject string at which
+       The offset modifier sets an offset  in  the  subject  string  at  which
       matching starts. Its value is a number of code units, not characters.
   Setting the size of the output vector
-       The ovector modifier applies only to  the  subject  line  in  which  it
+       The  ovector  modifier  applies  only  to  the subject line in which it
-       appears,  though  of  course  it can also be used to set a default in a
+       appears, though of course it can also be used to set  a  default  in  a
-       #subject command. It specifies the number of pairs of offsets that  are
+       #subject  command. It specifies the number of pairs of offsets that are
       available for storing matching information. The default is 15.
       At least one pair of offsets is always created by pcre2_match_data_cre-
       ate(),  for  matching  with  PCRE2's native API, so a value of 0 is the
       same as 1. However a value of 0 is useful when testing  the  POSIX  API
       because it causes regexec() to be called with a NULL capture vector.
 THE ALTERNATIVE MATCHING FUNCTION
@ -1069,5 +1071,5 @@ AUTHOR
 REVISION
-       Last updated: 19 August 2014
+       Last updated: 11 October 2014
       Copyright (c) 1997-2014 University of Cambridge.
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@ -612,6 +612,7 @@ clock_t total_match_time = 0;
 static uint32_t dfa_matched;
 static uint32_t forbid_utf = 0;
 static uint32_t maxlookbehind;
 static uint32_t max_oveccount;
 static uint32_t callout_count;
@ -2293,6 +2294,55 @@ return 0;
 /*************************************************
 *         Move back by so many characters        *
 *************************************************/
 /* Given a code unit offset in a subject string, move backwards by a number of 
 characters, and return the resulting offset.
 Arguments:
  subject   pointer to the string
  offset    start offset
  count     count to move back by
  utf       TRUE if in UTF mode
 Returns:   a possibly changed offset
 */    
 static PCRE2_SIZE
 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
 {
 long int yield;
 if (!utf || test_mode == PCRE32_MODE) yield = offset - count;
 else if (test_mode == PCRE8_MODE)
  {
  PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset;
  for (; count > 0; count--)
    {
    pp--;
    while ((*pp & 0xc0) == 0x80) pp--;
    }
  yield = pp - (PCRE2_SPTR8)subject;     
  }
 else  /* 16-bit mode */
  { 
  PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset;
  for (; count > 0; count--)
    {
    pp--;
    if ((*pp & 0xfc00) == 0xdc00) pp--; 
    }
  yield = pp - (PCRE2_SPTR16)subject;     
  }
 return (yield >= 0)? yield : 0; 
 }
 /*************************************************
 *        Read or extend an input line            *
 *************************************************/
@ -3099,8 +3149,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
  BOOL match_limit_set, recursion_limit_set;
  uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
    hascrorlf, jchanged, last_ctype, last_cunit, match_empty, match_limit,
-    maxlookbehind, minlength, nameentrysize, namecount, newline_convention,
+    minlength, nameentrysize, namecount, newline_convention, recursion_limit;
    recursion_limit;
  /* These info requests may return PCRE2_ERROR_UNSET. */
@ -3145,7 +3194,6 @@ if ((pat_patctl.control & CTL_INFO) != 0)
      pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
      pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
      pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
      pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) +
      pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
      pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
      pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
@ -3700,6 +3748,11 @@ if (TEST(compiled_code, ==, NULL))
  fprintf(outfile, "\n");
  return PR_SKIP;
  }
 /* Remember the maximum lookbehind, for partial matching. */ 
 if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
  return PR_ABEND;
 /* Call the JIT compiler if requested. */
@ -4875,22 +4928,41 @@ for (gmatched = 0;; gmatched++)
    }    /* End of handling a successful match */
  /* There was a partial match. The value of ovector[0] is the bumpalong point, 
-  not any \K point that might exist. */ 
+  that is, startchar, not any \K point that might have been passed. */ 
  else if (capcount == PCRE2_ERROR_PARTIAL)
    {
    PCRE2_SIZE poffset; 
    int backlength; 
    int rubriclength = 0;
    fprintf(outfile, "Partial match");
    if ((dat_datctl.control & CTL_MARK) != 0 &&
         TESTFLD(match_data, mark, !=, NULL))
      {
      fprintf(outfile, ", mark=");
-      PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile);
+      PCHARS(rubriclength, CASTFLD(void *, match_data, mark), 0, -1, utf, outfile);
      rubriclength += 7;
      }
    fprintf(outfile, ": ");
    rubriclength += 15; 
    poffset = backchars(pp, ovector[0], maxlookbehind, utf);
    PCHARS(backlength, pp, poffset, ovector[0] - poffset, utf, outfile); 
    PCHARSV(pp, ovector[0], ulen - ovector[0], utf, outfile);
    if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
      fprintf(outfile, " (JIT)");
    fprintf(outfile, "\n");
    if (backlength != 0)
      {
      int i; 
      for (i = 0; i < rubriclength; i++) fprintf(outfile, " ");
      for (i = 0; i < backlength; i++) fprintf(outfile, "<");
      fprintf(outfile, "\n"); 
      }  
    break;  /* Out of the /g loop */
    }       /* End of handling partial match */
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@ -9286,17 +9286,21 @@ Partial match: abc12
    xyzabc123pqr 
 0: 123
    xyzabc12\=ps
-Partial match: 12
+Partial match: abc12
               <<<
    xyzabc12\=ph
-Partial match: 12
+Partial match: abc12
               <<<
 /\babc\b/
    +++abc+++
 0: abc
    +++ab\=ps
-Partial match: ab
+Partial match: +ab
               <
    +++ab\=ph
-Partial match: ab
+Partial match: +ab
               <
 /(?&word)(?&element)(?(DEFINE)(?<element><[^m][^>]>[^<])(?<word>\w*+))/B
 ------------------------------------------------------------------
@ -10324,7 +10328,8 @@ No match
 /(?<=abc)def/
    abc\=ph
-Partial match: 
+Partial match: abc
               <<<
 /abc$/
    abc
@ -11877,9 +11882,11 @@ Callout 2: last capture = 0
 /(?<=123)(*MARK:xx)abc/mark
    xxxx123a\=ph
-Partial match, mark=xx: a
+Partial match, mark=xx: 123a
                        <<<
    xxxx123a\=ps
-Partial match, mark=xx: a
+Partial match, mark=xx: 123a
                        <<<
 /123\Kabc/
    xxxx123a\=ph
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@ -947,7 +947,8 @@ Partial match: abc
   xyzfo\=ps
 No match
   foob\=ps,offset=2
-Partial match: b
+Partial match: foob
               <<<
   foobar...\=ps,dfa_restart,offset=4
 0: ar
   xyzfo\=ps
@ -7092,17 +7093,21 @@ Failed: error -40: item unsupported for DFA matching
    xyzabc123pqr 
 0: 123
    xyzabc12\=ps
-Partial match: 12
+Partial match: abc12
               <<<
    xyzabc12\=ph
-Partial match: 12
+Partial match: abc12
               <<<
 /\babc\b/
    +++abc+++
 0: abc
    +++ab\=ps
-Partial match: ab
+Partial match: +ab
               <
    +++ab\=ph
-Partial match: ab
+Partial match: +ab
               <
 /(?=C)/g,aftertext
    ABCDECBA
@ -7226,7 +7231,8 @@ Failed: error -40: item unsupported for DFA matching
 /(?<=abc)def/
    abc\=ph
-Partial match: 
+Partial match: abc
               <<<
 /abc$/
    abc