pcre2grep update: -m and $x{..}, $o{..} escapes. Also some doc updates.

2020-10-04 16:34:31 +00:00 · 2020-10-04 16:34:31 +00:00 · 81da2b97e3
parent 3bdc76e4f3
commit 81da2b97e3
15 changed files with 1522 additions and 1270 deletions
--- a/10
+++ b/10
@ -76,6 +76,16 @@ the subject \xe5A. Fixes Bugzilla #2642.
 14. Fixed a bug in character set matching when JIT is enabled and both unicode
 scripts and unicode classes are present at the same time.
 15. Added GNU grep's -m (aka --max-count) option to pcre2grep.
 16. Refactored substitution processing in pcre2grep strings, both for the -O 
 option and when dealing with callouts. There is now a single function that 
 handles $ expansion in all cases (instead of multiple copies of almost 
 identical code). This means that the same escape sequences are available 
 everywhere, which was not previously the case. At the same time, the escape 
 sequences $x{...} and $o{...} have been introduced, to allow for characters 
 whose code points are greater than 255 in Unicode mode.
 Version 10.35 09-May-2020
 ---------------------------
--- a/6
+++ b/6
@ -892,6 +892,6 @@ The distribution should contain the files listed below.
                          )   environments
 Philip Hazel
-Email local part: ph10
+Email local part: Philip.Hazel
-Email domain: cam.ac.uk
+Email domain: gmail.com
-Last updated: 20 March 2020
+Last updated: 22 September 2020
--- a/25
+++ b/25
@ -661,6 +661,26 @@ echo "---------------------------- Test 128 -----------------------------" >>tes
 (cd $srcdir; $valgrind $vjs $pcre2grep -o1 --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep
 echo "---------------------------- Test 129 -----------------------------" >>testtrygrep
 (cd $srcdir; $valgrind $vjs $pcre2grep -m 2 'fox' testdata/grepinput) >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep
 echo "---------------------------- Test 130 -----------------------------" >>testtrygrep
 (cd $srcdir; $valgrind $vjs $pcre2grep -o -m2 'fox' testdata/grepinput) >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep
 echo "---------------------------- Test 131 -----------------------------" >>testtrygrep
 (cd $srcdir; $valgrind $vjs $pcre2grep -oc -m2 'fox' testdata/grepinput) >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep
 echo "---------------------------- Test 132 -----------------------------" >>testtrygrep
 (cd $srcdir; $valgrind $vjs $pcre2grep -m1 -A3 '^match'; echo '---'; head -1) <testdata/grepinput >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep
 echo "---------------------------- Test 133 -----------------------------" >>testtrygrep
 (cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <testdata/grepinputv >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep
 # Now compare the results.
 $cf $srcdir/testdata/grepoutput testtrygrep
@ -694,6 +714,10 @@ if [ $utf8 -ne 0 ] ; then
  (cd $srcdir; $valgrind $vjs $pcre2grep -U -o '....' $builddir/testtemp1grep) >>testtrygrep
  echo "RC=$?" >>testtrygrep
  echo "---------------------------- Test U6 -----------------------------" >>testtrygrep
  (cd $srcdir; $valgrind $vjs $pcre2grep -u -m1 -O '=$x{1d3}$o{744}=' 'fox') <testdata/grepinputv >>testtrygrep 2>&1
  echo "RC=$?" >>testtrygrep
  $cf $srcdir/testdata/grepoutput8 testtrygrep
  if [ $? != 0 ] ; then exit 1; fi
@ -764,6 +788,7 @@ if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scri
  $valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep
  $valgrind $vjs $pcre2grep '(T)(?C"|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep
  $valgrind $vjs $pcre2grep '(T)(?C"|$1$n")(*F)' $srcdir/testdata/grepinputv >>testtrygrep
  $valgrind $vjs $pcre2grep -m1 '(T)(?C"|$0:$1:$x{41}$o{101}$n")' $srcdir/testdata/grepinputv >>testtrygrep
  if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Non-fork callout scripts in patterns are supported'; then
    $cf $srcdir/testdata/grepoutputCN testtrygrep
--- a/doc/html/README.txt
+++ b/doc/html/README.txt
@ -892,6 +892,6 @@ The distribution should contain the files listed below.
                          )   environments
 Philip Hazel
-Email local part: ph10
+Email local part: Philip.Hazel
-Email domain: cam.ac.uk
+Email domain: gmail.com
-Last updated: 20 March 2020
+Last updated: 22 September 2020
--- a/doc/html/pcre2grep.html
+++ b/doc/html/pcre2grep.html
@ -111,8 +111,8 @@ matching substrings, or if <b>--only-matching</b>, <b>--file-offsets</b>, or
 (either shown literally, or as an offset), scanning resumes immediately
 following the match, so that further matches on the same line can be found. If
 there are multiple patterns, they are all tried on the remainder of the line,
-but patterns that follow the one that matched are not tried on the earlier part
+but patterns that follow the one that matched are not tried on the earlier 
-of the line.
+matched part of the line.
 </P>
 <P>
 This behaviour means that the order in which multiple patterns are specified
@ -146,11 +146,10 @@ ignored.
 <br><a name="SEC4" href="#TOC1">BINARY FILES</a><br>
 <P>
 By default, a file that contains a binary zero byte within the first 1024 bytes
-is identified as a binary file, and is processed specially. (GNU grep
+is identified as a binary file, and is processed specially. However, if the
-identifies binary files in this manner.) However, if the newline type is
+newline type is specified as NUL, that is, the line terminator is a binary
-specified as NUL, that is, the line terminator is a binary zero, the test for
+zero, the test for a binary file is not applied. See the <b>--binary-files</b>
-a binary file is not applied. See the <b>--binary-files</b> option for a means
+option for a means of changing the way binary files are handled.
 of changing the way binary files are handled.
 </P>
 <br><a name="SEC5" href="#TOC1">BINARY ZEROS IN PATTERNS</a><br>
 <P>
@ -443,8 +442,8 @@ Ignore upper/lower case distinctions during comparisons.
 <P>
 <b>--include</b>=<i>pattern</i>
 If any <b>--include</b> patterns are specified, the only files that are
-processed are those that match one of the patterns (and do not match an
+processed are those whose names match one of the patterns and do not match an
-<b>--exclude</b> pattern). This option does not affect directories, but it
+<b>--exclude</b> pattern. This option does not affect directories, but it
 applies to all files, whether listed on the command line, obtained from
 <b>--file-list</b>, or by scanning a directory. The pattern is a PCRE2 regular
 expression, and is matched against the final component of the file name, not
@ -463,8 +462,8 @@ may be given any number of times; all the files are read.
 <P>
 <b>--include-dir</b>=<i>pattern</i>
 If any <b>--include-dir</b> patterns are specified, the only directories that
-are processed are those that match one of the patterns (and do not match an
+are processed are those whose names match one of the patterns and do not match
-<b>--exclude-dir</b> pattern). This applies to all directories, whether listed
+an <b>--exclude-dir</b> pattern. This applies to all directories, whether listed
 on the command line, obtained from <b>--file-list</b>, or by scanning a parent
 directory. The pattern is a PCRE2 regular expression, and is matched against
 the final component of the directory name, not the entire path. The <b>-F</b>,
@ -487,8 +486,9 @@ a separate line. Searching normally stops as soon as a matching line is found
 in a file. However, if the <b>-c</b> (count) option is also used, matching
 continues in order to obtain the correct count, and those files that have at
 least one match are listed along with their counts. Using this option with
-<b>-c</b> is a way of suppressing the listing of files with no matches. This
+<b>-c</b> is a way of suppressing the listing of files with no matches that 
-opeion overrides any previous <b>-H</b>, <b>-h</b>, or <b>-L</b> options.
+occurs with <b>-c</b> on its own. This option overrides any previous <b>-H</b>,
 <b>-h</b>, or <b>-L</b> options.
 </P>
 <P>
 <b>--label</b>=<i>name</i>
@ -501,8 +501,8 @@ short form for this option.
 When this option is given, non-compressed input is read and processed line by
 line, and the output is flushed after each write. By default, input is read in
 large chunks, unless <b>pcre2grep</b> can determine that it is reading from a
-terminal (which is currently possible only in Unix-like environments or
+terminal, which is currently possible only in Unix-like environments or
-Windows). Output to terminal is normally automatically flushed by the operating
+Windows. Output to terminal is normally automatically flushed by the operating
 system. This option can be useful when the input or output is attached to a
 pipe and you do not want <b>pcre2grep</b> to buffer up large amounts of data.
 However, its use will affect performance, and the <b>-M</b> (multiline) option
@ -528,6 +528,49 @@ locale is specified, the PCRE2 library's default (usually the "C" locale) is
 used. There is no short form for this option.
 </P>
 <P>
 <b>-M</b>, <b>--multiline</b>
 Allow patterns to match more than one line. When this option is set, the PCRE2
 library is called in "multiline" mode. This allows a matched string to extend
 past the end of a line and continue on one or more subsequent lines. Patterns
 used with <b>-M</b> may usefully contain literal newline characters and internal
 occurrences of ^ and $ characters. The output for a successful match may
 consist of more than one line. The first line is the line in which the match
 started, and the last line is the line in which the match ended. If the matched
 string ends with a newline sequence, the output ends at the end of that line.
 If <b>-v</b> is set, none of the lines in a multi-line match are output. Once a
 match has been handled, scanning restarts at the beginning of the line after
 the one in which the match ended.
 <br>
 <br>
 The newline sequence that separates multiple lines must be matched as part of
 the pattern. For example, to find the phrase "regular expression" in a file
 where "regular" might be at the end of a line and "expression" at the start of
 the next line, you could use this command:
 <pre>
  pcre2grep -M 'regular\s+expression' &#60;file&#62;
 </pre>
 The \s escape sequence matches any white space character, including newlines,
 and is followed by + so as to match trailing white space on the first line as
 well as possibly handling a two-character newline sequence.
 <br>
 <br>
 There is a limit to the number of lines that can be matched, imposed by the way
 that <b>pcre2grep</b> buffers the input file as it scans it. With a sufficiently
 large processing buffer, this should not be a problem, but the <b>-M</b> option
 does not work when input is read line by line (see <b>--line-buffered</b>.)
 </P>
 <P>
 <b>-m</b> <i>number</i>, <b>--max-count</b>=<i>number</i>
 Stop processing after finding <i>number</i> matching lines, or non-matching 
 lines if <b>-v</b> is also set. Any trailing context lines are output after the
 final match. In multiline mode, each multiline match counts as just one line
 for this purpose. If this limit is reached when reading the standard input from
 a regular file, the file is left positioned just after the last matching line.
 If <b>-c</b> is also set, the count that is output is never greater than 
 <i>number</i>. This option has no effect if used with <b>-L</b>, <b>-l</b>, or
 <b>-q</b>, or when just checking for a match in a binary file.
 </P>
 <P>
 <b>--match-limit</b>=<i>number</i>
 Processing some regular expression patterns may take a very long time to search
 for all possible matching strings. Others may require a very large amount of
@ -568,38 +611,6 @@ set by <b>--buffer-size</b>. The maximum buffer size is silently forced to be no
 smaller than the starting buffer size.
 </P>
 <P>
 <b>-M</b>, <b>--multiline</b>
 Allow patterns to match more than one line. When this option is set, the PCRE2
 library is called in "multiline" mode. This allows a matched string to extend
 past the end of a line and continue on one or more subsequent lines. Patterns
 used with <b>-M</b> may usefully contain literal newline characters and internal
 occurrences of ^ and $ characters. The output for a successful match may
 consist of more than one line. The first line is the line in which the match
 started, and the last line is the line in which the match ended. If the matched
 string ends with a newline sequence, the output ends at the end of that line.
 If <b>-v</b> is set, none of the lines in a multi-line match are output. Once a
 match has been handled, scanning restarts at the beginning of the line after
 the one in which the match ended.
 <br>
 <br>
 The newline sequence that separates multiple lines must be matched as part of
 the pattern. For example, to find the phrase "regular expression" in a file
 where "regular" might be at the end of a line and "expression" at the start of
 the next line, you could use this command:
 <pre>
  pcre2grep -M 'regular\s+expression' &#60;file&#62;
 </pre>
 The \s escape sequence matches any white space character, including newlines,
 and is followed by + so as to match trailing white space on the first line as
 well as possibly handling a two-character newline sequence.
 <br>
 <br>
 There is a limit to the number of lines that can be matched, imposed by the way
 that <b>pcre2grep</b> buffers the input file as it scans it. With a sufficiently
 large processing buffer, this should not be a problem, but the <b>-M</b> option
 does not work when input is read line by line (see <b>--line-buffered</b>.)
 </P>
 <P>
 <b>-N</b> <i>newline-type</i>, <b>--newline</b>=<i>newline-type</i>
 Six different conventions for indicating the ends of lines in scanned files are
 supported. For example:
@ -648,31 +659,41 @@ It should never be needed in normal use.
 </P>
 <P>
 <b>-O</b> <i>text</i>, <b>--output</b>=<i>text</i>
-When there is a match, instead of outputting the whole line that matched,
+When there is a match, instead of outputting the line that matched, output just
-output just the given text, followed by an operating-system standard newline.
+the text specified in this option, followed by an operating-system standard
-The <b>--newline</b> option has no effect on this option, which is mutually
+newline. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>,
-exclusive with <b>--only-matching</b>, <b>--file-offsets</b>, and
+and <b>-C</b> options are ignored. The <b>--newline</b> option has no effect on
-<b>--line-offsets</b>. Escape sequences starting with a dollar character may be
+this option, which is mutually exclusive with <b>--only-matching</b>,
-used to insert the contents of the matched part of the line and/or captured
+<b>--file-offsets</b>, and <b>--line-offsets</b>. However, like
-substrings into the text.
+<b>--only-matching</b>, if there is more than one match in a line, each of them
 causes a line of output.
 <br>
 <br>
-$&#60;digits&#62; or ${&#60;digits&#62;} is replaced by the captured
+Escape sequences starting with a dollar character may be used to insert the
-substring of the given decimal number; zero substitutes the whole match. If
+contents of the matched part of the line and/or captured substrings into the
-the number is greater than the number of capturing substrings, or if the
+text.
-capture is unset, the replacement is empty.
+<br>
 <br>
 $&#60;digits&#62; or ${&#60;digits&#62;} is replaced by the captured substring of the given
 decimal number; zero substitutes the whole match. If the number is greater than
 the number of capturing substrings, or if the capture is unset, the replacement
 is empty.
 <br>
 <br>
 $a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
 newline; $r by carriage return; $t by tab; $v by vertical tab.
 <br>
 <br>
-$o&#60;digits&#62; is replaced by the character represented by the given octal
+$o&#60;digits&#62; or $o{&#60;digits&#62;} is replaced by the character whose code point is the
-number; up to three digits are processed.
+given octal number. In the first form, up to three octal digits are processed.
 When more digits are needed in Unicode mode to specify a wide character, the 
 second form must be used.
 <br>
 <br>
-$x&#60;digits&#62; is replaced by the character represented by the given hexadecimal
+$x&#60;digits&#62; or $x{&#60;digits&#62;} is replaced by the character represented by the
-number; up to two digits are processed.
+given hexadecimal number. In the first form, up to two hexadecimal digits are
 processed. When more digits are needed in Unicode mode to specify a wide
 character, the second form must be used.
 <br>
 <br>
 Any other character is substituted by itself. In particular, $$ is replaced by
@ -741,7 +762,8 @@ option to "recurse".
 </P>
 <P>
 <b>--recursion-limit</b>=<i>number</i>
-See <b>--match-limit</b> above.
+This is an obsolete synonym for <b>--depth-limit</b>. See <b>--match-limit</b>
 above for details.
 </P>
 <P>
 <b>-s</b>, <b>--no-messages</b>
@ -765,15 +787,18 @@ total would always be zero.
 <b>-u</b>, <b>--utf</b>
 Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
 with UTF-8 support. All patterns (including those for any <b>--exclude</b> and
-<b>--include</b> options) and all subject lines that are scanned must be valid
+<b>--include</b> options) and all lines that are scanned must be valid strings
-strings of UTF-8 characters.
+of UTF-8 characters. If an invalid UTF-8 string is encountered, an error 
 occurs.
 </P>
 <P>
 <b>-U</b>, <b>--utf-allow-invalid</b>
 As <b>--utf</b>, but in addition subject lines may contain invalid UTF-8 code
-unit sequences. These can never form part of any pattern match. This facility
+unit sequences. These can never form part of any pattern match. Patterns 
-allows valid UTF-8 strings to be sought in executable or other binary files.
+themselves, however, must still be valid UTF-8 strings. This facility allows
-For more details about matching in non-valid UTF-8 strings, see the
+valid UTF-8 strings to be sought within arbitrary byte sequences in executable
 or other binary files. For more details about matching in non-valid UTF-8
 strings, see the
 <a href="pcre2unicode.html"><b>pcre2unicode</b>(3)</a>
 documentation.
 </P>
@ -786,7 +811,9 @@ ignored.
 <P>
 <b>-v</b>, <b>--invert-match</b>
 Invert the sense of the match, so that lines which do <i>not</i> match any of
-the patterns are the ones that are found.
+the patterns are the ones that are found. When this option is set, options such 
 as <b>--only-matching</b> and <b>--output</b>, which specify parts of a match
 that are to be output, are ignored.
 </P>
 <P>
 <b>-w</b>, <b>--word-regex</b>, <b>--word-regexp</b>
@ -909,12 +936,36 @@ documentation for details). Numbered callouts are ignored by <b>pcre2grep</b>;
 only callouts with string arguments are useful.
 </P>
 <br><b>
 Echoing a specific string
 </b><br>
 <P>
 Starting the callout string with a pipe character invokes an echoing facility
 that avoids calling an external program or script. This facility is always
 available, provided that callouts were not completely disabled when
 <b>pcre2grep</b> was built. The rest of the callout string is processed as a
 zero-terminated string, which means it should not contain any internal binary
 zeros. It is written to the output, having first been passed through the same
 escape processing as text from the <b>--output</b> (<b>-O</b>) option (see
 above). However, $0 cannot be used to insert a matched substring because the
 match is still in progress. Instead, the single character '0' is inserted. Any
 syntax errors in the string (for example, a dollar not followed by another
 character) causes the callout to be ignored. No terminator is added to the
 output string, so if you want a newline, you must include it explicitly using
 the escape $n. For example:
 <pre>
  pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' &#60;some file&#62;
 </pre>
 Matching continues normally after the string is output. If you want to see only
 the callout output but not any output from an actual match, you should end the
 pattern with (*FAIL).
 </P>
 <br><b>
 Calling external programs or scripts
 </b><br>
 <P>
 This facility can be independently disabled when <b>pcre2grep</b> is built. It
 is supported for Windows, where a call to <b>_spawnvp()</b> is used, for VMS,
-where <b>lib$spawn()</b> is used, and for any other Unix-like environment where
+where <b>lib$spawn()</b> is used, and for any Unix-like environment where
 <b>fork()</b> and <b>execv()</b> are available.
 </P>
 <P>
@ -926,14 +977,11 @@ arguments:
  executable_name|arg1|arg2|...
 </pre>
 Any substring (including the executable name) may contain escape sequences
-started by a dollar character: $&#60;digits&#62; or ${&#60;digits&#62;} is replaced by the
+started by a dollar character. These are the same as for the <b>--output</b>
-captured substring of the given decimal number, which must be greater than
+(<b>-O</b>) option documented above, except that $0 cannot insert the matched
-zero. If the number is greater than the number of capturing substrings, or if
+string because the match is still in progress. Instead, the character '0'
-the capture is unset, the replacement is empty.
+is inserted. If you need a literal dollar or pipe character in any
-</P>
+substring, use $$ or $| respectively. Here is an example:
 <P>
 Any other character is substituted by itself. In particular, $$ is replaced by
 a single dollar and $| is replaced by a pipe character. Here is an example:
 <pre>
  echo -e "abcde\n12345" | pcre2grep \
    '(?x)(.)(..(.))
@ -946,28 +994,14 @@ a single dollar and $| is replaced by a pipe character. Here is an example:
    Arg1: [1] [234] [4] Arg2: |1| ()
    12345
 </pre>
-The parameters for the system call that is used to run the
+The parameters for the system call that is used to run the program or script
-program or script are zero-terminated strings. This means that binary zero
+are zero-terminated strings. This means that binary zero characters in the
-characters in the callout argument will cause premature termination of their
+callout argument will cause premature termination of their substrings, and
-substrings, and therefore should not be present. Any syntax errors in the
+therefore should not be present. Any syntax errors in the string (for example,
-string (for example, a dollar not followed by another character) cause the
+a dollar not followed by another character) causes the callout to be ignored.
-callout to be ignored. If running the program fails for any reason (including
+If running the program fails for any reason (including the non-existence of the
-the non-existence of the executable), a local matching failure occurs and the
+executable), a local matching failure occurs and the matcher backtracks in the
-matcher backtracks in the normal way.
+normal way.
 </P>
 <br><b>
 Echoing a specific string
 </b><br>
 <P>
 This facility is always available, provided that callouts were not completely
 disabled when <b>pcre2grep</b> was built. If the callout string starts with a
 pipe (vertical bar) character, the rest of the string is written to the output,
 having been passed through the same escape processing as text from the --output
 option. This provides a simple echoing facility that avoids calling an external
 program or script. No terminator is added to the string, so if you want a
 newline, you must include it explicitly. Matching continues normally after the
 string is output. If you want to see only the callout output but not any output
 from an actual match, you should end the relevant pattern with (*FAIL).
 </P>
 <br><a name="SEC12" href="#TOC1">MATCHING ERRORS</a><br>
 <P>
@ -999,7 +1033,8 @@ because VMS does not distinguish between exit(0) and exit(1).
 </P>
 <br><a name="SEC14" href="#TOC1">SEE ALSO</a><br>
 <P>
-<b>pcre2pattern</b>(3), <b>pcre2syntax</b>(3), <b>pcre2callout</b>(3).
+<b>pcre2pattern</b>(3), <b>pcre2syntax</b>(3), <b>pcre2callout</b>(3),
 <b>pcre2unicode</b>(3).
 </P>
 <br><a name="SEC15" href="#TOC1">AUTHOR</a><br>
 <P>
@ -1012,7 +1047,7 @@ Cambridge, England.
 </P>
 <br><a name="SEC16" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 25 January 2020
+Last updated: 04 October 2020
 <br>
 Copyright &copy; 1997-2020 University of Cambridge.
 <br>
--- a/doc/html/pcre2test.html
+++ b/doc/html/pcre2test.html
@ -323,7 +323,7 @@ test data, command lines that begin with # may appear. This file format, with
 some restrictions, can also be processed by the <b>perltest.sh</b> script that
 is distributed with PCRE2 as a means of checking that the behaviour of PCRE2
 and Perl is the same. For a specification of <b>perltest.sh</b>, see the
-comments near its beginning.
+comments near its beginning. See also the #perltest command below.
 </P>
 <P>
 When the input is a terminal, <b>pcre2test</b> prompts for each line of input,
@ -420,14 +420,20 @@ patterns. Modifiers on a pattern can change these settings.
 <pre>
  #perltest
 </pre>
-The appearance of this line causes all subsequent modifier settings to be
+This line is used in test files that can also be processed by <b>perltest.sh</b>
-checked for compatibility with the <b>perltest.sh</b> script, which is used to
+to confirm that Perl gives the same results as PCRE2. Subsequent tests are
-confirm that Perl gives the same results as PCRE2. Also, apart from comment
+checked for the use of <b>pcre2test</b> features that are incompatible with the
-lines, #pattern commands, and #subject commands that set or unset "mark", no
+<b>perltest.sh</b> script. 
-command lines are permitted, because they and many of the modifiers are
+</P>
-specific to <b>pcre2test</b>, and should not be used in test files that are also
+<P>
-processed by <b>perltest.sh</b>. The <b>#perltest</b> command helps detect tests
+Patterns must use '/' as their delimiter, and only certain modifiers are
-that are accidentally put in the wrong file.
+supported. Comment lines, #pattern commands, and #subject commands that set or
 unset "mark" are recognized and acted on. The #perltest, #forbid_utf, and
 #newline_default commands, which are needed in the relevant pcre2test files,
 are silently ignored. All other command lines are ignored, but give a warning
 message. The <b>#perltest</b> command helps detect tests that are accidentally
 put in the wrong file or use the wrong delimiter. For more details of the
 <b>perltest.sh</b> script see the comments it contains.
 <pre>
  #pop [&#60;modifiers&#62;]
  #popcopy [&#60;modifiers&#62;]
@ -2113,7 +2119,7 @@ Cambridge, England.
 </P>
 <br><a name="SEC21" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 20 March 2020
+Last updated: 14 September 2020
 <br>
 Copyright &copy; 1997-2020 University of Cambridge.
 <br>
--- a/doc/pcre2grep.1
+++ b/doc/pcre2grep.1
@ -1,4 +1,4 @@
-.TH PCRE2GREP 1 "25 January 2020" "PCRE2 10.35"
+.TH PCRE2GREP 1 "04 October 2020" "PCRE2 10.36"
 .SH NAME
 pcre2grep - a grep with Perl-compatible regular expressions.
 .SH SYNOPSIS
@ -79,8 +79,8 @@ matching substrings, or if \fB--only-matching\fP, \fB--file-offsets\fP, or
 (either shown literally, or as an offset), scanning resumes immediately
 following the match, so that further matches on the same line can be found. If
 there are multiple patterns, they are all tried on the remainder of the line,
-but patterns that follow the one that matched are not tried on the earlier part
+but patterns that follow the one that matched are not tried on the earlier 
-of the line.
+matched part of the line.
 .P
 This behaviour means that the order in which multiple patterns are specified
 can affect the output when one of the above options is used. This is no longer
@ -115,11 +115,10 @@ ignored.
 .rs
 .sp
 By default, a file that contains a binary zero byte within the first 1024 bytes
-is identified as a binary file, and is processed specially. (GNU grep
+is identified as a binary file, and is processed specially. However, if the
-identifies binary files in this manner.) However, if the newline type is
+newline type is specified as NUL, that is, the line terminator is a binary
-specified as NUL, that is, the line terminator is a binary zero, the test for
+zero, the test for a binary file is not applied. See the \fB--binary-files\fP
-a binary file is not applied. See the \fB--binary-files\fP option for a means
+option for a means of changing the way binary files are handled.
 of changing the way binary files are handled.
 .
 .
 .SH "BINARY ZEROS IN PATTERNS"
@ -383,8 +382,8 @@ Ignore upper/lower case distinctions during comparisons.
 .TP
 \fB--include\fP=\fIpattern\fP
 If any \fB--include\fP patterns are specified, the only files that are
-processed are those that match one of the patterns (and do not match an
+processed are those whose names match one of the patterns and do not match an
-\fB--exclude\fP pattern). This option does not affect directories, but it
+\fB--exclude\fP pattern. This option does not affect directories, but it
 applies to all files, whether listed on the command line, obtained from
 \fB--file-list\fP, or by scanning a directory. The pattern is a PCRE2 regular
 expression, and is matched against the final component of the file name, not
@ -401,8 +400,8 @@ may be given any number of times; all the files are read.
 .TP
 \fB--include-dir\fP=\fIpattern\fP
 If any \fB--include-dir\fP patterns are specified, the only directories that
-are processed are those that match one of the patterns (and do not match an
+are processed are those whose names match one of the patterns and do not match
-\fB--exclude-dir\fP pattern). This applies to all directories, whether listed
+an \fB--exclude-dir\fP pattern. This applies to all directories, whether listed
 on the command line, obtained from \fB--file-list\fP, or by scanning a parent
 directory. The pattern is a PCRE2 regular expression, and is matched against
 the final component of the directory name, not the entire path. The \fB-F\fP,
@ -423,8 +422,9 @@ a separate line. Searching normally stops as soon as a matching line is found
 in a file. However, if the \fB-c\fP (count) option is also used, matching
 continues in order to obtain the correct count, and those files that have at
 least one match are listed along with their counts. Using this option with
-\fB-c\fP is a way of suppressing the listing of files with no matches. This
+\fB-c\fP is a way of suppressing the listing of files with no matches that 
-opeion overrides any previous \fB-H\fP, \fB-h\fP, or \fB-L\fP options.
+occurs with \fB-c\fP on its own. This option overrides any previous \fB-H\fP,
 \fB-h\fP, or \fB-L\fP options.
 .TP
 \fB--label\fP=\fIname\fP
 This option supplies a name to be used for the standard input when file names
@ -435,8 +435,8 @@ short form for this option.
 When this option is given, non-compressed input is read and processed line by
 line, and the output is flushed after each write. By default, input is read in
 large chunks, unless \fBpcre2grep\fP can determine that it is reading from a
-terminal (which is currently possible only in Unix-like environments or
+terminal, which is currently possible only in Unix-like environments or
-Windows). Output to terminal is normally automatically flushed by the operating
+Windows. Output to terminal is normally automatically flushed by the operating
 system. This option can be useful when the input or output is attached to a
 pipe and you do not want \fBpcre2grep\fP to buffer up large amounts of data.
 However, its use will affect performance, and the \fB-M\fP (multiline) option
@ -459,6 +459,45 @@ the value in the \fBLC_ALL\fP or \fBLC_CTYPE\fP environment variables. If no
 locale is specified, the PCRE2 library's default (usually the "C" locale) is
 used. There is no short form for this option.
 .TP
 \fB-M\fP, \fB--multiline\fP
 Allow patterns to match more than one line. When this option is set, the PCRE2
 library is called in "multiline" mode. This allows a matched string to extend
 past the end of a line and continue on one or more subsequent lines. Patterns
 used with \fB-M\fP may usefully contain literal newline characters and internal
 occurrences of ^ and $ characters. The output for a successful match may
 consist of more than one line. The first line is the line in which the match
 started, and the last line is the line in which the match ended. If the matched
 string ends with a newline sequence, the output ends at the end of that line.
 If \fB-v\fP is set, none of the lines in a multi-line match are output. Once a
 match has been handled, scanning restarts at the beginning of the line after
 the one in which the match ended.
 .sp
 The newline sequence that separates multiple lines must be matched as part of
 the pattern. For example, to find the phrase "regular expression" in a file
 where "regular" might be at the end of a line and "expression" at the start of
 the next line, you could use this command:
 .sp
  pcre2grep -M 'regular\es+expression' <file>
 .sp
 The \es escape sequence matches any white space character, including newlines,
 and is followed by + so as to match trailing white space on the first line as
 well as possibly handling a two-character newline sequence.
 .sp
 There is a limit to the number of lines that can be matched, imposed by the way
 that \fBpcre2grep\fP buffers the input file as it scans it. With a sufficiently
 large processing buffer, this should not be a problem, but the \fB-M\fP option
 does not work when input is read line by line (see \fB--line-buffered\fP.)
 .TP
 \fB-m\fP \fInumber\fP, \fB--max-count\fP=\fInumber\fP
 Stop processing after finding \fInumber\fP matching lines, or non-matching 
 lines if \fB-v\fP is also set. Any trailing context lines are output after the
 final match. In multiline mode, each multiline match counts as just one line
 for this purpose. If this limit is reached when reading the standard input from
 a regular file, the file is left positioned just after the last matching line.
 If \fB-c\fP is also set, the count that is output is never greater than 
 \fInumber\fP. This option has no effect if used with \fB-L\fP, \fB-l\fP, or
 \fB-q\fP, or when just checking for a match in a binary file.
 .TP
 \fB--match-limit\fP=\fInumber\fP
 Processing some regular expression patterns may take a very long time to search
 for all possible matching strings. Others may require a very large amount of
@ -493,35 +532,6 @@ This limits the expansion of the processing buffer, whose initial size can be
 set by \fB--buffer-size\fP. The maximum buffer size is silently forced to be no
 smaller than the starting buffer size.
 .TP
 \fB-M\fP, \fB--multiline\fP
 Allow patterns to match more than one line. When this option is set, the PCRE2
 library is called in "multiline" mode. This allows a matched string to extend
 past the end of a line and continue on one or more subsequent lines. Patterns
 used with \fB-M\fP may usefully contain literal newline characters and internal
 occurrences of ^ and $ characters. The output for a successful match may
 consist of more than one line. The first line is the line in which the match
 started, and the last line is the line in which the match ended. If the matched
 string ends with a newline sequence, the output ends at the end of that line.
 If \fB-v\fP is set, none of the lines in a multi-line match are output. Once a
 match has been handled, scanning restarts at the beginning of the line after
 the one in which the match ended.
 .sp
 The newline sequence that separates multiple lines must be matched as part of
 the pattern. For example, to find the phrase "regular expression" in a file
 where "regular" might be at the end of a line and "expression" at the start of
 the next line, you could use this command:
 .sp
  pcre2grep -M 'regular\es+expression' <file>
 .sp
 The \es escape sequence matches any white space character, including newlines,
 and is followed by + so as to match trailing white space on the first line as
 well as possibly handling a two-character newline sequence.
 .sp
 There is a limit to the number of lines that can be matched, imposed by the way
 that \fBpcre2grep\fP buffers the input file as it scans it. With a sufficiently
 large processing buffer, this should not be a problem, but the \fB-M\fP option
 does not work when input is read line by line (see \fB--line-buffered\fP.)
 .TP
 \fB-N\fP \fInewline-type\fP, \fB--newline\fP=\fInewline-type\fP
 Six different conventions for indicating the ends of lines in scanned files are
 supported. For example:
@ -565,27 +575,36 @@ use of JIT at run time. It is provided for testing and working round problems.
 It should never be needed in normal use.
 .TP
 \fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP
-When there is a match, instead of outputting the whole line that matched,
+When there is a match, instead of outputting the line that matched, output just
-output just the given text, followed by an operating-system standard newline.
+the text specified in this option, followed by an operating-system standard
-The \fB--newline\fP option has no effect on this option, which is mutually
+newline. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP,
-exclusive with \fB--only-matching\fP, \fB--file-offsets\fP, and
+and \fB-C\fP options are ignored. The \fB--newline\fP option has no effect on
-\fB--line-offsets\fP. Escape sequences starting with a dollar character may be
+this option, which is mutually exclusive with \fB--only-matching\fP,
-used to insert the contents of the matched part of the line and/or captured
+\fB--file-offsets\fP, and \fB--line-offsets\fP. However, like
-substrings into the text.
+\fB--only-matching\fP, if there is more than one match in a line, each of them
 causes a line of output.
 .sp
-$<digits> or ${<digits>} is replaced by the captured
+Escape sequences starting with a dollar character may be used to insert the
-substring of the given decimal number; zero substitutes the whole match. If
+contents of the matched part of the line and/or captured substrings into the
-the number is greater than the number of capturing substrings, or if the
+text.
-capture is unset, the replacement is empty.
+.sp
 $<digits> or ${<digits>} is replaced by the captured substring of the given
 decimal number; zero substitutes the whole match. If the number is greater than
 the number of capturing substrings, or if the capture is unset, the replacement
 is empty.
 .sp
 $a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
 newline; $r by carriage return; $t by tab; $v by vertical tab.
 .sp
-$o<digits> is replaced by the character represented by the given octal
+$o<digits> or $o{<digits>} is replaced by the character whose code point is the
-number; up to three digits are processed.
+given octal number. In the first form, up to three octal digits are processed.
 When more digits are needed in Unicode mode to specify a wide character, the 
 second form must be used.
 .sp
-$x<digits> is replaced by the character represented by the given hexadecimal
+$x<digits> or $x{<digits>} is replaced by the character represented by the
-number; up to two digits are processed.
+given hexadecimal number. In the first form, up to two hexadecimal digits are
 processed. When more digits are needed in Unicode mode to specify a wide
 character, the second form must be used.
 .sp
 Any other character is substituted by itself. In particular, $$ is replaced by
 a single dollar.
@ -644,7 +663,8 @@ immediate end-of-file. This option is a shorthand for setting the \fB-d\fP
 option to "recurse".
 .TP
 \fB--recursion-limit\fP=\fInumber\fP
-See \fB--match-limit\fP above.
+This is an obsolete synonym for \fB--depth-limit\fP. See \fB--match-limit\fP
 above for details.
 .TP
 \fB-s\fP, \fB--no-messages\fP
 Suppress error messages about non-existent or unreadable files. Such files are
@ -665,14 +685,17 @@ total would always be zero.
 \fB-u\fP, \fB--utf\fP
 Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
 with UTF-8 support. All patterns (including those for any \fB--exclude\fP and
-\fB--include\fP options) and all subject lines that are scanned must be valid
+\fB--include\fP options) and all lines that are scanned must be valid strings
-strings of UTF-8 characters.
+of UTF-8 characters. If an invalid UTF-8 string is encountered, an error 
 occurs.
 .TP
 \fB-U\fP, \fB--utf-allow-invalid\fP
 As \fB--utf\fP, but in addition subject lines may contain invalid UTF-8 code
-unit sequences. These can never form part of any pattern match. This facility
+unit sequences. These can never form part of any pattern match. Patterns 
-allows valid UTF-8 strings to be sought in executable or other binary files.
+themselves, however, must still be valid UTF-8 strings. This facility allows
-For more details about matching in non-valid UTF-8 strings, see the
+valid UTF-8 strings to be sought within arbitrary byte sequences in executable
 or other binary files. For more details about matching in non-valid UTF-8
 strings, see the
 .\" HREF
 \fBpcre2unicode\fP(3)
 .\"
@ -685,7 +708,9 @@ ignored.
 .TP
 \fB-v\fP, \fB--invert-match\fP
 Invert the sense of the match, so that lines which do \fInot\fP match any of
-the patterns are the ones that are found.
+the patterns are the ones that are found. When this option is set, options such 
 as \fB--only-matching\fP and \fB--output\fP, which specify parts of a match
 that are to be output, are ignored.
 .TP
 \fB-w\fP, \fB--word-regex\fP, \fB--word-regexp\fP
 Force the patterns only to match "words". That is, there must be a word
@ -812,12 +837,36 @@ documentation for details). Numbered callouts are ignored by \fBpcre2grep\fP;
 only callouts with string arguments are useful.
 .
 .
 .SS "Echoing a specific string"
 .rs
 .sp
 Starting the callout string with a pipe character invokes an echoing facility
 that avoids calling an external program or script. This facility is always
 available, provided that callouts were not completely disabled when
 \fBpcre2grep\fP was built. The rest of the callout string is processed as a
 zero-terminated string, which means it should not contain any internal binary
 zeros. It is written to the output, having first been passed through the same
 escape processing as text from the \fB--output\fP (\fB-O\fP) option (see
 above). However, $0 cannot be used to insert a matched substring because the
 match is still in progress. Instead, the single character '0' is inserted. Any
 syntax errors in the string (for example, a dollar not followed by another
 character) causes the callout to be ignored. No terminator is added to the
 output string, so if you want a newline, you must include it explicitly using
 the escape $n. For example:
 .sp
  pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' <some file>
 .sp   
 Matching continues normally after the string is output. If you want to see only
 the callout output but not any output from an actual match, you should end the
 pattern with (*FAIL).
 .
 .
 .SS "Calling external programs or scripts"
 .rs
 .sp
 This facility can be independently disabled when \fBpcre2grep\fP is built. It
 is supported for Windows, where a call to \fB_spawnvp()\fP is used, for VMS,
-where \fBlib$spawn()\fP is used, and for any other Unix-like environment where
+where \fBlib$spawn()\fP is used, and for any Unix-like environment where
 \fBfork()\fP and \fBexecv()\fP are available.
 .P
 If the callout string does not start with a pipe (vertical bar) character, it
@ -828,13 +877,11 @@ arguments:
  executable_name|arg1|arg2|...
 .sp
 Any substring (including the executable name) may contain escape sequences
-started by a dollar character: $<digits> or ${<digits>} is replaced by the
+started by a dollar character. These are the same as for the \fB--output\fP
-captured substring of the given decimal number, which must be greater than
+(\fB-O\fP) option documented above, except that $0 cannot insert the matched
-zero. If the number is greater than the number of capturing substrings, or if
+string because the match is still in progress. Instead, the character '0'
-the capture is unset, the replacement is empty.
+is inserted. If you need a literal dollar or pipe character in any
-.P
+substring, use $$ or $| respectively. Here is an example:
 Any other character is substituted by itself. In particular, $$ is replaced by
 a single dollar and $| is replaced by a pipe character. Here is an example:
 .sp
  echo -e "abcde\en12345" | pcre2grep \e
    '(?x)(.)(..(.))
@ -847,28 +894,14 @@ a single dollar and $| is replaced by a pipe character. Here is an example:
    Arg1: [1] [234] [4] Arg2: |1| ()
    12345
 .sp
-The parameters for the system call that is used to run the
+The parameters for the system call that is used to run the program or script
-program or script are zero-terminated strings. This means that binary zero
+are zero-terminated strings. This means that binary zero characters in the
-characters in the callout argument will cause premature termination of their
+callout argument will cause premature termination of their substrings, and
-substrings, and therefore should not be present. Any syntax errors in the
+therefore should not be present. Any syntax errors in the string (for example,
-string (for example, a dollar not followed by another character) cause the
+a dollar not followed by another character) causes the callout to be ignored.
-callout to be ignored. If running the program fails for any reason (including
+If running the program fails for any reason (including the non-existence of the
-the non-existence of the executable), a local matching failure occurs and the
+executable), a local matching failure occurs and the matcher backtracks in the
-matcher backtracks in the normal way.
+normal way.
 .
 .
 .SS "Echoing a specific string"
 .rs
 .sp
 This facility is always available, provided that callouts were not completely
 disabled when \fBpcre2grep\fP was built. If the callout string starts with a
 pipe (vertical bar) character, the rest of the string is written to the output,
 having been passed through the same escape processing as text from the --output
 option. This provides a simple echoing facility that avoids calling an external
 program or script. No terminator is added to the string, so if you want a
 newline, you must include it explicitly. Matching continues normally after the
 string is output. If you want to see only the callout output but not any output
 from an actual match, you should end the relevant pattern with (*FAIL).
 .
 .
 .SH "MATCHING ERRORS"
@ -904,7 +937,8 @@ because VMS does not distinguish between exit(0) and exit(1).
 .SH "SEE ALSO"
 .rs
 .sp
-\fBpcre2pattern\fP(3), \fBpcre2syntax\fP(3), \fBpcre2callout\fP(3).
+\fBpcre2pattern\fP(3), \fBpcre2syntax\fP(3), \fBpcre2callout\fP(3),
 \fBpcre2unicode\fP(3).
 .
 .
 .SH AUTHOR
@ -921,6 +955,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 25 January 2020
+Last updated: 04 October 2020
 Copyright (c) 1997-2020 University of Cambridge.
 .fi
--- a/doc/pcre2grep.txt
+++ b/doc/pcre2grep.txt
@ -80,7 +80,7 @@ DESCRIPTION
       following  the  match,  so that further matches on the same line can be
       found. If there are multiple patterns, they are all tried  on  the  re-
       mainder  of the line, but patterns that follow the one that matched are
-       not tried on the earlier part of the line.
+       not tried on the earlier matched part of the line.
       This behaviour means that the order  in  which  multiple  patterns  are
       specified  can affect the output when one of the above options is used.
@ -115,10 +115,10 @@ BINARY FILES
       By  default,  a  file that contains a binary zero byte within the first
       1024 bytes is identified as a binary file, and is processed  specially.
-       (GNU grep identifies binary files in this manner.) However, if the new-
+       However,  if  the  newline  type is specified as NUL, that is, the line
-       line type is specified as NUL, that is, the line terminator is a binary
+       terminator is a binary zero, the test for a binary file is not applied.
-       zero, the test for a binary file is not applied. See the --binary-files
+       See  the  --binary-files  option for a means of changing the way binary
-       option for a means of changing the way binary files are handled.
+       files are handled.
 BINARY ZEROS IN PATTERNS
@ -413,17 +413,17 @@ OPTIONS
       --include=pattern
                 If  any --include patterns are specified, the only files that
-                 are processed are those that match one of the  patterns  (and
+                 are processed are those whose names match one of the patterns
-                 do  not match an --exclude pattern). This option does not af-
+                 and  do  not match an --exclude pattern. This option does not
-                 fect directories, but it applies to all files, whether listed
+                 affect directories, but it  applies  to  all  files,  whether
-                 on  the  command line, obtained from --file-list, or by scan-
+                 listed  on the command line, obtained from --file-list, or by
-                 ning a directory. The pattern is a PCRE2 regular  expression,
+                 scanning a directory. The pattern is a PCRE2 regular  expres-
-                 and  is matched against the final component of the file name,
+                 sion,  and is matched against the final component of the file
-                 not the entire path. The -F, -w, and -x options do not  apply
+                 name, not the entire path. The -F, -w, and -x options do  not
-                 to this pattern. The option may be given any number of times.
+                 apply  to this pattern. The option may be given any number of
-                 If a file name matches both an  --include  and  an  --exclude
+                 times. If a file name matches both an --include and an  --ex-
-                 pattern, it is excluded.  There is no short form for this op-
+                 clude  pattern,  it  is excluded.  There is no short form for
-                 tion.
+                 this option.
       --include-from=filename
                 Treat each non-empty line of the file  as  the  data  for  an
@ -434,8 +434,8 @@ OPTIONS
       --include-dir=pattern
                 If any --include-dir patterns are specified, the only  direc-
-                 tories  that  are  processed  are those that match one of the
+                 tories  that are processed are those whose names match one of
-                 patterns (and do not match an  --exclude-dir  pattern).  This
+                 the patterns and do not match an --exclude-dir pattern.  This
                 applies  to  all  directories,  whether listed on the command
                 line, obtained from --file-list, or by scanning a parent  di-
                 rectory.  The  pattern  is a PCRE2 regular expression, and is
@ -461,8 +461,9 @@ OPTIONS
                 matching  continues in order to obtain the correct count, and
                 those files that have at least one  match  are  listed  along
                 with their counts. Using this option with -c is a way of sup-
-                 pressing the listing of files with no  matches.  This  opeion
+                 pressing the listing of files with  no  matches  that  occurs
-                 overrides any previous -H, -h, or -L options.
+                 with  -c  on  its own. This option overrides any previous -H,
                 -h, or -L options.
       --label=name
                 This option supplies a name to be used for the standard input
@ -470,37 +471,84 @@ OPTIONS
                 input)" is used. There is no short form for this option.
       --line-buffered
-                 When  this  option is given, non-compressed input is read and
+                 When this option is given, non-compressed input is  read  and
-                 processed line by line, and the output is flushed after  each
+                 processed  line by line, and the output is flushed after each
-                 write.  By  default,  input  is  read in large chunks, unless
+                 write. By default, input is  read  in  large  chunks,  unless
-                 pcre2grep can determine that it is reading  from  a  terminal
+                 pcre2grep  can  determine that it is reading from a terminal,
-                 (which  is  currently possible only in Unix-like environments
+                 which is currently possible only in Unix-like environments or
-                 or Windows). Output to  terminal  is  normally  automatically
+                 Windows. Output to terminal is normally automatically flushed
-                 flushed  by  the  operating system. This option can be useful
+                 by the operating system. This option can be useful  when  the
-                 when the input or output is attached to a pipe and you do not
+                 input  or  output  is  attached to a pipe and you do not want
-                 want  pcre2grep to buffer up large amounts of data.  However,
+                 pcre2grep to buffer up large amounts of data.   However,  its
-                 its use will affect performance, and the -M  (multiline)  op-
+                 use  will  affect  performance, and the -M (multiline) option
-                 tion  ceases  to work. When input is from a compressed .gz or
+                 ceases to work. When input is from a compressed .gz  or  .bz2
-                 .bz2 file, --line-buffered is ignored.
+                 file, --line-buffered is ignored.
       --line-offsets
-                 Instead of showing lines or parts of lines that  match,  show
+                 Instead  of  showing lines or parts of lines that match, show
                 each match as a line number, the offset from the start of the
-                 line, and a length. The line number is terminated by a  colon
+                 line,  and a length. The line number is terminated by a colon
-                 (as  usual; see the -n option), and the offset and length are
+                 (as usual; see the -n option), and the offset and length  are
-                 separated by a comma. In this  mode,  no  context  is  shown.
+                 separated  by  a  comma.  In  this mode, no context is shown.
-                 That  is, the -A, -B, and -C options are ignored. If there is
+                 That is, the -A, -B, and -C options are ignored. If there  is
-                 more than one match in a line, each of them  is  shown  sepa-
+                 more  than  one  match in a line, each of them is shown sepa-
-                 rately.  This  option  is  mutually  exclusive with --output,
+                 rately. This option  is  mutually  exclusive  with  --output,
                 --file-offsets, and --only-matching.
       --locale=locale-name
-                 This option specifies a locale to be used for pattern  match-
+                 This  option specifies a locale to be used for pattern match-
-                 ing.  It  overrides the value in the LC_ALL or LC_CTYPE envi-
+                 ing. It overrides the value in the LC_ALL or  LC_CTYPE  envi-
-                 ronment variables. If no locale is specified, the  PCRE2  li-
+                 ronment  variables.  If no locale is specified, the PCRE2 li-
                 brary's default (usually the "C" locale) is used. There is no
                 short form for this option.
       -M, --multiline
                 Allow  patterns to match more than one line. When this option
                 is set, the PCRE2 library is called in "multiline" mode. This
                 allows  a matched string to extend past the end of a line and
                 continue on one or more subsequent lines. Patterns used  with
                 -M may usefully contain literal newline characters and inter-
                 nal occurrences of ^ and $ characters. The output for a  suc-
                 cessful  match  may  consist of more than one line. The first
                 line is the line in which the match  started,  and  the  last
                 line  is  the  line  in which the match ended. If the matched
                 string ends with a newline sequence, the output ends  at  the
                 end  of  that  line.   If  -v  is set, none of the lines in a
                 multi-line match are output. Once a match has  been  handled,
                 scanning  restarts at the beginning of the line after the one
                 in which the match ended.
                 The newline sequence that separates multiple  lines  must  be
                 matched  as  part  of  the  pattern. For example, to find the
                 phrase "regular expression" in a file where  "regular"  might
                 be  at the end of a line and "expression" at the start of the
                 next line, you could use this command:
                   pcre2grep -M 'regular\s+expression' <file>
                 The \s escape sequence matches any white space character, in-
                 cluding  newlines, and is followed by + so as to match trail-
                 ing white space on the first line as well  as  possibly  han-
                 dling a two-character newline sequence.
                 There  is a limit to the number of lines that can be matched,
                 imposed by the way that pcre2grep buffers the input  file  as
                 it  scans  it.  With  a sufficiently large processing buffer,
                 this should not be a problem, but the -M option does not work
                 when input is read line by line (see --line-buffered.)
       -m number, --max-count=number
                 Stop  processing after finding number matching lines, or non-
                 matching lines if -v is also set. Any trailing context  lines
                 are  output  after  the  final match. In multiline mode, each
                 multiline match counts as just one line for this purpose.  If
                 this  limit is reached when reading the standard input from a
                 regular file, the file is left positioned just after the last
                 matching  line.   If -c is also set, the count that is output
                 is never greater than number. This option has  no  effect  if
                 used with -L, -l, or -q, or when just checking for a match in
                 a binary file.
       --match-limit=number
                 Processing some regular expression patterns may take  a  very
                 long time to search for all possible matching strings. Others
@ -542,41 +590,6 @@ OPTIONS
                 size is silently forced to be no smaller  than  the  starting
                 buffer size.
       -M, --multiline
                 Allow  patterns to match more than one line. When this option
                 is set, the PCRE2 library is called in "multiline" mode. This
                 allows  a matched string to extend past the end of a line and
                 continue on one or more subsequent lines. Patterns used  with
                 -M may usefully contain literal newline characters and inter-
                 nal occurrences of ^ and $ characters. The output for a  suc-
                 cessful  match  may  consist of more than one line. The first
                 line is the line in which the match  started,  and  the  last
                 line  is  the  line  in which the match ended. If the matched
                 string ends with a newline sequence, the output ends  at  the
                 end  of  that  line.   If  -v  is set, none of the lines in a
                 multi-line match are output. Once a match has  been  handled,
                 scanning  restarts at the beginning of the line after the one
                 in which the match ended.
                 The newline sequence that separates multiple  lines  must  be
                 matched  as  part  of  the  pattern. For example, to find the
                 phrase "regular expression" in a file where  "regular"  might
                 be  at the end of a line and "expression" at the start of the
                 next line, you could use this command:
                   pcre2grep -M 'regular\s+expression' <file>
                 The \s escape sequence matches any white space character, in-
                 cluding  newlines, and is followed by + so as to match trail-
                 ing white space on the first line as well  as  possibly  han-
                 dling a two-character newline sequence.
                 There  is a limit to the number of lines that can be matched,
                 imposed by the way that pcre2grep buffers the input  file  as
                 it  scans  it.  With  a sufficiently large processing buffer,
                 this should not be a problem, but the -M option does not work
                 when input is read line by line (see --line-buffered.)
       -N newline-type, --newline=newline-type
                 Six different conventions for indicating the ends of lines in
                 scanned files are supported. For example:
@ -625,97 +638,109 @@ OPTIONS
                 lems.  It should never be needed in normal use.
       -O text, --output=text
-                 When there is a match, instead of outputting the  whole  line
+                 When there is a match, instead of outputting  the  line  that
-                 that  matched, output just the given text, followed by an op-
+                 matched,  output just the text specified in this option, fol-
-                 erating-system standard newline.  The --newline option has no
+                 lowed by an operating-system standard newline. In this  mode,
-                 effect  on  this  option,  which  is  mutually exclusive with
+                 no  context is shown. That is, the -A, -B, and -C options are
-                 --only-matching, --file-offsets, and  --line-offsets.  Escape
+                 ignored. The --newline option has no effect on  this  option,
-                 sequences starting with a dollar character may be used to in-
+                 which is mutually exclusive with --only-matching, --file-off-
-                 sert the contents of the matched part of the line and/or cap-
+                 sets, and --line-offsets. However, like  --only-matching,  if
-                 tured substrings into the text.
+                 there is more than one match in a line, each of them causes a
                 line of output.
-                 $<digits>  or  ${<digits>}  is  replaced by the captured sub-
+                 Escape sequences starting with a dollar character may be used
-                 string of the given  decimal  number;  zero  substitutes  the
+                 to insert the contents of the matched part of the line and/or
                 captured substrings into the text.
                 $<digits> or ${<digits>} is replaced  by  the  captured  sub-
                 string  of  the  given  decimal  number; zero substitutes the
                 whole match. If the number is greater than the number of cap-
-                 turing substrings, or if the capture is unset,  the  replace-
+                 turing  substrings,  or if the capture is unset, the replace-
                 ment is empty.
-                 $a  is replaced by bell; $b by backspace; $e by escape; $f by
+                 $a is replaced by bell; $b by backspace; $e by escape; $f  by
-                 form feed; $n by newline; $r by carriage return; $t  by  tab;
+                 form  feed;  $n by newline; $r by carriage return; $t by tab;
                 $v by vertical tab.
-                 $o<digits>  is  replaced  by the character represented by the
+                 $o<digits> or $o{<digits>} is replaced by the character whose
-                 given octal number; up to three digits are processed.
+                 code  point  is the given octal number. In the first form, up
                 to three octal digits are processed.  When  more  digits  are
                 needed  in Unicode mode to specify a wide character, the sec-
                 ond form must be used.
-                 $x<digits> is replaced by the character  represented  by  the
+                 $x<digits> or $x{<digits>} is replaced by the character  rep-
-                 given hexadecimal number; up to two digits are processed.
+                 resented  by the given hexadecimal number. In the first form,
                 up to two hexadecimal digits are processed. When more  digits
                 are  needed  in Unicode mode to specify a wide character, the
                 second form must be used.
-                 Any  other character is substituted by itself. In particular,
+                 Any other character is substituted by itself. In  particular,
                 $$ is replaced by a single dollar.
       -o, --only-matching
                 Show only the part of the line that matched a pattern instead
-                 of  the  whole  line. In this mode, no context is shown. That
+                 of the whole line. In this mode, no context  is  shown.  That
-                 is, the -A, -B, and -C options are ignored. If there is  more
+                 is,  the -A, -B, and -C options are ignored. If there is more
-                 than  one  match in a line, each of them is shown separately,
+                 than one match in a line, each of them is  shown  separately,
-                 on a separate line of output. If -o is combined with -v  (in-
+                 on  a separate line of output. If -o is combined with -v (in-
-                 vert  the  sense of the match to find non-matching lines), no
+                 vert the sense of the match to find non-matching  lines),  no
-                 output is generated, but the return  code  is  set  appropri-
+                 output  is  generated,  but  the return code is set appropri-
-                 ately.  If  the matched portion of the line is empty, nothing
+                 ately. If the matched portion of the line is  empty,  nothing
-                 is output unless the file  name  or  line  number  are  being
+                 is  output  unless  the  file  name  or line number are being
-                 printed,  in  which case they are shown on an otherwise empty
+                 printed, in which case they are shown on an  otherwise  empty
                 line.  This  option  is  mutually  exclusive  with  --output,
                 --file-offsets and --line-offsets.
       -onumber, --only-matching=number
-                 Show  only  the  part  of the line that matched the capturing
+                 Show only the part of the line  that  matched  the  capturing
                 parentheses of the given number. Up to 50 capturing parenthe-
-                 ses  are  supported by default. This limit can be changed via
+                 ses are supported by default. This limit can be  changed  via
-                 the --om-capture option. A pattern may contain any number  of
+                 the  --om-capture option. A pattern may contain any number of
-                 capturing  parentheses, but only those whose number is within
+                 capturing parentheses, but only those whose number is  within
-                 the limit can be accessed by -o. An error occurs if the  num-
+                 the  limit can be accessed by -o. An error occurs if the num-
                 ber specified by -o is greater than the limit.
                 -o0 is the same as -o without a number. Because these options
-                 can be given without an argument (see above), if an  argument
+                 can  be given without an argument (see above), if an argument
-                 is  present, it must be given in the same shell item, for ex-
+                 is present, it must be given in the same shell item, for  ex-
-                 ample, -o3 or --only-matching=2. The comments given  for  the
+                 ample,  -o3  or --only-matching=2. The comments given for the
-                 non-argument  case  above  also  apply to this option. If the
+                 non-argument case above also apply to  this  option.  If  the
-                 specified capturing parentheses do not exist in the  pattern,
+                 specified  capturing parentheses do not exist in the pattern,
-                 or  were  not  set in the match, nothing is output unless the
+                 or were not set in the match, nothing is  output  unless  the
                 file name or line number are being output.
-                 If this option is given multiple times,  multiple  substrings
+                 If  this  option is given multiple times, multiple substrings
-                 are  output  for  each  match,  in  the order the options are
+                 are output for each match,  in  the  order  the  options  are
-                 given, and all on one line. For example, -o3 -o1  -o3  causes
+                 given,  and  all on one line. For example, -o3 -o1 -o3 causes
-                 the  substrings  matched by capturing parentheses 3 and 1 and
+                 the substrings matched by capturing parentheses 3 and  1  and
-                 then 3 again to be output. By default, there is no  separator
+                 then  3 again to be output. By default, there is no separator
                 (but see the next but one option).
       --om-capture=number
-                 Set  the number of capturing parentheses that can be accessed
+                 Set the number of capturing parentheses that can be  accessed
                 by -o. The default is 50.
       --om-separator=text
-                 Specify a separating string for multiple occurrences  of  -o.
+                 Specify  a  separating string for multiple occurrences of -o.
-                 The  default is an empty string. Separating strings are never
+                 The default is an empty string. Separating strings are  never
                 coloured.
       -q, --quiet
                 Work quietly, that is, display nothing except error messages.
-                 The  exit  status  indicates  whether or not any matches were
+                 The exit status indicates whether or  not  any  matches  were
                 found.
       -r, --recursive
-                 If any given path is a directory, recursively scan the  files
+                 If  any given path is a directory, recursively scan the files
-                 it  contains, taking note of any --include and --exclude set-
+                 it contains, taking note of any --include and --exclude  set-
-                 tings. By default, a directory is read as a normal  file;  in
+                 tings.  By  default, a directory is read as a normal file; in
-                 some  operating  systems this gives an immediate end-of-file.
+                 some operating systems this gives an  immediate  end-of-file.
-                 This option is a shorthand for setting the -d option to  "re-
+                 This  option is a shorthand for setting the -d option to "re-
                 curse".
       --recursion-limit=number
-                 See --match-limit above.
+                 This is an obsolete synonym for --depth-limit.  See  --match-
                 limit above for details.
       -s, --no-messages
                 Suppress  error  messages  about  non-existent  or unreadable
@ -737,26 +762,30 @@ OPTIONS
       -u, --utf Operate in UTF-8 mode. This option is available only if PCRE2
                 has been compiled with UTF-8 support. All patterns (including
-                 those  for  any --exclude and --include options) and all sub-
+                 those  for any --exclude and --include options) and all lines
-                 ject lines that are scanned must be valid  strings  of  UTF-8
+                 that are scanned must be valid strings of  UTF-8  characters.
-                 characters.
+                 If an invalid UTF-8 string is encountered, an error occurs.
       -U, --utf-allow-invalid
                 As  --utf,  but in addition subject lines may contain invalid
                 UTF-8 code unit sequences. These can never form part  of  any
-                 pattern match. This facility allows valid UTF-8 strings to be
+                 pattern  match.  Patterns  themselves, however, must still be
-                 sought in executable or other binary files.  For more details
+                 valid UTF-8 strings. This facility allows valid UTF-8 strings
-                 about  matching in non-valid UTF-8 strings, see the pcre2uni-
+                 to be sought within arbitrary byte sequences in executable or
-                 code(3) documentation.
+                 other binary files. For more details about matching  in  non-
                 valid UTF-8 strings, see the pcre2unicode(3) documentation.
       -V, --version
-                 Write the version numbers of pcre2grep and the PCRE2  library
+                 Write  the version numbers of pcre2grep and the PCRE2 library
-                 to  the  standard  output and then exit. Anything else on the
+                 to the standard output and then exit. Anything  else  on  the
                 command line is ignored.
       -v, --invert-match
-                 Invert the sense of the match, so that  lines  which  do  not
+                 Invert  the  sense  of  the match, so that lines which do not
-                 match any of the patterns are the ones that are found.
+                 match any of the patterns are the ones that are  found.  When
                 this  option  is  set,  options  such  as --only-matching and
                 --output, which specify parts of a match that are to be  out-
                 put, are ignored.
       -w, --word-regex, --word-regexp
                 Force the patterns only to match "words". That is, there must
@ -878,30 +907,49 @@ USING PCRE2'S CALLOUT FACILITY
       mentation  for  details).  Numbered  callouts are ignored by pcre2grep;
       only callouts with string arguments are useful.
   Echoing a specific string
       Starting the callout string with a pipe character  invokes  an  echoing
       facility that avoids calling an external program or script. This facil-
       ity is always available, provided that  callouts  were  not  completely
       disabled  when  pcre2grep  was built. The rest of the callout string is
       processed as a zero-terminated string, which means it should  not  con-
       tain  any  internal  binary  zeros. It is written to the output, having
       first been passed through the same escape processing as text  from  the
       --output  (-O) option (see above). However, $0 cannot be used to insert
       a matched substring because the match is still  in  progress.  Instead,
       the  single  character '0' is inserted. Any syntax errors in the string
       (for example, a dollar not followed by another  character)  causes  the
       callout  to be ignored. No terminator is added to the output string, so
       if you want a newline, you must include it explicitly using the  escape
       $n. For example:
         pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' <some file>
       Matching  continues normally after the string is output. If you want to
       see only the callout output but not any output from  an  actual  match,
       you should end the pattern with (*FAIL).
   Calling external programs or scripts
       This facility can be independently disabled when pcre2grep is built. It
-       is  supported for Windows, where a call to _spawnvp() is used, for VMS,
+       is supported for Windows, where a call to _spawnvp() is used, for  VMS,
-       where lib$spawn() is used, and  for  any  other  Unix-like  environment
+       where  lib$spawn()  is  used,  and  for any Unix-like environment where
-       where fork() and execv() are available.
+       fork() and execv() are available.
       If the callout string does not start with a pipe (vertical bar) charac-
-       ter, it is parsed into a list of substrings separated by  pipe  charac-
+       ter,  it  is parsed into a list of substrings separated by pipe charac-
-       ters.  The first substring must be an executable name, with the follow-
+       ters. The first substring must be an executable name, with the  follow-
       ing substrings specifying arguments:
         executable_name|arg1|arg2|...
-       Any substring (including the executable name) may  contain  escape  se-
+       Any  substring  (including  the executable name) may contain escape se-
-       quences  started by a dollar character: $<digits> or ${<digits>} is re-
+       quences started by a dollar character. These are the same  as  for  the
-       placed by the captured substring of the  given  decimal  number,  which
+       --output (-O) option documented above, except that $0 cannot insert the
-       must  be greater than zero. If the number is greater than the number of
+       matched string because the match is still  in  progress.  Instead,  the
-       capturing substrings, or if the capture is unset,  the  replacement  is
+       character '0' is inserted. If you need a literal dollar or pipe charac-
-       empty.
+       ter in any substring, use $$ or $| respectively. Here is an example:
       Any  other character is substituted by itself. In particular, $$ is re-
       placed by a single dollar and $| is replaced by a pipe character.  Here
       is an example:
         echo -e "abcde\n12345" | pcre2grep \
           '(?x)(.)(..(.))
@ -914,28 +962,15 @@ USING PCRE2'S CALLOUT FACILITY
           Arg1: [1] [234] [4] Arg2: |1| ()
           12345
-       The  parameters  for the system call that is used to run the program or
+       The parameters for the system call that is used to run the  program  or
       script are zero-terminated strings. This means that binary zero charac-
-       ters  in the callout argument will cause premature termination of their
+       ters in the callout argument will cause premature termination of  their
-       substrings, and therefore should not be present. Any syntax  errors  in
+       substrings,  and  therefore should not be present. Any syntax errors in
-       the  string  (for  example, a dollar not followed by another character)
+       the string (for example, a dollar not followed  by  another  character)
-       cause the callout to be ignored. If running the program fails  for  any
+       causes the callout to be ignored.  If running the program fails for any
-       reason  (including the non-existence of the executable), a local match-
+       reason (including the non-existence of the executable), a local  match-
       ing failure occurs and the matcher backtracks in the normal way.
   Echoing a specific string
       This facility is always available, provided that callouts were not com-
       pletely disabled when pcre2grep was built. If the callout string starts
       with a pipe (vertical bar) character, the rest of the string is written
       to the output, having been passed through the same escape processing as
       text from the --output option. This provides a simple echoing  facility
       that  avoids  calling  an  external program or script. No terminator is
       added to the string, so if you want a newline, you must include it  ex-
       plicitly.  Matching  continues  normally after the string is output. If
       you want to see only the callout output but not any output from an  ac-
       tual match, you should end the relevant pattern with (*FAIL).
 MATCHING ERRORS
@ -969,7 +1004,7 @@ DIAGNOSTICS
 SEE ALSO
-       pcre2pattern(3), pcre2syntax(3), pcre2callout(3).
+       pcre2pattern(3), pcre2syntax(3), pcre2callout(3), pcre2unicode(3).
 AUTHOR
@ -981,5 +1016,5 @@ AUTHOR
 REVISION
-       Last updated: 25 January 2020
+       Last updated: 04 October 2020
       Copyright (c) 1997-2020 University of Cambridge.
--- a/doc/pcre2test.txt
+++ b/doc/pcre2test.txt
--- a/src/pcre2grep.c
+++ b/src/pcre2grep.c
@ -164,6 +164,10 @@ enum { DEE_READ, DEE_SKIP };
 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
 /* Return values from decode_dollar_escape() */
 enum { DDE_ERROR, DDE_CAPTURE, DDE_CHAR };
 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
 environments), a warning is issued if the value of fwrite() is ignored.
 Unfortunately, casting to (void) does not suppress the warning. To get round
@ -179,13 +183,21 @@ handled by using STDOUT_NL as the newline string. We also use a normal double
 quote for the example, as single quotes aren't usually available. */
 #ifdef WIN32
-#define STDOUT_NL  "\r\n"
+#define STDOUT_NL     "\r\n"
-#define QUOT       "\""
+#define STDOUT_NL_LEN  2
 #define QUOT          "\""
 #else
-#define STDOUT_NL  "\n"
+#define STDOUT_NL      "\n"
-#define QUOT       "'"
+#define STDOUT_NL_LEN  1
 #define QUOT           "'"
 #endif
 /* This code is returned from decode_dollar_escape() when $n is encountered,
 and used to mean "output STDOUT_NL". It is, of course, not a valid Unicode code
 point. */
 #define STDOUT_NL_CODE 0x7fffffffu
 /*************************************************
@ -224,8 +236,9 @@ static int max_bufthird = PCRE2GREP_MAX_BUFSIZE;
 static int bufsize = 3*PCRE2GREP_BUFSIZE;
 static int endlinetype;
-static unsigned long int total_count = 0;
+static int count_limit = -1;  /* Not long, so that it works with OP_NUMBER */
 static unsigned long int counts_printed = 0;
 static unsigned long int total_count = 0;
 #ifdef WIN32
 static int dee_action = dee_SKIP;
@ -277,6 +290,9 @@ static BOOL show_total_count = FALSE;
 static BOOL silent = FALSE;
 static BOOL utf = FALSE;
 static uint8_t utf8_buffer[8];
 /* Structure for list of --only-matching capturing numbers. */
 typedef struct omstr {
@ -443,6 +459,7 @@ static option_item optionlist[] = {
  { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
  { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
  { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
  { OP_NUMBER,     'm',      &count_limit,      "max-count=number", "stop after <number> matched lines" },
  { OP_STRING,     'N',      &newline_arg,      "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
  { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
 #ifdef SUPPORT_PCRE2GREP_JIT
@ -482,8 +499,13 @@ of PCRE2_NEWLINE_xx in pcre2.h. */
 static const char *newlines[] = {
  "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
-/* UTF-8 tables - used only when the newline setting is "any". */
+/* UTF-8 tables  */
 const int utf8_table1[] =
  { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
 const int utf8_table1_size = sizeof(utf8_table1) / sizeof(int);
 const int utf8_table2[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
 const char utf8_table4[] = {
@ -531,6 +553,32 @@ else
 #endif   /* not VPCOMPAT && not HAVE_MEMMOVE */
 /*************************************************
 *           Convert code point to UTF-8          *
 *************************************************/
 /* A static buffer is used. Returns the number of bytes. */
 static int
 ord2utf8(uint32_t value)
 {
 int i, j;
 uint8_t *utf8bytes = utf8_buffer;
 for (i = 0; i < utf8_table1_size; i++)
  if (value <= (uint32_t)utf8_table1[i]) break;
 utf8bytes += i;
 for (j = i; j > 0; j--)
  {
  *utf8bytes-- = 0x80 | (value & 0x3f);
  value >>= 6;
  }
 *utf8bytes = utf8_table2[i] | value;
 return i + 1;
 }
 /*************************************************
 *         Case-independent string compare        *
 *************************************************/
@ -1788,6 +1836,7 @@ if (slen > 200)
  slen = 200;
  msg = "text that starts:\n\n";
  }
 for (i = 1; p != NULL; p = p->next, i++)
  {
  *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
@ -1823,107 +1872,245 @@ return FALSE;  /* No match, no errors */
 }
 /*************************************************
 *          Decode dollar escape sequence         *
 *************************************************/
 /* Called from various places to decode $ escapes in output strings. The escape
 sequences are as follows:
 $<digits> or ${<digits>} returns a capture number. However, if callout is TRUE,
 zero is never returned; '0' is substituted.
 $a returns bell.
 $b returns backspace.
 $e returns escape.
 $f returns form feed.
 $n returns newline.
 $r returns carriage return.
 $t returns tab.
 $v returns vertical tab.
 $o<digits> returns the character represented by the given octal
  number; up to three digits are processed.
 $o{<digits>} does the same, up to 7 digits, but gives an error for mode-invalid
  code points.
 $x<digits> returns the character represented by the given hexadecimal
  number; up to two digits are processed.
 $x{<digits} does the same, up to 6 digits, but gives an error for mode-invalid
  code points.
 Any other character is substituted by itself. E.g: $$ is replaced by a single
 dollar.
 Arguments:
  begin      the start of the whole string
  string     points to the $
  callout    TRUE if in a callout (inhibits error messages)
  value      where to return a value
  last       where to return pointer to the last used character
 Returns:     DDE_ERROR    after a syntax error
             DDE_CAPTURE  if *value is a capture number
             DDE_CHAR     if *value is a character code
 */
 static int
 decode_dollar_escape(PCRE2_SPTR begin, PCRE2_SPTR string, BOOL callout,
  uint32_t *value, PCRE2_SPTR *last)
 {
 uint32_t c = 0;
 int base = 10;
 int dcount;
 int rc = DDE_CHAR;
 BOOL brace = FALSE;
 switch (*(++string))
  {
  case 0:   /* Syntax error: a character must be present after $. */
  if (!callout)
    fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
      (int)(string - begin), "no character after $");
  *last = string;
  return DDE_ERROR;
  case '{':
  brace = TRUE;
  string++;
  if (!isdigit(*string))  /* Syntax error: a decimal number required. */
    {
    if (!callout)
      fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
        (int)(string - begin), "decimal number expected");
    rc = DDE_ERROR;
    break;
    }
  /* Fall through */
  /* The maximum capture number is 65535, so any number greater than that will
  always be an unknown capture number. We just stop incrementing, in order to
  avoid overflow. */
  case '0': case '1': case '2': case '3': case '4':
  case '5': case '6': case '7': case '8': case '9':
  do
    {
    if (c <= 65535) c = c * 10 + (*string - '0');
    string++;
    }
  while (*string >= '0' && *string <= '9');
  string--;  /* Point to last digit */
  /* In a callout, capture number 0 is not available. No error can be given,
  so just return the character '0'. */
  if (callout && c == 0)
    {
    *value = '0';
    }
  else
    {
    *value = c;
    rc = DDE_CAPTURE;
    }
  break;
  /* Limit octal numbers to 3 digits without braces, or up to 7 with braces,
  for valid Unicode code points. */
  case 'o':
  base = 8;
  string++;
  if (*string == '{')
    {
    brace = TRUE;
    string++;
    dcount = 7;
    }
  else dcount = 3;
  for (; dcount > 0; dcount--)
    {
    if (*string < '0' || *string > '7') break;
    c = c * 8 + (*string++ - '0');
    }
  *value = c;
  string--;  /* Point to last digit */
  break;
  /* Limit hex numbers to 2 digits without braces, or up to 6 with braces,
  for valid Unicode code points. */
  case 'x':
  base = 16;
  string++;
  if (*string == '{')
    {
    brace = TRUE;
    string++;
    dcount = 6;
    }
  else dcount = 2;
  for (; dcount > 0; dcount--)
    {
    if (!isxdigit(*string)) break;
    if (*string >= '0' && *string <= '9')
      c = c *16 + *string++ - '0';
    else
      c = c * 16 + (*string++ | 0x20) - 'a' + 10;
    }
  *value = c;
  string--;  /* Point to last digit */
  break;
  case 'a': *value = '\a'; break;
  case 'b': *value = '\b'; break;
 #ifndef EBCDIC
  case 'e': *value = '\033'; break;
 #else
  case 'e': *value = '\047'; break;
 #endif
  case 'f': *value = '\f'; break;
  case 'n': *value = STDOUT_NL_CODE; break;
  case 'r': *value = '\r'; break;
  case 't': *value = '\t'; break;
  case 'v': *value = '\v'; break;
  default: *value = *string; break;
  }
 if (brace)
  {
  c = string[1];
  if (c != '}')
    {
    rc = DDE_ERROR;
    if (!callout)
      {
      if ((base == 8 && c >= '0' && c <= '7') ||
          (base == 16 && isxdigit(c)))
        {
        fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
          "too many %s digits\n", (int)(string - begin),
          (base == 8)? "octal" : "hex");
        }
      else
        {
        fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
          (int)(string - begin), "missing closing brace");
        }
      }
    }
  else string++;
  }
 /* Check maximum code point values, but take note of STDOUT_NL_CODE. */
 if (rc == DDE_CHAR && *value != STDOUT_NL_CODE)
  {
  uint32_t max = utf? 0x0010ffffu : 0xffu;
  if (*value > max)
    {
    if (!callout) 
      fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
        "code point greater than 0x%x is invalid\n", (int)(string - begin), max);
    rc = DDE_ERROR;
    }
  }
 *last = string;
 return rc;
 }
 /*************************************************
 *          Check output text for errors          *
 *************************************************/
 /* Called early, to get errors before doing anything for -O text; also called
 from callouts to check before outputting.
 Arguments:
  string    an --output text string
  callout   TRUE if in a callout (stops printing errors)
 Returns:    TRUE if OK, FALSE on error
 */
 static BOOL
 syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
 {
 uint32_t value;
 PCRE2_SPTR begin = string;
 for (; *string != 0; string++)
  {
-  if (*string == '$')
+  if (*string == '$' &&
-    {
+    decode_dollar_escape(begin, string, callout, &value, &string) == DDE_ERROR)
    PCRE2_SIZE capture_id = 0;
    BOOL brace = FALSE;
    string++;
    /* Syntax error: a character must be present after $. */
    if (*string == 0)
      {
      if (!callout)
        fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
          (int)(string - begin), "no character after $");
      return FALSE;
      }
    if (*string == '{')
      {
      /* Must be a decimal number in braces, e.g: {5} or {38} */
      string++;
      brace = TRUE;
      }
    if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
      {
      do
        {
        /* Maximum capture id is 65535. */
        if (capture_id <= 65535)
          capture_id = capture_id * 10 + (*string - '0');
        string++;
        }
      while (*string >= '0' && *string <= '9');
      if (brace)
        {
        /* Syntax error: closing brace is missing. */
        if (*string != '}')
          {
          if (!callout)
            fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
              (int)(string - begin), "missing closing brace");
          return FALSE;
          }
        }
      else
        {
        /* To negate the effect of the for. */
        string--;
        }
      }
    else if (brace)
      {
      /* Syntax error: a decimal number required. */
      if (!callout)
        fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
          (int)(string - begin), "decimal number expected");
      return FALSE;
      }
    else if (*string == 'o')
      {
      string++;
      if (*string < '0' || *string > '7')
        {
        /* Syntax error: an octal number required. */
        if (!callout)
          fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
            (int)(string - begin), "octal number expected");
        return FALSE;
        }
      }
    else if (*string == 'x')
      {
      string++;
      if (!isxdigit((unsigned char)*string))
        {
        /* Syntax error: a hexdecimal number required. */
        if (!callout)
          fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
            (int)(string - begin), "hexadecimal number expected");
        return FALSE;
        }
      }
    }
  }
-  return TRUE;
+return TRUE;
 }
@ -1932,31 +2119,7 @@ for (; *string != 0; string++)
 *************************************************/
 /* Display the output text, which is assumed to have already been syntax
-checked. Output may contain escape sequences started by the dollar sign. The
+checked. Output may contain escape sequences started by the dollar sign.
 escape sequences are substituted as follows:
  $<digits> or ${<digits>} is replaced by the captured substring of the given
  decimal number; zero will substitute the whole match. If the number is
  greater than the number of capturing substrings, or if the capture is unset,
  the replacement is empty.
  $a is replaced by bell.
  $b is replaced by backspace.
  $e is replaced by escape.
  $f is replaced by form feed.
  $n is replaced by newline.
  $r is replaced by carriage return.
  $t is replaced by tab.
  $v is replaced by vertical tab.
  $o<digits> is replaced by the character represented by the given octal
  number; up to three digits are processed.
  $x<digits> is replaced by the character represented by the given hexadecimal
  number; up to two digits are processed.
  Any other character is substituted by itself. E.g: $$ is replaced by a single
  dollar.
 Arguments:
  string:       the output text
@ -1973,121 +2136,54 @@ static BOOL
 display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
  PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
 {
 uint32_t value;
 BOOL printed = FALSE;
 PCRE2_SPTR begin = string;
 for (; *string != 0; string++)
  {
  int ch = EOF;
  if (*string == '$')
    {
-    PCRE2_SIZE capture_id = 0;
+    switch(decode_dollar_escape(begin, string, callout, &value, &string))
    BOOL brace = FALSE;
    string++;
    if (*string == '{')
      {
-      /* Must be a decimal number in braces, e.g: {5} or {38} */
+      case DDE_CHAR:
-      string++;
+      if (value == STDOUT_NL_CODE)
      brace = TRUE;
      }
    if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
      {
      do
        {
-        /* Maximum capture id is 65535. */
+        fprintf(stdout, STDOUT_NL);
-        if (capture_id <= 65535)
+        printed = FALSE;
-          capture_id = capture_id * 10 + (*string - '0');
+        continue;
        string++;
        }
-      while (*string >= '0' && *string <= '9');
+      break;  /* Will print value */
-      if (!brace)
+      case DDE_CAPTURE:
-        {
+      if (value < capture_top)
        /* To negate the effect of the for. */
        string--;
        }
      if (capture_id < capture_top)
        {
        PCRE2_SIZE capturesize;
-        capture_id *= 2;
+        value *= 2;
-
+        capturesize = ovector[value + 1] - ovector[value];
        capturesize = ovector[capture_id + 1] - ovector[capture_id];
        if (capturesize > 0)
          {
-          print_match(subject + ovector[capture_id], capturesize);
+          print_match(subject + ovector[value], capturesize);
          printed = TRUE;
          }
        }
-      }
+      continue;
    else if (*string == 'a') ch = '\a';
    else if (*string == 'b') ch = '\b';
 #ifndef EBCDIC
    else if (*string == 'e') ch = '\033';
 #else
    else if (*string == 'e') ch = '\047';
 #endif
    else if (*string == 'f') ch = '\f';
    else if (*string == 'r') ch = '\r';
    else if (*string == 't') ch = '\t';
    else if (*string == 'v') ch = '\v';
    else if (*string == 'n')
      {
      fprintf(stdout, STDOUT_NL);
      printed = FALSE;
      }
    else if (*string == 'o')
      {
      string++;
-      ch = *string - '0';
+      default:  /* Should not occur */
-      if (string[1] >= '0' && string[1] <= '7')
+      break;
        {
        string++;
        ch = ch * 8 + (*string - '0');
        }
      if (string[1] >= '0' && string[1] <= '7')
        {
        string++;
        ch = ch * 8 + (*string - '0');
        }
      }
-    else if (*string == 'x')
+    }
      {
      string++;
-      if (*string >= '0' && *string <= '9')
+  else value = *string;  /* Not a $ escape */
-        ch = *string - '0';
+
-      else
+  if (utf && value <= 127) fprintf(stdout, "%c", *string); else
        ch = (*string | 0x20) - 'a' + 10;
      if (isxdigit((unsigned char)string[1]))
        {
        string++;
        ch *= 16;
        if (*string >= '0' && *string <= '9')
          ch += *string - '0';
        else
          ch += (*string | 0x20) - 'a' + 10;
        }
      }
    else
      {
      ch = *string;
      }
    }
  else
    {
-    ch = *string;
+    int i;
-    }
+    int n = ord2utf8(value);
-  if (ch != EOF)
+    for (i = 0; i < n; i++) fputc(utf8_buffer[i], stdout);
    {
    fprintf(stdout, "%c", ch);
    printed = TRUE;
    }
  printed = TRUE;
  }
 return printed;
@ -2166,7 +2262,7 @@ int result = 0;
 (void)unused;   /* Avoid compiler warning */
-/* Only callout with strings are supported. */
+/* Only callouts with strings are supported. */
 if (string == NULL || length == 0) return 0;
@ -2185,83 +2281,43 @@ return 0;
 #else
 /* Checking syntax and compute the number of string fragments. Callout strings
-are ignored in case of a syntax error. */
+are silently ignored in the event of a syntax error. */
 while (length > 0)
  {
  if (*string == '|')
    {
    argsvectorlen++;
-
+    if (argsvectorlen > 10000) return 0;  /* Too many args */
    /* Maximum 10000 arguments allowed. */
    if (argsvectorlen > 10000) return 0;
    }
  else if (*string == '$')
    {
-    PCRE2_SIZE capture_id = 0;
+    uint32_t value;
    PCRE2_SPTR begin = string;
-    string++;
+    switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
    length--;
    /* Syntax error: a character must be present after $. */
    if (length == 0) return 0;
    if (*string >= '1' && *string <= '9')
      {
-      do
+      case DDE_CAPTURE:
      if (value < capture_top)
        {
-        /* Maximum capture id is 65535. */
+        value *= 2;
-        if (capture_id <= 65535)
+        argslen += ovector[value + 1] - ovector[value];
          capture_id = capture_id * 10 + (*string - '0');
        string++;
        length--;
        }
-      while (length > 0 && *string >= '0' && *string <= '9');
+      argslen--;   /* Negate the effect of argslen++ below. */
      break;
-      /* To negate the effect of string++ below. */
+      case DDE_CHAR:
-      string--;
+      if (value == STDOUT_NL_CODE) argslen += STDOUT_NL_LEN - 1;
-      length++;
+        else if (utf && value > 127) argslen += ord2utf8(value) - 1;
-      }
+      break;
    else if (*string == '{')
      {
      /* Must be a decimal number in braces, e.g: {5} or {38} */
      string++;
      length--;
-      /* Syntax error: a decimal number required. */
+      default:         /* Should not occur */
-      if (length == 0) return 0;
+      case DDE_ERROR:
-      if (*string < '1' || *string > '9') return 0;
+      return 0;
      do
        {
        /* Maximum capture id is 65535. */
        if (capture_id <= 65535)
          capture_id = capture_id * 10 + (*string - '0');
        string++;
        length--;
        /* Syntax error: no more characters */
        if (length == 0) return 0;
        }
      while (*string >= '0' && *string <= '9');
      /* Syntax error: closing brace is missing. */
      if (*string != '}') return 0;
      }
-    if (capture_id > 0)
+    length -= (string - begin);
      {
      if (capture_id < capture_top)
        {
        capture_id *= 2;
        argslen += ovector[capture_id + 1] - ovector[capture_id];
        }
      /* To negate the effect of argslen++ below. */
      argslen--;
      }
    }
  string++;
@ -2269,6 +2325,8 @@ while (length > 0)
  argslen++;
  }
 /* Get memory for the argument vector and its strings. */
 args = (char*)malloc(argslen);
 if (args == NULL) return 0;
@ -2279,9 +2337,10 @@ if (argsvector == NULL)
  return 0;
  }
 /* Now reprocess the string and set up the arguments. */
 argsptr = args;
 argsvectorptr = argsvector;
 *argsvectorptr++ = argsptr;
 length = calloutptr->callout_string_length;
@ -2294,69 +2353,55 @@ while (length > 0)
    *argsptr++ = '\0';
    *argsvectorptr++ = argsptr;
    }
  else if (*string == '$')
    {
-    string++;
+    uint32_t value;
-    length--;
+    PCRE2_SPTR begin = string;
-    if ((*string >= '1' && *string <= '9') || *string == '{')
+    switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
      {
-      PCRE2_SIZE capture_id = 0;
+      case DDE_CAPTURE:
-
+      if (value < capture_top)
      if (*string != '{')
        {
-        do
+        PCRE2_SIZE capturesize;
-          {
+        value *= 2;
-          /* Maximum capture id is 65535. */
+        capturesize = ovector[value + 1] - ovector[value];
-          if (capture_id <= 65535)
+        memcpy(argsptr, subject + ovector[value], capturesize);
-            capture_id = capture_id * 10 + (*string - '0');
+        argsptr += capturesize;
        }
      break;
-          string++;
+      case DDE_CHAR:
-          length--;
+      if (value == STDOUT_NL_CODE)
-          }
+        {
-        while (length > 0 && *string >= '0' && *string <= '9');
+        memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
-
+        argsptr += STDOUT_NL_LEN;  
-        /* To negate the effect of string++ below. */
+        }   
-        string--;
+      else if (utf && value > 127)
-        length++;
+        {
        int n = ord2utf8(value);
        memcpy(argsptr, utf8_buffer, n);
        argsptr += n;
        }
      else
        {
-        string++;
+        *argsptr++ = value;
        length--;
        do
          {
          /* Maximum capture id is 65535. */
          if (capture_id <= 65535)
            capture_id = capture_id * 10 + (*string - '0');
          string++;
          length--;
          }
        while (*string != '}');
        }
      break;
-        if (capture_id < capture_top)
+      default:         /* Should not occur */
-          {
+      case DDE_ERROR:
-          PCRE2_SIZE capturesize;
+      return 0;
-          capture_id *= 2;
+      }
-          capturesize = ovector[capture_id + 1] - ovector[capture_id];
+    length -= (string - begin);
          memcpy(argsptr, subject + ovector[capture_id], capturesize);
          argsptr += capturesize;
          }
      }
    else
      {
      *argsptr++ = *string;
      }
    }
  else
    {
    *argsptr++ = *string;
    }
  else *argsptr++ = *string;
  /* Advance along the string */
  string++;
  length--;
  }
@ -2479,6 +2524,7 @@ int filepos = 0;
 unsigned long int linenumber = 1;
 unsigned long int lastmatchnumber = 0;
 unsigned long int count = 0;
 long int count_matched_lines = 0;
 char *lastmatchrestart = main_buffer;
 char *ptr = main_buffer;
 char *endptr;
@ -2505,7 +2551,7 @@ bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
  input_line_buffered);
 #ifdef SUPPORT_LIBBZ2
-if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2;   /* Gotcha: bufflength is PCRE2_SIZE; */
+if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2;   /* Gotcha: bufflength is PCRE2_SIZE */
 #endif
 endptr = main_buffer + bufflength;
@ -2533,10 +2579,23 @@ while (ptr < endptr)
  int mrc = 0;
  unsigned int options = 0;
  BOOL match;
  BOOL line_matched = FALSE;
  char *t = ptr;
  PCRE2_SIZE length, linelength;
  PCRE2_SIZE startoffset = 0;
  /* If the -m option set a limit for the number of matched or non-matched
  lines, check it here. A limit of zero means that no matching is ever done.
  For stdin from a file, set the file position. */
  if (count_limit >= 0 && count_matched_lines >= count_limit)
    {
    if (frtype == FR_PLAIN && filename == stdin_name && !is_file_tty(handle))
      (void)fseek(handle, (long int)filepos, SEEK_SET);
    rc = (count_limit == 0)? 1 : 0;
    break;
    }
  /* At this point, ptr is at the start of a line. We need to find the length
  of the subject string to pass to pcre2_match(). In multiline mode, it is the
  length remainder of the data in the buffer. Otherwise, it is the length of
@ -2686,6 +2745,10 @@ while (ptr < endptr)
    if (filenames == FN_NOMATCH_ONLY) return 1;
    /* Remember that this line matched (for counting matched lines) */
    line_matched = TRUE;
    /* If all we want is a yes/no answer, we can return immediately. */
    if (quiet) return 0;
@ -3067,6 +3130,11 @@ while (ptr < endptr)
  filepos += (int)(linelength + endlinelength);
  linenumber++;
  /* If there was at least one match (or a non-match, as required) in the line,
  increment the count for the -m option. */
  if (line_matched) count_matched_lines++;
  /* If input is line buffered, and the buffer is not yet full, read another
  line and add it into the buffer. */
@ -4088,6 +4156,7 @@ if (only_matching_count > 1)
  pcre2grep_exit(usage(2));
  }
 /* Check that there is a big enough ovector for all -o settings. */
 for (om = only_matching; om != NULL; om = om->next)
--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@ -956,3 +956,27 @@ RC=0
 pcre2grep: Requested group 1 cannot be captured.
 pcre2grep: Use --om-capture to increase the size of the capture vector.
 RC=2
 ---------------------------- Test 129 -----------------------------
 The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
 lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
 RC=0
 ---------------------------- Test 130 -----------------------------
 fox
 fox
 fox
 fox
 RC=0
 ---------------------------- Test 131 -----------------------------
 2
 RC=0
 ---------------------------- Test 132 -----------------------------
 match 1:
 a
 match 2:
 b
 ---
 a
 RC=0
 ---------------------------- Test 133 -----------------------------
 =AB3CD5=
 RC=0
--- a/testdata/grepoutput8
+++ b/testdata/grepoutput8
@ -29,3 +29,6 @@ RC=1
 ---------------------------- Test U5 ------------------------------
 CD Z
 RC=0
 ---------------------------- Test U6 -----------------------------
 =ǓǤ=
 RC=0
--- a/testdata/grepoutputC
+++ b/testdata/grepoutputC
@ -40,3 +40,5 @@ T
 T
 T
 T
 0:T:AA
 The quick brown
--- a/testdata/grepoutputCN
+++ b/testdata/grepoutputCN
@ -28,3 +28,5 @@ T
 T
 T
 T
 0:T:AA
 The quick brown