pcre2grep update: -m and $x{..}, $o{..} escapes. Also some doc updates.

2020-10-04 16:34:31 +00:00 · 2020-10-04 16:34:31 +00:00 · 81da2b97e3
parent 3bdc76e4f3
commit 81da2b97e3
15 changed files with 1522 additions and 1270 deletions
--- a/10
+++ b/10
@ -76,6 +76,16 @@ the subject \xe5A. Fixes Bugzilla #2642.
 14. Fixed a bug in character set matching when JIT is enabled and both unicode
 scripts and unicode classes are present at the same time.

+15. Added GNU grep's -m (aka --max-count) option to pcre2grep.
+
+16. Refactored substitution processing in pcre2grep strings, both for the -O 
+option and when dealing with callouts. There is now a single function that 
+handles $ expansion in all cases (instead of multiple copies of almost 
+identical code). This means that the same escape sequences are available 
+everywhere, which was not previously the case. At the same time, the escape 
+sequences $x{...} and $o{...} have been introduced, to allow for characters 
+whose code points are greater than 255 in Unicode mode.
+

 Version 10.35 09-May-2020
 ---------------------------
--- a/6
+++ b/6
@ -892,6 +892,6 @@ The distribution should contain the files listed below.
                          )   environments

 Philip Hazel
-Email local part: ph10
-Email domain: cam.ac.uk
-Last updated: 20 March 2020
+Email local part: Philip.Hazel
+Email domain: gmail.com
+Last updated: 22 September 2020
--- a/25
+++ b/25
@ -661,6 +661,26 @@ echo "---------------------------- Test 128 -----------------------------" >>tes
 (cd $srcdir; $valgrind $vjs $pcre2grep -o1 --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep

+echo "---------------------------- Test 129 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -m 2 'fox' testdata/grepinput) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 130 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -o -m2 'fox' testdata/grepinput) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 131 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -oc -m2 'fox' testdata/grepinput) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 132 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -A3 '^match'; echo '---'; head -1) <testdata/grepinput >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 133 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <testdata/grepinputv >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
 # Now compare the results.

 $cf $srcdir/testdata/grepoutput testtrygrep
@ -694,6 +714,10 @@ if [ $utf8 -ne 0 ] ; then
  (cd $srcdir; $valgrind $vjs $pcre2grep -U -o '....' $builddir/testtemp1grep) >>testtrygrep
  echo "RC=$?" >>testtrygrep

+  echo "---------------------------- Test U6 -----------------------------" >>testtrygrep
+  (cd $srcdir; $valgrind $vjs $pcre2grep -u -m1 -O '=$x{1d3}$o{744}=' 'fox') <testdata/grepinputv >>testtrygrep 2>&1
+  echo "RC=$?" >>testtrygrep
+
  $cf $srcdir/testdata/grepoutput8 testtrygrep
  if [ $? != 0 ] ; then exit 1; fi

@ -764,6 +788,7 @@ if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scri
  $valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep
  $valgrind $vjs $pcre2grep '(T)(?C"|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep
  $valgrind $vjs $pcre2grep '(T)(?C"|$1$n")(*F)' $srcdir/testdata/grepinputv >>testtrygrep
+  $valgrind $vjs $pcre2grep -m1 '(T)(?C"|$0:$1:$x{41}$o{101}$n")' $srcdir/testdata/grepinputv >>testtrygrep

  if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Non-fork callout scripts in patterns are supported'; then
    $cf $srcdir/testdata/grepoutputCN testtrygrep
--- a/doc/html/README.txt
+++ b/doc/html/README.txt
@ -892,6 +892,6 @@ The distribution should contain the files listed below.
                          )   environments

 Philip Hazel
-Email local part: ph10
-Email domain: cam.ac.uk
-Last updated: 20 March 2020
+Email local part: Philip.Hazel
+Email domain: gmail.com
+Last updated: 22 September 2020
--- a/doc/html/pcre2grep.html
+++ b/doc/html/pcre2grep.html
@ -111,8 +111,8 @@ matching substrings, or if <b>--only-matching</b>, <b>--file-offsets</b>, or
 (either shown literally, or as an offset), scanning resumes immediately
 following the match, so that further matches on the same line can be found. If
 there are multiple patterns, they are all tried on the remainder of the line,
-but patterns that follow the one that matched are not tried on the earlier part
-of the line.
+but patterns that follow the one that matched are not tried on the earlier 
+matched part of the line.
 </P>
 <P>
 This behaviour means that the order in which multiple patterns are specified
@ -146,11 +146,10 @@ ignored.
 <br><a name="SEC4" href="#TOC1">BINARY FILES</a><br>
 <P>
 By default, a file that contains a binary zero byte within the first 1024 bytes
-is identified as a binary file, and is processed specially. (GNU grep
-identifies binary files in this manner.) However, if the newline type is
-specified as NUL, that is, the line terminator is a binary zero, the test for
-a binary file is not applied. See the <b>--binary-files</b> option for a means
-of changing the way binary files are handled.
+is identified as a binary file, and is processed specially. However, if the
+newline type is specified as NUL, that is, the line terminator is a binary
+zero, the test for a binary file is not applied. See the <b>--binary-files</b>
+option for a means of changing the way binary files are handled.
 </P>
 <br><a name="SEC5" href="#TOC1">BINARY ZEROS IN PATTERNS</a><br>
 <P>
@ -443,8 +442,8 @@ Ignore upper/lower case distinctions during comparisons.
 <P>
 <b>--include</b>=<i>pattern</i>
 If any <b>--include</b> patterns are specified, the only files that are
-processed are those that match one of the patterns (and do not match an
-<b>--exclude</b> pattern). This option does not affect directories, but it
+processed are those whose names match one of the patterns and do not match an
+<b>--exclude</b> pattern. This option does not affect directories, but it
 applies to all files, whether listed on the command line, obtained from
 <b>--file-list</b>, or by scanning a directory. The pattern is a PCRE2 regular
 expression, and is matched against the final component of the file name, not
@ -463,8 +462,8 @@ may be given any number of times; all the files are read.
 <P>
 <b>--include-dir</b>=<i>pattern</i>
 If any <b>--include-dir</b> patterns are specified, the only directories that
-are processed are those that match one of the patterns (and do not match an
-<b>--exclude-dir</b> pattern). This applies to all directories, whether listed
+are processed are those whose names match one of the patterns and do not match
+an <b>--exclude-dir</b> pattern. This applies to all directories, whether listed
 on the command line, obtained from <b>--file-list</b>, or by scanning a parent
 directory. The pattern is a PCRE2 regular expression, and is matched against
 the final component of the directory name, not the entire path. The <b>-F</b>,
@ -487,8 +486,9 @@ a separate line. Searching normally stops as soon as a matching line is found
 in a file. However, if the <b>-c</b> (count) option is also used, matching
 continues in order to obtain the correct count, and those files that have at
 least one match are listed along with their counts. Using this option with
-<b>-c</b> is a way of suppressing the listing of files with no matches. This
-opeion overrides any previous <b>-H</b>, <b>-h</b>, or <b>-L</b> options.
+<b>-c</b> is a way of suppressing the listing of files with no matches that 
+occurs with <b>-c</b> on its own. This option overrides any previous <b>-H</b>,
+<b>-h</b>, or <b>-L</b> options.
 </P>
 <P>
 <b>--label</b>=<i>name</i>
@ -501,8 +501,8 @@ short form for this option.
 When this option is given, non-compressed input is read and processed line by
 line, and the output is flushed after each write. By default, input is read in
 large chunks, unless <b>pcre2grep</b> can determine that it is reading from a
-terminal (which is currently possible only in Unix-like environments or
-Windows). Output to terminal is normally automatically flushed by the operating
+terminal, which is currently possible only in Unix-like environments or
+Windows. Output to terminal is normally automatically flushed by the operating
 system. This option can be useful when the input or output is attached to a
 pipe and you do not want <b>pcre2grep</b> to buffer up large amounts of data.
 However, its use will affect performance, and the <b>-M</b> (multiline) option
@ -528,6 +528,49 @@ locale is specified, the PCRE2 library's default (usually the "C" locale) is
 used. There is no short form for this option.
 </P>
 <P>
+<b>-M</b>, <b>--multiline</b>
+Allow patterns to match more than one line. When this option is set, the PCRE2
+library is called in "multiline" mode. This allows a matched string to extend
+past the end of a line and continue on one or more subsequent lines. Patterns
+used with <b>-M</b> may usefully contain literal newline characters and internal
+occurrences of ^ and $ characters. The output for a successful match may
+consist of more than one line. The first line is the line in which the match
+started, and the last line is the line in which the match ended. If the matched
+string ends with a newline sequence, the output ends at the end of that line.
+If <b>-v</b> is set, none of the lines in a multi-line match are output. Once a
+match has been handled, scanning restarts at the beginning of the line after
+the one in which the match ended.
+<br>
+<br>
+The newline sequence that separates multiple lines must be matched as part of
+the pattern. For example, to find the phrase "regular expression" in a file
+where "regular" might be at the end of a line and "expression" at the start of
+the next line, you could use this command:
+<pre>
+  pcre2grep -M 'regular\s+expression' &#60;file&#62;
+</pre>
+The \s escape sequence matches any white space character, including newlines,
+and is followed by + so as to match trailing white space on the first line as
+well as possibly handling a two-character newline sequence.
+<br>
+<br>
+There is a limit to the number of lines that can be matched, imposed by the way
+that <b>pcre2grep</b> buffers the input file as it scans it. With a sufficiently
+large processing buffer, this should not be a problem, but the <b>-M</b> option
+does not work when input is read line by line (see <b>--line-buffered</b>.)
+</P>
+<P>
+<b>-m</b> <i>number</i>, <b>--max-count</b>=<i>number</i>
+Stop processing after finding <i>number</i> matching lines, or non-matching 
+lines if <b>-v</b> is also set. Any trailing context lines are output after the
+final match. In multiline mode, each multiline match counts as just one line
+for this purpose. If this limit is reached when reading the standard input from
+a regular file, the file is left positioned just after the last matching line.
+If <b>-c</b> is also set, the count that is output is never greater than 
+<i>number</i>. This option has no effect if used with <b>-L</b>, <b>-l</b>, or
+<b>-q</b>, or when just checking for a match in a binary file.
+</P>
+<P>
 <b>--match-limit</b>=<i>number</i>
 Processing some regular expression patterns may take a very long time to search
 for all possible matching strings. Others may require a very large amount of
@ -568,38 +611,6 @@ set by <b>--buffer-size</b>. The maximum buffer size is silently forced to be no
 smaller than the starting buffer size.
 </P>
 <P>
-<b>-M</b>, <b>--multiline</b>
-Allow patterns to match more than one line. When this option is set, the PCRE2
-library is called in "multiline" mode. This allows a matched string to extend
-past the end of a line and continue on one or more subsequent lines. Patterns
-used with <b>-M</b> may usefully contain literal newline characters and internal
-occurrences of ^ and $ characters. The output for a successful match may
-consist of more than one line. The first line is the line in which the match
-started, and the last line is the line in which the match ended. If the matched
-string ends with a newline sequence, the output ends at the end of that line.
-If <b>-v</b> is set, none of the lines in a multi-line match are output. Once a
-match has been handled, scanning restarts at the beginning of the line after
-the one in which the match ended.
-<br>
-<br>
-The newline sequence that separates multiple lines must be matched as part of
-the pattern. For example, to find the phrase "regular expression" in a file
-where "regular" might be at the end of a line and "expression" at the start of
-the next line, you could use this command:
-<pre>
-  pcre2grep -M 'regular\s+expression' &#60;file&#62;
-</pre>
-The \s escape sequence matches any white space character, including newlines,
-and is followed by + so as to match trailing white space on the first line as
-well as possibly handling a two-character newline sequence.
-<br>
-<br>
-There is a limit to the number of lines that can be matched, imposed by the way
-that <b>pcre2grep</b> buffers the input file as it scans it. With a sufficiently
-large processing buffer, this should not be a problem, but the <b>-M</b> option
-does not work when input is read line by line (see <b>--line-buffered</b>.)
-</P>
-<P>
 <b>-N</b> <i>newline-type</i>, <b>--newline</b>=<i>newline-type</i>
 Six different conventions for indicating the ends of lines in scanned files are
 supported. For example:
@ -648,31 +659,41 @@ It should never be needed in normal use.
 </P>
 <P>
 <b>-O</b> <i>text</i>, <b>--output</b>=<i>text</i>
-When there is a match, instead of outputting the whole line that matched,
-output just the given text, followed by an operating-system standard newline.
-The <b>--newline</b> option has no effect on this option, which is mutually
-exclusive with <b>--only-matching</b>, <b>--file-offsets</b>, and
-<b>--line-offsets</b>. Escape sequences starting with a dollar character may be
-used to insert the contents of the matched part of the line and/or captured
-substrings into the text.
+When there is a match, instead of outputting the line that matched, output just
+the text specified in this option, followed by an operating-system standard
+newline. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>,
+and <b>-C</b> options are ignored. The <b>--newline</b> option has no effect on
+this option, which is mutually exclusive with <b>--only-matching</b>,
+<b>--file-offsets</b>, and <b>--line-offsets</b>. However, like
+<b>--only-matching</b>, if there is more than one match in a line, each of them
+causes a line of output.
 <br>
 <br>
-$&#60;digits&#62; or ${&#60;digits&#62;} is replaced by the captured
-substring of the given decimal number; zero substitutes the whole match. If
-the number is greater than the number of capturing substrings, or if the
-capture is unset, the replacement is empty.
+Escape sequences starting with a dollar character may be used to insert the
+contents of the matched part of the line and/or captured substrings into the
+text.
+<br>
+<br>
+$&#60;digits&#62; or ${&#60;digits&#62;} is replaced by the captured substring of the given
+decimal number; zero substitutes the whole match. If the number is greater than
+the number of capturing substrings, or if the capture is unset, the replacement
+is empty.
 <br>
 <br>
 $a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
 newline; $r by carriage return; $t by tab; $v by vertical tab.
 <br>
 <br>
-$o&#60;digits&#62; is replaced by the character represented by the given octal
-number; up to three digits are processed.
+$o&#60;digits&#62; or $o{&#60;digits&#62;} is replaced by the character whose code point is the
+given octal number. In the first form, up to three octal digits are processed.
+When more digits are needed in Unicode mode to specify a wide character, the 
+second form must be used.
 <br>
 <br>
-$x&#60;digits&#62; is replaced by the character represented by the given hexadecimal
-number; up to two digits are processed.
+$x&#60;digits&#62; or $x{&#60;digits&#62;} is replaced by the character represented by the
+given hexadecimal number. In the first form, up to two hexadecimal digits are
+processed. When more digits are needed in Unicode mode to specify a wide
+character, the second form must be used.
 <br>
 <br>
 Any other character is substituted by itself. In particular, $$ is replaced by
@ -741,7 +762,8 @@ option to "recurse".
 </P>
 <P>
 <b>--recursion-limit</b>=<i>number</i>
-See <b>--match-limit</b> above.
+This is an obsolete synonym for <b>--depth-limit</b>. See <b>--match-limit</b>
+above for details.
 </P>
 <P>
 <b>-s</b>, <b>--no-messages</b>
@ -765,15 +787,18 @@ total would always be zero.
 <b>-u</b>, <b>--utf</b>
 Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
 with UTF-8 support. All patterns (including those for any <b>--exclude</b> and
-<b>--include</b> options) and all subject lines that are scanned must be valid
-strings of UTF-8 characters.
+<b>--include</b> options) and all lines that are scanned must be valid strings
+of UTF-8 characters. If an invalid UTF-8 string is encountered, an error 
+occurs.
 </P>
 <P>
 <b>-U</b>, <b>--utf-allow-invalid</b>
 As <b>--utf</b>, but in addition subject lines may contain invalid UTF-8 code
-unit sequences. These can never form part of any pattern match. This facility
-allows valid UTF-8 strings to be sought in executable or other binary files.
-For more details about matching in non-valid UTF-8 strings, see the
+unit sequences. These can never form part of any pattern match. Patterns 
+themselves, however, must still be valid UTF-8 strings. This facility allows
+valid UTF-8 strings to be sought within arbitrary byte sequences in executable
+or other binary files. For more details about matching in non-valid UTF-8
+strings, see the
 <a href="pcre2unicode.html"><b>pcre2unicode</b>(3)</a>
 documentation.
 </P>
@ -786,7 +811,9 @@ ignored.
 <P>
 <b>-v</b>, <b>--invert-match</b>
 Invert the sense of the match, so that lines which do <i>not</i> match any of
-the patterns are the ones that are found.
+the patterns are the ones that are found. When this option is set, options such 
+as <b>--only-matching</b> and <b>--output</b>, which specify parts of a match
+that are to be output, are ignored.
 </P>
 <P>
 <b>-w</b>, <b>--word-regex</b>, <b>--word-regexp</b>
@ -909,12 +936,36 @@ documentation for details). Numbered callouts are ignored by <b>pcre2grep</b>;
 only callouts with string arguments are useful.
 </P>
 <br><b>
+Echoing a specific string
+</b><br>
+<P>
+Starting the callout string with a pipe character invokes an echoing facility
+that avoids calling an external program or script. This facility is always
+available, provided that callouts were not completely disabled when
+<b>pcre2grep</b> was built. The rest of the callout string is processed as a
+zero-terminated string, which means it should not contain any internal binary
+zeros. It is written to the output, having first been passed through the same
+escape processing as text from the <b>--output</b> (<b>-O</b>) option (see
+above). However, $0 cannot be used to insert a matched substring because the
+match is still in progress. Instead, the single character '0' is inserted. Any
+syntax errors in the string (for example, a dollar not followed by another
+character) causes the callout to be ignored. No terminator is added to the
+output string, so if you want a newline, you must include it explicitly using
+the escape $n. For example:
+<pre>
+  pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' &#60;some file&#62;
+</pre>
+Matching continues normally after the string is output. If you want to see only
+the callout output but not any output from an actual match, you should end the
+pattern with (*FAIL).
+</P>
+<br><b>
 Calling external programs or scripts
 </b><br>
 <P>
 This facility can be independently disabled when <b>pcre2grep</b> is built. It
 is supported for Windows, where a call to <b>_spawnvp()</b> is used, for VMS,
-where <b>lib$spawn()</b> is used, and for any other Unix-like environment where
+where <b>lib$spawn()</b> is used, and for any Unix-like environment where
 <b>fork()</b> and <b>execv()</b> are available.
 </P>
 <P>
@ -926,14 +977,11 @@ arguments:
  executable_name|arg1|arg2|...
 </pre>
 Any substring (including the executable name) may contain escape sequences
-started by a dollar character: $&#60;digits&#62; or ${&#60;digits&#62;} is replaced by the
-captured substring of the given decimal number, which must be greater than
-zero. If the number is greater than the number of capturing substrings, or if
-the capture is unset, the replacement is empty.
-</P>
-<P>
-Any other character is substituted by itself. In particular, $$ is replaced by
-a single dollar and $| is replaced by a pipe character. Here is an example:
+started by a dollar character. These are the same as for the <b>--output</b>
+(<b>-O</b>) option documented above, except that $0 cannot insert the matched
+string because the match is still in progress. Instead, the character '0'
+is inserted. If you need a literal dollar or pipe character in any
+substring, use $$ or $| respectively. Here is an example:
 <pre>
  echo -e "abcde\n12345" | pcre2grep \
    '(?x)(.)(..(.))
@ -946,28 +994,14 @@ a single dollar and $| is replaced by a pipe character. Here is an example:
    Arg1: [1] [234] [4] Arg2: |1| ()
    12345
 </pre>
-The parameters for the system call that is used to run the
-program or script are zero-terminated strings. This means that binary zero
-characters in the callout argument will cause premature termination of their
-substrings, and therefore should not be present. Any syntax errors in the
-string (for example, a dollar not followed by another character) cause the
-callout to be ignored. If running the program fails for any reason (including
-the non-existence of the executable), a local matching failure occurs and the
-matcher backtracks in the normal way.
-</P>
-<br><b>
-Echoing a specific string
-</b><br>
-<P>
-This facility is always available, provided that callouts were not completely
-disabled when <b>pcre2grep</b> was built. If the callout string starts with a
-pipe (vertical bar) character, the rest of the string is written to the output,
-having been passed through the same escape processing as text from the --output
-option. This provides a simple echoing facility that avoids calling an external
-program or script. No terminator is added to the string, so if you want a
-newline, you must include it explicitly. Matching continues normally after the
-string is output. If you want to see only the callout output but not any output
-from an actual match, you should end the relevant pattern with (*FAIL).
+The parameters for the system call that is used to run the program or script
+are zero-terminated strings. This means that binary zero characters in the
+callout argument will cause premature termination of their substrings, and
+therefore should not be present. Any syntax errors in the string (for example,
+a dollar not followed by another character) causes the callout to be ignored.
+If running the program fails for any reason (including the non-existence of the
+executable), a local matching failure occurs and the matcher backtracks in the
+normal way.
 </P>
 <br><a name="SEC12" href="#TOC1">MATCHING ERRORS</a><br>
 <P>
@ -999,7 +1033,8 @@ because VMS does not distinguish between exit(0) and exit(1).
 </P>
 <br><a name="SEC14" href="#TOC1">SEE ALSO</a><br>
 <P>
-<b>pcre2pattern</b>(3), <b>pcre2syntax</b>(3), <b>pcre2callout</b>(3).
+<b>pcre2pattern</b>(3), <b>pcre2syntax</b>(3), <b>pcre2callout</b>(3),
+<b>pcre2unicode</b>(3).
 </P>
 <br><a name="SEC15" href="#TOC1">AUTHOR</a><br>
 <P>
@ -1012,7 +1047,7 @@ Cambridge, England.
 </P>
 <br><a name="SEC16" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 25 January 2020
+Last updated: 04 October 2020
 <br>
 Copyright &copy; 1997-2020 University of Cambridge.
 <br>
--- a/doc/html/pcre2test.html
+++ b/doc/html/pcre2test.html
@ -323,7 +323,7 @@ test data, command lines that begin with # may appear. This file format, with
 some restrictions, can also be processed by the <b>perltest.sh</b> script that
 is distributed with PCRE2 as a means of checking that the behaviour of PCRE2
 and Perl is the same. For a specification of <b>perltest.sh</b>, see the
-comments near its beginning.
+comments near its beginning. See also the #perltest command below.
 </P>
 <P>
 When the input is a terminal, <b>pcre2test</b> prompts for each line of input,
@ -420,14 +420,20 @@ patterns. Modifiers on a pattern can change these settings.
 <pre>
  #perltest
 </pre>
-The appearance of this line causes all subsequent modifier settings to be
-checked for compatibility with the <b>perltest.sh</b> script, which is used to
-confirm that Perl gives the same results as PCRE2. Also, apart from comment
-lines, #pattern commands, and #subject commands that set or unset "mark", no
-command lines are permitted, because they and many of the modifiers are
-specific to <b>pcre2test</b>, and should not be used in test files that are also
-processed by <b>perltest.sh</b>. The <b>#perltest</b> command helps detect tests
-that are accidentally put in the wrong file.
+This line is used in test files that can also be processed by <b>perltest.sh</b>
+to confirm that Perl gives the same results as PCRE2. Subsequent tests are
+checked for the use of <b>pcre2test</b> features that are incompatible with the
+<b>perltest.sh</b> script. 
+</P>
+<P>
+Patterns must use '/' as their delimiter, and only certain modifiers are
+supported. Comment lines, #pattern commands, and #subject commands that set or
+unset "mark" are recognized and acted on. The #perltest, #forbid_utf, and
+#newline_default commands, which are needed in the relevant pcre2test files,
+are silently ignored. All other command lines are ignored, but give a warning
+message. The <b>#perltest</b> command helps detect tests that are accidentally
+put in the wrong file or use the wrong delimiter. For more details of the
+<b>perltest.sh</b> script see the comments it contains.
 <pre>
  #pop [&#60;modifiers&#62;]
  #popcopy [&#60;modifiers&#62;]
@ -2113,7 +2119,7 @@ Cambridge, England.
 </P>
 <br><a name="SEC21" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 20 March 2020
+Last updated: 14 September 2020
 <br>
 Copyright &copy; 1997-2020 University of Cambridge.
 <br>
--- a/doc/pcre2grep.1
+++ b/doc/pcre2grep.1
@ -1,4 +1,4 @@
-.TH PCRE2GREP 1 "25 January 2020" "PCRE2 10.35"
+.TH PCRE2GREP 1 "04 October 2020" "PCRE2 10.36"
 .SH NAME
 pcre2grep - a grep with Perl-compatible regular expressions.
 .SH SYNOPSIS
@ -79,8 +79,8 @@ matching substrings, or if \fB--only-matching\fP, \fB--file-offsets\fP, or
 (either shown literally, or as an offset), scanning resumes immediately
 following the match, so that further matches on the same line can be found. If
 there are multiple patterns, they are all tried on the remainder of the line,
-but patterns that follow the one that matched are not tried on the earlier part
-of the line.
+but patterns that follow the one that matched are not tried on the earlier 
+matched part of the line.
 .P
 This behaviour means that the order in which multiple patterns are specified
 can affect the output when one of the above options is used. This is no longer
@ -115,11 +115,10 @@ ignored.
 .rs
 .sp
 By default, a file that contains a binary zero byte within the first 1024 bytes
-is identified as a binary file, and is processed specially. (GNU grep
-identifies binary files in this manner.) However, if the newline type is
-specified as NUL, that is, the line terminator is a binary zero, the test for
-a binary file is not applied. See the \fB--binary-files\fP option for a means
-of changing the way binary files are handled.
+is identified as a binary file, and is processed specially. However, if the
+newline type is specified as NUL, that is, the line terminator is a binary
+zero, the test for a binary file is not applied. See the \fB--binary-files\fP
+option for a means of changing the way binary files are handled.
 .
 .
 .SH "BINARY ZEROS IN PATTERNS"
@ -383,8 +382,8 @@ Ignore upper/lower case distinctions during comparisons.
 .TP
 \fB--include\fP=\fIpattern\fP
 If any \fB--include\fP patterns are specified, the only files that are
-processed are those that match one of the patterns (and do not match an
-\fB--exclude\fP pattern). This option does not affect directories, but it
+processed are those whose names match one of the patterns and do not match an
+\fB--exclude\fP pattern. This option does not affect directories, but it
 applies to all files, whether listed on the command line, obtained from
 \fB--file-list\fP, or by scanning a directory. The pattern is a PCRE2 regular
 expression, and is matched against the final component of the file name, not
@ -401,8 +400,8 @@ may be given any number of times; all the files are read.
 .TP
 \fB--include-dir\fP=\fIpattern\fP
 If any \fB--include-dir\fP patterns are specified, the only directories that
-are processed are those that match one of the patterns (and do not match an
-\fB--exclude-dir\fP pattern). This applies to all directories, whether listed
+are processed are those whose names match one of the patterns and do not match
+an \fB--exclude-dir\fP pattern. This applies to all directories, whether listed
 on the command line, obtained from \fB--file-list\fP, or by scanning a parent
 directory. The pattern is a PCRE2 regular expression, and is matched against
 the final component of the directory name, not the entire path. The \fB-F\fP,
@ -423,8 +422,9 @@ a separate line. Searching normally stops as soon as a matching line is found
 in a file. However, if the \fB-c\fP (count) option is also used, matching
 continues in order to obtain the correct count, and those files that have at
 least one match are listed along with their counts. Using this option with
-\fB-c\fP is a way of suppressing the listing of files with no matches. This
-opeion overrides any previous \fB-H\fP, \fB-h\fP, or \fB-L\fP options.
+\fB-c\fP is a way of suppressing the listing of files with no matches that 
+occurs with \fB-c\fP on its own. This option overrides any previous \fB-H\fP,
+\fB-h\fP, or \fB-L\fP options.
 .TP
 \fB--label\fP=\fIname\fP
 This option supplies a name to be used for the standard input when file names
@ -435,8 +435,8 @@ short form for this option.
 When this option is given, non-compressed input is read and processed line by
 line, and the output is flushed after each write. By default, input is read in
 large chunks, unless \fBpcre2grep\fP can determine that it is reading from a
-terminal (which is currently possible only in Unix-like environments or
-Windows). Output to terminal is normally automatically flushed by the operating
+terminal, which is currently possible only in Unix-like environments or
+Windows. Output to terminal is normally automatically flushed by the operating
 system. This option can be useful when the input or output is attached to a
 pipe and you do not want \fBpcre2grep\fP to buffer up large amounts of data.
 However, its use will affect performance, and the \fB-M\fP (multiline) option
@ -459,6 +459,45 @@ the value in the \fBLC_ALL\fP or \fBLC_CTYPE\fP environment variables. If no
 locale is specified, the PCRE2 library's default (usually the "C" locale) is
 used. There is no short form for this option.
 .TP
+\fB-M\fP, \fB--multiline\fP
+Allow patterns to match more than one line. When this option is set, the PCRE2
+library is called in "multiline" mode. This allows a matched string to extend
+past the end of a line and continue on one or more subsequent lines. Patterns
+used with \fB-M\fP may usefully contain literal newline characters and internal
+occurrences of ^ and $ characters. The output for a successful match may
+consist of more than one line. The first line is the line in which the match
+started, and the last line is the line in which the match ended. If the matched
+string ends with a newline sequence, the output ends at the end of that line.
+If \fB-v\fP is set, none of the lines in a multi-line match are output. Once a
+match has been handled, scanning restarts at the beginning of the line after
+the one in which the match ended.
+.sp
+The newline sequence that separates multiple lines must be matched as part of
+the pattern. For example, to find the phrase "regular expression" in a file
+where "regular" might be at the end of a line and "expression" at the start of
+the next line, you could use this command:
+.sp
+  pcre2grep -M 'regular\es+expression' <file>
+.sp
+The \es escape sequence matches any white space character, including newlines,
+and is followed by + so as to match trailing white space on the first line as
+well as possibly handling a two-character newline sequence.
+.sp
+There is a limit to the number of lines that can be matched, imposed by the way
+that \fBpcre2grep\fP buffers the input file as it scans it. With a sufficiently
+large processing buffer, this should not be a problem, but the \fB-M\fP option
+does not work when input is read line by line (see \fB--line-buffered\fP.)
+.TP
+\fB-m\fP \fInumber\fP, \fB--max-count\fP=\fInumber\fP
+Stop processing after finding \fInumber\fP matching lines, or non-matching 
+lines if \fB-v\fP is also set. Any trailing context lines are output after the
+final match. In multiline mode, each multiline match counts as just one line
+for this purpose. If this limit is reached when reading the standard input from
+a regular file, the file is left positioned just after the last matching line.
+If \fB-c\fP is also set, the count that is output is never greater than 
+\fInumber\fP. This option has no effect if used with \fB-L\fP, \fB-l\fP, or
+\fB-q\fP, or when just checking for a match in a binary file.
+.TP
 \fB--match-limit\fP=\fInumber\fP
 Processing some regular expression patterns may take a very long time to search
 for all possible matching strings. Others may require a very large amount of
@ -493,35 +532,6 @@ This limits the expansion of the processing buffer, whose initial size can be
 set by \fB--buffer-size\fP. The maximum buffer size is silently forced to be no
 smaller than the starting buffer size.
 .TP
-\fB-M\fP, \fB--multiline\fP
-Allow patterns to match more than one line. When this option is set, the PCRE2
-library is called in "multiline" mode. This allows a matched string to extend
-past the end of a line and continue on one or more subsequent lines. Patterns
-used with \fB-M\fP may usefully contain literal newline characters and internal
-occurrences of ^ and $ characters. The output for a successful match may
-consist of more than one line. The first line is the line in which the match
-started, and the last line is the line in which the match ended. If the matched
-string ends with a newline sequence, the output ends at the end of that line.
-If \fB-v\fP is set, none of the lines in a multi-line match are output. Once a
-match has been handled, scanning restarts at the beginning of the line after
-the one in which the match ended.
-.sp
-The newline sequence that separates multiple lines must be matched as part of
-the pattern. For example, to find the phrase "regular expression" in a file
-where "regular" might be at the end of a line and "expression" at the start of
-the next line, you could use this command:
-.sp
-  pcre2grep -M 'regular\es+expression' <file>
-.sp
-The \es escape sequence matches any white space character, including newlines,
-and is followed by + so as to match trailing white space on the first line as
-well as possibly handling a two-character newline sequence.
-.sp
-There is a limit to the number of lines that can be matched, imposed by the way
-that \fBpcre2grep\fP buffers the input file as it scans it. With a sufficiently
-large processing buffer, this should not be a problem, but the \fB-M\fP option
-does not work when input is read line by line (see \fB--line-buffered\fP.)
-.TP
 \fB-N\fP \fInewline-type\fP, \fB--newline\fP=\fInewline-type\fP
 Six different conventions for indicating the ends of lines in scanned files are
 supported. For example:
@ -565,27 +575,36 @@ use of JIT at run time. It is provided for testing and working round problems.
 It should never be needed in normal use.
 .TP
 \fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP
-When there is a match, instead of outputting the whole line that matched,
-output just the given text, followed by an operating-system standard newline.
-The \fB--newline\fP option has no effect on this option, which is mutually
-exclusive with \fB--only-matching\fP, \fB--file-offsets\fP, and
-\fB--line-offsets\fP. Escape sequences starting with a dollar character may be
-used to insert the contents of the matched part of the line and/or captured
-substrings into the text.
+When there is a match, instead of outputting the line that matched, output just
+the text specified in this option, followed by an operating-system standard
+newline. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP,
+and \fB-C\fP options are ignored. The \fB--newline\fP option has no effect on
+this option, which is mutually exclusive with \fB--only-matching\fP,
+\fB--file-offsets\fP, and \fB--line-offsets\fP. However, like
+\fB--only-matching\fP, if there is more than one match in a line, each of them
+causes a line of output.
 .sp
-$<digits> or ${<digits>} is replaced by the captured
-substring of the given decimal number; zero substitutes the whole match. If
-the number is greater than the number of capturing substrings, or if the
-capture is unset, the replacement is empty.
+Escape sequences starting with a dollar character may be used to insert the
+contents of the matched part of the line and/or captured substrings into the
+text.
+.sp
+$<digits> or ${<digits>} is replaced by the captured substring of the given
+decimal number; zero substitutes the whole match. If the number is greater than
+the number of capturing substrings, or if the capture is unset, the replacement
+is empty.
 .sp
 $a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
 newline; $r by carriage return; $t by tab; $v by vertical tab.
 .sp
-$o<digits> is replaced by the character represented by the given octal
-number; up to three digits are processed.
+$o<digits> or $o{<digits>} is replaced by the character whose code point is the
+given octal number. In the first form, up to three octal digits are processed.
+When more digits are needed in Unicode mode to specify a wide character, the 
+second form must be used.
 .sp
-$x<digits> is replaced by the character represented by the given hexadecimal
-number; up to two digits are processed.
+$x<digits> or $x{<digits>} is replaced by the character represented by the
+given hexadecimal number. In the first form, up to two hexadecimal digits are
+processed. When more digits are needed in Unicode mode to specify a wide
+character, the second form must be used.
 .sp
 Any other character is substituted by itself. In particular, $$ is replaced by
 a single dollar.
@ -644,7 +663,8 @@ immediate end-of-file. This option is a shorthand for setting the \fB-d\fP
 option to "recurse".
 .TP
 \fB--recursion-limit\fP=\fInumber\fP
-See \fB--match-limit\fP above.
+This is an obsolete synonym for \fB--depth-limit\fP. See \fB--match-limit\fP
+above for details.
 .TP
 \fB-s\fP, \fB--no-messages\fP
 Suppress error messages about non-existent or unreadable files. Such files are
@ -665,14 +685,17 @@ total would always be zero.
 \fB-u\fP, \fB--utf\fP
 Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
 with UTF-8 support. All patterns (including those for any \fB--exclude\fP and
-\fB--include\fP options) and all subject lines that are scanned must be valid
-strings of UTF-8 characters.
+\fB--include\fP options) and all lines that are scanned must be valid strings
+of UTF-8 characters. If an invalid UTF-8 string is encountered, an error 
+occurs.
 .TP
 \fB-U\fP, \fB--utf-allow-invalid\fP
 As \fB--utf\fP, but in addition subject lines may contain invalid UTF-8 code
-unit sequences. These can never form part of any pattern match. This facility
-allows valid UTF-8 strings to be sought in executable or other binary files.
-For more details about matching in non-valid UTF-8 strings, see the
+unit sequences. These can never form part of any pattern match. Patterns 
+themselves, however, must still be valid UTF-8 strings. This facility allows
+valid UTF-8 strings to be sought within arbitrary byte sequences in executable
+or other binary files. For more details about matching in non-valid UTF-8
+strings, see the
 .\" HREF
 \fBpcre2unicode\fP(3)
 .\"
@ -685,7 +708,9 @@ ignored.
 .TP
 \fB-v\fP, \fB--invert-match\fP
 Invert the sense of the match, so that lines which do \fInot\fP match any of
-the patterns are the ones that are found.
+the patterns are the ones that are found. When this option is set, options such 
+as \fB--only-matching\fP and \fB--output\fP, which specify parts of a match
+that are to be output, are ignored.
 .TP
 \fB-w\fP, \fB--word-regex\fP, \fB--word-regexp\fP
 Force the patterns only to match "words". That is, there must be a word
@ -812,12 +837,36 @@ documentation for details). Numbered callouts are ignored by \fBpcre2grep\fP;
 only callouts with string arguments are useful.
 .
 .
+.SS "Echoing a specific string"
+.rs
+.sp
+Starting the callout string with a pipe character invokes an echoing facility
+that avoids calling an external program or script. This facility is always
+available, provided that callouts were not completely disabled when
+\fBpcre2grep\fP was built. The rest of the callout string is processed as a
+zero-terminated string, which means it should not contain any internal binary
+zeros. It is written to the output, having first been passed through the same
+escape processing as text from the \fB--output\fP (\fB-O\fP) option (see
+above). However, $0 cannot be used to insert a matched substring because the
+match is still in progress. Instead, the single character '0' is inserted. Any
+syntax errors in the string (for example, a dollar not followed by another
+character) causes the callout to be ignored. No terminator is added to the
+output string, so if you want a newline, you must include it explicitly using
+the escape $n. For example:
+.sp
+  pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' <some file>
+.sp   
+Matching continues normally after the string is output. If you want to see only
+the callout output but not any output from an actual match, you should end the
+pattern with (*FAIL).
+.
+.
 .SS "Calling external programs or scripts"
 .rs
 .sp
 This facility can be independently disabled when \fBpcre2grep\fP is built. It
 is supported for Windows, where a call to \fB_spawnvp()\fP is used, for VMS,
-where \fBlib$spawn()\fP is used, and for any other Unix-like environment where
+where \fBlib$spawn()\fP is used, and for any Unix-like environment where
 \fBfork()\fP and \fBexecv()\fP are available.
 .P
 If the callout string does not start with a pipe (vertical bar) character, it
@ -828,13 +877,11 @@ arguments:
  executable_name|arg1|arg2|...
 .sp
 Any substring (including the executable name) may contain escape sequences
-started by a dollar character: $<digits> or ${<digits>} is replaced by the
-captured substring of the given decimal number, which must be greater than
-zero. If the number is greater than the number of capturing substrings, or if
-the capture is unset, the replacement is empty.
-.P
-Any other character is substituted by itself. In particular, $$ is replaced by
-a single dollar and $| is replaced by a pipe character. Here is an example:
+started by a dollar character. These are the same as for the \fB--output\fP
+(\fB-O\fP) option documented above, except that $0 cannot insert the matched
+string because the match is still in progress. Instead, the character '0'
+is inserted. If you need a literal dollar or pipe character in any
+substring, use $$ or $| respectively. Here is an example:
 .sp
  echo -e "abcde\en12345" | pcre2grep \e
    '(?x)(.)(..(.))
@ -847,28 +894,14 @@ a single dollar and $| is replaced by a pipe character. Here is an example:
    Arg1: [1] [234] [4] Arg2: |1| ()
    12345
 .sp
-The parameters for the system call that is used to run the
-program or script are zero-terminated strings. This means that binary zero
-characters in the callout argument will cause premature termination of their
-substrings, and therefore should not be present. Any syntax errors in the
-string (for example, a dollar not followed by another character) cause the
-callout to be ignored. If running the program fails for any reason (including
-the non-existence of the executable), a local matching failure occurs and the
-matcher backtracks in the normal way.
-.
-.
-.SS "Echoing a specific string"
-.rs
-.sp
-This facility is always available, provided that callouts were not completely
-disabled when \fBpcre2grep\fP was built. If the callout string starts with a
-pipe (vertical bar) character, the rest of the string is written to the output,
-having been passed through the same escape processing as text from the --output
-option. This provides a simple echoing facility that avoids calling an external
-program or script. No terminator is added to the string, so if you want a
-newline, you must include it explicitly. Matching continues normally after the
-string is output. If you want to see only the callout output but not any output
-from an actual match, you should end the relevant pattern with (*FAIL).
+The parameters for the system call that is used to run the program or script
+are zero-terminated strings. This means that binary zero characters in the
+callout argument will cause premature termination of their substrings, and
+therefore should not be present. Any syntax errors in the string (for example,
+a dollar not followed by another character) causes the callout to be ignored.
+If running the program fails for any reason (including the non-existence of the
+executable), a local matching failure occurs and the matcher backtracks in the
+normal way.
 .
 .
 .SH "MATCHING ERRORS"
@ -904,7 +937,8 @@ because VMS does not distinguish between exit(0) and exit(1).
 .SH "SEE ALSO"
 .rs
 .sp
-\fBpcre2pattern\fP(3), \fBpcre2syntax\fP(3), \fBpcre2callout\fP(3).
+\fBpcre2pattern\fP(3), \fBpcre2syntax\fP(3), \fBpcre2callout\fP(3),
+\fBpcre2unicode\fP(3).
 .
 .
 .SH AUTHOR
@ -921,6 +955,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 25 January 2020
+Last updated: 04 October 2020
 Copyright (c) 1997-2020 University of Cambridge.
 .fi
--- a/doc/pcre2grep.txt
+++ b/doc/pcre2grep.txt
@ -80,7 +80,7 @@ DESCRIPTION
       following  the  match,  so that further matches on the same line can be
       found. If there are multiple patterns, they are all tried  on  the  re-
       mainder  of the line, but patterns that follow the one that matched are
-       not tried on the earlier part of the line.
+       not tried on the earlier matched part of the line.

       This behaviour means that the order  in  which  multiple  patterns  are
       specified  can affect the output when one of the above options is used.
@ -115,10 +115,10 @@ BINARY FILES

       By  default,  a  file that contains a binary zero byte within the first
       1024 bytes is identified as a binary file, and is processed  specially.
-       (GNU grep identifies binary files in this manner.) However, if the new-
-       line type is specified as NUL, that is, the line terminator is a binary
-       zero, the test for a binary file is not applied. See the --binary-files
-       option for a means of changing the way binary files are handled.
+       However,  if  the  newline  type is specified as NUL, that is, the line
+       terminator is a binary zero, the test for a binary file is not applied.
+       See  the  --binary-files  option for a means of changing the way binary
+       files are handled.


 BINARY ZEROS IN PATTERNS
@ -413,17 +413,17 @@ OPTIONS

       --include=pattern
                 If  any --include patterns are specified, the only files that
-                 are processed are those that match one of the  patterns  (and
-                 do  not match an --exclude pattern). This option does not af-
-                 fect directories, but it applies to all files, whether listed
-                 on  the  command line, obtained from --file-list, or by scan-
-                 ning a directory. The pattern is a PCRE2 regular  expression,
-                 and  is matched against the final component of the file name,
-                 not the entire path. The -F, -w, and -x options do not  apply
-                 to this pattern. The option may be given any number of times.
-                 If a file name matches both an  --include  and  an  --exclude
-                 pattern, it is excluded.  There is no short form for this op-
-                 tion.
+                 are processed are those whose names match one of the patterns
+                 and  do  not match an --exclude pattern. This option does not
+                 affect directories, but it  applies  to  all  files,  whether
+                 listed  on the command line, obtained from --file-list, or by
+                 scanning a directory. The pattern is a PCRE2 regular  expres-
+                 sion,  and is matched against the final component of the file
+                 name, not the entire path. The -F, -w, and -x options do  not
+                 apply  to this pattern. The option may be given any number of
+                 times. If a file name matches both an --include and an  --ex-
+                 clude  pattern,  it  is excluded.  There is no short form for
+                 this option.

       --include-from=filename
                 Treat each non-empty line of the file  as  the  data  for  an
@ -434,8 +434,8 @@ OPTIONS

       --include-dir=pattern
                 If any --include-dir patterns are specified, the only  direc-
-                 tories  that  are  processed  are those that match one of the
-                 patterns (and do not match an  --exclude-dir  pattern).  This
+                 tories  that are processed are those whose names match one of
+                 the patterns and do not match an --exclude-dir pattern.  This
                 applies  to  all  directories,  whether listed on the command
                 line, obtained from --file-list, or by scanning a parent  di-
                 rectory.  The  pattern  is a PCRE2 regular expression, and is
@ -461,8 +461,9 @@ OPTIONS
                 matching  continues in order to obtain the correct count, and
                 those files that have at least one  match  are  listed  along
                 with their counts. Using this option with -c is a way of sup-
-                 pressing the listing of files with no  matches.  This  opeion
-                 overrides any previous -H, -h, or -L options.
+                 pressing the listing of files with  no  matches  that  occurs
+                 with  -c  on  its own. This option overrides any previous -H,
+                 -h, or -L options.

       --label=name
                 This option supplies a name to be used for the standard input
@ -470,37 +471,84 @@ OPTIONS
                 input)" is used. There is no short form for this option.

       --line-buffered
-                 When  this  option is given, non-compressed input is read and
-                 processed line by line, and the output is flushed after  each
-                 write.  By  default,  input  is  read in large chunks, unless
-                 pcre2grep can determine that it is reading  from  a  terminal
-                 (which  is  currently possible only in Unix-like environments
-                 or Windows). Output to  terminal  is  normally  automatically
-                 flushed  by  the  operating system. This option can be useful
-                 when the input or output is attached to a pipe and you do not
-                 want  pcre2grep to buffer up large amounts of data.  However,
-                 its use will affect performance, and the -M  (multiline)  op-
-                 tion  ceases  to work. When input is from a compressed .gz or
-                 .bz2 file, --line-buffered is ignored.
+                 When this option is given, non-compressed input is  read  and
+                 processed  line by line, and the output is flushed after each
+                 write. By default, input is  read  in  large  chunks,  unless
+                 pcre2grep  can  determine that it is reading from a terminal,
+                 which is currently possible only in Unix-like environments or
+                 Windows. Output to terminal is normally automatically flushed
+                 by the operating system. This option can be useful  when  the
+                 input  or  output  is  attached to a pipe and you do not want
+                 pcre2grep to buffer up large amounts of data.   However,  its
+                 use  will  affect  performance, and the -M (multiline) option
+                 ceases to work. When input is from a compressed .gz  or  .bz2
+                 file, --line-buffered is ignored.

       --line-offsets
-                 Instead of showing lines or parts of lines that  match,  show
+                 Instead  of  showing lines or parts of lines that match, show
                 each match as a line number, the offset from the start of the
-                 line, and a length. The line number is terminated by a  colon
-                 (as  usual; see the -n option), and the offset and length are
-                 separated by a comma. In this  mode,  no  context  is  shown.
-                 That  is, the -A, -B, and -C options are ignored. If there is
-                 more than one match in a line, each of them  is  shown  sepa-
-                 rately.  This  option  is  mutually  exclusive with --output,
+                 line,  and a length. The line number is terminated by a colon
+                 (as usual; see the -n option), and the offset and length  are
+                 separated  by  a  comma.  In  this mode, no context is shown.
+                 That is, the -A, -B, and -C options are ignored. If there  is
+                 more  than  one  match in a line, each of them is shown sepa-
+                 rately. This option  is  mutually  exclusive  with  --output,
                 --file-offsets, and --only-matching.

       --locale=locale-name
-                 This option specifies a locale to be used for pattern  match-
-                 ing.  It  overrides the value in the LC_ALL or LC_CTYPE envi-
-                 ronment variables. If no locale is specified, the  PCRE2  li-
+                 This  option specifies a locale to be used for pattern match-
+                 ing. It overrides the value in the LC_ALL or  LC_CTYPE  envi-
+                 ronment  variables.  If no locale is specified, the PCRE2 li-
                 brary's default (usually the "C" locale) is used. There is no
                 short form for this option.

+       -M, --multiline
+                 Allow  patterns to match more than one line. When this option
+                 is set, the PCRE2 library is called in "multiline" mode. This
+                 allows  a matched string to extend past the end of a line and
+                 continue on one or more subsequent lines. Patterns used  with
+                 -M may usefully contain literal newline characters and inter-
+                 nal occurrences of ^ and $ characters. The output for a  suc-
+                 cessful  match  may  consist of more than one line. The first
+                 line is the line in which the match  started,  and  the  last
+                 line  is  the  line  in which the match ended. If the matched
+                 string ends with a newline sequence, the output ends  at  the
+                 end  of  that  line.   If  -v  is set, none of the lines in a
+                 multi-line match are output. Once a match has  been  handled,
+                 scanning  restarts at the beginning of the line after the one
+                 in which the match ended.
+
+                 The newline sequence that separates multiple  lines  must  be
+                 matched  as  part  of  the  pattern. For example, to find the
+                 phrase "regular expression" in a file where  "regular"  might
+                 be  at the end of a line and "expression" at the start of the
+                 next line, you could use this command:
+
+                   pcre2grep -M 'regular\s+expression' <file>
+
+                 The \s escape sequence matches any white space character, in-
+                 cluding  newlines, and is followed by + so as to match trail-
+                 ing white space on the first line as well  as  possibly  han-
+                 dling a two-character newline sequence.
+
+                 There  is a limit to the number of lines that can be matched,
+                 imposed by the way that pcre2grep buffers the input  file  as
+                 it  scans  it.  With  a sufficiently large processing buffer,
+                 this should not be a problem, but the -M option does not work
+                 when input is read line by line (see --line-buffered.)
+
+       -m number, --max-count=number
+                 Stop  processing after finding number matching lines, or non-
+                 matching lines if -v is also set. Any trailing context  lines
+                 are  output  after  the  final match. In multiline mode, each
+                 multiline match counts as just one line for this purpose.  If
+                 this  limit is reached when reading the standard input from a
+                 regular file, the file is left positioned just after the last
+                 matching  line.   If -c is also set, the count that is output
+                 is never greater than number. This option has  no  effect  if
+                 used with -L, -l, or -q, or when just checking for a match in
+                 a binary file.
+
       --match-limit=number
                 Processing some regular expression patterns may take  a  very
                 long time to search for all possible matching strings. Others
@ -542,41 +590,6 @@ OPTIONS
                 size is silently forced to be no smaller  than  the  starting
                 buffer size.

-       -M, --multiline
-                 Allow  patterns to match more than one line. When this option
-                 is set, the PCRE2 library is called in "multiline" mode. This
-                 allows  a matched string to extend past the end of a line and
-                 continue on one or more subsequent lines. Patterns used  with
-                 -M may usefully contain literal newline characters and inter-
-                 nal occurrences of ^ and $ characters. The output for a  suc-
-                 cessful  match  may  consist of more than one line. The first
-                 line is the line in which the match  started,  and  the  last
-                 line  is  the  line  in which the match ended. If the matched
-                 string ends with a newline sequence, the output ends  at  the
-                 end  of  that  line.   If  -v  is set, none of the lines in a
-                 multi-line match are output. Once a match has  been  handled,
-                 scanning  restarts at the beginning of the line after the one
-                 in which the match ended.
-
-                 The newline sequence that separates multiple  lines  must  be
-                 matched  as  part  of  the  pattern. For example, to find the
-                 phrase "regular expression" in a file where  "regular"  might
-                 be  at the end of a line and "expression" at the start of the
-                 next line, you could use this command:
-
-                   pcre2grep -M 'regular\s+expression' <file>
-
-                 The \s escape sequence matches any white space character, in-
-                 cluding  newlines, and is followed by + so as to match trail-
-                 ing white space on the first line as well  as  possibly  han-
-                 dling a two-character newline sequence.
-
-                 There  is a limit to the number of lines that can be matched,
-                 imposed by the way that pcre2grep buffers the input  file  as
-                 it  scans  it.  With  a sufficiently large processing buffer,
-                 this should not be a problem, but the -M option does not work
-                 when input is read line by line (see --line-buffered.)
-
       -N newline-type, --newline=newline-type
                 Six different conventions for indicating the ends of lines in
                 scanned files are supported. For example:
@ -625,97 +638,109 @@ OPTIONS
                 lems.  It should never be needed in normal use.

       -O text, --output=text
-                 When there is a match, instead of outputting the  whole  line
-                 that  matched, output just the given text, followed by an op-
-                 erating-system standard newline.  The --newline option has no
-                 effect  on  this  option,  which  is  mutually exclusive with
-                 --only-matching, --file-offsets, and  --line-offsets.  Escape
-                 sequences starting with a dollar character may be used to in-
-                 sert the contents of the matched part of the line and/or cap-
-                 tured substrings into the text.
+                 When there is a match, instead of outputting  the  line  that
+                 matched,  output just the text specified in this option, fol-
+                 lowed by an operating-system standard newline. In this  mode,
+                 no  context is shown. That is, the -A, -B, and -C options are
+                 ignored. The --newline option has no effect on  this  option,
+                 which is mutually exclusive with --only-matching, --file-off-
+                 sets, and --line-offsets. However, like  --only-matching,  if
+                 there is more than one match in a line, each of them causes a
+                 line of output.

-                 $<digits>  or  ${<digits>}  is  replaced by the captured sub-
-                 string of the given  decimal  number;  zero  substitutes  the
+                 Escape sequences starting with a dollar character may be used
+                 to insert the contents of the matched part of the line and/or
+                 captured substrings into the text.
+
+                 $<digits> or ${<digits>} is replaced  by  the  captured  sub-
+                 string  of  the  given  decimal  number; zero substitutes the
                 whole match. If the number is greater than the number of cap-
-                 turing substrings, or if the capture is unset,  the  replace-
+                 turing  substrings,  or if the capture is unset, the replace-
                 ment is empty.

-                 $a  is replaced by bell; $b by backspace; $e by escape; $f by
-                 form feed; $n by newline; $r by carriage return; $t  by  tab;
+                 $a is replaced by bell; $b by backspace; $e by escape; $f  by
+                 form  feed;  $n by newline; $r by carriage return; $t by tab;
                 $v by vertical tab.

-                 $o<digits>  is  replaced  by the character represented by the
-                 given octal number; up to three digits are processed.
+                 $o<digits> or $o{<digits>} is replaced by the character whose
+                 code  point  is the given octal number. In the first form, up
+                 to three octal digits are processed.  When  more  digits  are
+                 needed  in Unicode mode to specify a wide character, the sec-
+                 ond form must be used.

-                 $x<digits> is replaced by the character  represented  by  the
-                 given hexadecimal number; up to two digits are processed.
+                 $x<digits> or $x{<digits>} is replaced by the character  rep-
+                 resented  by the given hexadecimal number. In the first form,
+                 up to two hexadecimal digits are processed. When more  digits
+                 are  needed  in Unicode mode to specify a wide character, the
+                 second form must be used.

-                 Any  other character is substituted by itself. In particular,
+                 Any other character is substituted by itself. In  particular,
                 $$ is replaced by a single dollar.

       -o, --only-matching
                 Show only the part of the line that matched a pattern instead
-                 of  the  whole  line. In this mode, no context is shown. That
-                 is, the -A, -B, and -C options are ignored. If there is  more
-                 than  one  match in a line, each of them is shown separately,
-                 on a separate line of output. If -o is combined with -v  (in-
-                 vert  the  sense of the match to find non-matching lines), no
-                 output is generated, but the return  code  is  set  appropri-
-                 ately.  If  the matched portion of the line is empty, nothing
-                 is output unless the file  name  or  line  number  are  being
-                 printed,  in  which case they are shown on an otherwise empty
+                 of the whole line. In this mode, no context  is  shown.  That
+                 is,  the -A, -B, and -C options are ignored. If there is more
+                 than one match in a line, each of them is  shown  separately,
+                 on  a separate line of output. If -o is combined with -v (in-
+                 vert the sense of the match to find non-matching  lines),  no
+                 output  is  generated,  but  the return code is set appropri-
+                 ately. If the matched portion of the line is  empty,  nothing
+                 is  output  unless  the  file  name  or line number are being
+                 printed, in which case they are shown on an  otherwise  empty
                 line.  This  option  is  mutually  exclusive  with  --output,
                 --file-offsets and --line-offsets.

       -onumber, --only-matching=number
-                 Show  only  the  part  of the line that matched the capturing
+                 Show only the part of the line  that  matched  the  capturing
                 parentheses of the given number. Up to 50 capturing parenthe-
-                 ses  are  supported by default. This limit can be changed via
-                 the --om-capture option. A pattern may contain any number  of
-                 capturing  parentheses, but only those whose number is within
-                 the limit can be accessed by -o. An error occurs if the  num-
+                 ses are supported by default. This limit can be  changed  via
+                 the  --om-capture option. A pattern may contain any number of
+                 capturing parentheses, but only those whose number is  within
+                 the  limit can be accessed by -o. An error occurs if the num-
                 ber specified by -o is greater than the limit.

                 -o0 is the same as -o without a number. Because these options
-                 can be given without an argument (see above), if an  argument
-                 is  present, it must be given in the same shell item, for ex-
-                 ample, -o3 or --only-matching=2. The comments given  for  the
-                 non-argument  case  above  also  apply to this option. If the
-                 specified capturing parentheses do not exist in the  pattern,
-                 or  were  not  set in the match, nothing is output unless the
+                 can  be given without an argument (see above), if an argument
+                 is present, it must be given in the same shell item, for  ex-
+                 ample,  -o3  or --only-matching=2. The comments given for the
+                 non-argument case above also apply to  this  option.  If  the
+                 specified  capturing parentheses do not exist in the pattern,
+                 or were not set in the match, nothing is  output  unless  the
                 file name or line number are being output.

-                 If this option is given multiple times,  multiple  substrings
-                 are  output  for  each  match,  in  the order the options are
-                 given, and all on one line. For example, -o3 -o1  -o3  causes
-                 the  substrings  matched by capturing parentheses 3 and 1 and
-                 then 3 again to be output. By default, there is no  separator
+                 If  this  option is given multiple times, multiple substrings
+                 are output for each match,  in  the  order  the  options  are
+                 given,  and  all on one line. For example, -o3 -o1 -o3 causes
+                 the substrings matched by capturing parentheses 3 and  1  and
+                 then  3 again to be output. By default, there is no separator
                 (but see the next but one option).

       --om-capture=number
-                 Set  the number of capturing parentheses that can be accessed
+                 Set the number of capturing parentheses that can be  accessed
                 by -o. The default is 50.

       --om-separator=text
-                 Specify a separating string for multiple occurrences  of  -o.
-                 The  default is an empty string. Separating strings are never
+                 Specify  a  separating string for multiple occurrences of -o.
+                 The default is an empty string. Separating strings are  never
                 coloured.

       -q, --quiet
                 Work quietly, that is, display nothing except error messages.
-                 The  exit  status  indicates  whether or not any matches were
+                 The exit status indicates whether or  not  any  matches  were
                 found.

       -r, --recursive
-                 If any given path is a directory, recursively scan the  files
-                 it  contains, taking note of any --include and --exclude set-
-                 tings. By default, a directory is read as a normal  file;  in
-                 some  operating  systems this gives an immediate end-of-file.
-                 This option is a shorthand for setting the -d option to  "re-
+                 If  any given path is a directory, recursively scan the files
+                 it contains, taking note of any --include and --exclude  set-
+                 tings.  By  default, a directory is read as a normal file; in
+                 some operating systems this gives an  immediate  end-of-file.
+                 This  option is a shorthand for setting the -d option to "re-
                 curse".

       --recursion-limit=number
-                 See --match-limit above.
+                 This is an obsolete synonym for --depth-limit.  See  --match-
+                 limit above for details.

       -s, --no-messages
                 Suppress  error  messages  about  non-existent  or unreadable
@ -737,26 +762,30 @@ OPTIONS

       -u, --utf Operate in UTF-8 mode. This option is available only if PCRE2
                 has been compiled with UTF-8 support. All patterns (including
-                 those  for  any --exclude and --include options) and all sub-
-                 ject lines that are scanned must be valid  strings  of  UTF-8
-                 characters.
+                 those  for any --exclude and --include options) and all lines
+                 that are scanned must be valid strings of  UTF-8  characters.
+                 If an invalid UTF-8 string is encountered, an error occurs.

       -U, --utf-allow-invalid
                 As  --utf,  but in addition subject lines may contain invalid
                 UTF-8 code unit sequences. These can never form part  of  any
-                 pattern match. This facility allows valid UTF-8 strings to be
-                 sought in executable or other binary files.  For more details
-                 about  matching in non-valid UTF-8 strings, see the pcre2uni-
-                 code(3) documentation.
+                 pattern  match.  Patterns  themselves, however, must still be
+                 valid UTF-8 strings. This facility allows valid UTF-8 strings
+                 to be sought within arbitrary byte sequences in executable or
+                 other binary files. For more details about matching  in  non-
+                 valid UTF-8 strings, see the pcre2unicode(3) documentation.

       -V, --version
-                 Write the version numbers of pcre2grep and the PCRE2  library
-                 to  the  standard  output and then exit. Anything else on the
+                 Write  the version numbers of pcre2grep and the PCRE2 library
+                 to the standard output and then exit. Anything  else  on  the
                 command line is ignored.

       -v, --invert-match
-                 Invert the sense of the match, so that  lines  which  do  not
-                 match any of the patterns are the ones that are found.
+                 Invert  the  sense  of  the match, so that lines which do not
+                 match any of the patterns are the ones that are  found.  When
+                 this  option  is  set,  options  such  as --only-matching and
+                 --output, which specify parts of a match that are to be  out-
+                 put, are ignored.

       -w, --word-regex, --word-regexp
                 Force the patterns only to match "words". That is, there must
@ -878,30 +907,49 @@ USING PCRE2'S CALLOUT FACILITY
       mentation  for  details).  Numbered  callouts are ignored by pcre2grep;
       only callouts with string arguments are useful.

+   Echoing a specific string
+
+       Starting the callout string with a pipe character  invokes  an  echoing
+       facility that avoids calling an external program or script. This facil-
+       ity is always available, provided that  callouts  were  not  completely
+       disabled  when  pcre2grep  was built. The rest of the callout string is
+       processed as a zero-terminated string, which means it should  not  con-
+       tain  any  internal  binary  zeros. It is written to the output, having
+       first been passed through the same escape processing as text  from  the
+       --output  (-O) option (see above). However, $0 cannot be used to insert
+       a matched substring because the match is still  in  progress.  Instead,
+       the  single  character '0' is inserted. Any syntax errors in the string
+       (for example, a dollar not followed by another  character)  causes  the
+       callout  to be ignored. No terminator is added to the output string, so
+       if you want a newline, you must include it explicitly using the  escape
+       $n. For example:
+
+         pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' <some file>
+
+       Matching  continues normally after the string is output. If you want to
+       see only the callout output but not any output from  an  actual  match,
+       you should end the pattern with (*FAIL).
+
   Calling external programs or scripts

       This facility can be independently disabled when pcre2grep is built. It
-       is  supported for Windows, where a call to _spawnvp() is used, for VMS,
-       where lib$spawn() is used, and  for  any  other  Unix-like  environment
-       where fork() and execv() are available.
+       is supported for Windows, where a call to _spawnvp() is used, for  VMS,
+       where  lib$spawn()  is  used,  and  for any Unix-like environment where
+       fork() and execv() are available.

       If the callout string does not start with a pipe (vertical bar) charac-
-       ter, it is parsed into a list of substrings separated by  pipe  charac-
-       ters.  The first substring must be an executable name, with the follow-
+       ter,  it  is parsed into a list of substrings separated by pipe charac-
+       ters. The first substring must be an executable name, with the  follow-
       ing substrings specifying arguments:

         executable_name|arg1|arg2|...

-       Any substring (including the executable name) may  contain  escape  se-
-       quences  started by a dollar character: $<digits> or ${<digits>} is re-
-       placed by the captured substring of the  given  decimal  number,  which
-       must  be greater than zero. If the number is greater than the number of
-       capturing substrings, or if the capture is unset,  the  replacement  is
-       empty.
-
-       Any  other character is substituted by itself. In particular, $$ is re-
-       placed by a single dollar and $| is replaced by a pipe character.  Here
-       is an example:
+       Any  substring  (including  the executable name) may contain escape se-
+       quences started by a dollar character. These are the same  as  for  the
+       --output (-O) option documented above, except that $0 cannot insert the
+       matched string because the match is still  in  progress.  Instead,  the
+       character '0' is inserted. If you need a literal dollar or pipe charac-
+       ter in any substring, use $$ or $| respectively. Here is an example:

         echo -e "abcde\n12345" | pcre2grep \
           '(?x)(.)(..(.))
@ -914,28 +962,15 @@ USING PCRE2'S CALLOUT FACILITY
           Arg1: [1] [234] [4] Arg2: |1| ()
           12345

-       The  parameters  for the system call that is used to run the program or
+       The parameters for the system call that is used to run the  program  or
       script are zero-terminated strings. This means that binary zero charac-
-       ters  in the callout argument will cause premature termination of their
-       substrings, and therefore should not be present. Any syntax  errors  in
-       the  string  (for  example, a dollar not followed by another character)
-       cause the callout to be ignored. If running the program fails  for  any
-       reason  (including the non-existence of the executable), a local match-
+       ters in the callout argument will cause premature termination of  their
+       substrings,  and  therefore should not be present. Any syntax errors in
+       the string (for example, a dollar not followed  by  another  character)
+       causes the callout to be ignored.  If running the program fails for any
+       reason (including the non-existence of the executable), a local  match-
       ing failure occurs and the matcher backtracks in the normal way.

-   Echoing a specific string
-
-       This facility is always available, provided that callouts were not com-
-       pletely disabled when pcre2grep was built. If the callout string starts
-       with a pipe (vertical bar) character, the rest of the string is written
-       to the output, having been passed through the same escape processing as
-       text from the --output option. This provides a simple echoing  facility
-       that  avoids  calling  an  external program or script. No terminator is
-       added to the string, so if you want a newline, you must include it  ex-
-       plicitly.  Matching  continues  normally after the string is output. If
-       you want to see only the callout output but not any output from an  ac-
-       tual match, you should end the relevant pattern with (*FAIL).
-

 MATCHING ERRORS

@ -969,7 +1004,7 @@ DIAGNOSTICS

 SEE ALSO

-       pcre2pattern(3), pcre2syntax(3), pcre2callout(3).
+       pcre2pattern(3), pcre2syntax(3), pcre2callout(3), pcre2unicode(3).


 AUTHOR
@ -981,5 +1016,5 @@ AUTHOR

 REVISION

-       Last updated: 25 January 2020
+       Last updated: 04 October 2020
       Copyright (c) 1997-2020 University of Cambridge.
--- a/doc/pcre2test.txt
+++ b/doc/pcre2test.txt
--- a/src/pcre2grep.c
+++ b/src/pcre2grep.c
@ -164,6 +164,10 @@ enum { DEE_READ, DEE_SKIP };

 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };

+/* Return values from decode_dollar_escape() */
+
+enum { DDE_ERROR, DDE_CAPTURE, DDE_CHAR };
+
 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
 environments), a warning is issued if the value of fwrite() is ignored.
 Unfortunately, casting to (void) does not suppress the warning. To get round
@ -179,13 +183,21 @@ handled by using STDOUT_NL as the newline string. We also use a normal double
 quote for the example, as single quotes aren't usually available. */

 #ifdef WIN32
-#define STDOUT_NL  "\r\n"
-#define QUOT       "\""
+#define STDOUT_NL     "\r\n"
+#define STDOUT_NL_LEN  2
+#define QUOT          "\""
 #else
-#define STDOUT_NL  "\n"
-#define QUOT       "'"
+#define STDOUT_NL      "\n"
+#define STDOUT_NL_LEN  1
+#define QUOT           "'"
 #endif

+/* This code is returned from decode_dollar_escape() when $n is encountered,
+and used to mean "output STDOUT_NL". It is, of course, not a valid Unicode code
+point. */
+
+#define STDOUT_NL_CODE 0x7fffffffu
+


 /*************************************************
@ -224,8 +236,9 @@ static int max_bufthird = PCRE2GREP_MAX_BUFSIZE;
 static int bufsize = 3*PCRE2GREP_BUFSIZE;
 static int endlinetype;

-static unsigned long int total_count = 0;
+static int count_limit = -1;  /* Not long, so that it works with OP_NUMBER */
 static unsigned long int counts_printed = 0;
+static unsigned long int total_count = 0;

 #ifdef WIN32
 static int dee_action = dee_SKIP;
@ -277,6 +290,9 @@ static BOOL show_total_count = FALSE;
 static BOOL silent = FALSE;
 static BOOL utf = FALSE;

+static uint8_t utf8_buffer[8];
+
+
 /* Structure for list of --only-matching capturing numbers. */

 typedef struct omstr {
@ -443,6 +459,7 @@ static option_item optionlist[] = {
  { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
  { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
  { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
+  { OP_NUMBER,     'm',      &count_limit,      "max-count=number", "stop after <number> matched lines" },
  { OP_STRING,     'N',      &newline_arg,      "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
  { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
 #ifdef SUPPORT_PCRE2GREP_JIT
@ -482,8 +499,13 @@ of PCRE2_NEWLINE_xx in pcre2.h. */
 static const char *newlines[] = {
  "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };

-/* UTF-8 tables - used only when the newline setting is "any". */
+/* UTF-8 tables  */

+const int utf8_table1[] =
+  { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
+const int utf8_table1_size = sizeof(utf8_table1) / sizeof(int);
+
+const int utf8_table2[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};

 const char utf8_table4[] = {
@ -531,6 +553,32 @@ else
 #endif   /* not VPCOMPAT && not HAVE_MEMMOVE */


+
+/*************************************************
+*           Convert code point to UTF-8          *
+*************************************************/
+
+/* A static buffer is used. Returns the number of bytes. */
+
+static int
+ord2utf8(uint32_t value)
+{
+int i, j;
+uint8_t *utf8bytes = utf8_buffer;
+for (i = 0; i < utf8_table1_size; i++)
+  if (value <= (uint32_t)utf8_table1[i]) break;
+utf8bytes += i;
+for (j = i; j > 0; j--)
+  {
+  *utf8bytes-- = 0x80 | (value & 0x3f);
+  value >>= 6;
+  }
+*utf8bytes = utf8_table2[i] | value;
+return i + 1;
+}
+
+
+
 /*************************************************
 *         Case-independent string compare        *
 *************************************************/
@ -1788,6 +1836,7 @@ if (slen > 200)
  slen = 200;
  msg = "text that starts:\n\n";
  }
+
 for (i = 1; p != NULL; p = p->next, i++)
  {
  *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
@ -1823,107 +1872,245 @@ return FALSE;  /* No match, no errors */
 }


+
+/*************************************************
+*          Decode dollar escape sequence         *
+*************************************************/
+
+/* Called from various places to decode $ escapes in output strings. The escape
+sequences are as follows:
+
+$<digits> or ${<digits>} returns a capture number. However, if callout is TRUE,
+zero is never returned; '0' is substituted.
+
+$a returns bell.
+$b returns backspace.
+$e returns escape.
+$f returns form feed.
+$n returns newline.
+$r returns carriage return.
+$t returns tab.
+$v returns vertical tab.
+$o<digits> returns the character represented by the given octal
+  number; up to three digits are processed.
+$o{<digits>} does the same, up to 7 digits, but gives an error for mode-invalid
+  code points.
+$x<digits> returns the character represented by the given hexadecimal
+  number; up to two digits are processed.
+$x{<digits} does the same, up to 6 digits, but gives an error for mode-invalid
+  code points.
+Any other character is substituted by itself. E.g: $$ is replaced by a single
+dollar.
+
+Arguments:
+  begin      the start of the whole string
+  string     points to the $
+  callout    TRUE if in a callout (inhibits error messages)
+  value      where to return a value
+  last       where to return pointer to the last used character
+
+Returns:     DDE_ERROR    after a syntax error
+             DDE_CAPTURE  if *value is a capture number
+             DDE_CHAR     if *value is a character code
+*/
+
+static int
+decode_dollar_escape(PCRE2_SPTR begin, PCRE2_SPTR string, BOOL callout,
+  uint32_t *value, PCRE2_SPTR *last)
+{
+uint32_t c = 0;
+int base = 10;
+int dcount;
+int rc = DDE_CHAR;
+BOOL brace = FALSE;
+
+switch (*(++string))
+  {
+  case 0:   /* Syntax error: a character must be present after $. */
+  if (!callout)
+    fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
+      (int)(string - begin), "no character after $");
+  *last = string;
+  return DDE_ERROR;
+
+  case '{':
+  brace = TRUE;
+  string++;
+  if (!isdigit(*string))  /* Syntax error: a decimal number required. */
+    {
+    if (!callout)
+      fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
+        (int)(string - begin), "decimal number expected");
+    rc = DDE_ERROR;
+    break;
+    }
+
+  /* Fall through */
+
+  /* The maximum capture number is 65535, so any number greater than that will
+  always be an unknown capture number. We just stop incrementing, in order to
+  avoid overflow. */
+
+  case '0': case '1': case '2': case '3': case '4':
+  case '5': case '6': case '7': case '8': case '9':
+  do
+    {
+    if (c <= 65535) c = c * 10 + (*string - '0');
+    string++;
+    }
+  while (*string >= '0' && *string <= '9');
+  string--;  /* Point to last digit */
+
+  /* In a callout, capture number 0 is not available. No error can be given,
+  so just return the character '0'. */
+
+  if (callout && c == 0)
+    {
+    *value = '0';
+    }
+  else
+    {
+    *value = c;
+    rc = DDE_CAPTURE;
+    }
+  break;
+
+  /* Limit octal numbers to 3 digits without braces, or up to 7 with braces,
+  for valid Unicode code points. */
+
+  case 'o':
+  base = 8;
+  string++;
+  if (*string == '{')
+    {
+    brace = TRUE;
+    string++;
+    dcount = 7;
+    }
+  else dcount = 3;
+  for (; dcount > 0; dcount--)
+    {
+    if (*string < '0' || *string > '7') break;
+    c = c * 8 + (*string++ - '0');
+    }
+  *value = c;
+  string--;  /* Point to last digit */
+  break;
+
+  /* Limit hex numbers to 2 digits without braces, or up to 6 with braces,
+  for valid Unicode code points. */
+
+  case 'x':
+  base = 16;
+  string++;
+  if (*string == '{')
+    {
+    brace = TRUE;
+    string++;
+    dcount = 6;
+    }
+  else dcount = 2;
+  for (; dcount > 0; dcount--)
+    {
+    if (!isxdigit(*string)) break;
+    if (*string >= '0' && *string <= '9')
+      c = c *16 + *string++ - '0';
+    else
+      c = c * 16 + (*string++ | 0x20) - 'a' + 10;
+    }
+  *value = c;
+  string--;  /* Point to last digit */
+  break;
+
+  case 'a': *value = '\a'; break;
+  case 'b': *value = '\b'; break;
+#ifndef EBCDIC
+  case 'e': *value = '\033'; break;
+#else
+  case 'e': *value = '\047'; break;
+#endif
+  case 'f': *value = '\f'; break;
+  case 'n': *value = STDOUT_NL_CODE; break;
+  case 'r': *value = '\r'; break;
+  case 't': *value = '\t'; break;
+  case 'v': *value = '\v'; break;
+
+  default: *value = *string; break;
+  }
+
+if (brace)
+  {
+  c = string[1];
+  if (c != '}')
+    {
+    rc = DDE_ERROR;
+    if (!callout)
+      {
+      if ((base == 8 && c >= '0' && c <= '7') ||
+          (base == 16 && isxdigit(c)))
+        {
+        fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
+          "too many %s digits\n", (int)(string - begin),
+          (base == 8)? "octal" : "hex");
+        }
+      else
+        {
+        fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
+          (int)(string - begin), "missing closing brace");
+        }
+      }
+    }
+  else string++;
+  }
+
+/* Check maximum code point values, but take note of STDOUT_NL_CODE. */
+
+if (rc == DDE_CHAR && *value != STDOUT_NL_CODE)
+  {
+  uint32_t max = utf? 0x0010ffffu : 0xffu;
+  if (*value > max)
+    {
+    if (!callout) 
+      fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
+        "code point greater than 0x%x is invalid\n", (int)(string - begin), max);
+    rc = DDE_ERROR;
+    }
+  }
+
+*last = string;
+return rc;
+}
+
+
+
 /*************************************************
 *          Check output text for errors          *
 *************************************************/

+/* Called early, to get errors before doing anything for -O text; also called
+from callouts to check before outputting.
+
+Arguments:
+  string    an --output text string
+  callout   TRUE if in a callout (stops printing errors)
+
+Returns:    TRUE if OK, FALSE on error
+*/
+
 static BOOL
 syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
 {
+uint32_t value;
 PCRE2_SPTR begin = string;
+
 for (; *string != 0; string++)
  {
-  if (*string == '$')
-    {
-    PCRE2_SIZE capture_id = 0;
-    BOOL brace = FALSE;
-
-    string++;
-
-    /* Syntax error: a character must be present after $. */
-    if (*string == 0)
-      {
-      if (!callout)
-        fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
-          (int)(string - begin), "no character after $");
+  if (*string == '$' &&
+    decode_dollar_escape(begin, string, callout, &value, &string) == DDE_ERROR)
      return FALSE;
-      }
-
-    if (*string == '{')
-      {
-      /* Must be a decimal number in braces, e.g: {5} or {38} */
-      string++;
-
-      brace = TRUE;
-      }
-
-    if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
-      {
-      do
-        {
-        /* Maximum capture id is 65535. */
-        if (capture_id <= 65535)
-          capture_id = capture_id * 10 + (*string - '0');
-
-        string++;
-        }
-      while (*string >= '0' && *string <= '9');
-
-      if (brace)
-        {
-        /* Syntax error: closing brace is missing. */
-        if (*string != '}')
-          {
-          if (!callout)
-            fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
-              (int)(string - begin), "missing closing brace");
-          return FALSE;
-          }
-        }
-      else
-        {
-        /* To negate the effect of the for. */
-        string--;
-        }
-      }
-    else if (brace)
-      {
-      /* Syntax error: a decimal number required. */
-      if (!callout)
-        fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
-          (int)(string - begin), "decimal number expected");
-      return FALSE;
-      }
-    else if (*string == 'o')
-      {
-      string++;
-
-      if (*string < '0' || *string > '7')
-        {
-        /* Syntax error: an octal number required. */
-        if (!callout)
-          fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
-            (int)(string - begin), "octal number expected");
-        return FALSE;
-        }
-      }
-    else if (*string == 'x')
-      {
-      string++;
-
-      if (!isxdigit((unsigned char)*string))
-        {
-        /* Syntax error: a hexdecimal number required. */
-        if (!callout)
-          fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
-            (int)(string - begin), "hexadecimal number expected");
-        return FALSE;
-        }
-      }
-    }
  }

-  return TRUE;
+return TRUE;
 }


@ -1932,31 +2119,7 @@ for (; *string != 0; string++)
 *************************************************/

 /* Display the output text, which is assumed to have already been syntax
-checked. Output may contain escape sequences started by the dollar sign. The
-escape sequences are substituted as follows:
-
-  $<digits> or ${<digits>} is replaced by the captured substring of the given
-  decimal number; zero will substitute the whole match. If the number is
-  greater than the number of capturing substrings, or if the capture is unset,
-  the replacement is empty.
-
-  $a is replaced by bell.
-  $b is replaced by backspace.
-  $e is replaced by escape.
-  $f is replaced by form feed.
-  $n is replaced by newline.
-  $r is replaced by carriage return.
-  $t is replaced by tab.
-  $v is replaced by vertical tab.
-
-  $o<digits> is replaced by the character represented by the given octal
-  number; up to three digits are processed.
-
-  $x<digits> is replaced by the character represented by the given hexadecimal
-  number; up to two digits are processed.
-
-  Any other character is substituted by itself. E.g: $$ is replaced by a single
-  dollar.
+checked. Output may contain escape sequences started by the dollar sign.

 Arguments:
  string:       the output text
@ -1973,121 +2136,54 @@ static BOOL
 display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
  PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
 {
+uint32_t value;
 BOOL printed = FALSE;
+PCRE2_SPTR begin = string;

 for (; *string != 0; string++)
  {
-  int ch = EOF;
  if (*string == '$')
    {
-    PCRE2_SIZE capture_id = 0;
-    BOOL brace = FALSE;
-
-    string++;
-
-    if (*string == '{')
+    switch(decode_dollar_escape(begin, string, callout, &value, &string))
      {
-      /* Must be a decimal number in braces, e.g: {5} or {38} */
-      string++;
-
-      brace = TRUE;
-      }
-
-    if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
-      {
-      do
+      case DDE_CHAR:
+      if (value == STDOUT_NL_CODE)
        {
-        /* Maximum capture id is 65535. */
-        if (capture_id <= 65535)
-          capture_id = capture_id * 10 + (*string - '0');
-
-        string++;
+        fprintf(stdout, STDOUT_NL);
+        printed = FALSE;
+        continue;
        }
-      while (*string >= '0' && *string <= '9');
+      break;  /* Will print value */

-      if (!brace)
-        {
-        /* To negate the effect of the for. */
-        string--;
-        }
-
-      if (capture_id < capture_top)
+      case DDE_CAPTURE:
+      if (value < capture_top)
        {
        PCRE2_SIZE capturesize;
-        capture_id *= 2;
-
-        capturesize = ovector[capture_id + 1] - ovector[capture_id];
+        value *= 2;
+        capturesize = ovector[value + 1] - ovector[value];
        if (capturesize > 0)
          {
-          print_match(subject + ovector[capture_id], capturesize);
+          print_match(subject + ovector[value], capturesize);
          printed = TRUE;
          }
        }
-      }
-    else if (*string == 'a') ch = '\a';
-    else if (*string == 'b') ch = '\b';
-#ifndef EBCDIC
-    else if (*string == 'e') ch = '\033';
-#else
-    else if (*string == 'e') ch = '\047';
-#endif
-    else if (*string == 'f') ch = '\f';
-    else if (*string == 'r') ch = '\r';
-    else if (*string == 't') ch = '\t';
-    else if (*string == 'v') ch = '\v';
-    else if (*string == 'n')
-      {
-      fprintf(stdout, STDOUT_NL);
-      printed = FALSE;
-      }
-    else if (*string == 'o')
-      {
-      string++;
+      continue;

-      ch = *string - '0';
-      if (string[1] >= '0' && string[1] <= '7')
-        {
-        string++;
-        ch = ch * 8 + (*string - '0');
-        }
-      if (string[1] >= '0' && string[1] <= '7')
-        {
-        string++;
-        ch = ch * 8 + (*string - '0');
-        }
+      default:  /* Should not occur */
+      break;
      }
-    else if (*string == 'x')
-      {
-      string++;
+    }

-      if (*string >= '0' && *string <= '9')
-        ch = *string - '0';
-      else
-        ch = (*string | 0x20) - 'a' + 10;
-      if (isxdigit((unsigned char)string[1]))
-        {
-        string++;
-        ch *= 16;
-        if (*string >= '0' && *string <= '9')
-          ch += *string - '0';
-        else
-          ch += (*string | 0x20) - 'a' + 10;
-        }
-      }
-    else
-      {
-      ch = *string;
-      }
-    }
-  else
+  else value = *string;  /* Not a $ escape */
+
+  if (utf && value <= 127) fprintf(stdout, "%c", *string); else
    {
-    ch = *string;
-    }
-  if (ch != EOF)
-    {
-    fprintf(stdout, "%c", ch);
-    printed = TRUE;
+    int i;
+    int n = ord2utf8(value);
+    for (i = 0; i < n; i++) fputc(utf8_buffer[i], stdout);
    }
+
+  printed = TRUE;
  }

 return printed;
@ -2166,7 +2262,7 @@ int result = 0;

 (void)unused;   /* Avoid compiler warning */

-/* Only callout with strings are supported. */
+/* Only callouts with strings are supported. */

 if (string == NULL || length == 0) return 0;

@ -2185,83 +2281,43 @@ return 0;
 #else

 /* Checking syntax and compute the number of string fragments. Callout strings
-are ignored in case of a syntax error. */
+are silently ignored in the event of a syntax error. */

 while (length > 0)
  {
  if (*string == '|')
    {
    argsvectorlen++;
-
-    /* Maximum 10000 arguments allowed. */
-    if (argsvectorlen > 10000) return 0;
+    if (argsvectorlen > 10000) return 0;  /* Too many args */
    }
+
  else if (*string == '$')
    {
-    PCRE2_SIZE capture_id = 0;
+    uint32_t value;
+    PCRE2_SPTR begin = string;

-    string++;
-    length--;
-
-    /* Syntax error: a character must be present after $. */
-    if (length == 0) return 0;
-
-    if (*string >= '1' && *string <= '9')
+    switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
      {
-      do
+      case DDE_CAPTURE:
+      if (value < capture_top)
        {
-        /* Maximum capture id is 65535. */
-        if (capture_id <= 65535)
-          capture_id = capture_id * 10 + (*string - '0');
-
-        string++;
-        length--;
+        value *= 2;
+        argslen += ovector[value + 1] - ovector[value];
        }
-      while (length > 0 && *string >= '0' && *string <= '9');
+      argslen--;   /* Negate the effect of argslen++ below. */
+      break;

-      /* To negate the effect of string++ below. */
-      string--;
-      length++;
-      }
-    else if (*string == '{')
-      {
-      /* Must be a decimal number in braces, e.g: {5} or {38} */
-      string++;
-      length--;
+      case DDE_CHAR:
+      if (value == STDOUT_NL_CODE) argslen += STDOUT_NL_LEN - 1;
+        else if (utf && value > 127) argslen += ord2utf8(value) - 1;
+      break;

-      /* Syntax error: a decimal number required. */
-      if (length == 0) return 0;
-      if (*string < '1' || *string > '9') return 0;
-
-      do
-        {
-        /* Maximum capture id is 65535. */
-        if (capture_id <= 65535)
-          capture_id = capture_id * 10 + (*string - '0');
-
-        string++;
-        length--;
-
-        /* Syntax error: no more characters */
-        if (length == 0) return 0;
-        }
-      while (*string >= '0' && *string <= '9');
-
-      /* Syntax error: closing brace is missing. */
-      if (*string != '}') return 0;
+      default:         /* Should not occur */
+      case DDE_ERROR:
+      return 0;
      }

-    if (capture_id > 0)
-      {
-      if (capture_id < capture_top)
-        {
-        capture_id *= 2;
-        argslen += ovector[capture_id + 1] - ovector[capture_id];
-        }
-
-      /* To negate the effect of argslen++ below. */
-      argslen--;
-      }
+    length -= (string - begin);
    }

  string++;
@ -2269,6 +2325,8 @@ while (length > 0)
  argslen++;
  }

+/* Get memory for the argument vector and its strings. */
+
 args = (char*)malloc(argslen);
 if (args == NULL) return 0;

@ -2279,9 +2337,10 @@ if (argsvector == NULL)
  return 0;
  }

+/* Now reprocess the string and set up the arguments. */
+
 argsptr = args;
 argsvectorptr = argsvector;
-
 *argsvectorptr++ = argsptr;

 length = calloutptr->callout_string_length;
@ -2294,69 +2353,55 @@ while (length > 0)
    *argsptr++ = '\0';
    *argsvectorptr++ = argsptr;
    }
+
  else if (*string == '$')
    {
-    string++;
-    length--;
+    uint32_t value;
+    PCRE2_SPTR begin = string;

-    if ((*string >= '1' && *string <= '9') || *string == '{')
+    switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
      {
-      PCRE2_SIZE capture_id = 0;
-
-      if (*string != '{')
+      case DDE_CAPTURE:
+      if (value < capture_top)
        {
-        do
-          {
-          /* Maximum capture id is 65535. */
-          if (capture_id <= 65535)
-            capture_id = capture_id * 10 + (*string - '0');
+        PCRE2_SIZE capturesize;
+        value *= 2;
+        capturesize = ovector[value + 1] - ovector[value];
+        memcpy(argsptr, subject + ovector[value], capturesize);
+        argsptr += capturesize;
+        }
+      break;

-          string++;
-          length--;
-          }
-        while (length > 0 && *string >= '0' && *string <= '9');
-
-        /* To negate the effect of string++ below. */
-        string--;
-        length++;
+      case DDE_CHAR:
+      if (value == STDOUT_NL_CODE)
+        {
+        memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
+        argsptr += STDOUT_NL_LEN;  
+        }   
+      else if (utf && value > 127)
+        {
+        int n = ord2utf8(value);
+        memcpy(argsptr, utf8_buffer, n);
+        argsptr += n;
        }
      else
        {
-        string++;
-        length--;
-
-        do
-          {
-          /* Maximum capture id is 65535. */
-          if (capture_id <= 65535)
-            capture_id = capture_id * 10 + (*string - '0');
-
-          string++;
-          length--;
-          }
-        while (*string != '}');
+        *argsptr++ = value;
        }
+      break;

-        if (capture_id < capture_top)
-          {
-          PCRE2_SIZE capturesize;
-          capture_id *= 2;
+      default:         /* Should not occur */
+      case DDE_ERROR:
+      return 0;
+      }

-          capturesize = ovector[capture_id + 1] - ovector[capture_id];
-          memcpy(argsptr, subject + ovector[capture_id], capturesize);
-          argsptr += capturesize;
-          }
-      }
-    else
-      {
-      *argsptr++ = *string;
-      }
-    }
-  else
-    {
-    *argsptr++ = *string;
+    length -= (string - begin);
    }

+  else *argsptr++ = *string;
+
+  /* Advance along the string */
+
  string++;
  length--;
  }
@ -2479,6 +2524,7 @@ int filepos = 0;
 unsigned long int linenumber = 1;
 unsigned long int lastmatchnumber = 0;
 unsigned long int count = 0;
+long int count_matched_lines = 0;
 char *lastmatchrestart = main_buffer;
 char *ptr = main_buffer;
 char *endptr;
@ -2505,7 +2551,7 @@ bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
  input_line_buffered);

 #ifdef SUPPORT_LIBBZ2
-if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2;   /* Gotcha: bufflength is PCRE2_SIZE; */
+if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2;   /* Gotcha: bufflength is PCRE2_SIZE */
 #endif

 endptr = main_buffer + bufflength;
@ -2533,10 +2579,23 @@ while (ptr < endptr)
  int mrc = 0;
  unsigned int options = 0;
  BOOL match;
+  BOOL line_matched = FALSE;
  char *t = ptr;
  PCRE2_SIZE length, linelength;
  PCRE2_SIZE startoffset = 0;

+  /* If the -m option set a limit for the number of matched or non-matched
+  lines, check it here. A limit of zero means that no matching is ever done.
+  For stdin from a file, set the file position. */
+
+  if (count_limit >= 0 && count_matched_lines >= count_limit)
+    {
+    if (frtype == FR_PLAIN && filename == stdin_name && !is_file_tty(handle))
+      (void)fseek(handle, (long int)filepos, SEEK_SET);
+    rc = (count_limit == 0)? 1 : 0;
+    break;
+    }
+
  /* At this point, ptr is at the start of a line. We need to find the length
  of the subject string to pass to pcre2_match(). In multiline mode, it is the
  length remainder of the data in the buffer. Otherwise, it is the length of
@ -2686,6 +2745,10 @@ while (ptr < endptr)

    if (filenames == FN_NOMATCH_ONLY) return 1;

+    /* Remember that this line matched (for counting matched lines) */
+
+    line_matched = TRUE;
+
    /* If all we want is a yes/no answer, we can return immediately. */

    if (quiet) return 0;
@ -3067,6 +3130,11 @@ while (ptr < endptr)
  filepos += (int)(linelength + endlinelength);
  linenumber++;

+  /* If there was at least one match (or a non-match, as required) in the line,
+  increment the count for the -m option. */
+
+  if (line_matched) count_matched_lines++;
+
  /* If input is line buffered, and the buffer is not yet full, read another
  line and add it into the buffer. */

@ -4088,6 +4156,7 @@ if (only_matching_count > 1)
  pcre2grep_exit(usage(2));
  }

+
 /* Check that there is a big enough ovector for all -o settings. */

 for (om = only_matching; om != NULL; om = om->next)
--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@ -956,3 +956,27 @@ RC=0
 pcre2grep: Requested group 1 cannot be captured.
 pcre2grep: Use --om-capture to increase the size of the capture vector.
 RC=2
+---------------------------- Test 129 -----------------------------
+The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
+lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
+RC=0
+---------------------------- Test 130 -----------------------------
+fox
+fox
+fox
+fox
+RC=0
+---------------------------- Test 131 -----------------------------
+2
+RC=0
+---------------------------- Test 132 -----------------------------
+match 1:
+ a
+match 2:
+ b
+---
+ a
+RC=0
+---------------------------- Test 133 -----------------------------
+=AB3CD5=
+RC=0
--- a/testdata/grepoutput8
+++ b/testdata/grepoutput8
@ -29,3 +29,6 @@ RC=1
 ---------------------------- Test U5 ------------------------------
 CD Z
 RC=0
+---------------------------- Test U6 -----------------------------
+=ǓǤ=
+RC=0
--- a/testdata/grepoutputC
+++ b/testdata/grepoutputC
@ -40,3 +40,5 @@ T
 T
 T
 T
+0:T:AA
+The quick brown
--- a/testdata/grepoutputCN
+++ b/testdata/grepoutputCN
@ -28,3 +28,5 @@ T
 T
 T
 T
+0:T:AA
+The quick brown