Implement -Z in pcre2grep and update documentation

2022-07-30 17:41:49 +01:00 · 2022-07-30 17:41:49 +01:00 · 8b133fa0ba
parent cc5e121c8e
commit 8b133fa0ba
16 changed files with 994 additions and 868 deletions
--- a/2
+++ b/2
@ -49,6 +49,8 @@ tests.
 tests run by 'make check', but can be run manually. The current output is from 
 a 64-bit system.
 13. Implemented -Z aka --null in pcre2grep.
 Version 10.40 15-April-2022
 ---------------------------
--- a/42
+++ b/42
@ -68,6 +68,22 @@ diff -b  /dev/null /dev/null 2>/dev/null && cf="diff -b"
 diff -u  /dev/null /dev/null 2>/dev/null && cf="diff -u"
 diff -ub /dev/null /dev/null 2>/dev/null && cf="diff -ub"
 # Some tests involve NUL characters. It seems impossible to handle them easily
 # in many operating systems. An earlier version of this script used sed to
 # translate NUL into the string ZERO, but this didn't work on Solaris (aka
 # SunOS), where the version of sed explicitly doesn't like them, and also MacOS
 # (Darwin), OpenBSD, FreeBSD, NetBSD, and some Linux distributions like Alpine,
 # even when using GNU sed. A user suggested using tr instead, which
 # necessitates translating to a single character. However, on (some versions
 # of?) Solaris, the normal "tr" cannot handle binary zeros, but if
 # /usr/xpg4/bin/tr is available, it can do so, so test for that.
 if [ -x /usr/xpg4/bin/tr ] ; then
  tr=/usr/xpg4/bin/tr
 else
  tr=tr
 fi
 # If this test is being run from "make check", $srcdir will be set. If not, set
 # it to the current or parent directory, whichever one contains the test data.
 # Subsequently, we run most of the pcre2grep tests in the source directory so
@ -685,6 +701,16 @@ echo "---------------------------- Test 134 -----------------------------" >>tes
 (cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep
 echo "---------------------------- Test 135 -----------------------------" >>testtrygrep
 (cd $srcdir; $valgrind $vjs $pcre2grep -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
 echo "RC=$?" >>testtrygrep
 (cd $srcdir; $valgrind $vjs $pcre2grep -lZ 'word' ./testdata/grepinputv ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
 echo "RC=$?" >>testtrygrep
 (cd $srcdir; $valgrind $vjs $pcre2grep -A 1 -B 1 -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
 echo "RC=$?" >>testtrygrep
 (cd $srcdir; $valgrind $vjs $pcre2grep -MHZn 'start[\s]+end' testdata/grepinputM) >>testtrygrep
 echo "RC=$?" >>testtrygrep
 # Now compare the results.
 $cf $srcdir/testdata/grepoutput testtrygrep
@ -759,22 +785,6 @@ $valgrind $vjs $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >
 printf '%c--------------------------- Test N6 ------------------------------\r\n' - >>testtrygrep
 $valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
 # This next test involves NUL characters. It seems impossible to handle them
 # easily in many operating systems. An earlier version of this script used sed
 # to translate NUL into the string ZERO, but this didn't work on Solaris (aka
 # SunOS), where the version of sed explicitly doesn't like them, and also MacOS
 # (Darwin), OpenBSD, FreeBSD, NetBSD, and some Linux distributions like Alpine,
 # even when using GNU sed. A user suggested using tr instead, which
 # necessitates translating to a single character (@). However, on (some
 # versions of?) Solaris, the normal "tr" cannot handle binary zeros, but if
 # /usr/xpg4/bin/tr is available, it can do so, so test for that.
 if [ -x /usr/xpg4/bin/tr ] ; then
  tr=/usr/xpg4/bin/tr
 else
  tr=tr
 fi
 printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep
 printf 'abc\0def' >testNinputgrep
 $valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep
--- a/doc/html/NON-AUTOTOOLS-BUILD.txt
+++ b/doc/html/NON-AUTOTOOLS-BUILD.txt
@ -121,6 +121,7 @@ environment, for example.
       pcre2_substring.c
       pcre2_tables.c
       pcre2_ucd.c
       pcre2_ucptables.c
       pcre2_valid_utf.c
       pcre2_xclass.c
@ -373,7 +374,7 @@ Otherwise:
 1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
   have been created.
-2. Edit RunTest.bat to indentify the full or relative location of
+2. Edit RunTest.bat to identify the full or relative location of
   the pcre2 source (wherein which the testdata folder resides), e.g.:
   set srcdir=C:\pcre2\pcre2-10.00
--- a/doc/html/README.txt
+++ b/doc/html/README.txt
@ -17,7 +17,7 @@ pcre2-dev+subscribe@googlegroups.com.
 You can access the archives and also subscribe or manage your subscription
 here:
-https://groups.google.com/pcre2-dev
+https://groups.google.com/g/pcre2-dev
 Please read the NEWS file if you are upgrading from a previous release. The
 contents of this README file are:
@ -375,7 +375,8 @@ library. They are also documented in the pcre2build man page.
  necessary to specify something like LIBS="-lncurses" as well. This is
  because, to quote the readline INSTALL, "Readline uses the termcap functions,
  but does not link with the termcap or curses library itself, allowing
-  applications which link with readline the to choose an appropriate library."
+  applications which link with readline the option to choose an appropriate
  library."
  If you get error messages about missing functions tgetstr, tgetent, tputs,
  tgetflag, or tgoto, this is the problem, and linking with the ncurses library
  should fix it.
@ -400,10 +401,10 @@ library. They are also documented in the pcre2build man page.
  Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
  be created. This is normally run under valgrind or used when PCRE2 is
  compiled with address sanitizing enabled. It calls the fuzzing function and
-  outputs information about it is doing. The input strings are specified by
+  outputs information about what it is doing. The input strings are specified
-  arguments: if an argument starts with "=" the rest of it is a literal input
+  by arguments: if an argument starts with "=" the rest of it is a literal
-  string. Otherwise, it is assumed to be a file name, and the contents of the
+  input string. Otherwise, it is assumed to be a file name, and the contents
-  file are the test string.
+  of the file are the test string.
 . Releases before 10.30 could be compiled with --disable-stack-for-recursion,
  which caused pcre2_match() to use individual blocks on the heap for
@ -695,7 +696,7 @@ Test 14 contains some special UTF and UCP tests that give different output for
 different code unit widths.
 Test 15 contains a number of tests that must not be run with JIT. They check,
-among other non-JIT things, the match-limiting features of the intepretive
+among other non-JIT things, the match-limiting features of the interpretive
 matcher.
 Test 16 is run only when JIT support is not available. It checks that an
--- a/doc/html/pcre2api.html
+++ b/doc/html/pcre2api.html
@ -1017,7 +1017,7 @@ has its own memory control arrangements (see the
 documentation for more details). If the limit is reached, the negative error
 code PCRE2_ERROR_HEAPLIMIT is returned. The default limit can be set when PCRE2
 is built; if it is not, the default is set very large and is essentially
-"unlimited".
+unlimited.
 </P>
 <P>
 A value for the heap limit may also be supplied by an item at the start of a
@ -1030,19 +1030,17 @@ less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
 limit is set, less than the default.
 </P>
 <P>
-The <b>pcre2_match()</b> function starts out using a 20KiB vector on the system
+The <b>pcre2_match()</b> function always needs some heap memory, so setting a
-stack for recording backtracking points. The more nested backtracking points
+value of zero guarantees a "heap limit exceeded" error. Details of how
-there are (that is, the deeper the search tree), the more memory is needed.
+<b>pcre2_match()</b> uses the heap are given in the
-Heap memory is used only if the initial vector is too small. If the heap limit
+<a href="pcre2perform.html"><b>pcre2perform</b></a>
-is set to a value less than 21 (in particular, zero) no heap memory will be
+documentation.
 used. In this case, only patterns that do not have a lot of nested backtracking
 can be successfully processed.
 </P>
 <P>
-Similarly, for <b>pcre2_dfa_match()</b>, a vector on the system stack is used
+For <b>pcre2_dfa_match()</b>, a vector on the system stack is used when
-when processing pattern recursions, lookarounds, or atomic groups, and only if
+processing pattern recursions, lookarounds, or atomic groups, and only if this
-this is not big enough is heap memory used. In this case, too, setting a value
+is not big enough is heap memory used. In this case, setting a value of zero
-of zero disables the use of the heap.
+disables the use of the heap.
 <br>
 <br>
 <b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
@ -1089,10 +1087,10 @@ less than the limit set by the caller of <b>pcre2_match()</b> or
 <br>
 <br>
 This parameter limits the depth of nested backtracking in <b>pcre2_match()</b>.
-Each time a nested backtracking point is passed, a new memory "frame" is used
+Each time a nested backtracking point is passed, a new memory frame is used
 to remember the state of matching at that point. Thus, this parameter
 indirectly limits the amount of memory that is used in a match. However,
-because the size of each memory "frame" depends on the number of capturing
+because the size of each memory frame depends on the number of capturing
 parentheses, the actual memory limit varies from pattern to pattern. This limit
 was more useful in versions before 10.30, where function recursion was used for
 backtracking.
@ -3148,11 +3146,11 @@ The backtracking match limit was reached.
 <pre>
  PCRE2_ERROR_NOMEMORY
 </pre>
-If a pattern contains many nested backtracking points, heap memory is used to
+Heap memory is used to remember backgracking points. This error is given when
-remember them. This error is given when the memory allocation function (default
+the memory allocation function (default or custom) fails. Note that a different
-or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
+error, PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds
-if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
+the heap limit. PCRE2_ERROR_NOMEMORY is also returned if
-also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
+PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
 <pre>
  PCRE2_ERROR_NULL
 </pre>
@ -4020,9 +4018,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC42" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 14 December 2021
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2build.html
+++ b/doc/html/pcre2build.html
@ -284,12 +284,11 @@ to the <b>configure</b> command. This setting also applies to the
 counting is done differently).
 </P>
 <P>
-The <b>pcre2_match()</b> function starts out using a 20KiB vector on the system
+The <b>pcre2_match()</b> function uses heap memory to record backtracking
-stack to record backtracking points. The more nested backtracking points there
+points. The more nested backtracking points there are (that is, the deeper the
-are (that is, the deeper the search tree), the more memory is needed. If the
+search tree), the more memory is needed. There is an upper limit, specified in
-initial vector is not large enough, heap memory is used, up to a certain limit,
+kibibytes (units of 1024 bytes). This limit can be changed at run time, as
-which is specified in kibibytes (units of 1024 bytes). The limit can be changed
+described in the
 at run time, as described in the
 <a href="pcre2api.html"><b>pcre2api</b></a>
 documentation. The default limit (in effect unlimited) is 20 million. You can
 change this by a setting such as
@ -609,16 +608,16 @@ give a warning.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC26" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 08 December 2021
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2grep.html
+++ b/doc/html/pcre2grep.html
@ -71,13 +71,15 @@ For example:
 <pre>
  pcre2grep some-pattern file1 - file3
 </pre>
-Input files are searched line by line. By default, each line that matches a
+By default, input files are searched line by line. Each line that matches a
 pattern is copied to the standard output, and if there is more than one file,
 the file name is output at the start of each line, followed by a colon.
-However, there are options that can change how <b>pcre2grep</b> behaves. In
+However, there are options that can change how <b>pcre2grep</b> behaves. For
-particular, the <b>-M</b> option makes it possible to search for strings that
+example, the <b>-M</b> option makes it possible to search for strings that span
-span line boundaries. What defines a line boundary is controlled by the
+line boundaries. What defines a line boundary is controlled by the <b>-N</b>
-<b>-N</b> (<b>--newline</b>) option.
+(<b>--newline</b>) option. The <b>-h</b> and <b>-H</b> options control whether or
 not file names are shown, and the <b>-Z</b> option changes the file name
 terminator to a zero byte.
 </P>
 <P>
 The amount of memory used for buffering files that are being scanned is
@ -178,9 +180,11 @@ Output up to <i>number</i> lines of context after each matching line. Fewer
 lines are output if the next match or the end of the file is reached, or if the
 processing buffer size has been set too small. If file names and/or line
 numbers are being output, a hyphen separator is used instead of a colon for the
-context lines. A line containing "--" is output between each group of lines,
+context lines (the <b>-Z</b> option can be used to change the file name
-unless they are in fact contiguous in the input file. The value of <i>number</i>
+terminator to a zero byte). A line containing "--" is output between each group
-is expected to be relatively small. When <b>-c</b> is used, <b>-A</b> is ignored.
+of lines, unless they are in fact contiguous in the input file. The value of
 <i>number</i> is expected to be relatively small. When <b>-c</b> is used,
 <b>-A</b> is ignored.
 </P>
 <P>
 <b>-a</b>, <b>--text</b>
@ -199,9 +203,10 @@ Output up to <i>number</i> lines of context before each matching line. Fewer
 lines are output if the previous match or the start of the file is within
 <i>number</i> lines, or if the processing buffer size has been set too small. If
 file names and/or line numbers are being output, a hyphen separator is used
-instead of a colon for the context lines. A line containing "--" is output
+instead of a colon for the context lines (the <b>-Z</b> option can be used to
-between each group of lines, unless they are in fact contiguous in the input
+change the file name terminator to a zero byte). A line containing "--" is
-file. The value of <i>number</i> is expected to be relatively small. When
+output between each group of lines, unless they are in fact contiguous in the
 input file. The value of <i>number</i> is expected to be relatively small. When
 <b>-c</b> is used, <b>-B</b> is ignored.
 </P>
 <P>
@ -411,20 +416,22 @@ shown separately. This option is mutually exclusive with <b>--output</b>,
 <P>
 <b>-H</b>, <b>--with-filename</b>
 Force the inclusion of the file name at the start of output lines when
-searching a single file. By default, the file name is not shown in this case.
+searching a single file. The file name is not normally shown in this case.
-For matching lines, the file name is followed by a colon; for context lines, a
+By default, for matching lines, the file name is followed by a colon; for
-hyphen separator is used. If a line number is also being output, it follows the
+context lines, a hyphen separator is used. The <b>-Z</b> option can be used to
-file name. When the <b>-M</b> option causes a pattern to match more than one
+change the terminator to a zero byte. If a line number is also being output,
-line, only the first is preceded by the file name. This option overrides any
+it follows the file name. When the <b>-M</b> option causes a pattern to match
-previous <b>-h</b>, <b>-l</b>, or <b>-L</b> options.
+more than one line, only the first is preceded by the file name. This option
 overrides any previous <b>-h</b>, <b>-l</b>, or <b>-L</b> options.
 </P>
 <P>
 <b>-h</b>, <b>--no-filename</b>
-Suppress the output file names when searching multiple files. By default,
+Suppress the output file names when searching multiple files. File names are
-file names are shown when multiple files are searched. For matching lines, the
+normally shown when multiple files are searched. By default, for matching
-file name is followed by a colon; for context lines, a hyphen separator is used.
+lines, the file name is followed by a colon; for context lines, a hyphen
-If a line number is also being output, it follows the file name. This option
+separator is used. The <b>-Z</b> option can be used to change the terminator to
-overrides any previous <b>-H</b>, <b>-L</b>, or <b>-l</b> options.
+a zero byte. If a line number is also being output, it follows the file name.
 This option overrides any previous <b>-H</b>, <b>-L</b>, or <b>-l</b> options.
 </P>
 <P>
 <b>--heap-limit</b>=<i>number</i>
@ -481,18 +488,20 @@ given any number of times. If a directory matches both <b>--include-dir</b> and
 <b>-L</b>, <b>--files-without-match</b>
 Instead of outputting lines from the files, just output the names of the files
 that do not contain any lines that would have been output. Each file name is
-output once, on a separate line. This option overrides any previous <b>-H</b>,
+output once, on a separate line by default, but if the <b>-Z</b> option is set, 
-<b>-h</b>, or <b>-l</b> options.
+they are separated by zero bytes instead of newlines. This option overrides any
 previous <b>-H</b>, <b>-h</b>, or <b>-l</b> options.
 </P>
 <P>
 <b>-l</b>, <b>--files-with-matches</b>
 Instead of outputting lines from the files, just output the names of the files
 containing lines that would have been output. Each file name is output once, on
-a separate line. Searching normally stops as soon as a matching line is found
+a separate line, but if the <b>-Z</b> option is set, they are separated by zero
-in a file. However, if the <b>-c</b> (count) option is also used, matching
+bytes instead of newlines. Searching normally stops as soon as a matching line
-continues in order to obtain the correct count, and those files that have at
+is found in a file. However, if the <b>-c</b> (count) option is also used,
-least one match are listed along with their counts. Using this option with
+matching continues in order to obtain the correct count, and those files that
-<b>-c</b> is a way of suppressing the listing of files with no matches that
+have at least one match are listed along with their counts. Using this option
 with <b>-c</b> is a way of suppressing the listing of files with no matches that
 occurs with <b>-c</b> on its own. This option overrides any previous <b>-H</b>,
 <b>-h</b>, or <b>-L</b> options.
 </P>
@ -592,10 +601,7 @@ value set by <b>--match-limit</b> is reached, an error occurs.
 <br>
 <br>
 The <b>--heap-limit</b> option specifies, as a number of kibibytes (units of
-1024 bytes), the amount of heap memory that may be used for matching. Heap
+1024 bytes), the maximum amount of heap memory that may be used for matching.
 memory is needed only if matching the pattern requires a significant number of
 nested backtracking points to be remembered. This parameter can be set to zero
 to forbid the use of heap memory altogether.
 <br>
 <br>
 The <b>--depth-limit</b> option limits the depth of nested backtracking points,
@ -839,6 +845,13 @@ pattern and ")$" at the end. This option applies only to the patterns that are
 matched against the contents of files; it does not apply to patterns specified
 by any of the <b>--include</b> or <b>--exclude</b> options.
 </P>
 <P>
 <b>-Z</b>, <b>--null</b>
 Terminate files names in the regular output with a zero byte (the NUL
 character) instead of what would normally appear. This is useful when file
 names contain unusual characters such as colons, hyphens, or even newlines. The
 option does not apply to file names in error messages.
 </P>
 <br><a name="SEC7" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
 <P>
 The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
@ -1053,9 +1066,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC16" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 31 August 2021
+Last updated: 30 July 2022
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2limits.html
+++ b/doc/html/pcre2limits.html
@ -71,13 +71,18 @@ is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
 The maximum length of a string argument to a callout is the largest number a
 32-bit unsigned integer can hold.
 </P>
 <P>
 The maximum amount of heap memory used for matching is controlled by the heap 
 limit, which can be set in a pattern or in a match context. The default is a 
 very large number, effectively unlimited.
 </P>
 <br><b>
 AUTHOR
 </b><br>
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
@ -86,9 +91,9 @@ Cambridge, England.
 REVISION
 </b><br>
 <P>
-Last updated: 02 February 2019
+Last updated: 26 July 2022
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2perform.html
+++ b/doc/html/pcre2perform.html
@ -83,12 +83,31 @@ From release 10.30, the interpretive (non-JIT) version of <b>pcre2_match()</b>
 uses very little system stack at run time. In earlier releases recursive
 function calls could use a great deal of stack, and this could cause problems,
 but this usage has been eliminated. Backtracking positions are now explicitly
-remembered in memory frames controlled by the code. An initial 20KiB vector of
+remembered in memory frames controlled by the code. 
-frames is allocated on the system stack (enough for about 100 frames for small
+</P>
-patterns), but if this is insufficient, heap memory is used. The amount of heap
+<P>
-memory can be limited; if the limit is set to zero, only the initial stack
+The size of each frame depends on the size of pointer variables and the number
-vector is used. Rewriting patterns to be time-efficient, as described below,
+of capturing parenthesized groups in the pattern being matched. On a 64-bit
-may also reduce the memory requirements.
+system the frame size for a pattern with no captures is 128 bytes. For each
 capturing group the size increases by 16 bytes.
 </P>
 <P>
 Until release 10.41, an initial 20KiB frames vector was allocated on the system 
 stack, but this still caused some issues for multi-thread applications where
 each thread has a very small stack. From release 10.41 backtracking memory
 frames are always held in heap memory. An initial heap allocation is obtained
 the first time any match data block is passed to <b>pcre2_match()</b>. This is
 remembered with the match data block and re-used if that block is used for
 another match. It is freed when the match data block itself is freed.
 </P>
 <P>
 The size of the initial block is the larger of 20KiB or ten times the pattern's 
 frame size, unless the heap limit is less than this, in which case the heap 
 limit is used. If the initial block proves to be too small during matching, it
 is replaced by a larger block, subject to the heap limit. The heap limit is 
 checked only when a new block is to be allocated. Reducing the heap limit 
 between calls to <b>pcre2_match()</b> with the same match data block does not 
 affect the saved block.
 </P>
 <P>
 In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive
@ -245,16 +264,16 @@ pattern to match. This is done by repeatedly matching with different limits.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC6" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 03 February 2019
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2test.html
+++ b/doc/html/pcre2test.html
@ -1241,7 +1241,8 @@ pattern, but can be overridden by modifiers on the subject.
      copy=&#60;number or name&#62;      copy captured substring
      depth_limit=&#60;n&#62;            set a depth limit
      dfa                        use <b>pcre2_dfa_match()</b>
-      find_limits                find match and depth limits
+      find_limits                find heap, match and depth limits
      find_limits_noheap         find match and depth limits
      get=&#60;number or name&#62;       extract captured substring
      getall                     extract all captured substrings
  /g  global                     global matching
@ -1564,7 +1565,7 @@ Setting heap, match, and depth limits
 <P>
 The <b>heap_limit</b>, <b>match_limit</b>, and <b>depth_limit</b> modifiers set
 the appropriate limits in the match context. These values are ignored when the
-<b>find_limits</b> modifier is specified.
+<b>find_limits</b> or <b>find_limits_noheap</b> modifier is specified.
 </P>
 <br><b>
 Finding minimum limits
@ -1574,8 +1575,12 @@ If the <b>find_limits</b> modifier is present on a subject line, <b>pcre2test</b
 calls the relevant matching function several times, setting different values in
 the match context via <b>pcre2_set_heap_limit()</b>,
 <b>pcre2_set_match_limit()</b>, or <b>pcre2_set_depth_limit()</b> until it finds
-the minimum values for each parameter that allows the match to complete without
+the smallest value for each parameter that allows the match to complete without
-error. If JIT is being used, only the match limit is relevant.
+a "limit exceeded" error. The match itself may succeed or fail. An alternative
 modifier, <b>find_limits_noheap</b>, omits the heap limit. This is used in the
 standard tests, because the minimum heap limit varies between systems. If JIT
 is being used, only the match limit is relevant, and the other two are
 automatically omitted.
 </P>
 <P>
 When using this modifier, the pattern should not contain any limit settings
@ -1603,9 +1608,7 @@ overall amount of computing resource that is used.
 </P>
 <P>
 For both kinds of matching, the <i>heap_limit</i> number, which is in kibibytes
-(units of 1024 bytes), limits the amount of heap memory used for matching. A
+(units of 1024 bytes), limits the amount of heap memory used for matching.
 value of zero disables the use of any heap memory; many simple pattern matches
 can be done without using the heap, so zero is not an unreasonable setting.
 </P>
 <br><b>
 Showing MARK names
@ -1623,12 +1626,10 @@ Showing memory usage
 <P>
 The <b>memory</b> modifier causes <b>pcre2test</b> to log the sizes of all heap
 memory allocation and freeing calls that occur during a call to
-<b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>. These occur only when a match
+<b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>. In the latter case, heap memory
-requires a bigger vector than the default for remembering backtracking points
+is used only when a match requires more internal workspace that the default
-(<b>pcre2_match()</b>) or for internal workspace (<b>pcre2_dfa_match()</b>). In
+allocation on the stack, so in many cases there will be no output. No heap
-many cases there will be no heap memory used and therefore no additional
+memory is allocated during matching with JIT. For this modifier to work, the
 output. No heap memory is allocated during matching with JIT, so in that case
 the <b>memory</b> modifier never has any effect. For this modifier to work, the
 <b>null_context</b> modifier must not be set on both the pattern and the
 subject, though it can be set on one or the other.
 </P>
@ -1690,7 +1691,8 @@ Normally, <b>pcre2test</b> passes a context block to <b>pcre2_match()</b>,
 If the <b>null_context</b> modifier is set, however, NULL is passed. This is for
 testing that the matching and substitution functions behave correctly in this
 case (they use default values). This modifier cannot be used with the
-<b>find_limits</b> or <b>substitute_callout</b> modifiers.
+<b>find_limits</b>, <b>find_limits_noheap</b>, or <b>substitute_callout</b>
 modifiers.
 </P>
 <P>
 Similarly, for testing purposes, if the <b>null_subject</b> or
@ -2141,7 +2143,7 @@ Cambridge, England.
 </P>
 <br><a name="SEC21" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 12 January 2022
+Last updated: 27 July 2022
 <br>
 Copyright &copy; 1997-2022 University of Cambridge.
 <br>
--- a/doc/pcre2.txt
+++ b/doc/pcre2.txt
@ -1028,7 +1028,7 @@ PCRE2 CONTEXTS
       pcre2jit  documentation for more details). If the limit is reached, the
       negative error code  PCRE2_ERROR_HEAPLIMIT  is  returned.  The  default
       limit  can be set when PCRE2 is built; if it is not, the default is set
-       very large and is essentially "unlimited".
+       very large and is essentially unlimited.
       A value for the heap limit may also be supplied by an item at the start
       of a pattern of the form
@ -1039,19 +1039,15 @@ PCRE2 CONTEXTS
       less ddd is less than the limit set by the caller of pcre2_match()  or,
       if no such limit is set, less than the default.
-       The  pcre2_match() function starts out using a 20KiB vector on the sys-
+       The  pcre2_match() function always needs some heap memory, so setting a
-       tem stack for recording backtracking points. The more nested backtrack-
+       value of zero guarantees a "heap limit exceeded" error. Details of  how
-       ing  points  there  are (that is, the deeper the search tree), the more
+       pcre2_match()  uses  the  heap are given in the pcre2perform documenta-
-       memory is needed.  Heap memory is used only if the  initial  vector  is
+       tion.
       too small. If the heap limit is set to a value less than 21 (in partic-
       ular, zero) no heap memory will be used. In this  case,  only  patterns
       that  do not have a lot of nested backtracking can be successfully pro-
       cessed.
-       Similarly, for pcre2_dfa_match(), a vector on the system stack is  used
+       For pcre2_dfa_match(), a vector on the system stack is used  when  pro-
-       when  processing pattern recursions, lookarounds, or atomic groups, and
+       cessing  pattern recursions, lookarounds, or atomic groups, and only if
-       only if this is not big enough is heap memory used. In this case,  too,
+       this is not big enough is heap memory used. In  this  case,  setting  a
-       setting a value of zero disables the use of the heap.
+       value of zero disables the use of the heap.
       int pcre2_set_match_limit(pcre2_match_context *mcontext,
         uint32_t value);
@ -1093,12 +1089,12 @@ PCRE2 CONTEXTS
       This   parameter   limits   the   depth   of   nested  backtracking  in
       pcre2_match().  Each time a nested backtracking point is passed, a  new
-       memory "frame" is used to remember the state of matching at that point.
+       memory  frame  is used to remember the state of matching at that point.
       Thus, this parameter indirectly limits the amount  of  memory  that  is
-       used  in  a match. However, because the size of each memory "frame" de-
+       used in a match. However, because the size of each memory frame depends
-       pends on the number of capturing parentheses, the actual  memory  limit
+       on the number of capturing parentheses, the actual memory limit  varies
-       varies  from pattern to pattern. This limit was more useful in versions
+       from  pattern to pattern. This limit was more useful in versions before
-       before 10.30, where function recursion was used for backtracking.
+       10.30, where function recursion was used for backtracking.
       The depth limit is not relevant, and is ignored, when matching is  done
       using JIT compiled code. However, it is supported by pcre2_dfa_match(),
@ -3051,12 +3047,12 @@ ERROR RETURNS FROM pcre2_match()
         PCRE2_ERROR_NOMEMORY
-       If  a  pattern contains many nested backtracking points, heap memory is
+       Heap  memory  is  used  to  remember backgracking points. This error is
-       used to remember them. This error is given when the  memory  allocation
+       given when the memory allocation function (default  or  custom)  fails.
-       function  (default  or  custom)  fails.  Note  that  a different error,
+       Note  that  a  different  error, PCRE2_ERROR_HEAPLIMIT, is given if the
-       PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed  exceeds
+       amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
-       the    heap   limit.   PCRE2_ERROR_NOMEMORY   is   also   returned   if
+       also  returned  if PCRE2_COPY_MATCHED_SUBJECT is set and memory alloca-
-       PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
+       tion fails.
         PCRE2_ERROR_NULL
@ -3860,8 +3856,8 @@ AUTHOR
 REVISION
-       Last updated: 14 December 2021
+       Last updated: 27 July 2022
-       Copyright (c) 1997-2021 University of Cambridge.
+       Copyright (c) 1997-2022 University of Cambridge.
 ------------------------------------------------------------------------------
@ -4118,14 +4114,13 @@ LIMITING PCRE2 RESOURCE USAGE
       pcre2_dfa_match() matching function, and to JIT  matching  (though  the
       counting is done differently).
-       The  pcre2_match() function starts out using a 20KiB vector on the sys-
+       The  pcre2_match()  function  uses  heap  memory to record backtracking
-       tem stack to record backtracking points. The more  nested  backtracking
+       points. The more nested backtracking points there  are  (that  is,  the
-       points there are (that is, the deeper the search tree), the more memory
+       deeper  the  search tree), the more memory is needed. There is an upper
-       is needed. If the initial vector is not large enough,  heap  memory  is
+       limit, specified in kibibytes (units of 1024 bytes). This limit can  be
-       used,  up to a certain limit, which is specified in kibibytes (units of
+       changed  at  run  time, as described in the pcre2api documentation. The
-       1024 bytes). The limit can be changed at run time, as described in  the
+       default limit (in effect unlimited) is 20 million. You can change  this
-       pcre2api  documentation.  The default limit (in effect unlimited) is 20
+       by a setting such as
       million. You can change this by a setting such as
         --with-heap-limit=500
@ -4450,14 +4445,14 @@ SEE ALSO
 AUTHOR
       Philip Hazel
-       University Computing Service
+       Retired from University Computing Service
       Cambridge, England.
 REVISION
-       Last updated: 08 December 2021
+       Last updated: 27 July 2022
-       Copyright (c) 1997-2021 University of Cambridge.
+       Copyright (c) 1997-2022 University of Cambridge.
 ------------------------------------------------------------------------------
@ -5596,18 +5591,22 @@ SIZE AND OTHER LIMITATIONS
       The maximum length of a string argument to a  callout  is  the  largest
       number a 32-bit unsigned integer can hold.
       The  maximum  amount  of heap memory used for matching is controlled by
       the heap limit, which can be set in a pattern or in  a  match  context.
       The default is a very large number, effectively unlimited.
 AUTHOR
       Philip Hazel
-       University Computing Service
+       Retired from University Computing Service
       Cambridge, England.
 REVISION
-       Last updated: 02 February 2019
+       Last updated: 26 July 2022
-       Copyright (c) 1997-2019 University of Cambridge.
+       Copyright (c) 1997-2022 University of Cambridge.
 ------------------------------------------------------------------------------
@ -9773,12 +9772,29 @@ STACK AND HEAP USAGE AT RUN TIME
       sive function calls could use a great deal of  stack,  and  this  could
       cause  problems, but this usage has been eliminated. Backtracking posi-
       tions are now explicitly remembered in memory frames controlled by  the
-       code.  An  initial  20KiB  vector  of frames is allocated on the system
+       code.
-       stack (enough for about 100 frames for small patterns), but if this  is
+
-       insufficient,  heap  memory  is  used. The amount of heap memory can be
+       The size of each frame depends on the size of pointer variables and the
-       limited; if the limit is set to zero, only the initial stack vector  is
+       number of capturing parenthesized groups in the pattern being  matched.
-       used.  Rewriting patterns to be time-efficient, as described below, may
+       On a 64-bit system the frame size for a pattern with no captures is 128
-       also reduce the memory requirements.
+       bytes. For each capturing group the size increases by 16 bytes.
       Until release 10.41, an initial 20KiB frames vector  was  allocated  on
       the  system  stack,  but this still caused some issues for multi-thread
       applications where each thread has a very  small  stack.  From  release
       10.41  backtracking  memory  frames  are always held in heap memory. An
       initial heap allocation is obtained the first time any match data block
       is  passed  to  pcre2_match().  This  is remembered with the match data
       block and re-used if that block is used for another match. It is  freed
       when the match data block itself is freed.
       The  size  of the initial block is the larger of 20KiB or ten times the
       pattern's frame size, unless the heap limit is less than this, in which
       case  the  heap  limit  is  used. If the initial block proves to be too
       small during matching, it is replaced by a larger block, subject to the
       heap  limit.  The  heap limit is checked only when a new block is to be
       allocated. Reducing the heap limit between calls to pcre2_match()  with
       the same match data block does not affect the saved block.
       In  contrast  to  pcre2_match(),  pcre2_dfa_match()  does use recursive
       function calls, but only for processing atomic groups,  lookaround  as-
@ -9926,14 +9942,14 @@ PROCESSING TIME
 AUTHOR
       Philip Hazel
-       University Computing Service
+       Retired from University Computing Service
       Cambridge, England.
 REVISION
-       Last updated: 03 February 2019
+       Last updated: 27 July 2022
-       Copyright (c) 1997-2019 University of Cambridge.
+       Copyright (c) 1997-2022 University of Cambridge.
 ------------------------------------------------------------------------------
--- a/doc/pcre2grep.1
+++ b/doc/pcre2grep.1
@ -1,4 +1,4 @@
-.TH PCRE2GREP 1 "27 July 2022" "PCRE2 10.41"
+.TH PCRE2GREP 1 "30 July 2022" "PCRE2 10.41"
 .SH NAME
 pcre2grep - a grep with Perl-compatible regular expressions.
 .SH SYNOPSIS
@ -43,13 +43,15 @@ For example:
 .sp
  pcre2grep some-pattern file1 - file3
 .sp
-Input files are searched line by line. By default, each line that matches a
+By default, input files are searched line by line. Each line that matches a
 pattern is copied to the standard output, and if there is more than one file,
 the file name is output at the start of each line, followed by a colon.
-However, there are options that can change how \fBpcre2grep\fP behaves. In
+However, there are options that can change how \fBpcre2grep\fP behaves. For
-particular, the \fB-M\fP option makes it possible to search for strings that
+example, the \fB-M\fP option makes it possible to search for strings that span
-span line boundaries. What defines a line boundary is controlled by the
+line boundaries. What defines a line boundary is controlled by the \fB-N\fP
-\fB-N\fP (\fB--newline\fP) option.
+(\fB--newline\fP) option. The \fB-h\fP and \fB-H\fP options control whether or
 not file names are shown, and the \fB-Z\fP option changes the file name
 terminator to a zero byte.
 .P
 The amount of memory used for buffering files that are being scanned is
 controlled by parameters that can be set by the \fB--buffer-size\fP and
@ -149,9 +151,11 @@ Output up to \fInumber\fP lines of context after each matching line. Fewer
 lines are output if the next match or the end of the file is reached, or if the
 processing buffer size has been set too small. If file names and/or line
 numbers are being output, a hyphen separator is used instead of a colon for the
-context lines. A line containing "--" is output between each group of lines,
+context lines (the \fB-Z\fP option can be used to change the file name
-unless they are in fact contiguous in the input file. The value of \fInumber\fP
+terminator to a zero byte). A line containing "--" is output between each group
-is expected to be relatively small. When \fB-c\fP is used, \fB-A\fP is ignored.
+of lines, unless they are in fact contiguous in the input file. The value of
 \fInumber\fP is expected to be relatively small. When \fB-c\fP is used,
 \fB-A\fP is ignored.
 .TP
 \fB-a\fP, \fB--text\fP
 Treat binary files as text. This is equivalent to
@ -167,9 +171,10 @@ Output up to \fInumber\fP lines of context before each matching line. Fewer
 lines are output if the previous match or the start of the file is within
 \fInumber\fP lines, or if the processing buffer size has been set too small. If
 file names and/or line numbers are being output, a hyphen separator is used
-instead of a colon for the context lines. A line containing "--" is output
+instead of a colon for the context lines (the \fB-Z\fP option can be used to
-between each group of lines, unless they are in fact contiguous in the input
+change the file name terminator to a zero byte). A line containing "--" is
-file. The value of \fInumber\fP is expected to be relatively small. When
+output between each group of lines, unless they are in fact contiguous in the
 input file. The value of \fInumber\fP is expected to be relatively small. When
 \fB-c\fP is used, \fB-B\fP is ignored.
 .TP
 \fB--binary-files=\fP\fIword\fP
@ -356,19 +361,21 @@ shown separately. This option is mutually exclusive with \fB--output\fP,
 .TP
 \fB-H\fP, \fB--with-filename\fP
 Force the inclusion of the file name at the start of output lines when
-searching a single file. By default, the file name is not shown in this case.
+searching a single file. The file name is not normally shown in this case.
-For matching lines, the file name is followed by a colon; for context lines, a
+By default, for matching lines, the file name is followed by a colon; for
-hyphen separator is used. If a line number is also being output, it follows the
+context lines, a hyphen separator is used. The \fB-Z\fP option can be used to
-file name. When the \fB-M\fP option causes a pattern to match more than one
+change the terminator to a zero byte. If a line number is also being output,
-line, only the first is preceded by the file name. This option overrides any
+it follows the file name. When the \fB-M\fP option causes a pattern to match
-previous \fB-h\fP, \fB-l\fP, or \fB-L\fP options.
+more than one line, only the first is preceded by the file name. This option
 overrides any previous \fB-h\fP, \fB-l\fP, or \fB-L\fP options.
 .TP
 \fB-h\fP, \fB--no-filename\fP
-Suppress the output file names when searching multiple files. By default,
+Suppress the output file names when searching multiple files. File names are
-file names are shown when multiple files are searched. For matching lines, the
+normally shown when multiple files are searched. By default, for matching
-file name is followed by a colon; for context lines, a hyphen separator is used.
+lines, the file name is followed by a colon; for context lines, a hyphen
-If a line number is also being output, it follows the file name. This option
+separator is used. The \fB-Z\fP option can be used to change the terminator to
-overrides any previous \fB-H\fP, \fB-L\fP, or \fB-l\fP options.
+a zero byte. If a line number is also being output, it follows the file name.
 This option overrides any previous \fB-H\fP, \fB-L\fP, or \fB-l\fP options.
 .TP
 \fB--heap-limit\fP=\fInumber\fP
 See \fB--match-limit\fP below.
@ -417,17 +424,19 @@ given any number of times. If a directory matches both \fB--include-dir\fP and
 \fB-L\fP, \fB--files-without-match\fP
 Instead of outputting lines from the files, just output the names of the files
 that do not contain any lines that would have been output. Each file name is
-output once, on a separate line. This option overrides any previous \fB-H\fP,
+output once, on a separate line by default, but if the \fB-Z\fP option is set, 
-\fB-h\fP, or \fB-l\fP options.
+they are separated by zero bytes instead of newlines. This option overrides any
 previous \fB-H\fP, \fB-h\fP, or \fB-l\fP options.
 .TP
 \fB-l\fP, \fB--files-with-matches\fP
 Instead of outputting lines from the files, just output the names of the files
 containing lines that would have been output. Each file name is output once, on
-a separate line. Searching normally stops as soon as a matching line is found
+a separate line, but if the \fB-Z\fP option is set, they are separated by zero
-in a file. However, if the \fB-c\fP (count) option is also used, matching
+bytes instead of newlines. Searching normally stops as soon as a matching line
-continues in order to obtain the correct count, and those files that have at
+is found in a file. However, if the \fB-c\fP (count) option is also used,
-least one match are listed along with their counts. Using this option with
+matching continues in order to obtain the correct count, and those files that
-\fB-c\fP is a way of suppressing the listing of files with no matches that
+have at least one match are listed along with their counts. Using this option
 with \fB-c\fP is a way of suppressing the listing of files with no matches that
 occurs with \fB-c\fP on its own. This option overrides any previous \fB-H\fP,
 \fB-h\fP, or \fB-L\fP options.
 .TP
@ -729,6 +738,12 @@ be more than one line. This is equivalent to having "^(?:" at the start of each
 pattern and ")$" at the end. This option applies only to the patterns that are
 matched against the contents of files; it does not apply to patterns specified
 by any of the \fB--include\fP or \fB--exclude\fP options.
 .TP
 \fB-Z\fP, \fB--null\fP
 Terminate files names in the regular output with a zero byte (the NUL
 character) instead of what would normally appear. This is useful when file
 names contain unusual characters such as colons, hyphens, or even newlines. The
 option does not apply to file names in error messages.
 .
 .
 .SH "ENVIRONMENT VARIABLES"
@ -957,6 +972,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 27 July 2022
+Last updated: 30 July 2022
 Copyright (c) 1997-2022 University of Cambridge.
 .fi
--- a/doc/pcre2grep.txt
+++ b/doc/pcre2grep.txt
@ -42,13 +42,15 @@ DESCRIPTION
         pcre2grep some-pattern file1 - file3
-       Input files are searched line by  line.  By  default,  each  line  that
+       By default, input files are searched  line  by  line.  Each  line  that
       matches  a  pattern  is  copied to the standard output, and if there is
       more than one file, the file name is output at the start of each  line,
       followed  by  a  colon.  However, there are options that can change how
-       pcre2grep behaves. In particular, the -M option makes  it  possible  to
+       pcre2grep behaves. For example, the -M  option  makes  it  possible  to
       search  for  strings  that  span  line  boundaries. What defines a line
-       boundary is controlled by the -N (--newline) option.
+       boundary is controlled by the -N (--newline) option. The -h and -H  op-
       tions  control  whether  or not file names are shown, and the -Z option
       changes the file name terminator to a zero byte.
       The amount of memory used for buffering files that are being scanned is
       controlled  by  parameters  that  can  be  set by the --buffer-size and
@ -149,10 +151,12 @@ OPTIONS
                 the  file  is  reached,  or if the processing buffer size has
                 been set too small. If file names and/or line numbers are be-
                 ing output, a hyphen separator is used instead of a colon for
-                 the context lines. A line containing "--" is  output  between
+                 the context lines (the -Z option can be used  to  change  the
-                 each  group  of  lines, unless they are in fact contiguous in
+                 file  name terminator to a zero byte). A line containing "--"
-                 the input file. The value of number is expected to  be  rela-
+                 is output between each group of lines,  unless  they  are  in
-                 tively small. When -c is used, -A is ignored.
+                 fact contiguous in the input file. The value of number is ex-
                 pected to be relatively small. When -c is  used,  -A  is  ig-
                 nored.
       -a, --text
                 Treat  binary  files as text. This is equivalent to --binary-
@ -170,11 +174,12 @@ OPTIONS
                 start  of the file is within number lines, or if the process-
                 ing buffer size has been set too small. If file names  and/or
                 line numbers are being output, a hyphen separator is used in-
-                 stead of a colon for the context  lines.  A  line  containing
+                 stead of a colon for the context lines (the -Z option can  be
-                 "--"  is  output between each group of lines, unless they are
+                 used  to  change  the file name terminator to a zero byte). A
-                 in fact contiguous in the input file. The value of number  is
+                 line containing "--" is output between each group  of  lines,
-                 expected  to  be relatively small. When -c is used, -B is ig-
+                 unless  they  are  in  fact contiguous in the input file. The
-                 nored.
+                 value of number is expected to be relatively small.  When  -c
                 is used, -B is ignored.
       --binary-files=word
                 Specify  how binary files are to be processed. If the word is
@ -387,22 +392,25 @@ OPTIONS
       -H, --with-filename
                 Force  the  inclusion of the file name at the start of output
-                 lines when searching a single file. By default, the file name
+                 lines when searching a single file. The file name is not nor-
-                 is not shown in this case.  For matching lines, the file name
+                 mally  shown  in  this case.  By default, for matching lines,
-                 is followed by a colon; for context lines, a hyphen separator
+                 the file name is followed by a colon; for  context  lines,  a
-                 is used. If a line number is also being  output,  it  follows
+                 hyphen separator is used. The -Z option can be used to change
-                 the  file  name. When the -M option causes a pattern to match
+                 the terminator to a zero byte. If a line number is also being
-                 more than one line, only the first is preceded  by  the  file
+                 output, it follows the file name. When the -M option causes a
-                 name.  This  option  overrides any previous -h, -l, or -L op-
+                 pattern to match more than one line, only the first  is  pre-
-                 tions.
+                 ceded  by  the  file name. This option overrides any previous
                 -h, -l, or -L options.
       -h, --no-filename
                 Suppress the output file names when searching multiple files.
-                 By  default,  file  names  are  shown when multiple files are
+                 File  names  are  normally  shown  when  multiple  files  are
-                 searched. For matching lines, the file name is followed by  a
+                 searched. By default, for matching lines, the  file  name  is
-                 colon;  for  context lines, a hyphen separator is used.  If a
+                 followed by a colon; for context lines, a hyphen separator is
-                 line number is also being output, it follows the  file  name.
+                 used. The -Z option can be used to change the terminator to a
-                 This option overrides any previous -H, -L, or -l options.
+                 zero  byte. If a line number is also being output, it follows
                 the file name.  This option overrides any previous -H, -L, or
                 -l options.
       --heap-limit=number
                 See --match-limit below.
@ -455,21 +463,23 @@ OPTIONS
                 Instead  of  outputting lines from the files, just output the
                 names of the files that do not contain any lines  that  would
                 have  been  output. Each file name is output once, on a sepa-
-                 rate line. This option overrides any previous -H, -h,  or  -l
+                 rate line by default, but if the -Z option is set,  they  are
-                 options.
+                 separated  by  zero  bytes  instead  of newlines. This option
                 overrides any previous -H, -h, or -l options.
       -l, --files-with-matches
                 Instead of outputting lines from the files, just  output  the
                 names of the files containing lines that would have been out-
-                 put.  Each  file  name  is  output  once, on a separate line.
+                 put. Each file name is output once, on a separate  line,  but
-                 Searching normally stops as soon as a matching line is  found
+                 if the -Z option is set, they are separated by zero bytes in-
-                 in  a  file.  However, if the -c (count) option is also used,
+                 stead of newlines. Searching normally  stops  as  soon  as  a
-                 matching continues in order to obtain the correct count,  and
+                 matching  line is found in a file. However, if the -c (count)
-                 those  files  that  have  at least one match are listed along
+                 option is also used, matching continues in  order  to  obtain
-                 with their counts. Using this option with -c is a way of sup-
+                 the  correct  count,  and  those files that have at least one
-                 pressing  the  listing  of  files with no matches that occurs
+                 match are listed along with their counts. Using  this  option
-                 with -c on its own. This option overrides  any  previous  -H,
+                 with  -c is a way of suppressing the listing of files with no
-                 -h, or -L options.
+                 matches that occurs with -c on its own. This option overrides
                 any previous -H, -h, or -L options.
       --label=name
                 This option supplies a name to be used for the standard input
@ -571,11 +581,8 @@ OPTIONS
                 an error occurs.
                 The  --heap-limit  option specifies, as a number of kibibytes
-                 (units of 1024 bytes), the amount of heap memory that may  be
+                 (units of 1024 bytes), the maximum amount of heap memory that
-                 used for matching. Heap memory is needed only if matching the
+                 may be used for matching.
                 pattern requires a significant number of nested  backtracking
                 points to be remembered. This parameter can be set to zero to
                 forbid the use of heap memory altogether.
                 The  --depth-limit  option  limits  the depth of nested back-
                 tracking points, which indirectly limits the amount of memory
@ -812,6 +819,13 @@ OPTIONS
                 does  not apply to patterns specified by any of the --include
                 or --exclude options.
       -Z, --null
                 Terminate files names in the regular output with a zero  byte
                 (the  NUL  character)  instead of what would normally appear.
                 This is useful when file  names  contain  unusual  characters
                 such  as  colons,  hyphens, or even newlines. The option does
                 not apply to file names in error messages.
 ENVIRONMENT VARIABLES
@ -1022,5 +1036,5 @@ AUTHOR
 REVISION
-       Last updated: 31 August 2021
+       Last updated: 30 July 2022
-       Copyright (c) 1997-2021 University of Cambridge.
+       Copyright (c) 1997-2022 University of Cambridge.
--- a/doc/pcre2test.txt
+++ b/doc/pcre2test.txt
@ -1111,7 +1111,8 @@ SUBJECT MODIFIERS
             copy=<number or name>      copy captured substring
             depth_limit=<n>            set a depth limit
             dfa                        use pcre2_dfa_match()
-             find_limits                find match and depth limits
+             find_limits                find heap, match and depth limits
             find_limits_noheap         find match and depth limits
             get=<number or name>       extract captured substring
             getall                     extract all captured substrings
         /g  global                     global matching
@ -1411,7 +1412,7 @@ SUBJECT MODIFIERS
       The heap_limit, match_limit, and depth_limit modifiers set  the  appro-
       priate  limits  in the match context. These values are ignored when the
-       find_limits modifier is specified.
+       find_limits or find_limits_noheap modifier is specified.
   Finding minimum limits
@ -1419,8 +1420,12 @@ SUBJECT MODIFIERS
       calls  the  relevant matching function several times, setting different
       values   in   the    match    context    via    pcre2_set_heap_limit(),
       pcre2_set_match_limit(),  or pcre2_set_depth_limit() until it finds the
-       minimum values for each parameter that allows  the  match  to  complete
+       smallest value for each parameter that allows  the  match  to  complete
-       without error. If JIT is being used, only the match limit is relevant.
+       without a "limit exceeded" error. The match itself may succeed or fail.
       An alternative modifier, find_limits_noheap, omits the heap limit. This
       is  used  in  the standard tests, because the minimum heap limit varies
       between systems. If JIT is being used, only the match  limit  is  rele-
       vant, and the other two are automatically omitted.
       When using this modifier, the pattern should not contain any limit set-
       tings such as (*LIMIT_MATCH=...)  within  it.  If  such  a  setting  is
@ -1446,9 +1451,7 @@ SUBJECT MODIFIERS
       For  both  kinds  of  matching,  the  heap_limit  number,  which  is in
       kibibytes (units of 1024 bytes), limits the amount of heap memory  used
-       for matching. A value of zero disables the use of any heap memory; many
+       for matching.
       simple pattern matches can be done without using the heap, so  zero  is
       not an unreasonable setting.
   Showing MARK names
@ -1463,13 +1466,11 @@ SUBJECT MODIFIERS
       The  memory modifier causes pcre2test to log the sizes of all heap mem-
       ory  allocation  and  freeing  calls  that  occur  during  a  call   to
-       pcre2_match()  or  pcre2_dfa_match(). These occur only when a match re-
+       pcre2_match()  or pcre2_dfa_match(). In the latter case, heap memory is
-       quires a bigger vector than the default  for  remembering  backtracking
+       used only when a match requires more internal workspace  that  the  de-
-       points  (pcre2_match())  or for internal workspace (pcre2_dfa_match()).
+       fault  allocation  on the stack, so in many cases there will be no out-
-       In many cases there will be no heap memory used and therefore no  addi-
+       put. No heap memory is allocated during matching  with  JIT.  For  this
-       tional output. No heap memory is allocated during matching with JIT, so
+       modifier to work, the null_context modifier must not be set on both the
       in that case the memory modifier never has any effect. For  this  modi-
       fier  to  work,  the  null_context modifier must not be set on both the
       pattern and the subject, though it can be set on one or the other.
   Setting a starting offset
@ -1518,7 +1519,8 @@ SUBJECT MODIFIERS
       null_context  modifier  is  set,  however,  NULL is passed. This is for
       testing that the matching and substitution functions  behave  correctly
       in  this  case  (they use default values). This modifier cannot be used
-       with the find_limits or substitute_callout modifiers.
+       with the find_limits, find_limits_noheap, or  substitute_callout  modi-
       fiers.
       Similarly,  for  testing purposes, if the null_subject or null_replace-
       ment modifier is set, the subject or replacement  string  pointers  are
@ -1949,5 +1951,5 @@ AUTHOR
 REVISION
-       Last updated: 12 January 2022
+       Last updated: 27 July 2022
       Copyright (c) 1997-2022 University of Cambridge.
--- a/src/pcre2grep.c
+++ b/src/pcre2grep.c
@ -205,9 +205,6 @@ point. */
 *               Global variables                 *
 *************************************************/
 /* Jeffrey Friedl has some debugging requirements that are not part of the
 regular code. */
 static const char *colour_string = "1;31";
 static const char *colour_option = NULL;
 static const char *dee_option = NULL;
@ -220,6 +217,10 @@ static const char *output_text = NULL;
 static char *main_buffer = NULL;
 static const char *printname_nl = STDOUT_NL;  /* Changed to NULL for -Z */
 static int printname_colon = ':';             /* Changed to 0 for -Z */
 static int printname_hyphen = '-';            /* Changed to 0 for -Z */
 static int after_context = 0;
 static int before_context = 0;
 static int binary_files = BIN_BINARY;
@ -483,6 +484,7 @@ static option_item optionlist[] = {
  { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
  { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
  { OP_NODATA,   N_ALLABSK, NULL,              "allow-lookaround-bsk", "allow \\K in lookarounds" },
  { OP_NODATA,    'Z',      NULL,              "null",          "output 0 byte after file names"  },
  { OP_NODATA,    0,        NULL,               NULL,            NULL }
 };
@ -1773,7 +1775,7 @@ if (after_context > 0 && lastmatchnumber > 0)
    {
    char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
    if (ellength == 0 && pp == main_buffer + bufsize) break;
-    if (printname != NULL) fprintf(stdout, "%s-", printname);
+    if (printname != NULL) fprintf(stdout, "%s%c", printname, printname_hyphen);
    if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
    FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
    lastmatchrestart = pp;
@ -2730,7 +2732,9 @@ while (ptr < endptr)
    else if (filenames == FN_MATCH_ONLY)
      {
-      fprintf(stdout, "%s" STDOUT_NL, printname);
+      fprintf(stdout, "%s", printname);
      if (printname_nl == NULL) fprintf(stdout, "%c", 0);
        else fprintf(stdout, "%s", printname_nl);
      return 0;
      }
@ -2749,7 +2753,8 @@ while (ptr < endptr)
        {
        PCRE2_SIZE oldstartoffset;
-        if (printname != NULL) fprintf(stdout, "%s:", printname);
+        if (printname != NULL) fprintf(stdout, "%s%c", printname,
          printname_colon);
        if (number) fprintf(stdout, "%lu:", linenumber);
        /* Handle --line-offsets */
@ -2871,7 +2876,8 @@ while (ptr < endptr)
        while (lastmatchrestart < p)
          {
          char *pp = lastmatchrestart;
-          if (printname != NULL) fprintf(stdout, "%s-", printname);
+          if (printname != NULL) fprintf(stdout, "%s%c", printname,
            printname_hyphen);
          if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
          pp = end_of_line(pp, endptr, &ellength);
          FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
@ -2912,7 +2918,8 @@ while (ptr < endptr)
          {
          int ellength;
          char *pp = p;
-          if (printname != NULL) fprintf(stdout, "%s-", printname);
+          if (printname != NULL) fprintf(stdout, "%s%c", printname,
            printname_hyphen);
          if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
          pp = end_of_line(pp, endptr, &ellength);
          FWRITE_IGNORE(p, 1, pp - p, stdout);
@ -2926,7 +2933,8 @@ while (ptr < endptr)
      if (after_context > 0 || before_context > 0)
        endhyphenpending = TRUE;
-      if (printname != NULL) fprintf(stdout, "%s:", printname);
+      if (printname != NULL) fprintf(stdout, "%s%c", printname,
        printname_colon);
      if (number) fprintf(stdout, "%lu:", linenumber);
      /* In multiline mode, or if colouring, we have to split the line(s) up
@ -3131,7 +3139,9 @@ were none. If we found a match, we won't have got this far. */
 if (filenames == FN_NOMATCH_ONLY)
  {
-  fprintf(stdout, "%s" STDOUT_NL, printname);
+  fprintf(stdout, "%s", printname);
  if (printname_nl == NULL) fprintf(stdout, "%c", 0);
    else fprintf(stdout, "%s", printname_nl);
  return 0;
  }
@ -3142,7 +3152,7 @@ if (count_only && !quiet)
  if (count > 0 || !omit_zero_count)
    {
    if (printname != NULL && filenames != FN_NONE)
-      fprintf(stdout, "%s:", printname);
+      fprintf(stdout, "%s%c", printname, printname_colon);
    fprintf(stdout, "%lu" STDOUT_NL, count);
    counts_printed++;
    }
@ -3528,8 +3538,6 @@ switch(letter)
  case 'u': options |= PCRE2_UTF; utf = TRUE; break;
  case 'U': options |= PCRE2_UTF|PCRE2_MATCH_INVALID_UTF; utf = TRUE; break;
  case 'v': invert = TRUE; break;
  case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
  case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
  case 'V':
    {
@ -3540,6 +3548,10 @@ switch(letter)
  pcre2grep_exit(0);
  break;
  case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
  case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
  case 'Z': printname_colon = printname_hyphen = 0; printname_nl = NULL; break;
  default:
  fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
  pcre2grep_exit(usage(2));
@ -4259,8 +4271,6 @@ if (DEE_option != NULL)
 (void)pcre2_set_compile_extra_options(compile_context, extra_options);
 /* Check the values for Jeffrey Friedl's debugging options. */
 /* If use_jit is set, check whether JIT is available. If not, do not try
 to use JIT. */
--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@ -991,3 +991,22 @@ RC=0
 ---------------------------- Test 134 -----------------------------
 =AB3CD5=
 RC=0
 ---------------------------- Test 135 -----------------------------
 ./testdata/grepinputv@The word is cat in this line
 RC=0
 ./testdata/grepinputv@./testdata/grepinputv@RC=0
 ./testdata/grepinputv@This line contains \E and (regex) *meta* [characters].
 ./testdata/grepinputv@The word is cat in this line
 ./testdata/grepinputv@The caterpillar sat on the mat
 RC=0
 testdata/grepinputM3:start end in between start
 end and following
 testdata/grepinputM7:start end in between start
 end and following start
 end other stuff
 testdata/grepinputM11:start end in between start
 end
 testdata/grepinputM16:start end in between start
 end
 RC=0