Implement -Z in pcre2grep and update documentation

2022-07-30 17:41:49 +01:00 · 2022-07-30 17:41:49 +01:00 · 8b133fa0ba
parent cc5e121c8e
commit 8b133fa0ba
16 changed files with 994 additions and 868 deletions
--- a/2
+++ b/2
@ -49,6 +49,8 @@ tests.
 tests run by 'make check', but can be run manually. The current output is from 
 a 64-bit system.

+13. Implemented -Z aka --null in pcre2grep.
+

 Version 10.40 15-April-2022
 ---------------------------
--- a/42
+++ b/42
@ -68,6 +68,22 @@ diff -b  /dev/null /dev/null 2>/dev/null && cf="diff -b"
 diff -u  /dev/null /dev/null 2>/dev/null && cf="diff -u"
 diff -ub /dev/null /dev/null 2>/dev/null && cf="diff -ub"

+# Some tests involve NUL characters. It seems impossible to handle them easily
+# in many operating systems. An earlier version of this script used sed to
+# translate NUL into the string ZERO, but this didn't work on Solaris (aka
+# SunOS), where the version of sed explicitly doesn't like them, and also MacOS
+# (Darwin), OpenBSD, FreeBSD, NetBSD, and some Linux distributions like Alpine,
+# even when using GNU sed. A user suggested using tr instead, which
+# necessitates translating to a single character. However, on (some versions
+# of?) Solaris, the normal "tr" cannot handle binary zeros, but if
+# /usr/xpg4/bin/tr is available, it can do so, so test for that.
+
+if [ -x /usr/xpg4/bin/tr ] ; then
+  tr=/usr/xpg4/bin/tr
+else
+  tr=tr
+fi
+
 # If this test is being run from "make check", $srcdir will be set. If not, set
 # it to the current or parent directory, whichever one contains the test data.
 # Subsequently, we run most of the pcre2grep tests in the source directory so
@ -685,6 +701,16 @@ echo "---------------------------- Test 134 -----------------------------" >>tes
 (cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep

+echo "---------------------------- Test 135 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -lZ 'word' ./testdata/grepinputv ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -A 1 -B 1 -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -MHZn 'start[\s]+end' testdata/grepinputM) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+
 # Now compare the results.

 $cf $srcdir/testdata/grepoutput testtrygrep
@ -759,22 +785,6 @@ $valgrind $vjs $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >
 printf '%c--------------------------- Test N6 ------------------------------\r\n' - >>testtrygrep
 $valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep

-# This next test involves NUL characters. It seems impossible to handle them
-# easily in many operating systems. An earlier version of this script used sed
-# to translate NUL into the string ZERO, but this didn't work on Solaris (aka
-# SunOS), where the version of sed explicitly doesn't like them, and also MacOS
-# (Darwin), OpenBSD, FreeBSD, NetBSD, and some Linux distributions like Alpine,
-# even when using GNU sed. A user suggested using tr instead, which
-# necessitates translating to a single character (@). However, on (some
-# versions of?) Solaris, the normal "tr" cannot handle binary zeros, but if
-# /usr/xpg4/bin/tr is available, it can do so, so test for that.
-
-if [ -x /usr/xpg4/bin/tr ] ; then
-  tr=/usr/xpg4/bin/tr
-else
-  tr=tr
-fi
-
 printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep
 printf 'abc\0def' >testNinputgrep
 $valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep
--- a/doc/html/NON-AUTOTOOLS-BUILD.txt
+++ b/doc/html/NON-AUTOTOOLS-BUILD.txt
@ -121,6 +121,7 @@ environment, for example.
       pcre2_substring.c
       pcre2_tables.c
       pcre2_ucd.c
+       pcre2_ucptables.c
       pcre2_valid_utf.c
       pcre2_xclass.c

@ -373,7 +374,7 @@ Otherwise:
 1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
   have been created.

-2. Edit RunTest.bat to indentify the full or relative location of
+2. Edit RunTest.bat to identify the full or relative location of
   the pcre2 source (wherein which the testdata folder resides), e.g.:

   set srcdir=C:\pcre2\pcre2-10.00
--- a/doc/html/README.txt
+++ b/doc/html/README.txt
@ -17,7 +17,7 @@ pcre2-dev+subscribe@googlegroups.com.
 You can access the archives and also subscribe or manage your subscription
 here:

-https://groups.google.com/pcre2-dev
+https://groups.google.com/g/pcre2-dev

 Please read the NEWS file if you are upgrading from a previous release. The
 contents of this README file are:
@ -375,7 +375,8 @@ library. They are also documented in the pcre2build man page.
  necessary to specify something like LIBS="-lncurses" as well. This is
  because, to quote the readline INSTALL, "Readline uses the termcap functions,
  but does not link with the termcap or curses library itself, allowing
-  applications which link with readline the to choose an appropriate library."
+  applications which link with readline the option to choose an appropriate
+  library."
  If you get error messages about missing functions tgetstr, tgetent, tputs,
  tgetflag, or tgoto, this is the problem, and linking with the ncurses library
  should fix it.
@ -400,10 +401,10 @@ library. They are also documented in the pcre2build man page.
  Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
  be created. This is normally run under valgrind or used when PCRE2 is
  compiled with address sanitizing enabled. It calls the fuzzing function and
-  outputs information about it is doing. The input strings are specified by
-  arguments: if an argument starts with "=" the rest of it is a literal input
-  string. Otherwise, it is assumed to be a file name, and the contents of the
-  file are the test string.
+  outputs information about what it is doing. The input strings are specified
+  by arguments: if an argument starts with "=" the rest of it is a literal
+  input string. Otherwise, it is assumed to be a file name, and the contents
+  of the file are the test string.

 . Releases before 10.30 could be compiled with --disable-stack-for-recursion,
  which caused pcre2_match() to use individual blocks on the heap for
@ -695,7 +696,7 @@ Test 14 contains some special UTF and UCP tests that give different output for
 different code unit widths.

 Test 15 contains a number of tests that must not be run with JIT. They check,
-among other non-JIT things, the match-limiting features of the intepretive
+among other non-JIT things, the match-limiting features of the interpretive
 matcher.

 Test 16 is run only when JIT support is not available. It checks that an
--- a/doc/html/pcre2api.html
+++ b/doc/html/pcre2api.html
@ -1017,7 +1017,7 @@ has its own memory control arrangements (see the
 documentation for more details). If the limit is reached, the negative error
 code PCRE2_ERROR_HEAPLIMIT is returned. The default limit can be set when PCRE2
 is built; if it is not, the default is set very large and is essentially
-"unlimited".
+unlimited.
 </P>
 <P>
 A value for the heap limit may also be supplied by an item at the start of a
@ -1030,19 +1030,17 @@ less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
 limit is set, less than the default.
 </P>
 <P>
-The <b>pcre2_match()</b> function starts out using a 20KiB vector on the system
-stack for recording backtracking points. The more nested backtracking points
-there are (that is, the deeper the search tree), the more memory is needed.
-Heap memory is used only if the initial vector is too small. If the heap limit
-is set to a value less than 21 (in particular, zero) no heap memory will be
-used. In this case, only patterns that do not have a lot of nested backtracking
-can be successfully processed.
+The <b>pcre2_match()</b> function always needs some heap memory, so setting a
+value of zero guarantees a "heap limit exceeded" error. Details of how
+<b>pcre2_match()</b> uses the heap are given in the
+<a href="pcre2perform.html"><b>pcre2perform</b></a>
+documentation.
 </P>
 <P>
-Similarly, for <b>pcre2_dfa_match()</b>, a vector on the system stack is used
-when processing pattern recursions, lookarounds, or atomic groups, and only if
-this is not big enough is heap memory used. In this case, too, setting a value
-of zero disables the use of the heap.
+For <b>pcre2_dfa_match()</b>, a vector on the system stack is used when
+processing pattern recursions, lookarounds, or atomic groups, and only if this
+is not big enough is heap memory used. In this case, setting a value of zero
+disables the use of the heap.
 <br>
 <br>
 <b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
@ -1089,10 +1087,10 @@ less than the limit set by the caller of <b>pcre2_match()</b> or
 <br>
 <br>
 This parameter limits the depth of nested backtracking in <b>pcre2_match()</b>.
-Each time a nested backtracking point is passed, a new memory "frame" is used
+Each time a nested backtracking point is passed, a new memory frame is used
 to remember the state of matching at that point. Thus, this parameter
 indirectly limits the amount of memory that is used in a match. However,
-because the size of each memory "frame" depends on the number of capturing
+because the size of each memory frame depends on the number of capturing
 parentheses, the actual memory limit varies from pattern to pattern. This limit
 was more useful in versions before 10.30, where function recursion was used for
 backtracking.
@ -3148,11 +3146,11 @@ The backtracking match limit was reached.
 <pre>
  PCRE2_ERROR_NOMEMORY
 </pre>
-If a pattern contains many nested backtracking points, heap memory is used to
-remember them. This error is given when the memory allocation function (default
-or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
-if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
-also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
+Heap memory is used to remember backgracking points. This error is given when
+the memory allocation function (default or custom) fails. Note that a different
+error, PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds
+the heap limit. PCRE2_ERROR_NOMEMORY is also returned if
+PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
 <pre>
  PCRE2_ERROR_NULL
 </pre>
@ -4020,9 +4018,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC42" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 14 December 2021
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2build.html
+++ b/doc/html/pcre2build.html
@ -284,12 +284,11 @@ to the <b>configure</b> command. This setting also applies to the
 counting is done differently).
 </P>
 <P>
-The <b>pcre2_match()</b> function starts out using a 20KiB vector on the system
-stack to record backtracking points. The more nested backtracking points there
-are (that is, the deeper the search tree), the more memory is needed. If the
-initial vector is not large enough, heap memory is used, up to a certain limit,
-which is specified in kibibytes (units of 1024 bytes). The limit can be changed
-at run time, as described in the
+The <b>pcre2_match()</b> function uses heap memory to record backtracking
+points. The more nested backtracking points there are (that is, the deeper the
+search tree), the more memory is needed. There is an upper limit, specified in
+kibibytes (units of 1024 bytes). This limit can be changed at run time, as
+described in the
 <a href="pcre2api.html"><b>pcre2api</b></a>
 documentation. The default limit (in effect unlimited) is 20 million. You can
 change this by a setting such as
@ -609,16 +608,16 @@ give a warning.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC26" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 08 December 2021
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2grep.html
+++ b/doc/html/pcre2grep.html
@ -71,13 +71,15 @@ For example:
 <pre>
  pcre2grep some-pattern file1 - file3
 </pre>
-Input files are searched line by line. By default, each line that matches a
+By default, input files are searched line by line. Each line that matches a
 pattern is copied to the standard output, and if there is more than one file,
 the file name is output at the start of each line, followed by a colon.
-However, there are options that can change how <b>pcre2grep</b> behaves. In
-particular, the <b>-M</b> option makes it possible to search for strings that
-span line boundaries. What defines a line boundary is controlled by the
-<b>-N</b> (<b>--newline</b>) option.
+However, there are options that can change how <b>pcre2grep</b> behaves. For
+example, the <b>-M</b> option makes it possible to search for strings that span
+line boundaries. What defines a line boundary is controlled by the <b>-N</b>
+(<b>--newline</b>) option. The <b>-h</b> and <b>-H</b> options control whether or
+not file names are shown, and the <b>-Z</b> option changes the file name
+terminator to a zero byte.
 </P>
 <P>
 The amount of memory used for buffering files that are being scanned is
@ -178,9 +180,11 @@ Output up to <i>number</i> lines of context after each matching line. Fewer
 lines are output if the next match or the end of the file is reached, or if the
 processing buffer size has been set too small. If file names and/or line
 numbers are being output, a hyphen separator is used instead of a colon for the
-context lines. A line containing "--" is output between each group of lines,
-unless they are in fact contiguous in the input file. The value of <i>number</i>
-is expected to be relatively small. When <b>-c</b> is used, <b>-A</b> is ignored.
+context lines (the <b>-Z</b> option can be used to change the file name
+terminator to a zero byte). A line containing "--" is output between each group
+of lines, unless they are in fact contiguous in the input file. The value of
+<i>number</i> is expected to be relatively small. When <b>-c</b> is used,
+<b>-A</b> is ignored.
 </P>
 <P>
 <b>-a</b>, <b>--text</b>
@ -199,9 +203,10 @@ Output up to <i>number</i> lines of context before each matching line. Fewer
 lines are output if the previous match or the start of the file is within
 <i>number</i> lines, or if the processing buffer size has been set too small. If
 file names and/or line numbers are being output, a hyphen separator is used
-instead of a colon for the context lines. A line containing "--" is output
-between each group of lines, unless they are in fact contiguous in the input
-file. The value of <i>number</i> is expected to be relatively small. When
+instead of a colon for the context lines (the <b>-Z</b> option can be used to
+change the file name terminator to a zero byte). A line containing "--" is
+output between each group of lines, unless they are in fact contiguous in the
+input file. The value of <i>number</i> is expected to be relatively small. When
 <b>-c</b> is used, <b>-B</b> is ignored.
 </P>
 <P>
@ -411,20 +416,22 @@ shown separately. This option is mutually exclusive with <b>--output</b>,
 <P>
 <b>-H</b>, <b>--with-filename</b>
 Force the inclusion of the file name at the start of output lines when
-searching a single file. By default, the file name is not shown in this case.
-For matching lines, the file name is followed by a colon; for context lines, a
-hyphen separator is used. If a line number is also being output, it follows the
-file name. When the <b>-M</b> option causes a pattern to match more than one
-line, only the first is preceded by the file name. This option overrides any
-previous <b>-h</b>, <b>-l</b>, or <b>-L</b> options.
+searching a single file. The file name is not normally shown in this case.
+By default, for matching lines, the file name is followed by a colon; for
+context lines, a hyphen separator is used. The <b>-Z</b> option can be used to
+change the terminator to a zero byte. If a line number is also being output,
+it follows the file name. When the <b>-M</b> option causes a pattern to match
+more than one line, only the first is preceded by the file name. This option
+overrides any previous <b>-h</b>, <b>-l</b>, or <b>-L</b> options.
 </P>
 <P>
 <b>-h</b>, <b>--no-filename</b>
-Suppress the output file names when searching multiple files. By default,
-file names are shown when multiple files are searched. For matching lines, the
-file name is followed by a colon; for context lines, a hyphen separator is used.
-If a line number is also being output, it follows the file name. This option
-overrides any previous <b>-H</b>, <b>-L</b>, or <b>-l</b> options.
+Suppress the output file names when searching multiple files. File names are
+normally shown when multiple files are searched. By default, for matching
+lines, the file name is followed by a colon; for context lines, a hyphen
+separator is used. The <b>-Z</b> option can be used to change the terminator to
+a zero byte. If a line number is also being output, it follows the file name.
+This option overrides any previous <b>-H</b>, <b>-L</b>, or <b>-l</b> options.
 </P>
 <P>
 <b>--heap-limit</b>=<i>number</i>
@ -481,18 +488,20 @@ given any number of times. If a directory matches both <b>--include-dir</b> and
 <b>-L</b>, <b>--files-without-match</b>
 Instead of outputting lines from the files, just output the names of the files
 that do not contain any lines that would have been output. Each file name is
-output once, on a separate line. This option overrides any previous <b>-H</b>,
-<b>-h</b>, or <b>-l</b> options.
+output once, on a separate line by default, but if the <b>-Z</b> option is set, 
+they are separated by zero bytes instead of newlines. This option overrides any
+previous <b>-H</b>, <b>-h</b>, or <b>-l</b> options.
 </P>
 <P>
 <b>-l</b>, <b>--files-with-matches</b>
 Instead of outputting lines from the files, just output the names of the files
 containing lines that would have been output. Each file name is output once, on
-a separate line. Searching normally stops as soon as a matching line is found
-in a file. However, if the <b>-c</b> (count) option is also used, matching
-continues in order to obtain the correct count, and those files that have at
-least one match are listed along with their counts. Using this option with
-<b>-c</b> is a way of suppressing the listing of files with no matches that
+a separate line, but if the <b>-Z</b> option is set, they are separated by zero
+bytes instead of newlines. Searching normally stops as soon as a matching line
+is found in a file. However, if the <b>-c</b> (count) option is also used,
+matching continues in order to obtain the correct count, and those files that
+have at least one match are listed along with their counts. Using this option
+with <b>-c</b> is a way of suppressing the listing of files with no matches that
 occurs with <b>-c</b> on its own. This option overrides any previous <b>-H</b>,
 <b>-h</b>, or <b>-L</b> options.
 </P>
@ -592,10 +601,7 @@ value set by <b>--match-limit</b> is reached, an error occurs.
 <br>
 <br>
 The <b>--heap-limit</b> option specifies, as a number of kibibytes (units of
-1024 bytes), the amount of heap memory that may be used for matching. Heap
-memory is needed only if matching the pattern requires a significant number of
-nested backtracking points to be remembered. This parameter can be set to zero
-to forbid the use of heap memory altogether.
+1024 bytes), the maximum amount of heap memory that may be used for matching.
 <br>
 <br>
 The <b>--depth-limit</b> option limits the depth of nested backtracking points,
@ -839,6 +845,13 @@ pattern and ")$" at the end. This option applies only to the patterns that are
 matched against the contents of files; it does not apply to patterns specified
 by any of the <b>--include</b> or <b>--exclude</b> options.
 </P>
+<P>
+<b>-Z</b>, <b>--null</b>
+Terminate files names in the regular output with a zero byte (the NUL
+character) instead of what would normally appear. This is useful when file
+names contain unusual characters such as colons, hyphens, or even newlines. The
+option does not apply to file names in error messages.
+</P>
 <br><a name="SEC7" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
 <P>
 The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
@ -1053,9 +1066,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC16" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 31 August 2021
+Last updated: 30 July 2022
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2limits.html
+++ b/doc/html/pcre2limits.html
@ -71,13 +71,18 @@ is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
 The maximum length of a string argument to a callout is the largest number a
 32-bit unsigned integer can hold.
 </P>
+<P>
+The maximum amount of heap memory used for matching is controlled by the heap 
+limit, which can be set in a pattern or in a match context. The default is a 
+very large number, effectively unlimited.
+</P>
 <br><b>
 AUTHOR
 </b><br>
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
@ -86,9 +91,9 @@ Cambridge, England.
 REVISION
 </b><br>
 <P>
-Last updated: 02 February 2019
+Last updated: 26 July 2022
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2perform.html
+++ b/doc/html/pcre2perform.html
@ -83,12 +83,31 @@ From release 10.30, the interpretive (non-JIT) version of <b>pcre2_match()</b>
 uses very little system stack at run time. In earlier releases recursive
 function calls could use a great deal of stack, and this could cause problems,
 but this usage has been eliminated. Backtracking positions are now explicitly
-remembered in memory frames controlled by the code. An initial 20KiB vector of
-frames is allocated on the system stack (enough for about 100 frames for small
-patterns), but if this is insufficient, heap memory is used. The amount of heap
-memory can be limited; if the limit is set to zero, only the initial stack
-vector is used. Rewriting patterns to be time-efficient, as described below,
-may also reduce the memory requirements.
+remembered in memory frames controlled by the code. 
+</P>
+<P>
+The size of each frame depends on the size of pointer variables and the number
+of capturing parenthesized groups in the pattern being matched. On a 64-bit
+system the frame size for a pattern with no captures is 128 bytes. For each
+capturing group the size increases by 16 bytes.
+</P>
+<P>
+Until release 10.41, an initial 20KiB frames vector was allocated on the system 
+stack, but this still caused some issues for multi-thread applications where
+each thread has a very small stack. From release 10.41 backtracking memory
+frames are always held in heap memory. An initial heap allocation is obtained
+the first time any match data block is passed to <b>pcre2_match()</b>. This is
+remembered with the match data block and re-used if that block is used for
+another match. It is freed when the match data block itself is freed.
+</P>
+<P>
+The size of the initial block is the larger of 20KiB or ten times the pattern's 
+frame size, unless the heap limit is less than this, in which case the heap 
+limit is used. If the initial block proves to be too small during matching, it
+is replaced by a larger block, subject to the heap limit. The heap limit is 
+checked only when a new block is to be allocated. Reducing the heap limit 
+between calls to <b>pcre2_match()</b> with the same match data block does not 
+affect the saved block.
 </P>
 <P>
 In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive
@ -245,16 +264,16 @@ pattern to match. This is done by repeatedly matching with different limits.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC6" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 03 February 2019
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2test.html
+++ b/doc/html/pcre2test.html
@ -1241,7 +1241,8 @@ pattern, but can be overridden by modifiers on the subject.
      copy=&#60;number or name&#62;      copy captured substring
      depth_limit=&#60;n&#62;            set a depth limit
      dfa                        use <b>pcre2_dfa_match()</b>
-      find_limits                find match and depth limits
+      find_limits                find heap, match and depth limits
+      find_limits_noheap         find match and depth limits
      get=&#60;number or name&#62;       extract captured substring
      getall                     extract all captured substrings
  /g  global                     global matching
@ -1564,7 +1565,7 @@ Setting heap, match, and depth limits
 <P>
 The <b>heap_limit</b>, <b>match_limit</b>, and <b>depth_limit</b> modifiers set
 the appropriate limits in the match context. These values are ignored when the
-<b>find_limits</b> modifier is specified.
+<b>find_limits</b> or <b>find_limits_noheap</b> modifier is specified.
 </P>
 <br><b>
 Finding minimum limits
@ -1574,8 +1575,12 @@ If the <b>find_limits</b> modifier is present on a subject line, <b>pcre2test</b
 calls the relevant matching function several times, setting different values in
 the match context via <b>pcre2_set_heap_limit()</b>,
 <b>pcre2_set_match_limit()</b>, or <b>pcre2_set_depth_limit()</b> until it finds
-the minimum values for each parameter that allows the match to complete without
-error. If JIT is being used, only the match limit is relevant.
+the smallest value for each parameter that allows the match to complete without
+a "limit exceeded" error. The match itself may succeed or fail. An alternative
+modifier, <b>find_limits_noheap</b>, omits the heap limit. This is used in the
+standard tests, because the minimum heap limit varies between systems. If JIT
+is being used, only the match limit is relevant, and the other two are
+automatically omitted.
 </P>
 <P>
 When using this modifier, the pattern should not contain any limit settings
@ -1603,9 +1608,7 @@ overall amount of computing resource that is used.
 </P>
 <P>
 For both kinds of matching, the <i>heap_limit</i> number, which is in kibibytes
-(units of 1024 bytes), limits the amount of heap memory used for matching. A
-value of zero disables the use of any heap memory; many simple pattern matches
-can be done without using the heap, so zero is not an unreasonable setting.
+(units of 1024 bytes), limits the amount of heap memory used for matching.
 </P>
 <br><b>
 Showing MARK names
@ -1623,12 +1626,10 @@ Showing memory usage
 <P>
 The <b>memory</b> modifier causes <b>pcre2test</b> to log the sizes of all heap
 memory allocation and freeing calls that occur during a call to
-<b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>. These occur only when a match
-requires a bigger vector than the default for remembering backtracking points
-(<b>pcre2_match()</b>) or for internal workspace (<b>pcre2_dfa_match()</b>). In
-many cases there will be no heap memory used and therefore no additional
-output. No heap memory is allocated during matching with JIT, so in that case
-the <b>memory</b> modifier never has any effect. For this modifier to work, the
+<b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>. In the latter case, heap memory
+is used only when a match requires more internal workspace that the default
+allocation on the stack, so in many cases there will be no output. No heap
+memory is allocated during matching with JIT. For this modifier to work, the
 <b>null_context</b> modifier must not be set on both the pattern and the
 subject, though it can be set on one or the other.
 </P>
@ -1690,7 +1691,8 @@ Normally, <b>pcre2test</b> passes a context block to <b>pcre2_match()</b>,
 If the <b>null_context</b> modifier is set, however, NULL is passed. This is for
 testing that the matching and substitution functions behave correctly in this
 case (they use default values). This modifier cannot be used with the
-<b>find_limits</b> or <b>substitute_callout</b> modifiers.
+<b>find_limits</b>, <b>find_limits_noheap</b>, or <b>substitute_callout</b>
+modifiers.
 </P>
 <P>
 Similarly, for testing purposes, if the <b>null_subject</b> or
@ -2141,7 +2143,7 @@ Cambridge, England.
 </P>
 <br><a name="SEC21" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 12 January 2022
+Last updated: 27 July 2022
 <br>
 Copyright &copy; 1997-2022 University of Cambridge.
 <br>
--- a/doc/pcre2.txt
+++ b/doc/pcre2.txt
@ -1028,7 +1028,7 @@ PCRE2 CONTEXTS
       pcre2jit  documentation for more details). If the limit is reached, the
       negative error code  PCRE2_ERROR_HEAPLIMIT  is  returned.  The  default
       limit  can be set when PCRE2 is built; if it is not, the default is set
-       very large and is essentially "unlimited".
+       very large and is essentially unlimited.

       A value for the heap limit may also be supplied by an item at the start
       of a pattern of the form
@ -1039,19 +1039,15 @@ PCRE2 CONTEXTS
       less ddd is less than the limit set by the caller of pcre2_match()  or,
       if no such limit is set, less than the default.

-       The  pcre2_match() function starts out using a 20KiB vector on the sys-
-       tem stack for recording backtracking points. The more nested backtrack-
-       ing  points  there  are (that is, the deeper the search tree), the more
-       memory is needed.  Heap memory is used only if the  initial  vector  is
-       too small. If the heap limit is set to a value less than 21 (in partic-
-       ular, zero) no heap memory will be used. In this  case,  only  patterns
-       that  do not have a lot of nested backtracking can be successfully pro-
-       cessed.
+       The  pcre2_match() function always needs some heap memory, so setting a
+       value of zero guarantees a "heap limit exceeded" error. Details of  how
+       pcre2_match()  uses  the  heap are given in the pcre2perform documenta-
+       tion.

-       Similarly, for pcre2_dfa_match(), a vector on the system stack is  used
-       when  processing pattern recursions, lookarounds, or atomic groups, and
-       only if this is not big enough is heap memory used. In this case,  too,
-       setting a value of zero disables the use of the heap.
+       For pcre2_dfa_match(), a vector on the system stack is used  when  pro-
+       cessing  pattern recursions, lookarounds, or atomic groups, and only if
+       this is not big enough is heap memory used. In  this  case,  setting  a
+       value of zero disables the use of the heap.

       int pcre2_set_match_limit(pcre2_match_context *mcontext,
         uint32_t value);
@ -1093,12 +1089,12 @@ PCRE2 CONTEXTS

       This   parameter   limits   the   depth   of   nested  backtracking  in
       pcre2_match().  Each time a nested backtracking point is passed, a  new
-       memory "frame" is used to remember the state of matching at that point.
+       memory  frame  is used to remember the state of matching at that point.
       Thus, this parameter indirectly limits the amount  of  memory  that  is
-       used  in  a match. However, because the size of each memory "frame" de-
-       pends on the number of capturing parentheses, the actual  memory  limit
-       varies  from pattern to pattern. This limit was more useful in versions
-       before 10.30, where function recursion was used for backtracking.
+       used in a match. However, because the size of each memory frame depends
+       on the number of capturing parentheses, the actual memory limit  varies
+       from  pattern to pattern. This limit was more useful in versions before
+       10.30, where function recursion was used for backtracking.

       The depth limit is not relevant, and is ignored, when matching is  done
       using JIT compiled code. However, it is supported by pcre2_dfa_match(),
@ -3051,12 +3047,12 @@ ERROR RETURNS FROM pcre2_match()

         PCRE2_ERROR_NOMEMORY

-       If  a  pattern contains many nested backtracking points, heap memory is
-       used to remember them. This error is given when the  memory  allocation
-       function  (default  or  custom)  fails.  Note  that  a different error,
-       PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed  exceeds
-       the    heap   limit.   PCRE2_ERROR_NOMEMORY   is   also   returned   if
-       PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
+       Heap  memory  is  used  to  remember backgracking points. This error is
+       given when the memory allocation function (default  or  custom)  fails.
+       Note  that  a  different  error, PCRE2_ERROR_HEAPLIMIT, is given if the
+       amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
+       also  returned  if PCRE2_COPY_MATCHED_SUBJECT is set and memory alloca-
+       tion fails.

         PCRE2_ERROR_NULL

@ -3860,8 +3856,8 @@ AUTHOR

 REVISION

-       Last updated: 14 December 2021
-       Copyright (c) 1997-2021 University of Cambridge.
+       Last updated: 27 July 2022
+       Copyright (c) 1997-2022 University of Cambridge.
 ------------------------------------------------------------------------------
 
 
@ -4118,14 +4114,13 @@ LIMITING PCRE2 RESOURCE USAGE
       pcre2_dfa_match() matching function, and to JIT  matching  (though  the
       counting is done differently).

-       The  pcre2_match() function starts out using a 20KiB vector on the sys-
-       tem stack to record backtracking points. The more  nested  backtracking
-       points there are (that is, the deeper the search tree), the more memory
-       is needed. If the initial vector is not large enough,  heap  memory  is
-       used,  up to a certain limit, which is specified in kibibytes (units of
-       1024 bytes). The limit can be changed at run time, as described in  the
-       pcre2api  documentation.  The default limit (in effect unlimited) is 20
-       million. You can change this by a setting such as
+       The  pcre2_match()  function  uses  heap  memory to record backtracking
+       points. The more nested backtracking points there  are  (that  is,  the
+       deeper  the  search tree), the more memory is needed. There is an upper
+       limit, specified in kibibytes (units of 1024 bytes). This limit can  be
+       changed  at  run  time, as described in the pcre2api documentation. The
+       default limit (in effect unlimited) is 20 million. You can change  this
+       by a setting such as

         --with-heap-limit=500

@ -4450,14 +4445,14 @@ SEE ALSO
 AUTHOR

       Philip Hazel
-       University Computing Service
+       Retired from University Computing Service
       Cambridge, England.


 REVISION

-       Last updated: 08 December 2021
-       Copyright (c) 1997-2021 University of Cambridge.
+       Last updated: 27 July 2022
+       Copyright (c) 1997-2022 University of Cambridge.
 ------------------------------------------------------------------------------
 
 
@ -5596,18 +5591,22 @@ SIZE AND OTHER LIMITATIONS
       The maximum length of a string argument to a  callout  is  the  largest
       number a 32-bit unsigned integer can hold.

+       The  maximum  amount  of heap memory used for matching is controlled by
+       the heap limit, which can be set in a pattern or in  a  match  context.
+       The default is a very large number, effectively unlimited.
+

 AUTHOR

       Philip Hazel
-       University Computing Service
+       Retired from University Computing Service
       Cambridge, England.


 REVISION

-       Last updated: 02 February 2019
-       Copyright (c) 1997-2019 University of Cambridge.
+       Last updated: 26 July 2022
+       Copyright (c) 1997-2022 University of Cambridge.
 ------------------------------------------------------------------------------
 
 
@ -9773,12 +9772,29 @@ STACK AND HEAP USAGE AT RUN TIME
       sive function calls could use a great deal of  stack,  and  this  could
       cause  problems, but this usage has been eliminated. Backtracking posi-
       tions are now explicitly remembered in memory frames controlled by  the
-       code.  An  initial  20KiB  vector  of frames is allocated on the system
-       stack (enough for about 100 frames for small patterns), but if this  is
-       insufficient,  heap  memory  is  used. The amount of heap memory can be
-       limited; if the limit is set to zero, only the initial stack vector  is
-       used.  Rewriting patterns to be time-efficient, as described below, may
-       also reduce the memory requirements.
+       code.
+
+       The size of each frame depends on the size of pointer variables and the
+       number of capturing parenthesized groups in the pattern being  matched.
+       On a 64-bit system the frame size for a pattern with no captures is 128
+       bytes. For each capturing group the size increases by 16 bytes.
+
+       Until release 10.41, an initial 20KiB frames vector  was  allocated  on
+       the  system  stack,  but this still caused some issues for multi-thread
+       applications where each thread has a very  small  stack.  From  release
+       10.41  backtracking  memory  frames  are always held in heap memory. An
+       initial heap allocation is obtained the first time any match data block
+       is  passed  to  pcre2_match().  This  is remembered with the match data
+       block and re-used if that block is used for another match. It is  freed
+       when the match data block itself is freed.
+
+       The  size  of the initial block is the larger of 20KiB or ten times the
+       pattern's frame size, unless the heap limit is less than this, in which
+       case  the  heap  limit  is  used. If the initial block proves to be too
+       small during matching, it is replaced by a larger block, subject to the
+       heap  limit.  The  heap limit is checked only when a new block is to be
+       allocated. Reducing the heap limit between calls to pcre2_match()  with
+       the same match data block does not affect the saved block.

       In  contrast  to  pcre2_match(),  pcre2_dfa_match()  does use recursive
       function calls, but only for processing atomic groups,  lookaround  as-
@ -9926,14 +9942,14 @@ PROCESSING TIME
 AUTHOR

       Philip Hazel
-       University Computing Service
+       Retired from University Computing Service
       Cambridge, England.


 REVISION

-       Last updated: 03 February 2019
-       Copyright (c) 1997-2019 University of Cambridge.
+       Last updated: 27 July 2022
+       Copyright (c) 1997-2022 University of Cambridge.
 ------------------------------------------------------------------------------
 
 
--- a/doc/pcre2grep.1
+++ b/doc/pcre2grep.1
@ -1,4 +1,4 @@
-.TH PCRE2GREP 1 "27 July 2022" "PCRE2 10.41"
+.TH PCRE2GREP 1 "30 July 2022" "PCRE2 10.41"
 .SH NAME
 pcre2grep - a grep with Perl-compatible regular expressions.
 .SH SYNOPSIS
@ -43,13 +43,15 @@ For example:
 .sp
  pcre2grep some-pattern file1 - file3
 .sp
-Input files are searched line by line. By default, each line that matches a
+By default, input files are searched line by line. Each line that matches a
 pattern is copied to the standard output, and if there is more than one file,
 the file name is output at the start of each line, followed by a colon.
-However, there are options that can change how \fBpcre2grep\fP behaves. In
-particular, the \fB-M\fP option makes it possible to search for strings that
-span line boundaries. What defines a line boundary is controlled by the
-\fB-N\fP (\fB--newline\fP) option.
+However, there are options that can change how \fBpcre2grep\fP behaves. For
+example, the \fB-M\fP option makes it possible to search for strings that span
+line boundaries. What defines a line boundary is controlled by the \fB-N\fP
+(\fB--newline\fP) option. The \fB-h\fP and \fB-H\fP options control whether or
+not file names are shown, and the \fB-Z\fP option changes the file name
+terminator to a zero byte.
 .P
 The amount of memory used for buffering files that are being scanned is
 controlled by parameters that can be set by the \fB--buffer-size\fP and
@ -149,9 +151,11 @@ Output up to \fInumber\fP lines of context after each matching line. Fewer
 lines are output if the next match or the end of the file is reached, or if the
 processing buffer size has been set too small. If file names and/or line
 numbers are being output, a hyphen separator is used instead of a colon for the
-context lines. A line containing "--" is output between each group of lines,
-unless they are in fact contiguous in the input file. The value of \fInumber\fP
-is expected to be relatively small. When \fB-c\fP is used, \fB-A\fP is ignored.
+context lines (the \fB-Z\fP option can be used to change the file name
+terminator to a zero byte). A line containing "--" is output between each group
+of lines, unless they are in fact contiguous in the input file. The value of
+\fInumber\fP is expected to be relatively small. When \fB-c\fP is used,
+\fB-A\fP is ignored.
 .TP
 \fB-a\fP, \fB--text\fP
 Treat binary files as text. This is equivalent to
@ -167,9 +171,10 @@ Output up to \fInumber\fP lines of context before each matching line. Fewer
 lines are output if the previous match or the start of the file is within
 \fInumber\fP lines, or if the processing buffer size has been set too small. If
 file names and/or line numbers are being output, a hyphen separator is used
-instead of a colon for the context lines. A line containing "--" is output
-between each group of lines, unless they are in fact contiguous in the input
-file. The value of \fInumber\fP is expected to be relatively small. When
+instead of a colon for the context lines (the \fB-Z\fP option can be used to
+change the file name terminator to a zero byte). A line containing "--" is
+output between each group of lines, unless they are in fact contiguous in the
+input file. The value of \fInumber\fP is expected to be relatively small. When
 \fB-c\fP is used, \fB-B\fP is ignored.
 .TP
 \fB--binary-files=\fP\fIword\fP
@ -356,19 +361,21 @@ shown separately. This option is mutually exclusive with \fB--output\fP,
 .TP
 \fB-H\fP, \fB--with-filename\fP
 Force the inclusion of the file name at the start of output lines when
-searching a single file. By default, the file name is not shown in this case.
-For matching lines, the file name is followed by a colon; for context lines, a
-hyphen separator is used. If a line number is also being output, it follows the
-file name. When the \fB-M\fP option causes a pattern to match more than one
-line, only the first is preceded by the file name. This option overrides any
-previous \fB-h\fP, \fB-l\fP, or \fB-L\fP options.
+searching a single file. The file name is not normally shown in this case.
+By default, for matching lines, the file name is followed by a colon; for
+context lines, a hyphen separator is used. The \fB-Z\fP option can be used to
+change the terminator to a zero byte. If a line number is also being output,
+it follows the file name. When the \fB-M\fP option causes a pattern to match
+more than one line, only the first is preceded by the file name. This option
+overrides any previous \fB-h\fP, \fB-l\fP, or \fB-L\fP options.
 .TP
 \fB-h\fP, \fB--no-filename\fP
-Suppress the output file names when searching multiple files. By default,
-file names are shown when multiple files are searched. For matching lines, the
-file name is followed by a colon; for context lines, a hyphen separator is used.
-If a line number is also being output, it follows the file name. This option
-overrides any previous \fB-H\fP, \fB-L\fP, or \fB-l\fP options.
+Suppress the output file names when searching multiple files. File names are
+normally shown when multiple files are searched. By default, for matching
+lines, the file name is followed by a colon; for context lines, a hyphen
+separator is used. The \fB-Z\fP option can be used to change the terminator to
+a zero byte. If a line number is also being output, it follows the file name.
+This option overrides any previous \fB-H\fP, \fB-L\fP, or \fB-l\fP options.
 .TP
 \fB--heap-limit\fP=\fInumber\fP
 See \fB--match-limit\fP below.
@ -417,17 +424,19 @@ given any number of times. If a directory matches both \fB--include-dir\fP and
 \fB-L\fP, \fB--files-without-match\fP
 Instead of outputting lines from the files, just output the names of the files
 that do not contain any lines that would have been output. Each file name is
-output once, on a separate line. This option overrides any previous \fB-H\fP,
-\fB-h\fP, or \fB-l\fP options.
+output once, on a separate line by default, but if the \fB-Z\fP option is set, 
+they are separated by zero bytes instead of newlines. This option overrides any
+previous \fB-H\fP, \fB-h\fP, or \fB-l\fP options.
 .TP
 \fB-l\fP, \fB--files-with-matches\fP
 Instead of outputting lines from the files, just output the names of the files
 containing lines that would have been output. Each file name is output once, on
-a separate line. Searching normally stops as soon as a matching line is found
-in a file. However, if the \fB-c\fP (count) option is also used, matching
-continues in order to obtain the correct count, and those files that have at
-least one match are listed along with their counts. Using this option with
-\fB-c\fP is a way of suppressing the listing of files with no matches that
+a separate line, but if the \fB-Z\fP option is set, they are separated by zero
+bytes instead of newlines. Searching normally stops as soon as a matching line
+is found in a file. However, if the \fB-c\fP (count) option is also used,
+matching continues in order to obtain the correct count, and those files that
+have at least one match are listed along with their counts. Using this option
+with \fB-c\fP is a way of suppressing the listing of files with no matches that
 occurs with \fB-c\fP on its own. This option overrides any previous \fB-H\fP,
 \fB-h\fP, or \fB-L\fP options.
 .TP
@ -729,6 +738,12 @@ be more than one line. This is equivalent to having "^(?:" at the start of each
 pattern and ")$" at the end. This option applies only to the patterns that are
 matched against the contents of files; it does not apply to patterns specified
 by any of the \fB--include\fP or \fB--exclude\fP options.
+.TP
+\fB-Z\fP, \fB--null\fP
+Terminate files names in the regular output with a zero byte (the NUL
+character) instead of what would normally appear. This is useful when file
+names contain unusual characters such as colons, hyphens, or even newlines. The
+option does not apply to file names in error messages.
 .
 .
 .SH "ENVIRONMENT VARIABLES"
@ -957,6 +972,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 27 July 2022
+Last updated: 30 July 2022
 Copyright (c) 1997-2022 University of Cambridge.
 .fi
--- a/doc/pcre2grep.txt
+++ b/doc/pcre2grep.txt
@ -42,13 +42,15 @@ DESCRIPTION

         pcre2grep some-pattern file1 - file3

-       Input files are searched line by  line.  By  default,  each  line  that
+       By default, input files are searched  line  by  line.  Each  line  that
       matches  a  pattern  is  copied to the standard output, and if there is
       more than one file, the file name is output at the start of each  line,
       followed  by  a  colon.  However, there are options that can change how
-       pcre2grep behaves. In particular, the -M option makes  it  possible  to
+       pcre2grep behaves. For example, the -M  option  makes  it  possible  to
       search  for  strings  that  span  line  boundaries. What defines a line
-       boundary is controlled by the -N (--newline) option.
+       boundary is controlled by the -N (--newline) option. The -h and -H  op-
+       tions  control  whether  or not file names are shown, and the -Z option
+       changes the file name terminator to a zero byte.

       The amount of memory used for buffering files that are being scanned is
       controlled  by  parameters  that  can  be  set by the --buffer-size and
@ -149,10 +151,12 @@ OPTIONS
                 the  file  is  reached,  or if the processing buffer size has
                 been set too small. If file names and/or line numbers are be-
                 ing output, a hyphen separator is used instead of a colon for
-                 the context lines. A line containing "--" is  output  between
-                 each  group  of  lines, unless they are in fact contiguous in
-                 the input file. The value of number is expected to  be  rela-
-                 tively small. When -c is used, -A is ignored.
+                 the context lines (the -Z option can be used  to  change  the
+                 file  name terminator to a zero byte). A line containing "--"
+                 is output between each group of lines,  unless  they  are  in
+                 fact contiguous in the input file. The value of number is ex-
+                 pected to be relatively small. When -c is  used,  -A  is  ig-
+                 nored.

       -a, --text
                 Treat  binary  files as text. This is equivalent to --binary-
@ -170,11 +174,12 @@ OPTIONS
                 start  of the file is within number lines, or if the process-
                 ing buffer size has been set too small. If file names  and/or
                 line numbers are being output, a hyphen separator is used in-
-                 stead of a colon for the context  lines.  A  line  containing
-                 "--"  is  output between each group of lines, unless they are
-                 in fact contiguous in the input file. The value of number  is
-                 expected  to  be relatively small. When -c is used, -B is ig-
-                 nored.
+                 stead of a colon for the context lines (the -Z option can  be
+                 used  to  change  the file name terminator to a zero byte). A
+                 line containing "--" is output between each group  of  lines,
+                 unless  they  are  in  fact contiguous in the input file. The
+                 value of number is expected to be relatively small.  When  -c
+                 is used, -B is ignored.

       --binary-files=word
                 Specify  how binary files are to be processed. If the word is
@ -387,22 +392,25 @@ OPTIONS

       -H, --with-filename
                 Force  the  inclusion of the file name at the start of output
-                 lines when searching a single file. By default, the file name
-                 is not shown in this case.  For matching lines, the file name
-                 is followed by a colon; for context lines, a hyphen separator
-                 is used. If a line number is also being  output,  it  follows
-                 the  file  name. When the -M option causes a pattern to match
-                 more than one line, only the first is preceded  by  the  file
-                 name.  This  option  overrides any previous -h, -l, or -L op-
-                 tions.
+                 lines when searching a single file. The file name is not nor-
+                 mally  shown  in  this case.  By default, for matching lines,
+                 the file name is followed by a colon; for  context  lines,  a
+                 hyphen separator is used. The -Z option can be used to change
+                 the terminator to a zero byte. If a line number is also being
+                 output, it follows the file name. When the -M option causes a
+                 pattern to match more than one line, only the first  is  pre-
+                 ceded  by  the  file name. This option overrides any previous
+                 -h, -l, or -L options.

       -h, --no-filename
                 Suppress the output file names when searching multiple files.
-                 By  default,  file  names  are  shown when multiple files are
-                 searched. For matching lines, the file name is followed by  a
-                 colon;  for  context lines, a hyphen separator is used.  If a
-                 line number is also being output, it follows the  file  name.
-                 This option overrides any previous -H, -L, or -l options.
+                 File  names  are  normally  shown  when  multiple  files  are
+                 searched. By default, for matching lines, the  file  name  is
+                 followed by a colon; for context lines, a hyphen separator is
+                 used. The -Z option can be used to change the terminator to a
+                 zero  byte. If a line number is also being output, it follows
+                 the file name.  This option overrides any previous -H, -L, or
+                 -l options.

       --heap-limit=number
                 See --match-limit below.
@ -455,21 +463,23 @@ OPTIONS
                 Instead  of  outputting lines from the files, just output the
                 names of the files that do not contain any lines  that  would
                 have  been  output. Each file name is output once, on a sepa-
-                 rate line. This option overrides any previous -H, -h,  or  -l
-                 options.
+                 rate line by default, but if the -Z option is set,  they  are
+                 separated  by  zero  bytes  instead  of newlines. This option
+                 overrides any previous -H, -h, or -l options.

       -l, --files-with-matches
                 Instead of outputting lines from the files, just  output  the
                 names of the files containing lines that would have been out-
-                 put.  Each  file  name  is  output  once, on a separate line.
-                 Searching normally stops as soon as a matching line is  found
-                 in  a  file.  However, if the -c (count) option is also used,
-                 matching continues in order to obtain the correct count,  and
-                 those  files  that  have  at least one match are listed along
-                 with their counts. Using this option with -c is a way of sup-
-                 pressing  the  listing  of  files with no matches that occurs
-                 with -c on its own. This option overrides  any  previous  -H,
-                 -h, or -L options.
+                 put. Each file name is output once, on a separate  line,  but
+                 if the -Z option is set, they are separated by zero bytes in-
+                 stead of newlines. Searching normally  stops  as  soon  as  a
+                 matching  line is found in a file. However, if the -c (count)
+                 option is also used, matching continues in  order  to  obtain
+                 the  correct  count,  and  those files that have at least one
+                 match are listed along with their counts. Using  this  option
+                 with  -c is a way of suppressing the listing of files with no
+                 matches that occurs with -c on its own. This option overrides
+                 any previous -H, -h, or -L options.

       --label=name
                 This option supplies a name to be used for the standard input
@ -571,11 +581,8 @@ OPTIONS
                 an error occurs.

                 The  --heap-limit  option specifies, as a number of kibibytes
-                 (units of 1024 bytes), the amount of heap memory that may  be
-                 used for matching. Heap memory is needed only if matching the
-                 pattern requires a significant number of nested  backtracking
-                 points to be remembered. This parameter can be set to zero to
-                 forbid the use of heap memory altogether.
+                 (units of 1024 bytes), the maximum amount of heap memory that
+                 may be used for matching.

                 The  --depth-limit  option  limits  the depth of nested back-
                 tracking points, which indirectly limits the amount of memory
@ -812,6 +819,13 @@ OPTIONS
                 does  not apply to patterns specified by any of the --include
                 or --exclude options.

+       -Z, --null
+                 Terminate files names in the regular output with a zero  byte
+                 (the  NUL  character)  instead of what would normally appear.
+                 This is useful when file  names  contain  unusual  characters
+                 such  as  colons,  hyphens, or even newlines. The option does
+                 not apply to file names in error messages.
+

 ENVIRONMENT VARIABLES

@ -1022,5 +1036,5 @@ AUTHOR

 REVISION

-       Last updated: 31 August 2021
-       Copyright (c) 1997-2021 University of Cambridge.
+       Last updated: 30 July 2022
+       Copyright (c) 1997-2022 University of Cambridge.
--- a/doc/pcre2test.txt
+++ b/doc/pcre2test.txt
@ -1111,7 +1111,8 @@ SUBJECT MODIFIERS
             copy=<number or name>      copy captured substring
             depth_limit=<n>            set a depth limit
             dfa                        use pcre2_dfa_match()
-             find_limits                find match and depth limits
+             find_limits                find heap, match and depth limits
+             find_limits_noheap         find match and depth limits
             get=<number or name>       extract captured substring
             getall                     extract all captured substrings
         /g  global                     global matching
@ -1411,7 +1412,7 @@ SUBJECT MODIFIERS

       The heap_limit, match_limit, and depth_limit modifiers set  the  appro-
       priate  limits  in the match context. These values are ignored when the
-       find_limits modifier is specified.
+       find_limits or find_limits_noheap modifier is specified.

   Finding minimum limits

@ -1419,8 +1420,12 @@ SUBJECT MODIFIERS
       calls  the  relevant matching function several times, setting different
       values   in   the    match    context    via    pcre2_set_heap_limit(),
       pcre2_set_match_limit(),  or pcre2_set_depth_limit() until it finds the
-       minimum values for each parameter that allows  the  match  to  complete
-       without error. If JIT is being used, only the match limit is relevant.
+       smallest value for each parameter that allows  the  match  to  complete
+       without a "limit exceeded" error. The match itself may succeed or fail.
+       An alternative modifier, find_limits_noheap, omits the heap limit. This
+       is  used  in  the standard tests, because the minimum heap limit varies
+       between systems. If JIT is being used, only the match  limit  is  rele-
+       vant, and the other two are automatically omitted.

       When using this modifier, the pattern should not contain any limit set-
       tings such as (*LIMIT_MATCH=...)  within  it.  If  such  a  setting  is
@ -1446,9 +1451,7 @@ SUBJECT MODIFIERS

       For  both  kinds  of  matching,  the  heap_limit  number,  which  is in
       kibibytes (units of 1024 bytes), limits the amount of heap memory  used
-       for matching. A value of zero disables the use of any heap memory; many
-       simple pattern matches can be done without using the heap, so  zero  is
-       not an unreasonable setting.
+       for matching.

   Showing MARK names

@ -1463,13 +1466,11 @@ SUBJECT MODIFIERS

       The  memory modifier causes pcre2test to log the sizes of all heap mem-
       ory  allocation  and  freeing  calls  that  occur  during  a  call   to
-       pcre2_match()  or  pcre2_dfa_match(). These occur only when a match re-
-       quires a bigger vector than the default  for  remembering  backtracking
-       points  (pcre2_match())  or for internal workspace (pcre2_dfa_match()).
-       In many cases there will be no heap memory used and therefore no  addi-
-       tional output. No heap memory is allocated during matching with JIT, so
-       in that case the memory modifier never has any effect. For  this  modi-
-       fier  to  work,  the  null_context modifier must not be set on both the
+       pcre2_match()  or pcre2_dfa_match(). In the latter case, heap memory is
+       used only when a match requires more internal workspace  that  the  de-
+       fault  allocation  on the stack, so in many cases there will be no out-
+       put. No heap memory is allocated during matching  with  JIT.  For  this
+       modifier to work, the null_context modifier must not be set on both the
       pattern and the subject, though it can be set on one or the other.

   Setting a starting offset
@ -1518,7 +1519,8 @@ SUBJECT MODIFIERS
       null_context  modifier  is  set,  however,  NULL is passed. This is for
       testing that the matching and substitution functions  behave  correctly
       in  this  case  (they use default values). This modifier cannot be used
-       with the find_limits or substitute_callout modifiers.
+       with the find_limits, find_limits_noheap, or  substitute_callout  modi-
+       fiers.

       Similarly,  for  testing purposes, if the null_subject or null_replace-
       ment modifier is set, the subject or replacement  string  pointers  are
@ -1949,5 +1951,5 @@ AUTHOR

 REVISION

-       Last updated: 12 January 2022
+       Last updated: 27 July 2022
       Copyright (c) 1997-2022 University of Cambridge.
--- a/src/pcre2grep.c
+++ b/src/pcre2grep.c
@ -205,9 +205,6 @@ point. */
 *               Global variables                 *
 *************************************************/

-/* Jeffrey Friedl has some debugging requirements that are not part of the
-regular code. */
-
 static const char *colour_string = "1;31";
 static const char *colour_option = NULL;
 static const char *dee_option = NULL;
@ -220,6 +217,10 @@ static const char *output_text = NULL;

 static char *main_buffer = NULL;

+static const char *printname_nl = STDOUT_NL;  /* Changed to NULL for -Z */
+static int printname_colon = ':';             /* Changed to 0 for -Z */
+static int printname_hyphen = '-';            /* Changed to 0 for -Z */
+
 static int after_context = 0;
 static int before_context = 0;
 static int binary_files = BIN_BINARY;
@ -483,6 +484,7 @@ static option_item optionlist[] = {
  { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
  { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
  { OP_NODATA,   N_ALLABSK, NULL,              "allow-lookaround-bsk", "allow \\K in lookarounds" },
+  { OP_NODATA,    'Z',      NULL,              "null",          "output 0 byte after file names"  },
  { OP_NODATA,    0,        NULL,               NULL,            NULL }
 };

@ -1773,7 +1775,7 @@ if (after_context > 0 && lastmatchnumber > 0)
    {
    char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
    if (ellength == 0 && pp == main_buffer + bufsize) break;
-    if (printname != NULL) fprintf(stdout, "%s-", printname);
+    if (printname != NULL) fprintf(stdout, "%s%c", printname, printname_hyphen);
    if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
    FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
    lastmatchrestart = pp;
@ -2730,7 +2732,9 @@ while (ptr < endptr)

    else if (filenames == FN_MATCH_ONLY)
      {
-      fprintf(stdout, "%s" STDOUT_NL, printname);
+      fprintf(stdout, "%s", printname);
+      if (printname_nl == NULL) fprintf(stdout, "%c", 0);
+        else fprintf(stdout, "%s", printname_nl);
      return 0;
      }

@ -2749,7 +2753,8 @@ while (ptr < endptr)
        {
        PCRE2_SIZE oldstartoffset;

-        if (printname != NULL) fprintf(stdout, "%s:", printname);
+        if (printname != NULL) fprintf(stdout, "%s%c", printname,
+          printname_colon);
        if (number) fprintf(stdout, "%lu:", linenumber);

        /* Handle --line-offsets */
@ -2871,7 +2876,8 @@ while (ptr < endptr)
        while (lastmatchrestart < p)
          {
          char *pp = lastmatchrestart;
-          if (printname != NULL) fprintf(stdout, "%s-", printname);
+          if (printname != NULL) fprintf(stdout, "%s%c", printname,
+            printname_hyphen);
          if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
          pp = end_of_line(pp, endptr, &ellength);
          FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
@ -2912,7 +2918,8 @@ while (ptr < endptr)
          {
          int ellength;
          char *pp = p;
-          if (printname != NULL) fprintf(stdout, "%s-", printname);
+          if (printname != NULL) fprintf(stdout, "%s%c", printname,
+            printname_hyphen);
          if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
          pp = end_of_line(pp, endptr, &ellength);
          FWRITE_IGNORE(p, 1, pp - p, stdout);
@ -2926,7 +2933,8 @@ while (ptr < endptr)
      if (after_context > 0 || before_context > 0)
        endhyphenpending = TRUE;

-      if (printname != NULL) fprintf(stdout, "%s:", printname);
+      if (printname != NULL) fprintf(stdout, "%s%c", printname,
+        printname_colon);
      if (number) fprintf(stdout, "%lu:", linenumber);

      /* In multiline mode, or if colouring, we have to split the line(s) up
@ -3131,7 +3139,9 @@ were none. If we found a match, we won't have got this far. */

 if (filenames == FN_NOMATCH_ONLY)
  {
-  fprintf(stdout, "%s" STDOUT_NL, printname);
+  fprintf(stdout, "%s", printname);
+  if (printname_nl == NULL) fprintf(stdout, "%c", 0);
+    else fprintf(stdout, "%s", printname_nl);
  return 0;
  }

@ -3142,7 +3152,7 @@ if (count_only && !quiet)
  if (count > 0 || !omit_zero_count)
    {
    if (printname != NULL && filenames != FN_NONE)
-      fprintf(stdout, "%s:", printname);
+      fprintf(stdout, "%s%c", printname, printname_colon);
    fprintf(stdout, "%lu" STDOUT_NL, count);
    counts_printed++;
    }
@ -3528,8 +3538,6 @@ switch(letter)
  case 'u': options |= PCRE2_UTF; utf = TRUE; break;
  case 'U': options |= PCRE2_UTF|PCRE2_MATCH_INVALID_UTF; utf = TRUE; break;
  case 'v': invert = TRUE; break;
-  case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
-  case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;

  case 'V':
    {
@ -3540,6 +3548,10 @@ switch(letter)
  pcre2grep_exit(0);
  break;

+  case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
+  case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
+  case 'Z': printname_colon = printname_hyphen = 0; printname_nl = NULL; break;
+
  default:
  fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
  pcre2grep_exit(usage(2));
@ -4259,8 +4271,6 @@ if (DEE_option != NULL)

 (void)pcre2_set_compile_extra_options(compile_context, extra_options);

-/* Check the values for Jeffrey Friedl's debugging options. */
-
 /* If use_jit is set, check whether JIT is available. If not, do not try
 to use JIT. */

--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@ -991,3 +991,22 @@ RC=0
 ---------------------------- Test 134 -----------------------------
 =AB3CD5=
 RC=0
+---------------------------- Test 135 -----------------------------
+./testdata/grepinputv@The word is cat in this line
+RC=0
+./testdata/grepinputv@./testdata/grepinputv@RC=0
+./testdata/grepinputv@This line contains \E and (regex) *meta* [characters].
+./testdata/grepinputv@The word is cat in this line
+./testdata/grepinputv@The caterpillar sat on the mat
+RC=0
+testdata/grepinputM3:start end in between start
+end and following
+testdata/grepinputM7:start end in between start
+end and following start
+end other stuff
+testdata/grepinputM11:start end in between start
+
+end
+testdata/grepinputM16:start end in between start
+end
+RC=0