Implement -Z in pcre2grep and update documentation

2022-07-30 17:41:49 +01:00 · 2022-07-30 17:41:49 +01:00 · 8b133fa0ba
parent cc5e121c8e
commit 8b133fa0ba
16 changed files with 994 additions and 868 deletions
--- a/2
+++ b/2
@ -49,6 +49,8 @@ tests.
 tests run by 'make check', but can be run manually. The current output is from 
 a 64-bit system.

+13. Implemented -Z aka --null in pcre2grep.
+

 Version 10.40 15-April-2022
 ---------------------------
--- a/42
+++ b/42
@ -68,6 +68,22 @@ diff -b  /dev/null /dev/null 2>/dev/null && cf="diff -b"
 diff -u  /dev/null /dev/null 2>/dev/null && cf="diff -u"
 diff -ub /dev/null /dev/null 2>/dev/null && cf="diff -ub"

+# Some tests involve NUL characters. It seems impossible to handle them easily
+# in many operating systems. An earlier version of this script used sed to
+# translate NUL into the string ZERO, but this didn't work on Solaris (aka
+# SunOS), where the version of sed explicitly doesn't like them, and also MacOS
+# (Darwin), OpenBSD, FreeBSD, NetBSD, and some Linux distributions like Alpine,
+# even when using GNU sed. A user suggested using tr instead, which
+# necessitates translating to a single character. However, on (some versions
+# of?) Solaris, the normal "tr" cannot handle binary zeros, but if
+# /usr/xpg4/bin/tr is available, it can do so, so test for that.
+
+if [ -x /usr/xpg4/bin/tr ] ; then
+  tr=/usr/xpg4/bin/tr
+else
+  tr=tr
+fi
+
 # If this test is being run from "make check", $srcdir will be set. If not, set
 # it to the current or parent directory, whichever one contains the test data.
 # Subsequently, we run most of the pcre2grep tests in the source directory so
@ -685,6 +701,16 @@ echo "---------------------------- Test 134 -----------------------------" >>tes
 (cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep

+echo "---------------------------- Test 135 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -lZ 'word' ./testdata/grepinputv ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -A 1 -B 1 -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -MHZn 'start[\s]+end' testdata/grepinputM) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+
 # Now compare the results.

 $cf $srcdir/testdata/grepoutput testtrygrep
@ -759,22 +785,6 @@ $valgrind $vjs $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >
 printf '%c--------------------------- Test N6 ------------------------------\r\n' - >>testtrygrep
 $valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep

-# This next test involves NUL characters. It seems impossible to handle them
-# easily in many operating systems. An earlier version of this script used sed
-# to translate NUL into the string ZERO, but this didn't work on Solaris (aka
-# SunOS), where the version of sed explicitly doesn't like them, and also MacOS
-# (Darwin), OpenBSD, FreeBSD, NetBSD, and some Linux distributions like Alpine,
-# even when using GNU sed. A user suggested using tr instead, which
-# necessitates translating to a single character (@). However, on (some
-# versions of?) Solaris, the normal "tr" cannot handle binary zeros, but if
-# /usr/xpg4/bin/tr is available, it can do so, so test for that.
-
-if [ -x /usr/xpg4/bin/tr ] ; then
-  tr=/usr/xpg4/bin/tr
-else
-  tr=tr
-fi
-
 printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep
 printf 'abc\0def' >testNinputgrep
 $valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep
--- a/doc/html/NON-AUTOTOOLS-BUILD.txt
+++ b/doc/html/NON-AUTOTOOLS-BUILD.txt
@ -121,6 +121,7 @@ environment, for example.
       pcre2_substring.c
       pcre2_tables.c
       pcre2_ucd.c
+       pcre2_ucptables.c
       pcre2_valid_utf.c
       pcre2_xclass.c

@ -373,7 +374,7 @@ Otherwise:
 1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
   have been created.

-2. Edit RunTest.bat to indentify the full or relative location of
+2. Edit RunTest.bat to identify the full or relative location of
   the pcre2 source (wherein which the testdata folder resides), e.g.:

   set srcdir=C:\pcre2\pcre2-10.00
--- a/doc/html/README.txt
+++ b/doc/html/README.txt
@ -17,7 +17,7 @@ pcre2-dev+subscribe@googlegroups.com.
 You can access the archives and also subscribe or manage your subscription
 here:

-https://groups.google.com/pcre2-dev
+https://groups.google.com/g/pcre2-dev

 Please read the NEWS file if you are upgrading from a previous release. The
 contents of this README file are:
@ -375,7 +375,8 @@ library. They are also documented in the pcre2build man page.
  necessary to specify something like LIBS="-lncurses" as well. This is
  because, to quote the readline INSTALL, "Readline uses the termcap functions,
  but does not link with the termcap or curses library itself, allowing
-  applications which link with readline the to choose an appropriate library."
+  applications which link with readline the option to choose an appropriate
+  library."
  If you get error messages about missing functions tgetstr, tgetent, tputs,
  tgetflag, or tgoto, this is the problem, and linking with the ncurses library
  should fix it.
@ -400,10 +401,10 @@ library. They are also documented in the pcre2build man page.
  Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
  be created. This is normally run under valgrind or used when PCRE2 is
  compiled with address sanitizing enabled. It calls the fuzzing function and
-  outputs information about it is doing. The input strings are specified by
-  arguments: if an argument starts with "=" the rest of it is a literal input
-  string. Otherwise, it is assumed to be a file name, and the contents of the
-  file are the test string.
+  outputs information about what it is doing. The input strings are specified
+  by arguments: if an argument starts with "=" the rest of it is a literal
+  input string. Otherwise, it is assumed to be a file name, and the contents
+  of the file are the test string.

 . Releases before 10.30 could be compiled with --disable-stack-for-recursion,
  which caused pcre2_match() to use individual blocks on the heap for
@ -695,7 +696,7 @@ Test 14 contains some special UTF and UCP tests that give different output for
 different code unit widths.

 Test 15 contains a number of tests that must not be run with JIT. They check,
-among other non-JIT things, the match-limiting features of the intepretive
+among other non-JIT things, the match-limiting features of the interpretive
 matcher.

 Test 16 is run only when JIT support is not available. It checks that an
--- a/doc/html/pcre2api.html
+++ b/doc/html/pcre2api.html
@ -1017,7 +1017,7 @@ has its own memory control arrangements (see the
 documentation for more details). If the limit is reached, the negative error
 code PCRE2_ERROR_HEAPLIMIT is returned. The default limit can be set when PCRE2
 is built; if it is not, the default is set very large and is essentially
-"unlimited".
+unlimited.
 </P>
 <P>
 A value for the heap limit may also be supplied by an item at the start of a
@ -1030,19 +1030,17 @@ less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
 limit is set, less than the default.
 </P>
 <P>
-The <b>pcre2_match()</b> function starts out using a 20KiB vector on the system
-stack for recording backtracking points. The more nested backtracking points
-there are (that is, the deeper the search tree), the more memory is needed.
-Heap memory is used only if the initial vector is too small. If the heap limit
-is set to a value less than 21 (in particular, zero) no heap memory will be
-used. In this case, only patterns that do not have a lot of nested backtracking
-can be successfully processed.
+The <b>pcre2_match()</b> function always needs some heap memory, so setting a
+value of zero guarantees a "heap limit exceeded" error. Details of how
+<b>pcre2_match()</b> uses the heap are given in the
+<a href="pcre2perform.html"><b>pcre2perform</b></a>
+documentation.
 </P>
 <P>
-Similarly, for <b>pcre2_dfa_match()</b>, a vector on the system stack is used
-when processing pattern recursions, lookarounds, or atomic groups, and only if
-this is not big enough is heap memory used. In this case, too, setting a value
-of zero disables the use of the heap.
+For <b>pcre2_dfa_match()</b>, a vector on the system stack is used when
+processing pattern recursions, lookarounds, or atomic groups, and only if this
+is not big enough is heap memory used. In this case, setting a value of zero
+disables the use of the heap.
 <br>
 <br>
 <b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
@ -1089,10 +1087,10 @@ less than the limit set by the caller of <b>pcre2_match()</b> or
 <br>
 <br>
 This parameter limits the depth of nested backtracking in <b>pcre2_match()</b>.
-Each time a nested backtracking point is passed, a new memory "frame" is used
+Each time a nested backtracking point is passed, a new memory frame is used
 to remember the state of matching at that point. Thus, this parameter
 indirectly limits the amount of memory that is used in a match. However,
-because the size of each memory "frame" depends on the number of capturing
+because the size of each memory frame depends on the number of capturing
 parentheses, the actual memory limit varies from pattern to pattern. This limit
 was more useful in versions before 10.30, where function recursion was used for
 backtracking.
@ -3148,11 +3146,11 @@ The backtracking match limit was reached.
 <pre>
  PCRE2_ERROR_NOMEMORY
 </pre>
-If a pattern contains many nested backtracking points, heap memory is used to
-remember them. This error is given when the memory allocation function (default
-or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
-if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
-also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
+Heap memory is used to remember backgracking points. This error is given when
+the memory allocation function (default or custom) fails. Note that a different
+error, PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds
+the heap limit. PCRE2_ERROR_NOMEMORY is also returned if
+PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
 <pre>
  PCRE2_ERROR_NULL
 </pre>
@ -4020,9 +4018,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC42" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 14 December 2021
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2build.html
+++ b/doc/html/pcre2build.html
@ -284,12 +284,11 @@ to the <b>configure</b> command. This setting also applies to the
 counting is done differently).
 </P>
 <P>
-The <b>pcre2_match()</b> function starts out using a 20KiB vector on the system
-stack to record backtracking points. The more nested backtracking points there
-are (that is, the deeper the search tree), the more memory is needed. If the
-initial vector is not large enough, heap memory is used, up to a certain limit,
-which is specified in kibibytes (units of 1024 bytes). The limit can be changed
-at run time, as described in the
+The <b>pcre2_match()</b> function uses heap memory to record backtracking
+points. The more nested backtracking points there are (that is, the deeper the
+search tree), the more memory is needed. There is an upper limit, specified in
+kibibytes (units of 1024 bytes). This limit can be changed at run time, as
+described in the
 <a href="pcre2api.html"><b>pcre2api</b></a>
 documentation. The default limit (in effect unlimited) is 20 million. You can
 change this by a setting such as
@ -609,16 +608,16 @@ give a warning.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC26" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 08 December 2021
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2grep.html
+++ b/doc/html/pcre2grep.html
@ -71,13 +71,15 @@ For example:
 <pre>
  pcre2grep some-pattern file1 - file3
 </pre>
-Input files are searched line by line. By default, each line that matches a
+By default, input files are searched line by line. Each line that matches a
 pattern is copied to the standard output, and if there is more than one file,
 the file name is output at the start of each line, followed by a colon.
-However, there are options that can change how <b>pcre2grep</b> behaves. In
-particular, the <b>-M</b> option makes it possible to search for strings that
-span line boundaries. What defines a line boundary is controlled by the
-<b>-N</b> (<b>--newline</b>) option.
+However, there are options that can change how <b>pcre2grep</b> behaves. For
+example, the <b>-M</b> option makes it possible to search for strings that span
+line boundaries. What defines a line boundary is controlled by the <b>-N</b>
+(<b>--newline</b>) option. The <b>-h</b> and <b>-H</b> options control whether or
+not file names are shown, and the <b>-Z</b> option changes the file name
+terminator to a zero byte.
 </P>
 <P>
 The amount of memory used for buffering files that are being scanned is
@ -178,9 +180,11 @@ Output up to <i>number</i> lines of context after each matching line. Fewer
 lines are output if the next match or the end of the file is reached, or if the
 processing buffer size has been set too small. If file names and/or line
 numbers are being output, a hyphen separator is used instead of a colon for the
-context lines. A line containing "--" is output between each group of lines,
-unless they are in fact contiguous in the input file. The value of <i>number</i>
-is expected to be relatively small. When <b>-c</b> is used, <b>-A</b> is ignored.
+context lines (the <b>-Z</b> option can be used to change the file name
+terminator to a zero byte). A line containing "--" is output between each group
+of lines, unless they are in fact contiguous in the input file. The value of
+<i>number</i> is expected to be relatively small. When <b>-c</b> is used,
+<b>-A</b> is ignored.
 </P>
 <P>
 <b>-a</b>, <b>--text</b>
@ -199,9 +203,10 @@ Output up to <i>number</i> lines of context before each matching line. Fewer
 lines are output if the previous match or the start of the file is within
 <i>number</i> lines, or if the processing buffer size has been set too small. If
 file names and/or line numbers are being output, a hyphen separator is used
-instead of a colon for the context lines. A line containing "--" is output
-between each group of lines, unless they are in fact contiguous in the input
-file. The value of <i>number</i> is expected to be relatively small. When
+instead of a colon for the context lines (the <b>-Z</b> option can be used to
+change the file name terminator to a zero byte). A line containing "--" is
+output between each group of lines, unless they are in fact contiguous in the
+input file. The value of <i>number</i> is expected to be relatively small. When
 <b>-c</b> is used, <b>-B</b> is ignored.
 </P>
 <P>
@ -411,20 +416,22 @@ shown separately. This option is mutually exclusive with <b>--output</b>,
 <P>
 <b>-H</b>, <b>--with-filename</b>
 Force the inclusion of the file name at the start of output lines when
-searching a single file. By default, the file name is not shown in this case.
-For matching lines, the file name is followed by a colon; for context lines, a
-hyphen separator is used. If a line number is also being output, it follows the
-file name. When the <b>-M</b> option causes a pattern to match more than one
-line, only the first is preceded by the file name. This option overrides any
-previous <b>-h</b>, <b>-l</b>, or <b>-L</b> options.
+searching a single file. The file name is not normally shown in this case.
+By default, for matching lines, the file name is followed by a colon; for
+context lines, a hyphen separator is used. The <b>-Z</b> option can be used to
+change the terminator to a zero byte. If a line number is also being output,
+it follows the file name. When the <b>-M</b> option causes a pattern to match
+more than one line, only the first is preceded by the file name. This option
+overrides any previous <b>-h</b>, <b>-l</b>, or <b>-L</b> options.
 </P>
 <P>
 <b>-h</b>, <b>--no-filename</b>
-Suppress the output file names when searching multiple files. By default,
-file names are shown when multiple files are searched. For matching lines, the
-file name is followed by a colon; for context lines, a hyphen separator is used.
-If a line number is also being output, it follows the file name. This option
-overrides any previous <b>-H</b>, <b>-L</b>, or <b>-l</b> options.
+Suppress the output file names when searching multiple files. File names are
+normally shown when multiple files are searched. By default, for matching
+lines, the file name is followed by a colon; for context lines, a hyphen
+separator is used. The <b>-Z</b> option can be used to change the terminator to
+a zero byte. If a line number is also being output, it follows the file name.
+This option overrides any previous <b>-H</b>, <b>-L</b>, or <b>-l</b> options.
 </P>
 <P>
 <b>--heap-limit</b>=<i>number</i>
@ -481,18 +488,20 @@ given any number of times. If a directory matches both <b>--include-dir</b> and
 <b>-L</b>, <b>--files-without-match</b>
 Instead of outputting lines from the files, just output the names of the files
 that do not contain any lines that would have been output. Each file name is
-output once, on a separate line. This option overrides any previous <b>-H</b>,
-<b>-h</b>, or <b>-l</b> options.
+output once, on a separate line by default, but if the <b>-Z</b> option is set, 
+they are separated by zero bytes instead of newlines. This option overrides any
+previous <b>-H</b>, <b>-h</b>, or <b>-l</b> options.
 </P>
 <P>
 <b>-l</b>, <b>--files-with-matches</b>
 Instead of outputting lines from the files, just output the names of the files
 containing lines that would have been output. Each file name is output once, on
-a separate line. Searching normally stops as soon as a matching line is found
-in a file. However, if the <b>-c</b> (count) option is also used, matching
-continues in order to obtain the correct count, and those files that have at
-least one match are listed along with their counts. Using this option with
-<b>-c</b> is a way of suppressing the listing of files with no matches that
+a separate line, but if the <b>-Z</b> option is set, they are separated by zero
+bytes instead of newlines. Searching normally stops as soon as a matching line
+is found in a file. However, if the <b>-c</b> (count) option is also used,
+matching continues in order to obtain the correct count, and those files that
+have at least one match are listed along with their counts. Using this option
+with <b>-c</b> is a way of suppressing the listing of files with no matches that
 occurs with <b>-c</b> on its own. This option overrides any previous <b>-H</b>,
 <b>-h</b>, or <b>-L</b> options.
 </P>
@ -592,10 +601,7 @@ value set by <b>--match-limit</b> is reached, an error occurs.
 <br>
 <br>
 The <b>--heap-limit</b> option specifies, as a number of kibibytes (units of
-1024 bytes), the amount of heap memory that may be used for matching. Heap
-memory is needed only if matching the pattern requires a significant number of
-nested backtracking points to be remembered. This parameter can be set to zero
-to forbid the use of heap memory altogether.
+1024 bytes), the maximum amount of heap memory that may be used for matching.
 <br>
 <br>
 The <b>--depth-limit</b> option limits the depth of nested backtracking points,
@ -839,6 +845,13 @@ pattern and ")$" at the end. This option applies only to the patterns that are
 matched against the contents of files; it does not apply to patterns specified
 by any of the <b>--include</b> or <b>--exclude</b> options.
 </P>
+<P>
+<b>-Z</b>, <b>--null</b>
+Terminate files names in the regular output with a zero byte (the NUL
+character) instead of what would normally appear. This is useful when file
+names contain unusual characters such as colons, hyphens, or even newlines. The
+option does not apply to file names in error messages.
+</P>
 <br><a name="SEC7" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
 <P>
 The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
@ -1053,9 +1066,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC16" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 31 August 2021
+Last updated: 30 July 2022
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2limits.html
+++ b/doc/html/pcre2limits.html
@ -71,13 +71,18 @@ is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
 The maximum length of a string argument to a callout is the largest number a
 32-bit unsigned integer can hold.
 </P>
+<P>
+The maximum amount of heap memory used for matching is controlled by the heap 
+limit, which can be set in a pattern or in a match context. The default is a 
+very large number, effectively unlimited.
+</P>
 <br><b>
 AUTHOR
 </b><br>
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
@ -86,9 +91,9 @@ Cambridge, England.
 REVISION
 </b><br>
 <P>
-Last updated: 02 February 2019
+Last updated: 26 July 2022
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2perform.html
+++ b/doc/html/pcre2perform.html
@ -83,12 +83,31 @@ From release 10.30, the interpretive (non-JIT) version of <b>pcre2_match()</b>
 uses very little system stack at run time. In earlier releases recursive
 function calls could use a great deal of stack, and this could cause problems,
 but this usage has been eliminated. Backtracking positions are now explicitly
-remembered in memory frames controlled by the code. An initial 20KiB vector of
-frames is allocated on the system stack (enough for about 100 frames for small
-patterns), but if this is insufficient, heap memory is used. The amount of heap
-memory can be limited; if the limit is set to zero, only the initial stack
-vector is used. Rewriting patterns to be time-efficient, as described below,
-may also reduce the memory requirements.
+remembered in memory frames controlled by the code. 
+</P>
+<P>
+The size of each frame depends on the size of pointer variables and the number
+of capturing parenthesized groups in the pattern being matched. On a 64-bit
+system the frame size for a pattern with no captures is 128 bytes. For each
+capturing group the size increases by 16 bytes.
+</P>
+<P>
+Until release 10.41, an initial 20KiB frames vector was allocated on the system 
+stack, but this still caused some issues for multi-thread applications where
+each thread has a very small stack. From release 10.41 backtracking memory
+frames are always held in heap memory. An initial heap allocation is obtained
+the first time any match data block is passed to <b>pcre2_match()</b>. This is
+remembered with the match data block and re-used if that block is used for
+another match. It is freed when the match data block itself is freed.
+</P>
+<P>
+The size of the initial block is the larger of 20KiB or ten times the pattern's 
+frame size, unless the heap limit is less than this, in which case the heap 
+limit is used. If the initial block proves to be too small during matching, it
+is replaced by a larger block, subject to the heap limit. The heap limit is 
+checked only when a new block is to be allocated. Reducing the heap limit 
+between calls to <b>pcre2_match()</b> with the same match data block does not 
+affect the saved block.
 </P>
 <P>
 In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive
@ -245,16 +264,16 @@ pattern to match. This is done by repeatedly matching with different limits.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC6" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 03 February 2019
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.
--- a/doc/html/pcre2test.html
+++ b/doc/html/pcre2test.html
@ -1241,7 +1241,8 @@ pattern, but can be overridden by modifiers on the subject.
      copy=&#60;number or name&#62;      copy captured substring
      depth_limit=&#60;n&#62;            set a depth limit
      dfa                        use <b>pcre2_dfa_match()</b>
-      find_limits                find match and depth limits
+      find_limits                find heap, match and depth limits
+      find_limits_noheap         find match and depth limits
      get=&#60;number or name&#62;       extract captured substring
      getall                     extract all captured substrings
  /g  global                     global matching
@ -1564,7 +1565,7 @@ Setting heap, match, and depth limits
 <P>
 The <b>heap_limit</b>, <b>match_limit</b>, and <b>depth_limit</b> modifiers set
 the appropriate limits in the match context. These values are ignored when the
-<b>find_limits</b> modifier is specified.
+<b>find_limits</b> or <b>find_limits_noheap</b> modifier is specified.
 </P>
 <br><b>
 Finding minimum limits
@ -1574,8 +1575,12 @@ If the <b>find_limits</b> modifier is present on a subject line, <b>pcre2test</b
 calls the relevant matching function several times, setting different values in
 the match context via <b>pcre2_set_heap_limit()</b>,
 <b>pcre2_set_match_limit()</b>, or <b>pcre2_set_depth_limit()</b> until it finds
-the minimum values for each parameter that allows the match to complete without
-error. If JIT is being used, only the match limit is relevant.
+the smallest value for each parameter that allows the match to complete without
+a "limit exceeded" error. The match itself may succeed or fail. An alternative
+modifier, <b>find_limits_noheap</b>, omits the heap limit. This is used in the
+standard tests, because the minimum heap limit varies between systems. If JIT
+is being used, only the match limit is relevant, and the other two are
+automatically omitted.
 </P>
 <P>
 When using this modifier, the pattern should not contain any limit settings
@ -1603,9 +1608,7 @@ overall amount of computing resource that is used.
 </P>
 <P>
 For both kinds of matching, the <i>heap_limit</i> number, which is in kibibytes
-(units of 1024 bytes), limits the amount of heap memory used for matching. A
-value of zero disables the use of any heap memory; many simple pattern matches
-can be done without using the heap, so zero is not an unreasonable setting.
+(units of 1024 bytes), limits the amount of heap memory used for matching.
 </P>
 <br><b>
 Showing MARK names
@ -1623,12 +1626,10 @@ Showing memory usage
 <P>
 The <b>memory</b> modifier causes <b>pcre2test</b> to log the sizes of all heap
 memory allocation and freeing calls that occur during a call to
-<b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>. These occur only when a match
-requires a bigger vector than the default for remembering backtracking points
-(<b>pcre2_match()</b>) or for internal workspace (<b>pcre2_dfa_match()</b>). In
-many cases there will be no heap memory used and therefore no additional
-output. No heap memory is allocated during matching with JIT, so in that case
-the <b>memory</b> modifier never has any effect. For this modifier to work, the
+<b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>. In the latter case, heap memory
+is used only when a match requires more internal workspace that the default
+allocation on the stack, so in many cases there will be no output. No heap
+memory is allocated during matching with JIT. For this modifier to work, the
 <b>null_context</b> modifier must not be set on both the pattern and the
 subject, though it can be set on one or the other.
 </P>
@ -1690,7 +1691,8 @@ Normally, <b>pcre2test</b> passes a context block to <b>pcre2_match()</b>,
 If the <b>null_context</b> modifier is set, however, NULL is passed. This is for
 testing that the matching and substitution functions behave correctly in this
 case (they use default values). This modifier cannot be used with the
-<b>find_limits</b> or <b>substitute_callout</b> modifiers.
+<b>find_limits</b>, <b>find_limits_noheap</b>, or <b>substitute_callout</b>
+modifiers.
 </P>
 <P>
 Similarly, for testing purposes, if the <b>null_subject</b> or
@ -2141,7 +2143,7 @@ Cambridge, England.
 </P>
 <br><a name="SEC21" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 12 January 2022
+Last updated: 27 July 2022
 <br>
 Copyright &copy; 1997-2022 University of Cambridge.
 <br>
--- a/doc/pcre2.txt
+++ b/doc/pcre2.txt
@ -1028,7 +1028,7 @@ PCRE2 CONTEXTS
       pcre2jit  documentation for more details). If the limit is reached, the
       negative error code  PCRE2_ERROR_HEAPLIMIT  is  returned.  The  default
       limit  can be set when PCRE2 is built; if it is not, the default is set
-       very large and is essentially "unlimited".
+       very large and is essentially unlimited.

       A value for the heap limit may also be supplied by an item at the start
       of a pattern of the form
@ -1039,19 +1039,15 @@ PCRE2 CONTEXTS
       less ddd is less than the limit set by the caller of pcre2_match()  or,
       if no such limit is set, less than the default.

-       The  pcre2_match() function starts out using a 20KiB vector on the sys-
-       tem stack for recording backtracking points. The more nested backtrack-
-       ing  points  there  are (that is, the deeper the search tree), the more
-       memory is needed.  Heap memory is used only if the  initial  vector  is
-       too small. If the heap limit is set to a value less than 21 (in partic-
-       ular, zero) no heap memory will be used. In this  case,  only  patterns
-       that  do not have a lot of nested backtracking can be successfully pro-
-       cessed.
+       The  pcre2_match() function always needs some heap memory, so setting a
+       value of zero guarantees a "heap limit exceeded" error. Details of  how
+       pcre2_match()  uses  the  heap are given in the pcre2perform documenta-
+       tion.

-       Similarly, for pcre2_dfa_match(), a vector on the system stack is  used
-       when  processing pattern recursions, lookarounds, or atomic groups, and
-       only if this is not big enough is heap memory used. In this case,  too,
-       setting a value of zero disables the use of the heap.
+       For pcre2_dfa_match(), a vector on the system stack is used  when  pro-
+       cessing  pattern recursions, lookarounds, or atomic groups, and only if
+       this is not big enough is heap memory used. In  this  case,  setting  a
+       value of zero disables the use of the heap.

       int pcre2_set_match_limit(pcre2_match_context *mcontext,
         uint32_t value);
@ -1093,12 +1089,12 @@ PCRE2 CONTEXTS

       This   parameter   limits   the   depth   of   nested  backtracking  in
       pcre2_match().  Each time a nested backtracking point is passed, a  new
-       memory "frame" is used to remember the state of matching at that point.
+       memory  frame  is used to remember the state of matching at that point.
       Thus, this parameter indirectly limits the amount  of  memory  that  is
-       used  in  a match. However, because the size of each memory "frame" de-
-       pends on the number of capturing parentheses, the actual  memory  limit
-       varies  from pattern to pattern. This limit was more useful in versions
-       before 10.30, where function recursion was used for backtracking.
+       used in a match. However, because the size of each memory frame depends
+       on the number of capturing parentheses, the actual memory limit  varies
+       from  pattern to pattern. This limit was more useful in versions before
+       10.30, where function recursion was used for backtracking.

       The depth limit is not relevant, and is ignored, when matching is  done
       using JIT compiled code. However, it is supported by pcre2_dfa_match(),
@ -3051,12 +3047,12 @@ ERROR RETURNS FROM pcre2_match()

         PCRE2_ERROR_NOMEMORY

-       If  a  pattern contains many nested backtracking points, heap memory is
-       used to remember them. This error is given when the  memory  allocation
-       function  (default  or  custom)  fails.  Note  that  a different error,
-       PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed  exceeds
-       the    heap   limit.   PCRE2_ERROR_NOMEMORY   is   also   returned   if
-       PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
+       Heap  memory  is  used  to  remember backgracking points. This error is
+       given when the memory allocation function (default  or  custom)  fails.
+       Note  that  a  different  error, PCRE2_ERROR_HEAPLIMIT, is given if the
+       amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
+       also  returned  if PCRE2_COPY_MATCHED_SUBJECT is set and memory alloca-
+       tion fails.

         PCRE2_ERROR_NULL

@ -3860,8 +3856,8 @@ AUTHOR

 REVISION

-       Last updated: 14 December 2021
-       Copyright (c) 1997-2021 University of Cambridge.
+       Last updated: 27 July 2022
+       Copyright (c) 1997-2022 University of Cambridge.
 ------------------------------------------------------------------------------
 
 
@ -4118,41 +4114,40 @@ LIMITING PCRE2 RESOURCE USAGE
       pcre2_dfa_match() matching function, and to JIT  matching  (though  the
       counting is done differently).

-       The  pcre2_match() function starts out using a 20KiB vector on the sys-
-       tem stack to record backtracking points. The more  nested  backtracking
-       points there are (that is, the deeper the search tree), the more memory
-       is needed. If the initial vector is not large enough,  heap  memory  is
-       used,  up to a certain limit, which is specified in kibibytes (units of
-       1024 bytes). The limit can be changed at run time, as described in  the
-       pcre2api  documentation.  The default limit (in effect unlimited) is 20
-       million. You can change this by a setting such as
+       The  pcre2_match()  function  uses  heap  memory to record backtracking
+       points. The more nested backtracking points there  are  (that  is,  the
+       deeper  the  search tree), the more memory is needed. There is an upper
+       limit, specified in kibibytes (units of 1024 bytes). This limit can  be
+       changed  at  run  time, as described in the pcre2api documentation. The
+       default limit (in effect unlimited) is 20 million. You can change  this
+       by a setting such as

         --with-heap-limit=500

-       which limits the amount of heap to 500 KiB. This limit applies only  to
+       which  limits the amount of heap to 500 KiB. This limit applies only to
       interpretive matching in pcre2_match() and pcre2_dfa_match(), which may
-       also use the heap for internal workspace  when  processing  complicated
-       patterns.  This limit does not apply when JIT (which has its own memory
+       also  use  the  heap for internal workspace when processing complicated
+       patterns. This limit does not apply when JIT (which has its own  memory
       arrangements) is used.

-       You can also explicitly limit the depth of nested backtracking  in  the
+       You  can  also explicitly limit the depth of nested backtracking in the
       pcre2_match() interpreter. This limit defaults to the value that is set
-       for --with-match-limit. You can set a lower default  limit  by  adding,
+       for  --with-match-limit.  You  can set a lower default limit by adding,
       for example,

         --with-match-limit-depth=10000

-       to  the  configure  command.  This value can be overridden at run time.
-       This depth limit indirectly limits the amount of heap  memory  that  is
-       used,  but because the size of each backtracking "frame" depends on the
-       number of capturing parentheses in a pattern, the amount of  heap  that
-       is  used  before  the  limit is reached varies from pattern to pattern.
+       to the configure command. This value can be  overridden  at  run  time.
+       This  depth  limit  indirectly limits the amount of heap memory that is
+       used, but because the size of each backtracking "frame" depends on  the
+       number  of  capturing parentheses in a pattern, the amount of heap that
+       is used before the limit is reached varies  from  pattern  to  pattern.
       This limit was more useful in versions before 10.30, where function re-
       cursion was used for backtracking.

       As well as applying to pcre2_match(), the depth limit also controls the
-       depth of recursive function calls in pcre2_dfa_match(). These are  used
-       for  lookaround  assertions,  atomic  groups, and recursion within pat-
+       depth  of recursive function calls in pcre2_dfa_match(). These are used
+       for lookaround assertions, atomic groups,  and  recursion  within  pat-
       terns.  The limit does not apply to JIT matching.


@ -4160,67 +4155,67 @@ CREATING CHARACTER TABLES AT BUILD TIME

       PCRE2 uses fixed tables for processing characters whose code points are
       less than 256. By default, PCRE2 is built with a set of tables that are
-       distributed in the file src/pcre2_chartables.c.dist. These  tables  are
+       distributed  in  the file src/pcre2_chartables.c.dist. These tables are
       for ASCII codes only. If you add

         --enable-rebuild-chartables

-       to  the  configure  command, the distributed tables are no longer used.
+       to the configure command, the distributed tables are  no  longer  used.
       Instead, a program called pcre2_dftables is compiled and run. This out-
       puts the source for new set of tables, created in the default locale of
-       your C run-time system. This method of replacing the  tables  does  not
+       your  C  run-time  system. This method of replacing the tables does not
       work if you are cross compiling, because pcre2_dftables needs to be run
       on the local host and therefore not compiled with the cross compiler.

       If you need to create alternative tables when cross compiling, you will
-       have  to  do so "by hand". There may also be other reasons for creating
-       tables manually.  To cause pcre2_dftables to  be  built  on  the  local
+       have to do so "by hand". There may also be other reasons  for  creating
+       tables  manually.   To  cause  pcre2_dftables  to be built on the local
       host, run a normal compiling command, and then run the program with the
       output file as its argument, for example:

         cc src/pcre2_dftables.c -o pcre2_dftables
         ./pcre2_dftables src/pcre2_chartables.c

-       This builds the tables in the default locale of the local host. If  you
+       This  builds the tables in the default locale of the local host. If you
       want to specify a locale, you must use the -L option:

         LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c

       You can also specify -b (with or without -L). This causes the tables to
-       be written in binary instead of as source code. A set of binary  tables
-       can  be  loaded  into memory by an application and passed to pcre2_com-
+       be  written in binary instead of as source code. A set of binary tables
+       can be loaded into memory by an application and  passed  to  pcre2_com-
       pile() in the same way as tables created by calling pcre2_maketables().
-       The  tables are just a string of bytes, independent of hardware charac-
-       teristics such as endianness. This means they can be  bundled  with  an
-       application  that  runs in different environments, to ensure consistent
+       The tables are just a string of bytes, independent of hardware  charac-
+       teristics  such  as  endianness. This means they can be bundled with an
+       application that runs in different environments, to  ensure  consistent
       behaviour.


 USING EBCDIC CODE

-       PCRE2 assumes by default that it will run in an environment  where  the
-       character  code is ASCII or Unicode, which is a superset of ASCII. This
+       PCRE2  assumes  by default that it will run in an environment where the
+       character code is ASCII or Unicode, which is a superset of ASCII.  This
       is the case for most computer operating systems. PCRE2 can, however, be
       compiled to run in an 8-bit EBCDIC environment by adding

         --enable-ebcdic --disable-unicode

       to the configure command. This setting implies --enable-rebuild-charta-
-       bles. You should only use it if you know that you are in an EBCDIC  en-
+       bles.  You should only use it if you know that you are in an EBCDIC en-
       vironment (for example, an IBM mainframe operating system).

-       It  is  not possible to support both EBCDIC and UTF-8 codes in the same
-       version of the library. Consequently,  --enable-unicode  and  --enable-
+       It is not possible to support both EBCDIC and UTF-8 codes in  the  same
+       version  of  the  library. Consequently, --enable-unicode and --enable-
       ebcdic are mutually exclusive.

       The EBCDIC character that corresponds to an ASCII LF is assumed to have
-       the value 0x15 by default. However, in some EBCDIC  environments,  0x25
+       the  value  0x15 by default. However, in some EBCDIC environments, 0x25
       is used. In such an environment you should use

         --enable-ebcdic-nl25

       as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR
-       has the same value as in ASCII, namely, 0x0d.  Whichever  of  0x15  and
+       has  the  same  value  as in ASCII, namely, 0x0d. Whichever of 0x15 and
       0x25 is not chosen as LF is made to correspond to the Unicode NEL char-
       acter (which, in Unicode, is 0x85).

@ -4232,47 +4227,47 @@ USING EBCDIC CODE
 PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS

       By default pcre2grep supports the use of callouts with string arguments
-       within  the patterns it is matching. There are two kinds: one that gen-
+       within the patterns it is matching. There are two kinds: one that  gen-
       erates output using local code, and another that calls an external pro-
-       gram  or  script.   If --disable-pcre2grep-callout-fork is added to the
-       configure command, only the first kind  of  callout  is  supported;  if
-       --disable-pcre2grep-callout  is  used,  all callouts are completely ig-
-       nored. For more details of pcre2grep callouts, see the pcre2grep  docu-
+       gram or script.  If --disable-pcre2grep-callout-fork is  added  to  the
+       configure  command,  only  the  first  kind of callout is supported; if
+       --disable-pcre2grep-callout is used, all callouts  are  completely  ig-
+       nored.  For more details of pcre2grep callouts, see the pcre2grep docu-
       mentation.


 PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT

-       By  default,  pcre2grep reads all files as plain text. You can build it
-       so that it recognizes files whose names end in .gz or .bz2,  and  reads
+       By default, pcre2grep reads all files as plain text. You can  build  it
+       so  that  it recognizes files whose names end in .gz or .bz2, and reads
       them with libz or libbz2, respectively, by adding one or both of

         --enable-pcre2grep-libz
         --enable-pcre2grep-libbz2

       to the configure command. These options naturally require that the rel-
-       evant libraries are installed on your system. Configuration  will  fail
+       evant  libraries  are installed on your system. Configuration will fail
       if they are not.


 PCRE2GREP BUFFER SIZE

-       pcre2grep  uses an internal buffer to hold a "window" on the file it is
+       pcre2grep uses an internal buffer to hold a "window" on the file it  is
       scanning, in order to be able to output "before" and "after" lines when
       it finds a match. The default starting size of the buffer is 20KiB. The
-       buffer itself is three times this size, but because of the  way  it  is
+       buffer  itself  is  three times this size, but because of the way it is
       used for holding "before" lines, the longest line that is guaranteed to
       be processable is the notional buffer size. If a longer line is encoun-
-       tered,  pcre2grep  automatically  expands the buffer, up to a specified
-       maximum size, whose default is 1MiB or the starting size, whichever  is
-       the  larger. You can change the default parameter values by adding, for
+       tered, pcre2grep automatically expands the buffer, up  to  a  specified
+       maximum  size, whose default is 1MiB or the starting size, whichever is
+       the larger. You can change the default parameter values by adding,  for
       example,

         --with-pcre2grep-bufsize=51200
         --with-pcre2grep-max-bufsize=2097152

-       to the configure command. The caller of pcre2grep  can  override  these
-       values  by  using  --buffer-size  and  --max-buffer-size on the command
+       to  the  configure  command. The caller of pcre2grep can override these
+       values by using --buffer-size  and  --max-buffer-size  on  the  command
       line.


@ -4283,26 +4278,26 @@ PCRE2TEST OPTION FOR LIBREADLINE SUPPORT
         --enable-pcre2test-libreadline
         --enable-pcre2test-libedit

-       to the configure command, pcre2test is linked with the libreadline  or-
-       libedit  library,  respectively, and when its input is from a terminal,
-       it reads it using the readline() function. This  provides  line-editing
-       and  history  facilities.  Note that libreadline is GPL-licensed, so if
-       you distribute a binary of pcre2test linked in this way, there  may  be
+       to  the configure command, pcre2test is linked with the libreadline or-
+       libedit library, respectively, and when its input is from  a  terminal,
+       it  reads  it using the readline() function. This provides line-editing
+       and history facilities. Note that libreadline is  GPL-licensed,  so  if
+       you  distribute  a binary of pcre2test linked in this way, there may be
       licensing issues. These can be avoided by linking instead with libedit,
       which has a BSD licence.

-       Setting --enable-pcre2test-libreadline causes the -lreadline option  to
-       be  added to the pcre2test build. In many operating environments with a
-       sytem-installed readline library this is sufficient. However,  in  some
+       Setting  --enable-pcre2test-libreadline causes the -lreadline option to
+       be added to the pcre2test build. In many operating environments with  a
+       sytem-installed  readline  library this is sufficient. However, in some
       environments (e.g. if an unmodified distribution version of readline is
-       in use), some extra configuration may be necessary.  The  INSTALL  file
+       in  use),  some  extra configuration may be necessary. The INSTALL file
       for libreadline says this:

         "Readline uses the termcap functions, but does not link with
         the termcap or curses library itself, allowing applications
         which link with readline the to choose an appropriate library."

-       If  your environment has not been set up so that an appropriate library
+       If your environment has not been set up so that an appropriate  library
       is automatically included, you may need to add something like

         LIBS="-ncurses"
@ -4316,7 +4311,7 @@ INCLUDING DEBUGGING CODE

         --enable-debug

-       to the configure command, additional debugging code is included in  the
+       to  the configure command, additional debugging code is included in the
       build. This feature is intended for use by the PCRE2 maintainers.


@ -4326,14 +4321,14 @@ DEBUGGING WITH VALGRIND SUPPORT

         --enable-valgrind

-       to  the  configure command, PCRE2 will use valgrind annotations to mark
-       certain memory regions as unaddressable. This allows it to  detect  in-
+       to the configure command, PCRE2 will use valgrind annotations  to  mark
+       certain  memory  regions as unaddressable. This allows it to detect in-
       valid memory accesses, and is mostly useful for debugging PCRE2 itself.


 CODE COVERAGE REPORTING

-       If  your  C  compiler is gcc, you can build a version of PCRE2 that can
+       If your C compiler is gcc, you can build a version of  PCRE2  that  can
       generate a code coverage report for its test suite. To enable this, you
       must install lcov version 1.6 or above. Then specify

@ -4342,20 +4337,20 @@ CODE COVERAGE REPORTING
       to the configure command and build PCRE2 in the usual way.

       Note that using ccache (a caching C compiler) is incompatible with code
-       coverage reporting. If you have configured ccache to run  automatically
+       coverage  reporting. If you have configured ccache to run automatically
       on your system, you must set the environment variable

         CCACHE_DISABLE=1

       before running make to build PCRE2, so that ccache is not used.

-       When  --enable-coverage  is  used,  the  following addition targets are
+       When --enable-coverage is used,  the  following  addition  targets  are
       added to the Makefile:

         make coverage

-       This creates a fresh coverage report for the PCRE2 test  suite.  It  is
-       equivalent  to running "make coverage-reset", "make coverage-baseline",
+       This  creates  a  fresh coverage report for the PCRE2 test suite. It is
+       equivalent to running "make coverage-reset", "make  coverage-baseline",
       "make check", and then "make coverage-report".

         make coverage-reset
@ -4372,73 +4367,73 @@ CODE COVERAGE REPORTING

         make coverage-clean-report

-       This removes the generated coverage report without cleaning the  cover-
+       This  removes the generated coverage report without cleaning the cover-
       age data itself.

         make coverage-clean-data

-       This  removes  the captured coverage data without removing the coverage
+       This removes the captured coverage data without removing  the  coverage
       files created at compile time (*.gcno).

         make coverage-clean

-       This cleans all coverage data including the generated coverage  report.
-       For  more  information about code coverage, see the gcov and lcov docu-
+       This  cleans all coverage data including the generated coverage report.
+       For more information about code coverage, see the gcov and  lcov  docu-
       mentation.


 DISABLING THE Z AND T FORMATTING MODIFIERS

-       The C99 standard defines formatting modifiers z and t  for  size_t  and
-       ptrdiff_t  values, respectively. By default, PCRE2 uses these modifiers
+       The  C99  standard  defines formatting modifiers z and t for size_t and
+       ptrdiff_t values, respectively. By default, PCRE2 uses these  modifiers
       in environments other than old versions of Microsoft Visual Studio when
-       __STDC_VERSION__  is  defined  and has a value greater than or equal to
-       199901L (indicating support for C99).  However, there is at  least  one
+       __STDC_VERSION__ is defined and has a value greater than  or  equal  to
+       199901L  (indicating  support for C99).  However, there is at least one
       environment that claims to be C99 but does not support these modifiers.
       If

         --disable-percent-zt

       is specified, no use is made of the z or t modifiers. Instead of %td or
-       %zu,  a  suitable  format is used depending in the size of long for the
+       %zu, a suitable format is used depending in the size of  long  for  the
       platform.


 SUPPORT FOR FUZZERS

-       There is a special option for use by people who  want  to  run  fuzzing
+       There  is  a  special  option for use by people who want to run fuzzing
       tests on PCRE2:

         --enable-fuzz-support

       At present this applies only to the 8-bit library. If set, it causes an
-       extra library called libpcre2-fuzzsupport.a to be built,  but  not  in-
-       stalled.  This  contains  a single function called LLVMFuzzerTestOneIn-
-       put() whose arguments are a pointer to a string and the length  of  the
-       string.  When  called,  this  function tries to compile the string as a
-       pattern, and if that succeeds, to match it.  This is done both with  no
-       options  and  with some random options bits that are generated from the
+       extra  library  called  libpcre2-fuzzsupport.a to be built, but not in-
+       stalled. This contains a single  function  called  LLVMFuzzerTestOneIn-
+       put()  whose  arguments are a pointer to a string and the length of the
+       string. When called, this function tries to compile  the  string  as  a
+       pattern,  and if that succeeds, to match it.  This is done both with no
+       options and with some random options bits that are generated  from  the
       string.

-       Setting --enable-fuzz-support also causes  a  binary  called  pcre2fuz-
-       zcheck  to be created. This is normally run under valgrind or used when
+       Setting  --enable-fuzz-support  also  causes  a binary called pcre2fuz-
+       zcheck to be created. This is normally run under valgrind or used  when
       PCRE2 is compiled with address sanitizing enabled. It calls the fuzzing
-       function  and  outputs  information  about  what it is doing. The input
-       strings are specified by arguments: if an argument starts with "="  the
-       rest  of it is a literal input string. Otherwise, it is assumed to be a
+       function and outputs information about what  it  is  doing.  The  input
+       strings  are specified by arguments: if an argument starts with "=" the
+       rest of it is a literal input string. Otherwise, it is assumed to be  a
       file name, and the contents of the file are the test string.


 OBSOLETE OPTION

-       In versions of PCRE2 prior to 10.30, there were two  ways  of  handling
-       backtracking  in the pcre2_match() function. The default was to use the
+       In  versions  of  PCRE2 prior to 10.30, there were two ways of handling
+       backtracking in the pcre2_match() function. The default was to use  the
       system stack, but if

         --disable-stack-for-recursion

-       was set, memory on the heap was used. From release 10.30  onwards  this
-       has  changed  (the  stack  is  no longer used) and this option now does
+       was  set,  memory on the heap was used. From release 10.30 onwards this
+       has changed (the stack is no longer used)  and  this  option  now  does
       nothing except give a warning.


@ -4450,14 +4445,14 @@ SEE ALSO
 AUTHOR

       Philip Hazel
-       University Computing Service
+       Retired from University Computing Service
       Cambridge, England.


 REVISION

-       Last updated: 08 December 2021
-       Copyright (c) 1997-2021 University of Cambridge.
+       Last updated: 27 July 2022
+       Copyright (c) 1997-2022 University of Cambridge.
 ------------------------------------------------------------------------------
 
 
@ -5596,18 +5591,22 @@ SIZE AND OTHER LIMITATIONS
       The maximum length of a string argument to a  callout  is  the  largest
       number a 32-bit unsigned integer can hold.

+       The  maximum  amount  of heap memory used for matching is controlled by
+       the heap limit, which can be set in a pattern or in  a  match  context.
+       The default is a very large number, effectively unlimited.
+

 AUTHOR

       Philip Hazel
-       University Computing Service
+       Retired from University Computing Service
       Cambridge, England.


 REVISION

-       Last updated: 02 February 2019
-       Copyright (c) 1997-2019 University of Cambridge.
+       Last updated: 26 July 2022
+       Copyright (c) 1997-2022 University of Cambridge.
 ------------------------------------------------------------------------------
 
 
@ -9773,152 +9772,169 @@ STACK AND HEAP USAGE AT RUN TIME
       sive function calls could use a great deal of  stack,  and  this  could
       cause  problems, but this usage has been eliminated. Backtracking posi-
       tions are now explicitly remembered in memory frames controlled by  the
-       code.  An  initial  20KiB  vector  of frames is allocated on the system
-       stack (enough for about 100 frames for small patterns), but if this  is
-       insufficient,  heap  memory  is  used. The amount of heap memory can be
-       limited; if the limit is set to zero, only the initial stack vector  is
-       used.  Rewriting patterns to be time-efficient, as described below, may
-       also reduce the memory requirements.
+       code.

-       In contrast to  pcre2_match(),  pcre2_dfa_match()  does  use  recursive
-       function  calls,  but only for processing atomic groups, lookaround as-
+       The size of each frame depends on the size of pointer variables and the
+       number of capturing parenthesized groups in the pattern being  matched.
+       On a 64-bit system the frame size for a pattern with no captures is 128
+       bytes. For each capturing group the size increases by 16 bytes.
+
+       Until release 10.41, an initial 20KiB frames vector  was  allocated  on
+       the  system  stack,  but this still caused some issues for multi-thread
+       applications where each thread has a very  small  stack.  From  release
+       10.41  backtracking  memory  frames  are always held in heap memory. An
+       initial heap allocation is obtained the first time any match data block
+       is  passed  to  pcre2_match().  This  is remembered with the match data
+       block and re-used if that block is used for another match. It is  freed
+       when the match data block itself is freed.
+
+       The  size  of the initial block is the larger of 20KiB or ten times the
+       pattern's frame size, unless the heap limit is less than this, in which
+       case  the  heap  limit  is  used. If the initial block proves to be too
+       small during matching, it is replaced by a larger block, subject to the
+       heap  limit.  The  heap limit is checked only when a new block is to be
+       allocated. Reducing the heap limit between calls to pcre2_match()  with
+       the same match data block does not affect the saved block.
+
+       In  contrast  to  pcre2_match(),  pcre2_dfa_match()  does use recursive
+       function calls, but only for processing atomic groups,  lookaround  as-
       sertions, and recursion within the pattern. The original version of the
-       code  used  to  allocate  quite large internal workspace vectors on the
-       stack, which caused some problems for  some  patterns  in  environments
-       with  small  stacks.  From release 10.32 the code for pcre2_dfa_match()
-       has been re-factored to use heap memory  when  necessary  for  internal
-       workspace  when  recursing,  though  recursive function calls are still
+       code used to allocate quite large internal  workspace  vectors  on  the
+       stack,  which  caused  some  problems for some patterns in environments
+       with small stacks. From release 10.32 the  code  for  pcre2_dfa_match()
+       has  been  re-factored  to  use heap memory when necessary for internal
+       workspace when recursing, though recursive  function  calls  are  still
       used.

-       The "match depth" parameter can be used to limit the depth of  function
-       recursion,  and  the  "match  heap"  parameter  to limit heap memory in
+       The  "match depth" parameter can be used to limit the depth of function
+       recursion, and the "match heap"  parameter  to  limit  heap  memory  in
       pcre2_dfa_match().


 PROCESSING TIME

-       Certain items in regular expression patterns are processed  more  effi-
+       Certain  items  in regular expression patterns are processed more effi-
       ciently than others. It is more efficient to use a character class like
-       [aeiou]  than  a  set  of   single-character   alternatives   such   as
-       (a|e|i|o|u).  In  general,  the simplest construction that provides the
+       [aeiou]   than   a   set   of  single-character  alternatives  such  as
+       (a|e|i|o|u). In general, the simplest construction  that  provides  the
       required behaviour is usually the most efficient. Jeffrey Friedl's book
-       contains  a  lot  of useful general discussion about optimizing regular
+       contains a lot of useful general discussion  about  optimizing  regular
       expressions for efficient performance. This document contains a few ob-
       servations about PCRE2.

-       Using  Unicode  character  properties  (the  \p, \P, and \X escapes) is
-       slow, because PCRE2 has to use a multi-stage table lookup  whenever  it
-       needs  a  character's  property. If you can find an alternative pattern
+       Using Unicode character properties (the \p,  \P,  and  \X  escapes)  is
+       slow,  because  PCRE2 has to use a multi-stage table lookup whenever it
+       needs a character's property. If you can find  an  alternative  pattern
       that does not use character properties, it will probably be faster.

-       By default, the escape sequences \b, \d, \s,  and  \w,  and  the  POSIX
-       character  classes  such  as  [:alpha:]  do not use Unicode properties,
+       By  default,  the  escape  sequences  \b, \d, \s, and \w, and the POSIX
+       character classes such as [:alpha:]  do  not  use  Unicode  properties,
       partly for backwards compatibility, and partly for performance reasons.
-       However,  you  can  set  the PCRE2_UCP option or start the pattern with
-       (*UCP) if you want Unicode character properties to be  used.  This  can
-       double  the  matching  time  for  items  such  as \d, when matched with
-       pcre2_match(); the performance loss is less with a DFA  matching  func-
+       However, you can set the PCRE2_UCP option or  start  the  pattern  with
+       (*UCP)  if  you  want Unicode character properties to be used. This can
+       double the matching time for  items  such  as  \d,  when  matched  with
+       pcre2_match();  the  performance loss is less with a DFA matching func-
       tion, and in both cases there is not much difference for \b.

-       When  a pattern begins with .* not in atomic parentheses, nor in paren-
-       theses that are the subject of a backreference,  and  the  PCRE2_DOTALL
-       option  is  set,  the pattern is implicitly anchored by PCRE2, since it
-       can match only at the start of a subject string.  If  the  pattern  has
+       When a pattern begins with .* not in atomic parentheses, nor in  paren-
+       theses  that  are  the subject of a backreference, and the PCRE2_DOTALL
+       option is set, the pattern is implicitly anchored by  PCRE2,  since  it
+       can  match  only  at  the start of a subject string. If the pattern has
       multiple top-level branches, they must all be anchorable. The optimiza-
-       tion can be disabled by the PCRE2_NO_DOTSTAR_ANCHOR option, and is  au-
+       tion  can be disabled by the PCRE2_NO_DOTSTAR_ANCHOR option, and is au-
       tomatically disabled if the pattern contains (*PRUNE) or (*SKIP).

-       If  PCRE2_DOTALL  is  not set, PCRE2 cannot make this optimization, be-
-       cause the dot metacharacter does not then match a newline, and  if  the
-       subject  string contains newlines, the pattern may match from the char-
+       If PCRE2_DOTALL is not set, PCRE2 cannot make  this  optimization,  be-
+       cause  the  dot metacharacter does not then match a newline, and if the
+       subject string contains newlines, the pattern may match from the  char-
       acter immediately following one of them instead of from the very start.
       For example, the pattern

         .*second

-       matches  the subject "first\nand second" (where \n stands for a newline
-       character), with the match starting at the seventh character. In  order
-       to  do  this, PCRE2 has to retry the match starting after every newline
+       matches the subject "first\nand second" (where \n stands for a  newline
+       character),  with the match starting at the seventh character. In order
+       to do this, PCRE2 has to retry the match starting after  every  newline
       in the subject.

-       If you are using such a pattern with subject strings that do  not  con-
-       tain   newlines,   the   best   performance   is  obtained  by  setting
-       PCRE2_DOTALL, or starting the pattern with ^.* or ^.*? to indicate  ex-
-       plicit  anchoring.  That saves PCRE2 from having to scan along the sub-
+       If  you  are using such a pattern with subject strings that do not con-
+       tain  newlines,  the  best   performance   is   obtained   by   setting
+       PCRE2_DOTALL,  or starting the pattern with ^.* or ^.*? to indicate ex-
+       plicit anchoring. That saves PCRE2 from having to scan along  the  sub-
       ject looking for a newline to restart at.

-       Beware of patterns that contain nested indefinite  repeats.  These  can
-       take  a  long time to run when applied to a string that does not match.
+       Beware  of  patterns  that contain nested indefinite repeats. These can
+       take a long time to run when applied to a string that does  not  match.
       Consider the pattern fragment

         ^(a+)*

-       This can match "aaaa" in 16 different ways, and this  number  increases
-       very  rapidly  as the string gets longer. (The * repeat can match 0, 1,
-       2, 3, or 4 times, and for each of those cases other than 0 or 4, the  +
-       repeats  can  match  different numbers of times.) When the remainder of
-       the pattern is such that the entire match is going to fail,  PCRE2  has
-       in  principle to try every possible variation, and this can take an ex-
+       This  can  match "aaaa" in 16 different ways, and this number increases
+       very rapidly as the string gets longer. (The * repeat can match  0,  1,
+       2,  3, or 4 times, and for each of those cases other than 0 or 4, the +
+       repeats can match different numbers of times.) When  the  remainder  of
+       the  pattern  is such that the entire match is going to fail, PCRE2 has
+       in principle to try every possible variation, and this can take an  ex-
       tremely long time, even for relatively short strings.

       An optimization catches some of the more simple cases such as

         (a+)*b

-       where a literal character follows. Before  embarking  on  the  standard
-       matching  procedure, PCRE2 checks that there is a "b" later in the sub-
-       ject string, and if there is not, it fails the match immediately.  How-
-       ever,  when  there  is no following literal this optimization cannot be
+       where  a  literal  character  follows. Before embarking on the standard
+       matching procedure, PCRE2 checks that there is a "b" later in the  sub-
+       ject  string, and if there is not, it fails the match immediately. How-
+       ever, when there is no following literal this  optimization  cannot  be
       used. You can see the difference by comparing the behaviour of

         (a+)*\d

-       with the pattern above. The former gives  a  failure  almost  instantly
-       when  applied  to  a  whole  line of "a" characters, whereas the latter
+       with  the  pattern  above.  The former gives a failure almost instantly
+       when applied to a whole line of  "a"  characters,  whereas  the  latter
       takes an appreciable time with strings longer than about 20 characters.

       In many cases, the solution to this kind of performance issue is to use
-       an  atomic group or a possessive quantifier. This can often reduce mem-
+       an atomic group or a possessive quantifier. This can often reduce  mem-
       ory requirements as well. As another example, consider this pattern:

         ([^<]|<(?!inet))+

-       It matches from wherever it starts until it encounters "<inet"  or  the
-       end  of  the  data,  and is the kind of pattern that might be used when
+       It  matches  from wherever it starts until it encounters "<inet" or the
+       end of the data, and is the kind of pattern that  might  be  used  when
       processing an XML file. Each iteration of the outer parentheses matches
-       either  one  character that is not "<" or a "<" that is not followed by
-       "inet". However, each time a parenthesis is processed,  a  backtracking
-       position  is  passed,  so this formulation uses a memory frame for each
+       either one character that is not "<" or a "<" that is not  followed  by
+       "inet".  However,  each time a parenthesis is processed, a backtracking
+       position is passed, so this formulation uses a memory  frame  for  each
       matched character. For a long string, a lot of memory is required. Con-
-       sider  now  this  rewritten  pattern,  which  matches  exactly the same
+       sider now this  rewritten  pattern,  which  matches  exactly  the  same
       strings:

         ([^<]++|<(?!inet))+

       This runs much faster, because sequences of characters that do not con-
       tain "<" are "swallowed" in one item inside the parentheses, and a pos-
-       sessive quantifier is used to stop any backtracking into  the  runs  of
-       non-"<"  characters.  This  version also uses a lot less memory because
-       entry to a new set of parentheses happens only  when  a  "<"  character
-       that  is  not  followed by "inet" is encountered (and we assume this is
+       sessive  quantifier  is  used to stop any backtracking into the runs of
+       non-"<" characters. This version also uses a lot  less  memory  because
+       entry  to  a  new  set of parentheses happens only when a "<" character
+       that is not followed by "inet" is encountered (and we  assume  this  is
       relatively rare).

       This example shows that one way of optimizing performance when matching
-       long  subject strings is to write repeated parenthesized subpatterns to
+       long subject strings is to write repeated parenthesized subpatterns  to
       match more than one character whenever possible.

   SETTING RESOURCE LIMITS

-       You can set limits on the amount of processing that  takes  place  when
-       matching,  and  on  the amount of heap memory that is used. The default
+       You  can  set  limits on the amount of processing that takes place when
+       matching, and on the amount of heap memory that is  used.  The  default
       values of the limits are very large, and unlikely ever to operate. They
-       can  be  changed  when  PCRE2  is  built, and they can also be set when
-       pcre2_match() or pcre2_dfa_match() is called. For details of these  in-
-       terfaces,  see  the  pcre2build  documentation and the section entitled
+       can be changed when PCRE2 is built, and  they  can  also  be  set  when
+       pcre2_match()  or pcre2_dfa_match() is called. For details of these in-
+       terfaces, see the pcre2build documentation  and  the  section  entitled
       "The match context" in the pcre2api documentation.

-       The pcre2test test program has a modifier called  "find_limits"  which,
-       if  applied  to  a  subject line, causes it to find the smallest limits
+       The  pcre2test  test program has a modifier called "find_limits" which,
+       if applied to a subject line, causes it to  find  the  smallest  limits
       that allow a pattern to match. This is done by repeatedly matching with
       different limits.

@ -9926,14 +9942,14 @@ PROCESSING TIME
 AUTHOR

       Philip Hazel
-       University Computing Service
+       Retired from University Computing Service
       Cambridge, England.


 REVISION

-       Last updated: 03 February 2019
-       Copyright (c) 1997-2019 University of Cambridge.
+       Last updated: 27 July 2022
+       Copyright (c) 1997-2022 University of Cambridge.
 ------------------------------------------------------------------------------
 
 
--- a/doc/pcre2grep.1
+++ b/doc/pcre2grep.1
@ -1,4 +1,4 @@
-.TH PCRE2GREP 1 "27 July 2022" "PCRE2 10.41"
+.TH PCRE2GREP 1 "30 July 2022" "PCRE2 10.41"
 .SH NAME
 pcre2grep - a grep with Perl-compatible regular expressions.
 .SH SYNOPSIS
@ -43,13 +43,15 @@ For example:
 .sp
  pcre2grep some-pattern file1 - file3
 .sp
-Input files are searched line by line. By default, each line that matches a
+By default, input files are searched line by line. Each line that matches a
 pattern is copied to the standard output, and if there is more than one file,
 the file name is output at the start of each line, followed by a colon.
-However, there are options that can change how \fBpcre2grep\fP behaves. In
-particular, the \fB-M\fP option makes it possible to search for strings that
-span line boundaries. What defines a line boundary is controlled by the
-\fB-N\fP (\fB--newline\fP) option.
+However, there are options that can change how \fBpcre2grep\fP behaves. For
+example, the \fB-M\fP option makes it possible to search for strings that span
+line boundaries. What defines a line boundary is controlled by the \fB-N\fP
+(\fB--newline\fP) option. The \fB-h\fP and \fB-H\fP options control whether or
+not file names are shown, and the \fB-Z\fP option changes the file name
+terminator to a zero byte.
 .P
 The amount of memory used for buffering files that are being scanned is
 controlled by parameters that can be set by the \fB--buffer-size\fP and
@ -149,9 +151,11 @@ Output up to \fInumber\fP lines of context after each matching line. Fewer
 lines are output if the next match or the end of the file is reached, or if the
 processing buffer size has been set too small. If file names and/or line
 numbers are being output, a hyphen separator is used instead of a colon for the
-context lines. A line containing "--" is output between each group of lines,
-unless they are in fact contiguous in the input file. The value of \fInumber\fP
-is expected to be relatively small. When \fB-c\fP is used, \fB-A\fP is ignored.
+context lines (the \fB-Z\fP option can be used to change the file name
+terminator to a zero byte). A line containing "--" is output between each group
+of lines, unless they are in fact contiguous in the input file. The value of
+\fInumber\fP is expected to be relatively small. When \fB-c\fP is used,
+\fB-A\fP is ignored.
 .TP
 \fB-a\fP, \fB--text\fP
 Treat binary files as text. This is equivalent to
@ -167,9 +171,10 @@ Output up to \fInumber\fP lines of context before each matching line. Fewer
 lines are output if the previous match or the start of the file is within
 \fInumber\fP lines, or if the processing buffer size has been set too small. If
 file names and/or line numbers are being output, a hyphen separator is used
-instead of a colon for the context lines. A line containing "--" is output
-between each group of lines, unless they are in fact contiguous in the input
-file. The value of \fInumber\fP is expected to be relatively small. When
+instead of a colon for the context lines (the \fB-Z\fP option can be used to
+change the file name terminator to a zero byte). A line containing "--" is
+output between each group of lines, unless they are in fact contiguous in the
+input file. The value of \fInumber\fP is expected to be relatively small. When
 \fB-c\fP is used, \fB-B\fP is ignored.
 .TP
 \fB--binary-files=\fP\fIword\fP
@ -356,19 +361,21 @@ shown separately. This option is mutually exclusive with \fB--output\fP,
 .TP
 \fB-H\fP, \fB--with-filename\fP
 Force the inclusion of the file name at the start of output lines when
-searching a single file. By default, the file name is not shown in this case.
-For matching lines, the file name is followed by a colon; for context lines, a
-hyphen separator is used. If a line number is also being output, it follows the
-file name. When the \fB-M\fP option causes a pattern to match more than one
-line, only the first is preceded by the file name. This option overrides any
-previous \fB-h\fP, \fB-l\fP, or \fB-L\fP options.
+searching a single file. The file name is not normally shown in this case.
+By default, for matching lines, the file name is followed by a colon; for
+context lines, a hyphen separator is used. The \fB-Z\fP option can be used to
+change the terminator to a zero byte. If a line number is also being output,
+it follows the file name. When the \fB-M\fP option causes a pattern to match
+more than one line, only the first is preceded by the file name. This option
+overrides any previous \fB-h\fP, \fB-l\fP, or \fB-L\fP options.
 .TP
 \fB-h\fP, \fB--no-filename\fP
-Suppress the output file names when searching multiple files. By default,
-file names are shown when multiple files are searched. For matching lines, the
-file name is followed by a colon; for context lines, a hyphen separator is used.
-If a line number is also being output, it follows the file name. This option
-overrides any previous \fB-H\fP, \fB-L\fP, or \fB-l\fP options.
+Suppress the output file names when searching multiple files. File names are
+normally shown when multiple files are searched. By default, for matching
+lines, the file name is followed by a colon; for context lines, a hyphen
+separator is used. The \fB-Z\fP option can be used to change the terminator to
+a zero byte. If a line number is also being output, it follows the file name.
+This option overrides any previous \fB-H\fP, \fB-L\fP, or \fB-l\fP options.
 .TP
 \fB--heap-limit\fP=\fInumber\fP
 See \fB--match-limit\fP below.
@ -417,17 +424,19 @@ given any number of times. If a directory matches both \fB--include-dir\fP and
 \fB-L\fP, \fB--files-without-match\fP
 Instead of outputting lines from the files, just output the names of the files
 that do not contain any lines that would have been output. Each file name is
-output once, on a separate line. This option overrides any previous \fB-H\fP,
-\fB-h\fP, or \fB-l\fP options.
+output once, on a separate line by default, but if the \fB-Z\fP option is set, 
+they are separated by zero bytes instead of newlines. This option overrides any
+previous \fB-H\fP, \fB-h\fP, or \fB-l\fP options.
 .TP
 \fB-l\fP, \fB--files-with-matches\fP
 Instead of outputting lines from the files, just output the names of the files
 containing lines that would have been output. Each file name is output once, on
-a separate line. Searching normally stops as soon as a matching line is found
-in a file. However, if the \fB-c\fP (count) option is also used, matching
-continues in order to obtain the correct count, and those files that have at
-least one match are listed along with their counts. Using this option with
-\fB-c\fP is a way of suppressing the listing of files with no matches that
+a separate line, but if the \fB-Z\fP option is set, they are separated by zero
+bytes instead of newlines. Searching normally stops as soon as a matching line
+is found in a file. However, if the \fB-c\fP (count) option is also used,
+matching continues in order to obtain the correct count, and those files that
+have at least one match are listed along with their counts. Using this option
+with \fB-c\fP is a way of suppressing the listing of files with no matches that
 occurs with \fB-c\fP on its own. This option overrides any previous \fB-H\fP,
 \fB-h\fP, or \fB-L\fP options.
 .TP
@ -516,7 +525,7 @@ counter that is incremented each time around its main processing loop. If the
 value set by \fB--match-limit\fP is reached, an error occurs.
 .sp
 The \fB--heap-limit\fP option specifies, as a number of kibibytes (units of
-1024 bytes), the maximum amount of heap memory that may be used for matching. 
+1024 bytes), the maximum amount of heap memory that may be used for matching.
 .sp
 The \fB--depth-limit\fP option limits the depth of nested backtracking points,
 which indirectly limits the amount of memory that is used. The amount of memory
@ -729,6 +738,12 @@ be more than one line. This is equivalent to having "^(?:" at the start of each
 pattern and ")$" at the end. This option applies only to the patterns that are
 matched against the contents of files; it does not apply to patterns specified
 by any of the \fB--include\fP or \fB--exclude\fP options.
+.TP
+\fB-Z\fP, \fB--null\fP
+Terminate files names in the regular output with a zero byte (the NUL
+character) instead of what would normally appear. This is useful when file
+names contain unusual characters such as colons, hyphens, or even newlines. The
+option does not apply to file names in error messages.
 .
 .
 .SH "ENVIRONMENT VARIABLES"
@ -957,6 +972,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 27 July 2022
+Last updated: 30 July 2022
 Copyright (c) 1997-2022 University of Cambridge.
 .fi
--- a/doc/pcre2grep.txt
+++ b/doc/pcre2grep.txt
--- a/doc/pcre2test.txt
+++ b/doc/pcre2test.txt
@ -1111,7 +1111,8 @@ SUBJECT MODIFIERS
             copy=<number or name>      copy captured substring
             depth_limit=<n>            set a depth limit
             dfa                        use pcre2_dfa_match()
-             find_limits                find match and depth limits
+             find_limits                find heap, match and depth limits
+             find_limits_noheap         find match and depth limits
             get=<number or name>       extract captured substring
             getall                     extract all captured substrings
         /g  global                     global matching
@ -1411,7 +1412,7 @@ SUBJECT MODIFIERS

       The heap_limit, match_limit, and depth_limit modifiers set  the  appro-
       priate  limits  in the match context. These values are ignored when the
-       find_limits modifier is specified.
+       find_limits or find_limits_noheap modifier is specified.

   Finding minimum limits

@ -1419,8 +1420,12 @@ SUBJECT MODIFIERS
       calls  the  relevant matching function several times, setting different
       values   in   the    match    context    via    pcre2_set_heap_limit(),
       pcre2_set_match_limit(),  or pcre2_set_depth_limit() until it finds the
-       minimum values for each parameter that allows  the  match  to  complete
-       without error. If JIT is being used, only the match limit is relevant.
+       smallest value for each parameter that allows  the  match  to  complete
+       without a "limit exceeded" error. The match itself may succeed or fail.
+       An alternative modifier, find_limits_noheap, omits the heap limit. This
+       is  used  in  the standard tests, because the minimum heap limit varies
+       between systems. If JIT is being used, only the match  limit  is  rele-
+       vant, and the other two are automatically omitted.

       When using this modifier, the pattern should not contain any limit set-
       tings such as (*LIMIT_MATCH=...)  within  it.  If  such  a  setting  is
@ -1446,9 +1451,7 @@ SUBJECT MODIFIERS

       For  both  kinds  of  matching,  the  heap_limit  number,  which  is in
       kibibytes (units of 1024 bytes), limits the amount of heap memory  used
-       for matching. A value of zero disables the use of any heap memory; many
-       simple pattern matches can be done without using the heap, so  zero  is
-       not an unreasonable setting.
+       for matching.

   Showing MARK names

@ -1463,13 +1466,11 @@ SUBJECT MODIFIERS

       The  memory modifier causes pcre2test to log the sizes of all heap mem-
       ory  allocation  and  freeing  calls  that  occur  during  a  call   to
-       pcre2_match()  or  pcre2_dfa_match(). These occur only when a match re-
-       quires a bigger vector than the default  for  remembering  backtracking
-       points  (pcre2_match())  or for internal workspace (pcre2_dfa_match()).
-       In many cases there will be no heap memory used and therefore no  addi-
-       tional output. No heap memory is allocated during matching with JIT, so
-       in that case the memory modifier never has any effect. For  this  modi-
-       fier  to  work,  the  null_context modifier must not be set on both the
+       pcre2_match()  or pcre2_dfa_match(). In the latter case, heap memory is
+       used only when a match requires more internal workspace  that  the  de-
+       fault  allocation  on the stack, so in many cases there will be no out-
+       put. No heap memory is allocated during matching  with  JIT.  For  this
+       modifier to work, the null_context modifier must not be set on both the
       pattern and the subject, though it can be set on one or the other.

   Setting a starting offset
@ -1518,45 +1519,46 @@ SUBJECT MODIFIERS
       null_context  modifier  is  set,  however,  NULL is passed. This is for
       testing that the matching and substitution functions  behave  correctly
       in  this  case  (they use default values). This modifier cannot be used
-       with the find_limits or substitute_callout modifiers.
+       with the find_limits, find_limits_noheap, or  substitute_callout  modi-
+       fiers.

-       Similarly, for testing purposes, if the null_subject  or  null_replace-
-       ment  modifier  is  set, the subject or replacement string pointers are
+       Similarly,  for  testing purposes, if the null_subject or null_replace-
+       ment modifier is set, the subject or replacement  string  pointers  are
       passed as NULL, respectively, to the relevant functions.


 THE ALTERNATIVE MATCHING FUNCTION

-       By default,  pcre2test  uses  the  standard  PCRE2  matching  function,
+       By  default,  pcre2test  uses  the  standard  PCRE2  matching function,
       pcre2_match() to match each subject line. PCRE2 also supports an alter-
-       native matching function, pcre2_dfa_match(), which operates in  a  dif-
-       ferent  way, and has some restrictions. The differences between the two
+       native  matching  function, pcre2_dfa_match(), which operates in a dif-
+       ferent way, and has some restrictions. The differences between the  two
       functions are described in the pcre2matching documentation.

-       If the dfa modifier is set, the alternative matching function is  used.
-       This  function  finds all possible matches at a given point in the sub-
-       ject. If, however, the dfa_shortest modifier is set,  processing  stops
-       after  the  first  match is found. This is always the shortest possible
+       If  the dfa modifier is set, the alternative matching function is used.
+       This function finds all possible matches at a given point in  the  sub-
+       ject.  If,  however, the dfa_shortest modifier is set, processing stops
+       after the first match is found. This is always  the  shortest  possible
       match.


 DEFAULT OUTPUT FROM pcre2test

-       This section describes the output when the  normal  matching  function,
+       This  section  describes  the output when the normal matching function,
       pcre2_match(), is being used.

-       When  a  match  succeeds,  pcre2test  outputs the list of captured sub-
-       strings, starting with number 0 for the string that matched  the  whole
+       When a match succeeds, pcre2test outputs  the  list  of  captured  sub-
+       strings,  starting  with number 0 for the string that matched the whole
       pattern.  Otherwise, it outputs "No match" when the return is PCRE2_ER-
-       ROR_NOMATCH, or "Partial match:" followed  by  the  partially  matching
-       substring  when  the  return is PCRE2_ERROR_PARTIAL. (Note that this is
-       the entire substring that was inspected during the  partial  match;  it
-       may  include  characters  before the actual match start if a lookbehind
+       ROR_NOMATCH,  or  "Partial  match:"  followed by the partially matching
+       substring when the return is PCRE2_ERROR_PARTIAL. (Note  that  this  is
+       the  entire  substring  that was inspected during the partial match; it
+       may include characters before the actual match start  if  a  lookbehind
       assertion, \K, \b, or \B was involved.)

       For any other return, pcre2test outputs the PCRE2 negative error number
-       and  a  short  descriptive  phrase. If the error is a failed UTF string
-       check, the code unit offset of the start of the  failing  character  is
+       and a short descriptive phrase. If the error is  a  failed  UTF  string
+       check,  the  code  unit offset of the start of the failing character is
       also output. Here is an example of an interactive pcre2test run.

         $ pcre2test
@ -1572,8 +1574,8 @@ DEFAULT OUTPUT FROM pcre2test
       Unset capturing substrings that are not followed by one that is set are
       not shown by pcre2test unless the allcaptures modifier is specified. In
       the following example, there are two capturing substrings, but when the
-       first data line is matched, the second, unset substring is  not  shown.
-       An  "internal" unset substring is shown as "<unset>", as for the second
+       first  data  line is matched, the second, unset substring is not shown.
+       An "internal" unset substring is shown as "<unset>", as for the  second
       data line.

           re> /(a)|(b)/
@ -1585,11 +1587,11 @@ DEFAULT OUTPUT FROM pcre2test
          1: <unset>
          2: b

-       If the strings contain any non-printing characters, they are output  as
-       \xhh  escapes  if  the  value is less than 256 and UTF mode is not set.
+       If  the strings contain any non-printing characters, they are output as
+       \xhh escapes if the value is less than 256 and UTF  mode  is  not  set.
       Otherwise they are output as \x{hh...} escapes. See below for the defi-
-       nition  of  non-printing  characters. If the aftertext modifier is set,
-       the output for substring 0 is followed by the the rest of  the  subject
+       nition of non-printing characters. If the aftertext  modifier  is  set,
+       the  output  for substring 0 is followed by the the rest of the subject
       string, identified by "0+" like this:

           re> /cat/aftertext
@ -1609,8 +1611,8 @@ DEFAULT OUTPUT FROM pcre2test
          0: ipp
          1: pp

-       "No match" is output only if the first match attempt fails. Here is  an
-       example  of  a  failure  message (the offset 4 that is specified by the
+       "No  match" is output only if the first match attempt fails. Here is an
+       example of a failure message (the offset 4 that  is  specified  by  the
       offset modifier is past the end of the subject string):

           re> /xyz/
@ -1618,7 +1620,7 @@ DEFAULT OUTPUT FROM pcre2test
         Error -24 (bad offset value)

       Note that whereas patterns can be continued over several lines (a plain
-       ">"  prompt  is used for continuations), subject lines may not. However
+       ">" prompt is used for continuations), subject lines may  not.  However
       newlines can be included in a subject by means of the \n escape (or \r,
       \r\n, etc., depending on the newline sequence setting).

@ -1626,7 +1628,7 @@ DEFAULT OUTPUT FROM pcre2test
 OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION

       When the alternative matching function, pcre2_dfa_match(), is used, the
-       output consists of a list of all the matches that start  at  the  first
+       output  consists  of  a list of all the matches that start at the first
       point in the subject where there is at least one match. For example:

           re> /(tang|tangerine|tan)/
@ -1635,11 +1637,11 @@ OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
          1: tang
          2: tan

-       Using  the normal matching function on this data finds only "tang". The
-       longest matching string is always given first (and numbered zero).  Af-
-       ter  a PCRE2_ERROR_PARTIAL return, the output is "Partial match:", fol-
+       Using the normal matching function on this data finds only "tang".  The
+       longest  matching string is always given first (and numbered zero). Af-
+       ter a PCRE2_ERROR_PARTIAL return, the output is "Partial match:",  fol-
       lowed by the partially matching substring. Note that this is the entire
-       substring  that  was inspected during the partial match; it may include
+       substring that was inspected during the partial match; it  may  include
       characters before the actual match start if a lookbehind assertion, \b,
       or \B was involved. (\K is not supported for DFA matching.)

@ -1655,16 +1657,16 @@ OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
          1: tan
          0: tan

-       The alternative matching function does not support  substring  capture,
-       so  the  modifiers  that are concerned with captured substrings are not
+       The  alternative  matching function does not support substring capture,
+       so the modifiers that are concerned with captured  substrings  are  not
       relevant.


 RESTARTING AFTER A PARTIAL MATCH

-       When the alternative matching function has given  the  PCRE2_ERROR_PAR-
+       When  the  alternative matching function has given the PCRE2_ERROR_PAR-
       TIAL return, indicating that the subject partially matched the pattern,
-       you can restart the match with additional subject data by means of  the
+       you  can restart the match with additional subject data by means of the
       dfa_restart modifier. For example:

           re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
@ -1673,37 +1675,37 @@ RESTARTING AFTER A PARTIAL MATCH
         data> n05\=dfa,dfa_restart
          0: n05

-       For  further  information  about partial matching, see the pcre2partial
+       For further information about partial matching,  see  the  pcre2partial
       documentation.


 CALLOUTS

       If the pattern contains any callout requests, pcre2test's callout func-
-       tion  is  called during matching unless callout_none is specified. This
+       tion is called during matching unless callout_none is  specified.  This
       works with both matching functions, and with JIT, though there are some
-       differences  in behaviour. The output for callouts with numerical argu-
+       differences in behaviour. The output for callouts with numerical  argu-
       ments and those with string arguments is slightly different.

   Callouts with numerical arguments

       By default, the callout function displays the callout number, the start
-       and  current positions in the subject text at the callout time, and the
+       and current positions in the subject text at the callout time, and  the
       next pattern item to be tested. For example:

         --->pqrabcdef
           0    ^  ^     \d

-       This output indicates that callout number 0 occurred for  a  match  at-
-       tempt  starting at the fourth character of the subject string, when the
-       pointer was at the seventh character, and when the  next  pattern  item
-       was  \d.  Just  one circumflex is output if the start and current posi-
+       This  output  indicates  that callout number 0 occurred for a match at-
+       tempt starting at the fourth character of the subject string, when  the
+       pointer  was  at  the seventh character, and when the next pattern item
+       was \d. Just one circumflex is output if the start  and  current  posi-
       tions are the same, or if the current position precedes the start posi-
       tion, which can happen if the callout is in a lookbehind assertion.

       Callouts numbered 255 are assumed to be automatic callouts, inserted as
       a result of the auto_callout pattern modifier. In this case, instead of
-       showing  the  callout  number, the offset in the pattern, preceded by a
+       showing the callout number, the offset in the pattern,  preceded  by  a
       plus, is output. For example:

           re> /\d?[A-E]\*/auto_callout
@ -1730,17 +1732,17 @@ CALLOUTS
         +12 ^  ^
          0: abc

-       The mark changes between matching "a" and "b", but stays the  same  for
-       the  rest  of  the match, so nothing more is output. If, as a result of
-       backtracking, the mark reverts to being unset, the  text  "<unset>"  is
+       The  mark  changes between matching "a" and "b", but stays the same for
+       the rest of the match, so nothing more is output. If, as  a  result  of
+       backtracking,  the  mark  reverts to being unset, the text "<unset>" is
       output.

   Callouts with string arguments

       The output for a callout with a string argument is similar, except that
-       instead of outputting a callout number before the position  indicators,
-       the  callout string and its offset in the pattern string are output be-
-       fore the reflection of the subject string, and the  subject  string  is
+       instead  of outputting a callout number before the position indicators,
+       the callout string and its offset in the pattern string are output  be-
+       fore  the  reflection  of the subject string, and the subject string is
       reflected for each callout. For example:

           re> /^ab(?C'first')cd(?C"second")ef/
@ -1756,26 +1758,26 @@ CALLOUTS

   Callout modifiers

-       The  callout  function in pcre2test returns zero (carry on matching) by
-       default, but you can use a callout_fail modifier in a subject  line  to
+       The callout function in pcre2test returns zero (carry on  matching)  by
+       default,  but  you can use a callout_fail modifier in a subject line to
       change this and other parameters of the callout (see below).

       If the callout_capture modifier is set, the current captured groups are
       output when a callout occurs. This is useful only for non-DFA matching,
-       as  pcre2_dfa_match()  does  not  support capturing, so no captures are
+       as pcre2_dfa_match() does not support capturing,  so  no  captures  are
       ever shown.

       The normal callout output, showing the callout number or pattern offset
-       (as  described above) is suppressed if the callout_no_where modifier is
+       (as described above) is suppressed if the callout_no_where modifier  is
       set.

-       When using the interpretive  matching  function  pcre2_match()  without
-       JIT,  setting  the callout_extra modifier causes additional output from
-       pcre2test's callout function to be generated. For the first callout  in
-       a  match  attempt at a new starting position in the subject, "New match
-       attempt" is output. If there has been a backtrack since the last  call-
+       When  using  the  interpretive  matching function pcre2_match() without
+       JIT, setting the callout_extra modifier causes additional  output  from
+       pcre2test's  callout function to be generated. For the first callout in
+       a match attempt at a new starting position in the subject,  "New  match
+       attempt"  is output. If there has been a backtrack since the last call-
       out (or start of matching if this is the first callout), "Backtrack" is
-       output, followed by "No other matching paths" if  the  backtrack  ended
+       output,  followed  by  "No other matching paths" if the backtrack ended
       the previous match attempt. For example:

          re> /(a+)b/auto_callout,no_start_optimize,no_auto_possess
@ -1812,86 +1814,86 @@ CALLOUTS
          +1    ^    a+
         No match

-       Notice  that  various  optimizations must be turned off if you want all
-       possible matching paths to be  scanned.  If  no_start_optimize  is  not
-       used,  there  is an immediate "no match", without any callouts, because
-       the starting optimization fails to find "b" in the  subject,  which  it
-       knows  must  be  present for any match. If no_auto_possess is not used,
-       the "a+" item is turned into "a++", which reduces the number  of  back-
+       Notice that various optimizations must be turned off if  you  want  all
+       possible  matching  paths  to  be  scanned. If no_start_optimize is not
+       used, there is an immediate "no match", without any  callouts,  because
+       the  starting  optimization  fails to find "b" in the subject, which it
+       knows must be present for any match. If no_auto_possess  is  not  used,
+       the  "a+"  item is turned into "a++", which reduces the number of back-
       tracks.

-       The  callout_extra modifier has no effect if used with the DFA matching
+       The callout_extra modifier has no effect if used with the DFA  matching
       function, or with JIT.

   Return values from callouts

-       The default return from the callout  function  is  zero,  which  allows
+       The  default  return  from  the  callout function is zero, which allows
       matching to continue. The callout_fail modifier can be given one or two
       numbers. If there is only one number, 1 is returned instead of 0 (caus-
       ing matching to backtrack) when a callout of that number is reached. If
-       two numbers (<n>:<m>) are given, 1 is  returned  when  callout  <n>  is
-       reached  and  there  have been at least <m> callouts. The callout_error
+       two  numbers  (<n>:<m>)  are  given,  1 is returned when callout <n> is
+       reached and there have been at least <m>  callouts.  The  callout_error
       modifier is similar, except that PCRE2_ERROR_CALLOUT is returned, caus-
-       ing  the entire matching process to be aborted. If both these modifiers
-       are set for the same callout number,  callout_error  takes  precedence.
-       Note  that  callouts  with string arguments are always given the number
+       ing the entire matching process to be aborted. If both these  modifiers
+       are  set  for  the same callout number, callout_error takes precedence.
+       Note that callouts with string arguments are always  given  the  number
       zero.

-       The callout_data modifier can be given an unsigned or a  negative  num-
-       ber.   This  is  set  as the "user data" that is passed to the matching
-       function, and passed back when the callout  function  is  invoked.  Any
-       value  other  than  zero  is  used as a return from pcre2test's callout
+       The  callout_data  modifier can be given an unsigned or a negative num-
+       ber.  This is set as the "user data" that is  passed  to  the  matching
+       function,  and  passed  back  when the callout function is invoked. Any
+       value other than zero is used as  a  return  from  pcre2test's  callout
       function.

       Inserting callouts can be helpful when using pcre2test to check compli-
-       cated  regular expressions. For further information about callouts, see
+       cated regular expressions. For further information about callouts,  see
       the pcre2callout documentation.


 NON-PRINTING CHARACTERS

       When pcre2test is outputting text in the compiled version of a pattern,
-       bytes  other  than 32-126 are always treated as non-printing characters
+       bytes other than 32-126 are always treated as  non-printing  characters
       and are therefore shown as hex escapes.

-       When pcre2test is outputting text that is a matched part of  a  subject
-       string,  it behaves in the same way, unless a different locale has been
-       set for the pattern (using the locale modifier). In this case, the  is-
+       When  pcre2test  is outputting text that is a matched part of a subject
+       string, it behaves in the same way, unless a different locale has  been
+       set  for the pattern (using the locale modifier). In this case, the is-
       print() function is used to distinguish printing and non-printing char-
       acters.


 SAVING AND RESTORING COMPILED PATTERNS

-       It is possible to save compiled patterns  on  disc  or  elsewhere,  and
+       It  is  possible  to  save  compiled patterns on disc or elsewhere, and
       reload them later, subject to a number of restrictions. JIT data cannot
-       be saved. The host on which the patterns are reloaded must  be  running
+       be  saved.  The host on which the patterns are reloaded must be running
       the same version of PCRE2, with the same code unit width, and must also
-       have the same endianness, pointer width  and  PCRE2_SIZE  type.  Before
-       compiled  patterns  can be saved they must be serialized, that is, con-
-       verted to a stream of bytes. A single byte stream may contain any  num-
-       ber  of compiled patterns, but they must all use the same character ta-
-       bles. A single copy of the tables is included in the byte  stream  (its
+       have  the  same  endianness,  pointer width and PCRE2_SIZE type. Before
+       compiled patterns can be saved they must be serialized, that  is,  con-
+       verted  to a stream of bytes. A single byte stream may contain any num-
+       ber of compiled patterns, but they must all use the same character  ta-
+       bles.  A  single copy of the tables is included in the byte stream (its
       size is 1088 bytes).

-       The  functions whose names begin with pcre2_serialize_ are used for se-
-       rializing and de-serializing. They are described in the  pcre2serialize
-       documentation.  In  this  section we describe the features of pcre2test
+       The functions whose names begin with pcre2_serialize_ are used for  se-
+       rializing  and de-serializing. They are described in the pcre2serialize
+       documentation. In this section we describe the  features  of  pcre2test
       that can be used to test these functions.

-       Note that "serialization" in PCRE2 does not convert  compiled  patterns
-       to  an  abstract  format  like Java or .NET. It just makes a reloadable
+       Note  that  "serialization" in PCRE2 does not convert compiled patterns
+       to an abstract format like Java or .NET. It  just  makes  a  reloadable
       byte code stream.  Hence the restrictions on reloading mentioned above.

-       In pcre2test, when a pattern with push modifier  is  successfully  com-
-       piled,  it  is  pushed onto a stack of compiled patterns, and pcre2test
-       expects the next line to contain a new pattern (or command) instead  of
+       In  pcre2test,  when  a pattern with push modifier is successfully com-
+       piled, it is pushed onto a stack of compiled  patterns,  and  pcre2test
+       expects  the next line to contain a new pattern (or command) instead of
       a subject line. By contrast, the pushcopy modifier causes a copy of the
-       compiled pattern to be stacked, leaving the original available for  im-
-       mediate  matching.  By using push and/or pushcopy, a number of patterns
-       can be compiled and retained. These  modifiers  are  incompatible  with
+       compiled  pattern to be stacked, leaving the original available for im-
+       mediate matching. By using push and/or pushcopy, a number  of  patterns
+       can  be  compiled  and  retained. These modifiers are incompatible with
       posix, and control modifiers that act at match time are ignored (with a
-       message) for the stacked patterns. The jitverify modifier applies  only
+       message)  for the stacked patterns. The jitverify modifier applies only
       at compile time.

       The command
@ -1899,21 +1901,21 @@ SAVING AND RESTORING COMPILED PATTERNS
         #save <filename>

       causes all the stacked patterns to be serialized and the result written
-       to the named file. Afterwards, all the stacked patterns are freed.  The
+       to  the named file. Afterwards, all the stacked patterns are freed. The
       command

         #load <filename>

-       reads  the  data in the file, and then arranges for it to be de-serial-
-       ized, with the resulting compiled patterns added to the pattern  stack.
-       The  pattern  on the top of the stack can be retrieved by the #pop com-
-       mand, which must be followed by  lines  of  subjects  that  are  to  be
-       matched  with  the pattern, terminated as usual by an empty line or end
-       of file. This command may be followed by  a  modifier  list  containing
-       only  control  modifiers that act after a pattern has been compiled. In
-       particular, hex, posix, posix_nosub, push, and  pushcopy  are  not  al-
-       lowed,  nor  are  any option-setting modifiers.  The JIT modifiers are,
-       however permitted. Here is an example that saves and reloads  two  pat-
+       reads the data in the file, and then arranges for it to  be  de-serial-
+       ized,  with the resulting compiled patterns added to the pattern stack.
+       The pattern on the top of the stack can be retrieved by the  #pop  com-
+       mand,  which  must  be  followed  by  lines  of subjects that are to be
+       matched with the pattern, terminated as usual by an empty line  or  end
+       of  file.  This  command  may be followed by a modifier list containing
+       only control modifiers that act after a pattern has been  compiled.  In
+       particular,  hex,  posix,  posix_nosub,  push, and pushcopy are not al-
+       lowed, nor are any option-setting modifiers.  The  JIT  modifiers  are,
+       however  permitted.  Here is an example that saves and reloads two pat-
       terns.

         /abc/push
@ -1926,10 +1928,10 @@ SAVING AND RESTORING COMPILED PATTERNS
         #pop jit,bincode
         abc

-       If  jitverify  is  used with #pop, it does not automatically imply jit,
+       If jitverify is used with #pop, it does not  automatically  imply  jit,
       which is different behaviour from when it is used on a pattern.

-       The #popcopy command is analagous to the pushcopy modifier in  that  it
+       The  #popcopy  command is analagous to the pushcopy modifier in that it
       makes current a copy of the topmost stack pattern, leaving the original
       still on the stack.

@ -1949,5 +1951,5 @@ AUTHOR

 REVISION

-       Last updated: 12 January 2022
+       Last updated: 27 July 2022
       Copyright (c) 1997-2022 University of Cambridge.
--- a/src/pcre2grep.c
+++ b/src/pcre2grep.c
@ -205,9 +205,6 @@ point. */
 *               Global variables                 *
 *************************************************/

-/* Jeffrey Friedl has some debugging requirements that are not part of the
-regular code. */
-
 static const char *colour_string = "1;31";
 static const char *colour_option = NULL;
 static const char *dee_option = NULL;
@ -220,6 +217,10 @@ static const char *output_text = NULL;

 static char *main_buffer = NULL;

+static const char *printname_nl = STDOUT_NL;  /* Changed to NULL for -Z */
+static int printname_colon = ':';             /* Changed to 0 for -Z */
+static int printname_hyphen = '-';            /* Changed to 0 for -Z */
+
 static int after_context = 0;
 static int before_context = 0;
 static int binary_files = BIN_BINARY;
@ -483,6 +484,7 @@ static option_item optionlist[] = {
  { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
  { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
  { OP_NODATA,   N_ALLABSK, NULL,              "allow-lookaround-bsk", "allow \\K in lookarounds" },
+  { OP_NODATA,    'Z',      NULL,              "null",          "output 0 byte after file names"  },
  { OP_NODATA,    0,        NULL,               NULL,            NULL }
 };

@ -1773,7 +1775,7 @@ if (after_context > 0 && lastmatchnumber > 0)
    {
    char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
    if (ellength == 0 && pp == main_buffer + bufsize) break;
-    if (printname != NULL) fprintf(stdout, "%s-", printname);
+    if (printname != NULL) fprintf(stdout, "%s%c", printname, printname_hyphen);
    if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
    FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
    lastmatchrestart = pp;
@ -2439,10 +2441,10 @@ if (pid == 0)
  }
 else if (pid > 0)
  {
-  (void)fflush(stdout); 
+  (void)fflush(stdout);
  (void)waitpid(pid, &result, 0);
-  (void)fflush(stdout); 
-  } 
+  (void)fflush(stdout);
+  }
 #endif  /* End Windows/VMS/other handling */

 free(args);
@ -2730,7 +2732,9 @@ while (ptr < endptr)

    else if (filenames == FN_MATCH_ONLY)
      {
-      fprintf(stdout, "%s" STDOUT_NL, printname);
+      fprintf(stdout, "%s", printname);
+      if (printname_nl == NULL) fprintf(stdout, "%c", 0);
+        else fprintf(stdout, "%s", printname_nl);
      return 0;
      }

@ -2749,7 +2753,8 @@ while (ptr < endptr)
        {
        PCRE2_SIZE oldstartoffset;

-        if (printname != NULL) fprintf(stdout, "%s:", printname);
+        if (printname != NULL) fprintf(stdout, "%s%c", printname,
+          printname_colon);
        if (number) fprintf(stdout, "%lu:", linenumber);

        /* Handle --line-offsets */
@ -2871,7 +2876,8 @@ while (ptr < endptr)
        while (lastmatchrestart < p)
          {
          char *pp = lastmatchrestart;
-          if (printname != NULL) fprintf(stdout, "%s-", printname);
+          if (printname != NULL) fprintf(stdout, "%s%c", printname,
+            printname_hyphen);
          if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
          pp = end_of_line(pp, endptr, &ellength);
          FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
@ -2912,7 +2918,8 @@ while (ptr < endptr)
          {
          int ellength;
          char *pp = p;
-          if (printname != NULL) fprintf(stdout, "%s-", printname);
+          if (printname != NULL) fprintf(stdout, "%s%c", printname,
+            printname_hyphen);
          if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
          pp = end_of_line(pp, endptr, &ellength);
          FWRITE_IGNORE(p, 1, pp - p, stdout);
@ -2926,7 +2933,8 @@ while (ptr < endptr)
      if (after_context > 0 || before_context > 0)
        endhyphenpending = TRUE;

-      if (printname != NULL) fprintf(stdout, "%s:", printname);
+      if (printname != NULL) fprintf(stdout, "%s%c", printname,
+        printname_colon);
      if (number) fprintf(stdout, "%lu:", linenumber);

      /* In multiline mode, or if colouring, we have to split the line(s) up
@ -3131,7 +3139,9 @@ were none. If we found a match, we won't have got this far. */

 if (filenames == FN_NOMATCH_ONLY)
  {
-  fprintf(stdout, "%s" STDOUT_NL, printname);
+  fprintf(stdout, "%s", printname);
+  if (printname_nl == NULL) fprintf(stdout, "%c", 0);
+    else fprintf(stdout, "%s", printname_nl);
  return 0;
  }

@ -3142,7 +3152,7 @@ if (count_only && !quiet)
  if (count > 0 || !omit_zero_count)
    {
    if (printname != NULL && filenames != FN_NONE)
-      fprintf(stdout, "%s:", printname);
+      fprintf(stdout, "%s%c", printname, printname_colon);
    fprintf(stdout, "%lu" STDOUT_NL, count);
    counts_printed++;
    }
@ -3528,8 +3538,6 @@ switch(letter)
  case 'u': options |= PCRE2_UTF; utf = TRUE; break;
  case 'U': options |= PCRE2_UTF|PCRE2_MATCH_INVALID_UTF; utf = TRUE; break;
  case 'v': invert = TRUE; break;
-  case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
-  case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;

  case 'V':
    {
@ -3540,6 +3548,10 @@ switch(letter)
  pcre2grep_exit(0);
  break;

+  case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
+  case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
+  case 'Z': printname_colon = printname_hyphen = 0; printname_nl = NULL; break;
+
  default:
  fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
  pcre2grep_exit(usage(2));
@ -4259,8 +4271,6 @@ if (DEE_option != NULL)

 (void)pcre2_set_compile_extra_options(compile_context, extra_options);

-/* Check the values for Jeffrey Friedl's debugging options. */
-
 /* If use_jit is set, check whether JIT is available. If not, do not try
 to use JIT. */

--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@ -991,3 +991,22 @@ RC=0
 ---------------------------- Test 134 -----------------------------
 =AB3CD5=
 RC=0
+---------------------------- Test 135 -----------------------------
+./testdata/grepinputv@The word is cat in this line
+RC=0
+./testdata/grepinputv@./testdata/grepinputv@RC=0
+./testdata/grepinputv@This line contains \E and (regex) *meta* [characters].
+./testdata/grepinputv@The word is cat in this line
+./testdata/grepinputv@The caterpillar sat on the mat
+RC=0
+testdata/grepinputM3:start end in between start
+end and following
+testdata/grepinputM7:start end in between start
+end and following start
+end other stuff
+testdata/grepinputM11:start end in between start
+
+end
+testdata/grepinputM16:start end in between start
+end
+RC=0