Implement -Z in pcre2grep and update documentation

This commit is contained in:
Philip Hazel 2022-07-30 17:41:49 +01:00
parent cc5e121c8e
commit 8b133fa0ba
16 changed files with 994 additions and 868 deletions

View File

@ -49,6 +49,8 @@ tests.
tests run by 'make check', but can be run manually. The current output is from tests run by 'make check', but can be run manually. The current output is from
a 64-bit system. a 64-bit system.
13. Implemented -Z aka --null in pcre2grep.
Version 10.40 15-April-2022 Version 10.40 15-April-2022
--------------------------- ---------------------------

View File

@ -68,6 +68,22 @@ diff -b /dev/null /dev/null 2>/dev/null && cf="diff -b"
diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u" diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u"
diff -ub /dev/null /dev/null 2>/dev/null && cf="diff -ub" diff -ub /dev/null /dev/null 2>/dev/null && cf="diff -ub"
# Some tests involve NUL characters. It seems impossible to handle them easily
# in many operating systems. An earlier version of this script used sed to
# translate NUL into the string ZERO, but this didn't work on Solaris (aka
# SunOS), where the version of sed explicitly doesn't like them, and also MacOS
# (Darwin), OpenBSD, FreeBSD, NetBSD, and some Linux distributions like Alpine,
# even when using GNU sed. A user suggested using tr instead, which
# necessitates translating to a single character. However, on (some versions
# of?) Solaris, the normal "tr" cannot handle binary zeros, but if
# /usr/xpg4/bin/tr is available, it can do so, so test for that.
if [ -x /usr/xpg4/bin/tr ] ; then
tr=/usr/xpg4/bin/tr
else
tr=tr
fi
# If this test is being run from "make check", $srcdir will be set. If not, set # If this test is being run from "make check", $srcdir will be set. If not, set
# it to the current or parent directory, whichever one contains the test data. # it to the current or parent directory, whichever one contains the test data.
# Subsequently, we run most of the pcre2grep tests in the source directory so # Subsequently, we run most of the pcre2grep tests in the source directory so
@ -685,6 +701,16 @@ echo "---------------------------- Test 134 -----------------------------" >>tes
(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1 (cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 135 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
echo "RC=$?" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -lZ 'word' ./testdata/grepinputv ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
echo "RC=$?" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -A 1 -B 1 -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
echo "RC=$?" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -MHZn 'start[\s]+end' testdata/grepinputM) >>testtrygrep
echo "RC=$?" >>testtrygrep
# Now compare the results. # Now compare the results.
$cf $srcdir/testdata/grepoutput testtrygrep $cf $srcdir/testdata/grepoutput testtrygrep
@ -759,22 +785,6 @@ $valgrind $vjs $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >
printf '%c--------------------------- Test N6 ------------------------------\r\n' - >>testtrygrep printf '%c--------------------------- Test N6 ------------------------------\r\n' - >>testtrygrep
$valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep $valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
# This next test involves NUL characters. It seems impossible to handle them
# easily in many operating systems. An earlier version of this script used sed
# to translate NUL into the string ZERO, but this didn't work on Solaris (aka
# SunOS), where the version of sed explicitly doesn't like them, and also MacOS
# (Darwin), OpenBSD, FreeBSD, NetBSD, and some Linux distributions like Alpine,
# even when using GNU sed. A user suggested using tr instead, which
# necessitates translating to a single character (@). However, on (some
# versions of?) Solaris, the normal "tr" cannot handle binary zeros, but if
# /usr/xpg4/bin/tr is available, it can do so, so test for that.
if [ -x /usr/xpg4/bin/tr ] ; then
tr=/usr/xpg4/bin/tr
else
tr=tr
fi
printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep
printf 'abc\0def' >testNinputgrep printf 'abc\0def' >testNinputgrep
$valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep $valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep

View File

@ -121,6 +121,7 @@ environment, for example.
pcre2_substring.c pcre2_substring.c
pcre2_tables.c pcre2_tables.c
pcre2_ucd.c pcre2_ucd.c
pcre2_ucptables.c
pcre2_valid_utf.c pcre2_valid_utf.c
pcre2_xclass.c pcre2_xclass.c
@ -373,7 +374,7 @@ Otherwise:
1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe 1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
have been created. have been created.
2. Edit RunTest.bat to indentify the full or relative location of 2. Edit RunTest.bat to identify the full or relative location of
the pcre2 source (wherein which the testdata folder resides), e.g.: the pcre2 source (wherein which the testdata folder resides), e.g.:
set srcdir=C:\pcre2\pcre2-10.00 set srcdir=C:\pcre2\pcre2-10.00

View File

@ -17,7 +17,7 @@ pcre2-dev+subscribe@googlegroups.com.
You can access the archives and also subscribe or manage your subscription You can access the archives and also subscribe or manage your subscription
here: here:
https://groups.google.com/pcre2-dev https://groups.google.com/g/pcre2-dev
Please read the NEWS file if you are upgrading from a previous release. The Please read the NEWS file if you are upgrading from a previous release. The
contents of this README file are: contents of this README file are:
@ -375,7 +375,8 @@ library. They are also documented in the pcre2build man page.
necessary to specify something like LIBS="-lncurses" as well. This is necessary to specify something like LIBS="-lncurses" as well. This is
because, to quote the readline INSTALL, "Readline uses the termcap functions, because, to quote the readline INSTALL, "Readline uses the termcap functions,
but does not link with the termcap or curses library itself, allowing but does not link with the termcap or curses library itself, allowing
applications which link with readline the to choose an appropriate library." applications which link with readline the option to choose an appropriate
library."
If you get error messages about missing functions tgetstr, tgetent, tputs, If you get error messages about missing functions tgetstr, tgetent, tputs,
tgetflag, or tgoto, this is the problem, and linking with the ncurses library tgetflag, or tgoto, this is the problem, and linking with the ncurses library
should fix it. should fix it.
@ -400,10 +401,10 @@ library. They are also documented in the pcre2build man page.
Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
be created. This is normally run under valgrind or used when PCRE2 is be created. This is normally run under valgrind or used when PCRE2 is
compiled with address sanitizing enabled. It calls the fuzzing function and compiled with address sanitizing enabled. It calls the fuzzing function and
outputs information about it is doing. The input strings are specified by outputs information about what it is doing. The input strings are specified
arguments: if an argument starts with "=" the rest of it is a literal input by arguments: if an argument starts with "=" the rest of it is a literal
string. Otherwise, it is assumed to be a file name, and the contents of the input string. Otherwise, it is assumed to be a file name, and the contents
file are the test string. of the file are the test string.
. Releases before 10.30 could be compiled with --disable-stack-for-recursion, . Releases before 10.30 could be compiled with --disable-stack-for-recursion,
which caused pcre2_match() to use individual blocks on the heap for which caused pcre2_match() to use individual blocks on the heap for
@ -695,7 +696,7 @@ Test 14 contains some special UTF and UCP tests that give different output for
different code unit widths. different code unit widths.
Test 15 contains a number of tests that must not be run with JIT. They check, Test 15 contains a number of tests that must not be run with JIT. They check,
among other non-JIT things, the match-limiting features of the intepretive among other non-JIT things, the match-limiting features of the interpretive
matcher. matcher.
Test 16 is run only when JIT support is not available. It checks that an Test 16 is run only when JIT support is not available. It checks that an

View File

@ -1017,7 +1017,7 @@ has its own memory control arrangements (see the
documentation for more details). If the limit is reached, the negative error documentation for more details). If the limit is reached, the negative error
code PCRE2_ERROR_HEAPLIMIT is returned. The default limit can be set when PCRE2 code PCRE2_ERROR_HEAPLIMIT is returned. The default limit can be set when PCRE2
is built; if it is not, the default is set very large and is essentially is built; if it is not, the default is set very large and is essentially
"unlimited". unlimited.
</P> </P>
<P> <P>
A value for the heap limit may also be supplied by an item at the start of a A value for the heap limit may also be supplied by an item at the start of a
@ -1030,19 +1030,17 @@ less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
limit is set, less than the default. limit is set, less than the default.
</P> </P>
<P> <P>
The <b>pcre2_match()</b> function starts out using a 20KiB vector on the system The <b>pcre2_match()</b> function always needs some heap memory, so setting a
stack for recording backtracking points. The more nested backtracking points value of zero guarantees a "heap limit exceeded" error. Details of how
there are (that is, the deeper the search tree), the more memory is needed. <b>pcre2_match()</b> uses the heap are given in the
Heap memory is used only if the initial vector is too small. If the heap limit <a href="pcre2perform.html"><b>pcre2perform</b></a>
is set to a value less than 21 (in particular, zero) no heap memory will be documentation.
used. In this case, only patterns that do not have a lot of nested backtracking
can be successfully processed.
</P> </P>
<P> <P>
Similarly, for <b>pcre2_dfa_match()</b>, a vector on the system stack is used For <b>pcre2_dfa_match()</b>, a vector on the system stack is used when
when processing pattern recursions, lookarounds, or atomic groups, and only if processing pattern recursions, lookarounds, or atomic groups, and only if this
this is not big enough is heap memory used. In this case, too, setting a value is not big enough is heap memory used. In this case, setting a value of zero
of zero disables the use of the heap. disables the use of the heap.
<br> <br>
<br> <br>
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b> <b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
@ -1089,10 +1087,10 @@ less than the limit set by the caller of <b>pcre2_match()</b> or
<br> <br>
<br> <br>
This parameter limits the depth of nested backtracking in <b>pcre2_match()</b>. This parameter limits the depth of nested backtracking in <b>pcre2_match()</b>.
Each time a nested backtracking point is passed, a new memory "frame" is used Each time a nested backtracking point is passed, a new memory frame is used
to remember the state of matching at that point. Thus, this parameter to remember the state of matching at that point. Thus, this parameter
indirectly limits the amount of memory that is used in a match. However, indirectly limits the amount of memory that is used in a match. However,
because the size of each memory "frame" depends on the number of capturing because the size of each memory frame depends on the number of capturing
parentheses, the actual memory limit varies from pattern to pattern. This limit parentheses, the actual memory limit varies from pattern to pattern. This limit
was more useful in versions before 10.30, where function recursion was used for was more useful in versions before 10.30, where function recursion was used for
backtracking. backtracking.
@ -3148,11 +3146,11 @@ The backtracking match limit was reached.
<pre> <pre>
PCRE2_ERROR_NOMEMORY PCRE2_ERROR_NOMEMORY
</pre> </pre>
If a pattern contains many nested backtracking points, heap memory is used to Heap memory is used to remember backgracking points. This error is given when
remember them. This error is given when the memory allocation function (default the memory allocation function (default or custom) fails. Note that a different
or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given error, PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds
if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is the heap limit. PCRE2_ERROR_NOMEMORY is also returned if
also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails. PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
<pre> <pre>
PCRE2_ERROR_NULL PCRE2_ERROR_NULL
</pre> </pre>
@ -4020,9 +4018,9 @@ Cambridge, England.
</P> </P>
<br><a name="SEC42" href="#TOC1">REVISION</a><br> <br><a name="SEC42" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 14 December 2021 Last updated: 27 July 2022
<br> <br>
Copyright &copy; 1997-2021 University of Cambridge. Copyright &copy; 1997-2022 University of Cambridge.
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE2 index page</a>. Return to the <a href="index.html">PCRE2 index page</a>.

View File

@ -284,12 +284,11 @@ to the <b>configure</b> command. This setting also applies to the
counting is done differently). counting is done differently).
</P> </P>
<P> <P>
The <b>pcre2_match()</b> function starts out using a 20KiB vector on the system The <b>pcre2_match()</b> function uses heap memory to record backtracking
stack to record backtracking points. The more nested backtracking points there points. The more nested backtracking points there are (that is, the deeper the
are (that is, the deeper the search tree), the more memory is needed. If the search tree), the more memory is needed. There is an upper limit, specified in
initial vector is not large enough, heap memory is used, up to a certain limit, kibibytes (units of 1024 bytes). This limit can be changed at run time, as
which is specified in kibibytes (units of 1024 bytes). The limit can be changed described in the
at run time, as described in the
<a href="pcre2api.html"><b>pcre2api</b></a> <a href="pcre2api.html"><b>pcre2api</b></a>
documentation. The default limit (in effect unlimited) is 20 million. You can documentation. The default limit (in effect unlimited) is 20 million. You can
change this by a setting such as change this by a setting such as
@ -609,16 +608,16 @@ give a warning.
<P> <P>
Philip Hazel Philip Hazel
<br> <br>
University Computing Service Retired from University Computing Service
<br> <br>
Cambridge, England. Cambridge, England.
<br> <br>
</P> </P>
<br><a name="SEC26" href="#TOC1">REVISION</a><br> <br><a name="SEC26" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 08 December 2021 Last updated: 27 July 2022
<br> <br>
Copyright &copy; 1997-2021 University of Cambridge. Copyright &copy; 1997-2022 University of Cambridge.
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE2 index page</a>. Return to the <a href="index.html">PCRE2 index page</a>.

View File

@ -71,13 +71,15 @@ For example:
<pre> <pre>
pcre2grep some-pattern file1 - file3 pcre2grep some-pattern file1 - file3
</pre> </pre>
Input files are searched line by line. By default, each line that matches a By default, input files are searched line by line. Each line that matches a
pattern is copied to the standard output, and if there is more than one file, pattern is copied to the standard output, and if there is more than one file,
the file name is output at the start of each line, followed by a colon. the file name is output at the start of each line, followed by a colon.
However, there are options that can change how <b>pcre2grep</b> behaves. In However, there are options that can change how <b>pcre2grep</b> behaves. For
particular, the <b>-M</b> option makes it possible to search for strings that example, the <b>-M</b> option makes it possible to search for strings that span
span line boundaries. What defines a line boundary is controlled by the line boundaries. What defines a line boundary is controlled by the <b>-N</b>
<b>-N</b> (<b>--newline</b>) option. (<b>--newline</b>) option. The <b>-h</b> and <b>-H</b> options control whether or
not file names are shown, and the <b>-Z</b> option changes the file name
terminator to a zero byte.
</P> </P>
<P> <P>
The amount of memory used for buffering files that are being scanned is The amount of memory used for buffering files that are being scanned is
@ -178,9 +180,11 @@ Output up to <i>number</i> lines of context after each matching line. Fewer
lines are output if the next match or the end of the file is reached, or if the lines are output if the next match or the end of the file is reached, or if the
processing buffer size has been set too small. If file names and/or line processing buffer size has been set too small. If file names and/or line
numbers are being output, a hyphen separator is used instead of a colon for the numbers are being output, a hyphen separator is used instead of a colon for the
context lines. A line containing "--" is output between each group of lines, context lines (the <b>-Z</b> option can be used to change the file name
unless they are in fact contiguous in the input file. The value of <i>number</i> terminator to a zero byte). A line containing "--" is output between each group
is expected to be relatively small. When <b>-c</b> is used, <b>-A</b> is ignored. of lines, unless they are in fact contiguous in the input file. The value of
<i>number</i> is expected to be relatively small. When <b>-c</b> is used,
<b>-A</b> is ignored.
</P> </P>
<P> <P>
<b>-a</b>, <b>--text</b> <b>-a</b>, <b>--text</b>
@ -199,9 +203,10 @@ Output up to <i>number</i> lines of context before each matching line. Fewer
lines are output if the previous match or the start of the file is within lines are output if the previous match or the start of the file is within
<i>number</i> lines, or if the processing buffer size has been set too small. If <i>number</i> lines, or if the processing buffer size has been set too small. If
file names and/or line numbers are being output, a hyphen separator is used file names and/or line numbers are being output, a hyphen separator is used
instead of a colon for the context lines. A line containing "--" is output instead of a colon for the context lines (the <b>-Z</b> option can be used to
between each group of lines, unless they are in fact contiguous in the input change the file name terminator to a zero byte). A line containing "--" is
file. The value of <i>number</i> is expected to be relatively small. When output between each group of lines, unless they are in fact contiguous in the
input file. The value of <i>number</i> is expected to be relatively small. When
<b>-c</b> is used, <b>-B</b> is ignored. <b>-c</b> is used, <b>-B</b> is ignored.
</P> </P>
<P> <P>
@ -411,20 +416,22 @@ shown separately. This option is mutually exclusive with <b>--output</b>,
<P> <P>
<b>-H</b>, <b>--with-filename</b> <b>-H</b>, <b>--with-filename</b>
Force the inclusion of the file name at the start of output lines when Force the inclusion of the file name at the start of output lines when
searching a single file. By default, the file name is not shown in this case. searching a single file. The file name is not normally shown in this case.
For matching lines, the file name is followed by a colon; for context lines, a By default, for matching lines, the file name is followed by a colon; for
hyphen separator is used. If a line number is also being output, it follows the context lines, a hyphen separator is used. The <b>-Z</b> option can be used to
file name. When the <b>-M</b> option causes a pattern to match more than one change the terminator to a zero byte. If a line number is also being output,
line, only the first is preceded by the file name. This option overrides any it follows the file name. When the <b>-M</b> option causes a pattern to match
previous <b>-h</b>, <b>-l</b>, or <b>-L</b> options. more than one line, only the first is preceded by the file name. This option
overrides any previous <b>-h</b>, <b>-l</b>, or <b>-L</b> options.
</P> </P>
<P> <P>
<b>-h</b>, <b>--no-filename</b> <b>-h</b>, <b>--no-filename</b>
Suppress the output file names when searching multiple files. By default, Suppress the output file names when searching multiple files. File names are
file names are shown when multiple files are searched. For matching lines, the normally shown when multiple files are searched. By default, for matching
file name is followed by a colon; for context lines, a hyphen separator is used. lines, the file name is followed by a colon; for context lines, a hyphen
If a line number is also being output, it follows the file name. This option separator is used. The <b>-Z</b> option can be used to change the terminator to
overrides any previous <b>-H</b>, <b>-L</b>, or <b>-l</b> options. a zero byte. If a line number is also being output, it follows the file name.
This option overrides any previous <b>-H</b>, <b>-L</b>, or <b>-l</b> options.
</P> </P>
<P> <P>
<b>--heap-limit</b>=<i>number</i> <b>--heap-limit</b>=<i>number</i>
@ -481,18 +488,20 @@ given any number of times. If a directory matches both <b>--include-dir</b> and
<b>-L</b>, <b>--files-without-match</b> <b>-L</b>, <b>--files-without-match</b>
Instead of outputting lines from the files, just output the names of the files Instead of outputting lines from the files, just output the names of the files
that do not contain any lines that would have been output. Each file name is that do not contain any lines that would have been output. Each file name is
output once, on a separate line. This option overrides any previous <b>-H</b>, output once, on a separate line by default, but if the <b>-Z</b> option is set,
<b>-h</b>, or <b>-l</b> options. they are separated by zero bytes instead of newlines. This option overrides any
previous <b>-H</b>, <b>-h</b>, or <b>-l</b> options.
</P> </P>
<P> <P>
<b>-l</b>, <b>--files-with-matches</b> <b>-l</b>, <b>--files-with-matches</b>
Instead of outputting lines from the files, just output the names of the files Instead of outputting lines from the files, just output the names of the files
containing lines that would have been output. Each file name is output once, on containing lines that would have been output. Each file name is output once, on
a separate line. Searching normally stops as soon as a matching line is found a separate line, but if the <b>-Z</b> option is set, they are separated by zero
in a file. However, if the <b>-c</b> (count) option is also used, matching bytes instead of newlines. Searching normally stops as soon as a matching line
continues in order to obtain the correct count, and those files that have at is found in a file. However, if the <b>-c</b> (count) option is also used,
least one match are listed along with their counts. Using this option with matching continues in order to obtain the correct count, and those files that
<b>-c</b> is a way of suppressing the listing of files with no matches that have at least one match are listed along with their counts. Using this option
with <b>-c</b> is a way of suppressing the listing of files with no matches that
occurs with <b>-c</b> on its own. This option overrides any previous <b>-H</b>, occurs with <b>-c</b> on its own. This option overrides any previous <b>-H</b>,
<b>-h</b>, or <b>-L</b> options. <b>-h</b>, or <b>-L</b> options.
</P> </P>
@ -592,10 +601,7 @@ value set by <b>--match-limit</b> is reached, an error occurs.
<br> <br>
<br> <br>
The <b>--heap-limit</b> option specifies, as a number of kibibytes (units of The <b>--heap-limit</b> option specifies, as a number of kibibytes (units of
1024 bytes), the amount of heap memory that may be used for matching. Heap 1024 bytes), the maximum amount of heap memory that may be used for matching.
memory is needed only if matching the pattern requires a significant number of
nested backtracking points to be remembered. This parameter can be set to zero
to forbid the use of heap memory altogether.
<br> <br>
<br> <br>
The <b>--depth-limit</b> option limits the depth of nested backtracking points, The <b>--depth-limit</b> option limits the depth of nested backtracking points,
@ -839,6 +845,13 @@ pattern and ")$" at the end. This option applies only to the patterns that are
matched against the contents of files; it does not apply to patterns specified matched against the contents of files; it does not apply to patterns specified
by any of the <b>--include</b> or <b>--exclude</b> options. by any of the <b>--include</b> or <b>--exclude</b> options.
</P> </P>
<P>
<b>-Z</b>, <b>--null</b>
Terminate files names in the regular output with a zero byte (the NUL
character) instead of what would normally appear. This is useful when file
names contain unusual characters such as colons, hyphens, or even newlines. The
option does not apply to file names in error messages.
</P>
<br><a name="SEC7" href="#TOC1">ENVIRONMENT VARIABLES</a><br> <br><a name="SEC7" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
<P> <P>
The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
@ -1053,9 +1066,9 @@ Cambridge, England.
</P> </P>
<br><a name="SEC16" href="#TOC1">REVISION</a><br> <br><a name="SEC16" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 31 August 2021 Last updated: 30 July 2022
<br> <br>
Copyright &copy; 1997-2021 University of Cambridge. Copyright &copy; 1997-2022 University of Cambridge.
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE2 index page</a>. Return to the <a href="index.html">PCRE2 index page</a>.

View File

@ -71,13 +71,18 @@ is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
The maximum length of a string argument to a callout is the largest number a The maximum length of a string argument to a callout is the largest number a
32-bit unsigned integer can hold. 32-bit unsigned integer can hold.
</P> </P>
<P>
The maximum amount of heap memory used for matching is controlled by the heap
limit, which can be set in a pattern or in a match context. The default is a
very large number, effectively unlimited.
</P>
<br><b> <br><b>
AUTHOR AUTHOR
</b><br> </b><br>
<P> <P>
Philip Hazel Philip Hazel
<br> <br>
University Computing Service Retired from University Computing Service
<br> <br>
Cambridge, England. Cambridge, England.
<br> <br>
@ -86,9 +91,9 @@ Cambridge, England.
REVISION REVISION
</b><br> </b><br>
<P> <P>
Last updated: 02 February 2019 Last updated: 26 July 2022
<br> <br>
Copyright &copy; 1997-2019 University of Cambridge. Copyright &copy; 1997-2022 University of Cambridge.
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE2 index page</a>. Return to the <a href="index.html">PCRE2 index page</a>.

View File

@ -83,12 +83,31 @@ From release 10.30, the interpretive (non-JIT) version of <b>pcre2_match()</b>
uses very little system stack at run time. In earlier releases recursive uses very little system stack at run time. In earlier releases recursive
function calls could use a great deal of stack, and this could cause problems, function calls could use a great deal of stack, and this could cause problems,
but this usage has been eliminated. Backtracking positions are now explicitly but this usage has been eliminated. Backtracking positions are now explicitly
remembered in memory frames controlled by the code. An initial 20KiB vector of remembered in memory frames controlled by the code.
frames is allocated on the system stack (enough for about 100 frames for small </P>
patterns), but if this is insufficient, heap memory is used. The amount of heap <P>
memory can be limited; if the limit is set to zero, only the initial stack The size of each frame depends on the size of pointer variables and the number
vector is used. Rewriting patterns to be time-efficient, as described below, of capturing parenthesized groups in the pattern being matched. On a 64-bit
may also reduce the memory requirements. system the frame size for a pattern with no captures is 128 bytes. For each
capturing group the size increases by 16 bytes.
</P>
<P>
Until release 10.41, an initial 20KiB frames vector was allocated on the system
stack, but this still caused some issues for multi-thread applications where
each thread has a very small stack. From release 10.41 backtracking memory
frames are always held in heap memory. An initial heap allocation is obtained
the first time any match data block is passed to <b>pcre2_match()</b>. This is
remembered with the match data block and re-used if that block is used for
another match. It is freed when the match data block itself is freed.
</P>
<P>
The size of the initial block is the larger of 20KiB or ten times the pattern's
frame size, unless the heap limit is less than this, in which case the heap
limit is used. If the initial block proves to be too small during matching, it
is replaced by a larger block, subject to the heap limit. The heap limit is
checked only when a new block is to be allocated. Reducing the heap limit
between calls to <b>pcre2_match()</b> with the same match data block does not
affect the saved block.
</P> </P>
<P> <P>
In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive
@ -245,16 +264,16 @@ pattern to match. This is done by repeatedly matching with different limits.
<P> <P>
Philip Hazel Philip Hazel
<br> <br>
University Computing Service Retired from University Computing Service
<br> <br>
Cambridge, England. Cambridge, England.
<br> <br>
</P> </P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br> <br><a name="SEC6" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 03 February 2019 Last updated: 27 July 2022
<br> <br>
Copyright &copy; 1997-2019 University of Cambridge. Copyright &copy; 1997-2022 University of Cambridge.
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE2 index page</a>. Return to the <a href="index.html">PCRE2 index page</a>.

View File

@ -1241,7 +1241,8 @@ pattern, but can be overridden by modifiers on the subject.
copy=&#60;number or name&#62; copy captured substring copy=&#60;number or name&#62; copy captured substring
depth_limit=&#60;n&#62; set a depth limit depth_limit=&#60;n&#62; set a depth limit
dfa use <b>pcre2_dfa_match()</b> dfa use <b>pcre2_dfa_match()</b>
find_limits find match and depth limits find_limits find heap, match and depth limits
find_limits_noheap find match and depth limits
get=&#60;number or name&#62; extract captured substring get=&#60;number or name&#62; extract captured substring
getall extract all captured substrings getall extract all captured substrings
/g global global matching /g global global matching
@ -1564,7 +1565,7 @@ Setting heap, match, and depth limits
<P> <P>
The <b>heap_limit</b>, <b>match_limit</b>, and <b>depth_limit</b> modifiers set The <b>heap_limit</b>, <b>match_limit</b>, and <b>depth_limit</b> modifiers set
the appropriate limits in the match context. These values are ignored when the the appropriate limits in the match context. These values are ignored when the
<b>find_limits</b> modifier is specified. <b>find_limits</b> or <b>find_limits_noheap</b> modifier is specified.
</P> </P>
<br><b> <br><b>
Finding minimum limits Finding minimum limits
@ -1574,8 +1575,12 @@ If the <b>find_limits</b> modifier is present on a subject line, <b>pcre2test</b
calls the relevant matching function several times, setting different values in calls the relevant matching function several times, setting different values in
the match context via <b>pcre2_set_heap_limit()</b>, the match context via <b>pcre2_set_heap_limit()</b>,
<b>pcre2_set_match_limit()</b>, or <b>pcre2_set_depth_limit()</b> until it finds <b>pcre2_set_match_limit()</b>, or <b>pcre2_set_depth_limit()</b> until it finds
the minimum values for each parameter that allows the match to complete without the smallest value for each parameter that allows the match to complete without
error. If JIT is being used, only the match limit is relevant. a "limit exceeded" error. The match itself may succeed or fail. An alternative
modifier, <b>find_limits_noheap</b>, omits the heap limit. This is used in the
standard tests, because the minimum heap limit varies between systems. If JIT
is being used, only the match limit is relevant, and the other two are
automatically omitted.
</P> </P>
<P> <P>
When using this modifier, the pattern should not contain any limit settings When using this modifier, the pattern should not contain any limit settings
@ -1603,9 +1608,7 @@ overall amount of computing resource that is used.
</P> </P>
<P> <P>
For both kinds of matching, the <i>heap_limit</i> number, which is in kibibytes For both kinds of matching, the <i>heap_limit</i> number, which is in kibibytes
(units of 1024 bytes), limits the amount of heap memory used for matching. A (units of 1024 bytes), limits the amount of heap memory used for matching.
value of zero disables the use of any heap memory; many simple pattern matches
can be done without using the heap, so zero is not an unreasonable setting.
</P> </P>
<br><b> <br><b>
Showing MARK names Showing MARK names
@ -1623,12 +1626,10 @@ Showing memory usage
<P> <P>
The <b>memory</b> modifier causes <b>pcre2test</b> to log the sizes of all heap The <b>memory</b> modifier causes <b>pcre2test</b> to log the sizes of all heap
memory allocation and freeing calls that occur during a call to memory allocation and freeing calls that occur during a call to
<b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>. These occur only when a match <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>. In the latter case, heap memory
requires a bigger vector than the default for remembering backtracking points is used only when a match requires more internal workspace that the default
(<b>pcre2_match()</b>) or for internal workspace (<b>pcre2_dfa_match()</b>). In allocation on the stack, so in many cases there will be no output. No heap
many cases there will be no heap memory used and therefore no additional memory is allocated during matching with JIT. For this modifier to work, the
output. No heap memory is allocated during matching with JIT, so in that case
the <b>memory</b> modifier never has any effect. For this modifier to work, the
<b>null_context</b> modifier must not be set on both the pattern and the <b>null_context</b> modifier must not be set on both the pattern and the
subject, though it can be set on one or the other. subject, though it can be set on one or the other.
</P> </P>
@ -1690,7 +1691,8 @@ Normally, <b>pcre2test</b> passes a context block to <b>pcre2_match()</b>,
If the <b>null_context</b> modifier is set, however, NULL is passed. This is for If the <b>null_context</b> modifier is set, however, NULL is passed. This is for
testing that the matching and substitution functions behave correctly in this testing that the matching and substitution functions behave correctly in this
case (they use default values). This modifier cannot be used with the case (they use default values). This modifier cannot be used with the
<b>find_limits</b> or <b>substitute_callout</b> modifiers. <b>find_limits</b>, <b>find_limits_noheap</b>, or <b>substitute_callout</b>
modifiers.
</P> </P>
<P> <P>
Similarly, for testing purposes, if the <b>null_subject</b> or Similarly, for testing purposes, if the <b>null_subject</b> or
@ -2141,7 +2143,7 @@ Cambridge, England.
</P> </P>
<br><a name="SEC21" href="#TOC1">REVISION</a><br> <br><a name="SEC21" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 12 January 2022 Last updated: 27 July 2022
<br> <br>
Copyright &copy; 1997-2022 University of Cambridge. Copyright &copy; 1997-2022 University of Cambridge.
<br> <br>

View File

@ -1028,7 +1028,7 @@ PCRE2 CONTEXTS
pcre2jit documentation for more details). If the limit is reached, the pcre2jit documentation for more details). If the limit is reached, the
negative error code PCRE2_ERROR_HEAPLIMIT is returned. The default negative error code PCRE2_ERROR_HEAPLIMIT is returned. The default
limit can be set when PCRE2 is built; if it is not, the default is set limit can be set when PCRE2 is built; if it is not, the default is set
very large and is essentially "unlimited". very large and is essentially unlimited.
A value for the heap limit may also be supplied by an item at the start A value for the heap limit may also be supplied by an item at the start
of a pattern of the form of a pattern of the form
@ -1039,19 +1039,15 @@ PCRE2 CONTEXTS
less ddd is less than the limit set by the caller of pcre2_match() or, less ddd is less than the limit set by the caller of pcre2_match() or,
if no such limit is set, less than the default. if no such limit is set, less than the default.
The pcre2_match() function starts out using a 20KiB vector on the sys- The pcre2_match() function always needs some heap memory, so setting a
tem stack for recording backtracking points. The more nested backtrack- value of zero guarantees a "heap limit exceeded" error. Details of how
ing points there are (that is, the deeper the search tree), the more pcre2_match() uses the heap are given in the pcre2perform documenta-
memory is needed. Heap memory is used only if the initial vector is tion.
too small. If the heap limit is set to a value less than 21 (in partic-
ular, zero) no heap memory will be used. In this case, only patterns
that do not have a lot of nested backtracking can be successfully pro-
cessed.
Similarly, for pcre2_dfa_match(), a vector on the system stack is used For pcre2_dfa_match(), a vector on the system stack is used when pro-
when processing pattern recursions, lookarounds, or atomic groups, and cessing pattern recursions, lookarounds, or atomic groups, and only if
only if this is not big enough is heap memory used. In this case, too, this is not big enough is heap memory used. In this case, setting a
setting a value of zero disables the use of the heap. value of zero disables the use of the heap.
int pcre2_set_match_limit(pcre2_match_context *mcontext, int pcre2_set_match_limit(pcre2_match_context *mcontext,
uint32_t value); uint32_t value);
@ -1093,12 +1089,12 @@ PCRE2 CONTEXTS
This parameter limits the depth of nested backtracking in This parameter limits the depth of nested backtracking in
pcre2_match(). Each time a nested backtracking point is passed, a new pcre2_match(). Each time a nested backtracking point is passed, a new
memory "frame" is used to remember the state of matching at that point. memory frame is used to remember the state of matching at that point.
Thus, this parameter indirectly limits the amount of memory that is Thus, this parameter indirectly limits the amount of memory that is
used in a match. However, because the size of each memory "frame" de- used in a match. However, because the size of each memory frame depends
pends on the number of capturing parentheses, the actual memory limit on the number of capturing parentheses, the actual memory limit varies
varies from pattern to pattern. This limit was more useful in versions from pattern to pattern. This limit was more useful in versions before
before 10.30, where function recursion was used for backtracking. 10.30, where function recursion was used for backtracking.
The depth limit is not relevant, and is ignored, when matching is done The depth limit is not relevant, and is ignored, when matching is done
using JIT compiled code. However, it is supported by pcre2_dfa_match(), using JIT compiled code. However, it is supported by pcre2_dfa_match(),
@ -3051,12 +3047,12 @@ ERROR RETURNS FROM pcre2_match()
PCRE2_ERROR_NOMEMORY PCRE2_ERROR_NOMEMORY
If a pattern contains many nested backtracking points, heap memory is Heap memory is used to remember backgracking points. This error is
used to remember them. This error is given when the memory allocation given when the memory allocation function (default or custom) fails.
function (default or custom) fails. Note that a different error, Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given if the
PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
the heap limit. PCRE2_ERROR_NOMEMORY is also returned if also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory alloca-
PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails. tion fails.
PCRE2_ERROR_NULL PCRE2_ERROR_NULL
@ -3860,8 +3856,8 @@ AUTHOR
REVISION REVISION
Last updated: 14 December 2021 Last updated: 27 July 2022
Copyright (c) 1997-2021 University of Cambridge. Copyright (c) 1997-2022 University of Cambridge.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
@ -4118,14 +4114,13 @@ LIMITING PCRE2 RESOURCE USAGE
pcre2_dfa_match() matching function, and to JIT matching (though the pcre2_dfa_match() matching function, and to JIT matching (though the
counting is done differently). counting is done differently).
The pcre2_match() function starts out using a 20KiB vector on the sys- The pcre2_match() function uses heap memory to record backtracking
tem stack to record backtracking points. The more nested backtracking points. The more nested backtracking points there are (that is, the
points there are (that is, the deeper the search tree), the more memory deeper the search tree), the more memory is needed. There is an upper
is needed. If the initial vector is not large enough, heap memory is limit, specified in kibibytes (units of 1024 bytes). This limit can be
used, up to a certain limit, which is specified in kibibytes (units of changed at run time, as described in the pcre2api documentation. The
1024 bytes). The limit can be changed at run time, as described in the default limit (in effect unlimited) is 20 million. You can change this
pcre2api documentation. The default limit (in effect unlimited) is 20 by a setting such as
million. You can change this by a setting such as
--with-heap-limit=500 --with-heap-limit=500
@ -4450,14 +4445,14 @@ SEE ALSO
AUTHOR AUTHOR
Philip Hazel Philip Hazel
University Computing Service Retired from University Computing Service
Cambridge, England. Cambridge, England.
REVISION REVISION
Last updated: 08 December 2021 Last updated: 27 July 2022
Copyright (c) 1997-2021 University of Cambridge. Copyright (c) 1997-2022 University of Cambridge.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
@ -5596,18 +5591,22 @@ SIZE AND OTHER LIMITATIONS
The maximum length of a string argument to a callout is the largest The maximum length of a string argument to a callout is the largest
number a 32-bit unsigned integer can hold. number a 32-bit unsigned integer can hold.
The maximum amount of heap memory used for matching is controlled by
the heap limit, which can be set in a pattern or in a match context.
The default is a very large number, effectively unlimited.
AUTHOR AUTHOR
Philip Hazel Philip Hazel
University Computing Service Retired from University Computing Service
Cambridge, England. Cambridge, England.
REVISION REVISION
Last updated: 02 February 2019 Last updated: 26 July 2022
Copyright (c) 1997-2019 University of Cambridge. Copyright (c) 1997-2022 University of Cambridge.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
@ -9773,12 +9772,29 @@ STACK AND HEAP USAGE AT RUN TIME
sive function calls could use a great deal of stack, and this could sive function calls could use a great deal of stack, and this could
cause problems, but this usage has been eliminated. Backtracking posi- cause problems, but this usage has been eliminated. Backtracking posi-
tions are now explicitly remembered in memory frames controlled by the tions are now explicitly remembered in memory frames controlled by the
code. An initial 20KiB vector of frames is allocated on the system code.
stack (enough for about 100 frames for small patterns), but if this is
insufficient, heap memory is used. The amount of heap memory can be The size of each frame depends on the size of pointer variables and the
limited; if the limit is set to zero, only the initial stack vector is number of capturing parenthesized groups in the pattern being matched.
used. Rewriting patterns to be time-efficient, as described below, may On a 64-bit system the frame size for a pattern with no captures is 128
also reduce the memory requirements. bytes. For each capturing group the size increases by 16 bytes.
Until release 10.41, an initial 20KiB frames vector was allocated on
the system stack, but this still caused some issues for multi-thread
applications where each thread has a very small stack. From release
10.41 backtracking memory frames are always held in heap memory. An
initial heap allocation is obtained the first time any match data block
is passed to pcre2_match(). This is remembered with the match data
block and re-used if that block is used for another match. It is freed
when the match data block itself is freed.
The size of the initial block is the larger of 20KiB or ten times the
pattern's frame size, unless the heap limit is less than this, in which
case the heap limit is used. If the initial block proves to be too
small during matching, it is replaced by a larger block, subject to the
heap limit. The heap limit is checked only when a new block is to be
allocated. Reducing the heap limit between calls to pcre2_match() with
the same match data block does not affect the saved block.
In contrast to pcre2_match(), pcre2_dfa_match() does use recursive In contrast to pcre2_match(), pcre2_dfa_match() does use recursive
function calls, but only for processing atomic groups, lookaround as- function calls, but only for processing atomic groups, lookaround as-
@ -9926,14 +9942,14 @@ PROCESSING TIME
AUTHOR AUTHOR
Philip Hazel Philip Hazel
University Computing Service Retired from University Computing Service
Cambridge, England. Cambridge, England.
REVISION REVISION
Last updated: 03 February 2019 Last updated: 27 July 2022
Copyright (c) 1997-2019 University of Cambridge. Copyright (c) 1997-2022 University of Cambridge.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------

View File

@ -1,4 +1,4 @@
.TH PCRE2GREP 1 "27 July 2022" "PCRE2 10.41" .TH PCRE2GREP 1 "30 July 2022" "PCRE2 10.41"
.SH NAME .SH NAME
pcre2grep - a grep with Perl-compatible regular expressions. pcre2grep - a grep with Perl-compatible regular expressions.
.SH SYNOPSIS .SH SYNOPSIS
@ -43,13 +43,15 @@ For example:
.sp .sp
pcre2grep some-pattern file1 - file3 pcre2grep some-pattern file1 - file3
.sp .sp
Input files are searched line by line. By default, each line that matches a By default, input files are searched line by line. Each line that matches a
pattern is copied to the standard output, and if there is more than one file, pattern is copied to the standard output, and if there is more than one file,
the file name is output at the start of each line, followed by a colon. the file name is output at the start of each line, followed by a colon.
However, there are options that can change how \fBpcre2grep\fP behaves. In However, there are options that can change how \fBpcre2grep\fP behaves. For
particular, the \fB-M\fP option makes it possible to search for strings that example, the \fB-M\fP option makes it possible to search for strings that span
span line boundaries. What defines a line boundary is controlled by the line boundaries. What defines a line boundary is controlled by the \fB-N\fP
\fB-N\fP (\fB--newline\fP) option. (\fB--newline\fP) option. The \fB-h\fP and \fB-H\fP options control whether or
not file names are shown, and the \fB-Z\fP option changes the file name
terminator to a zero byte.
.P .P
The amount of memory used for buffering files that are being scanned is The amount of memory used for buffering files that are being scanned is
controlled by parameters that can be set by the \fB--buffer-size\fP and controlled by parameters that can be set by the \fB--buffer-size\fP and
@ -149,9 +151,11 @@ Output up to \fInumber\fP lines of context after each matching line. Fewer
lines are output if the next match or the end of the file is reached, or if the lines are output if the next match or the end of the file is reached, or if the
processing buffer size has been set too small. If file names and/or line processing buffer size has been set too small. If file names and/or line
numbers are being output, a hyphen separator is used instead of a colon for the numbers are being output, a hyphen separator is used instead of a colon for the
context lines. A line containing "--" is output between each group of lines, context lines (the \fB-Z\fP option can be used to change the file name
unless they are in fact contiguous in the input file. The value of \fInumber\fP terminator to a zero byte). A line containing "--" is output between each group
is expected to be relatively small. When \fB-c\fP is used, \fB-A\fP is ignored. of lines, unless they are in fact contiguous in the input file. The value of
\fInumber\fP is expected to be relatively small. When \fB-c\fP is used,
\fB-A\fP is ignored.
.TP .TP
\fB-a\fP, \fB--text\fP \fB-a\fP, \fB--text\fP
Treat binary files as text. This is equivalent to Treat binary files as text. This is equivalent to
@ -167,9 +171,10 @@ Output up to \fInumber\fP lines of context before each matching line. Fewer
lines are output if the previous match or the start of the file is within lines are output if the previous match or the start of the file is within
\fInumber\fP lines, or if the processing buffer size has been set too small. If \fInumber\fP lines, or if the processing buffer size has been set too small. If
file names and/or line numbers are being output, a hyphen separator is used file names and/or line numbers are being output, a hyphen separator is used
instead of a colon for the context lines. A line containing "--" is output instead of a colon for the context lines (the \fB-Z\fP option can be used to
between each group of lines, unless they are in fact contiguous in the input change the file name terminator to a zero byte). A line containing "--" is
file. The value of \fInumber\fP is expected to be relatively small. When output between each group of lines, unless they are in fact contiguous in the
input file. The value of \fInumber\fP is expected to be relatively small. When
\fB-c\fP is used, \fB-B\fP is ignored. \fB-c\fP is used, \fB-B\fP is ignored.
.TP .TP
\fB--binary-files=\fP\fIword\fP \fB--binary-files=\fP\fIword\fP
@ -356,19 +361,21 @@ shown separately. This option is mutually exclusive with \fB--output\fP,
.TP .TP
\fB-H\fP, \fB--with-filename\fP \fB-H\fP, \fB--with-filename\fP
Force the inclusion of the file name at the start of output lines when Force the inclusion of the file name at the start of output lines when
searching a single file. By default, the file name is not shown in this case. searching a single file. The file name is not normally shown in this case.
For matching lines, the file name is followed by a colon; for context lines, a By default, for matching lines, the file name is followed by a colon; for
hyphen separator is used. If a line number is also being output, it follows the context lines, a hyphen separator is used. The \fB-Z\fP option can be used to
file name. When the \fB-M\fP option causes a pattern to match more than one change the terminator to a zero byte. If a line number is also being output,
line, only the first is preceded by the file name. This option overrides any it follows the file name. When the \fB-M\fP option causes a pattern to match
previous \fB-h\fP, \fB-l\fP, or \fB-L\fP options. more than one line, only the first is preceded by the file name. This option
overrides any previous \fB-h\fP, \fB-l\fP, or \fB-L\fP options.
.TP .TP
\fB-h\fP, \fB--no-filename\fP \fB-h\fP, \fB--no-filename\fP
Suppress the output file names when searching multiple files. By default, Suppress the output file names when searching multiple files. File names are
file names are shown when multiple files are searched. For matching lines, the normally shown when multiple files are searched. By default, for matching
file name is followed by a colon; for context lines, a hyphen separator is used. lines, the file name is followed by a colon; for context lines, a hyphen
If a line number is also being output, it follows the file name. This option separator is used. The \fB-Z\fP option can be used to change the terminator to
overrides any previous \fB-H\fP, \fB-L\fP, or \fB-l\fP options. a zero byte. If a line number is also being output, it follows the file name.
This option overrides any previous \fB-H\fP, \fB-L\fP, or \fB-l\fP options.
.TP .TP
\fB--heap-limit\fP=\fInumber\fP \fB--heap-limit\fP=\fInumber\fP
See \fB--match-limit\fP below. See \fB--match-limit\fP below.
@ -417,17 +424,19 @@ given any number of times. If a directory matches both \fB--include-dir\fP and
\fB-L\fP, \fB--files-without-match\fP \fB-L\fP, \fB--files-without-match\fP
Instead of outputting lines from the files, just output the names of the files Instead of outputting lines from the files, just output the names of the files
that do not contain any lines that would have been output. Each file name is that do not contain any lines that would have been output. Each file name is
output once, on a separate line. This option overrides any previous \fB-H\fP, output once, on a separate line by default, but if the \fB-Z\fP option is set,
\fB-h\fP, or \fB-l\fP options. they are separated by zero bytes instead of newlines. This option overrides any
previous \fB-H\fP, \fB-h\fP, or \fB-l\fP options.
.TP .TP
\fB-l\fP, \fB--files-with-matches\fP \fB-l\fP, \fB--files-with-matches\fP
Instead of outputting lines from the files, just output the names of the files Instead of outputting lines from the files, just output the names of the files
containing lines that would have been output. Each file name is output once, on containing lines that would have been output. Each file name is output once, on
a separate line. Searching normally stops as soon as a matching line is found a separate line, but if the \fB-Z\fP option is set, they are separated by zero
in a file. However, if the \fB-c\fP (count) option is also used, matching bytes instead of newlines. Searching normally stops as soon as a matching line
continues in order to obtain the correct count, and those files that have at is found in a file. However, if the \fB-c\fP (count) option is also used,
least one match are listed along with their counts. Using this option with matching continues in order to obtain the correct count, and those files that
\fB-c\fP is a way of suppressing the listing of files with no matches that have at least one match are listed along with their counts. Using this option
with \fB-c\fP is a way of suppressing the listing of files with no matches that
occurs with \fB-c\fP on its own. This option overrides any previous \fB-H\fP, occurs with \fB-c\fP on its own. This option overrides any previous \fB-H\fP,
\fB-h\fP, or \fB-L\fP options. \fB-h\fP, or \fB-L\fP options.
.TP .TP
@ -729,6 +738,12 @@ be more than one line. This is equivalent to having "^(?:" at the start of each
pattern and ")$" at the end. This option applies only to the patterns that are pattern and ")$" at the end. This option applies only to the patterns that are
matched against the contents of files; it does not apply to patterns specified matched against the contents of files; it does not apply to patterns specified
by any of the \fB--include\fP or \fB--exclude\fP options. by any of the \fB--include\fP or \fB--exclude\fP options.
.TP
\fB-Z\fP, \fB--null\fP
Terminate files names in the regular output with a zero byte (the NUL
character) instead of what would normally appear. This is useful when file
names contain unusual characters such as colons, hyphens, or even newlines. The
option does not apply to file names in error messages.
. .
. .
.SH "ENVIRONMENT VARIABLES" .SH "ENVIRONMENT VARIABLES"
@ -957,6 +972,6 @@ Cambridge, England.
.rs .rs
.sp .sp
.nf .nf
Last updated: 27 July 2022 Last updated: 30 July 2022
Copyright (c) 1997-2022 University of Cambridge. Copyright (c) 1997-2022 University of Cambridge.
.fi .fi

View File

@ -42,13 +42,15 @@ DESCRIPTION
pcre2grep some-pattern file1 - file3 pcre2grep some-pattern file1 - file3
Input files are searched line by line. By default, each line that By default, input files are searched line by line. Each line that
matches a pattern is copied to the standard output, and if there is matches a pattern is copied to the standard output, and if there is
more than one file, the file name is output at the start of each line, more than one file, the file name is output at the start of each line,
followed by a colon. However, there are options that can change how followed by a colon. However, there are options that can change how
pcre2grep behaves. In particular, the -M option makes it possible to pcre2grep behaves. For example, the -M option makes it possible to
search for strings that span line boundaries. What defines a line search for strings that span line boundaries. What defines a line
boundary is controlled by the -N (--newline) option. boundary is controlled by the -N (--newline) option. The -h and -H op-
tions control whether or not file names are shown, and the -Z option
changes the file name terminator to a zero byte.
The amount of memory used for buffering files that are being scanned is The amount of memory used for buffering files that are being scanned is
controlled by parameters that can be set by the --buffer-size and controlled by parameters that can be set by the --buffer-size and
@ -149,10 +151,12 @@ OPTIONS
the file is reached, or if the processing buffer size has the file is reached, or if the processing buffer size has
been set too small. If file names and/or line numbers are be- been set too small. If file names and/or line numbers are be-
ing output, a hyphen separator is used instead of a colon for ing output, a hyphen separator is used instead of a colon for
the context lines. A line containing "--" is output between the context lines (the -Z option can be used to change the
each group of lines, unless they are in fact contiguous in file name terminator to a zero byte). A line containing "--"
the input file. The value of number is expected to be rela- is output between each group of lines, unless they are in
tively small. When -c is used, -A is ignored. fact contiguous in the input file. The value of number is ex-
pected to be relatively small. When -c is used, -A is ig-
nored.
-a, --text -a, --text
Treat binary files as text. This is equivalent to --binary- Treat binary files as text. This is equivalent to --binary-
@ -170,11 +174,12 @@ OPTIONS
start of the file is within number lines, or if the process- start of the file is within number lines, or if the process-
ing buffer size has been set too small. If file names and/or ing buffer size has been set too small. If file names and/or
line numbers are being output, a hyphen separator is used in- line numbers are being output, a hyphen separator is used in-
stead of a colon for the context lines. A line containing stead of a colon for the context lines (the -Z option can be
"--" is output between each group of lines, unless they are used to change the file name terminator to a zero byte). A
in fact contiguous in the input file. The value of number is line containing "--" is output between each group of lines,
expected to be relatively small. When -c is used, -B is ig- unless they are in fact contiguous in the input file. The
nored. value of number is expected to be relatively small. When -c
is used, -B is ignored.
--binary-files=word --binary-files=word
Specify how binary files are to be processed. If the word is Specify how binary files are to be processed. If the word is
@ -387,22 +392,25 @@ OPTIONS
-H, --with-filename -H, --with-filename
Force the inclusion of the file name at the start of output Force the inclusion of the file name at the start of output
lines when searching a single file. By default, the file name lines when searching a single file. The file name is not nor-
is not shown in this case. For matching lines, the file name mally shown in this case. By default, for matching lines,
is followed by a colon; for context lines, a hyphen separator the file name is followed by a colon; for context lines, a
is used. If a line number is also being output, it follows hyphen separator is used. The -Z option can be used to change
the file name. When the -M option causes a pattern to match the terminator to a zero byte. If a line number is also being
more than one line, only the first is preceded by the file output, it follows the file name. When the -M option causes a
name. This option overrides any previous -h, -l, or -L op- pattern to match more than one line, only the first is pre-
tions. ceded by the file name. This option overrides any previous
-h, -l, or -L options.
-h, --no-filename -h, --no-filename
Suppress the output file names when searching multiple files. Suppress the output file names when searching multiple files.
By default, file names are shown when multiple files are File names are normally shown when multiple files are
searched. For matching lines, the file name is followed by a searched. By default, for matching lines, the file name is
colon; for context lines, a hyphen separator is used. If a followed by a colon; for context lines, a hyphen separator is
line number is also being output, it follows the file name. used. The -Z option can be used to change the terminator to a
This option overrides any previous -H, -L, or -l options. zero byte. If a line number is also being output, it follows
the file name. This option overrides any previous -H, -L, or
-l options.
--heap-limit=number --heap-limit=number
See --match-limit below. See --match-limit below.
@ -455,21 +463,23 @@ OPTIONS
Instead of outputting lines from the files, just output the Instead of outputting lines from the files, just output the
names of the files that do not contain any lines that would names of the files that do not contain any lines that would
have been output. Each file name is output once, on a sepa- have been output. Each file name is output once, on a sepa-
rate line. This option overrides any previous -H, -h, or -l rate line by default, but if the -Z option is set, they are
options. separated by zero bytes instead of newlines. This option
overrides any previous -H, -h, or -l options.
-l, --files-with-matches -l, --files-with-matches
Instead of outputting lines from the files, just output the Instead of outputting lines from the files, just output the
names of the files containing lines that would have been out- names of the files containing lines that would have been out-
put. Each file name is output once, on a separate line. put. Each file name is output once, on a separate line, but
Searching normally stops as soon as a matching line is found if the -Z option is set, they are separated by zero bytes in-
in a file. However, if the -c (count) option is also used, stead of newlines. Searching normally stops as soon as a
matching continues in order to obtain the correct count, and matching line is found in a file. However, if the -c (count)
those files that have at least one match are listed along option is also used, matching continues in order to obtain
with their counts. Using this option with -c is a way of sup- the correct count, and those files that have at least one
pressing the listing of files with no matches that occurs match are listed along with their counts. Using this option
with -c on its own. This option overrides any previous -H, with -c is a way of suppressing the listing of files with no
-h, or -L options. matches that occurs with -c on its own. This option overrides
any previous -H, -h, or -L options.
--label=name --label=name
This option supplies a name to be used for the standard input This option supplies a name to be used for the standard input
@ -571,11 +581,8 @@ OPTIONS
an error occurs. an error occurs.
The --heap-limit option specifies, as a number of kibibytes The --heap-limit option specifies, as a number of kibibytes
(units of 1024 bytes), the amount of heap memory that may be (units of 1024 bytes), the maximum amount of heap memory that
used for matching. Heap memory is needed only if matching the may be used for matching.
pattern requires a significant number of nested backtracking
points to be remembered. This parameter can be set to zero to
forbid the use of heap memory altogether.
The --depth-limit option limits the depth of nested back- The --depth-limit option limits the depth of nested back-
tracking points, which indirectly limits the amount of memory tracking points, which indirectly limits the amount of memory
@ -812,6 +819,13 @@ OPTIONS
does not apply to patterns specified by any of the --include does not apply to patterns specified by any of the --include
or --exclude options. or --exclude options.
-Z, --null
Terminate files names in the regular output with a zero byte
(the NUL character) instead of what would normally appear.
This is useful when file names contain unusual characters
such as colons, hyphens, or even newlines. The option does
not apply to file names in error messages.
ENVIRONMENT VARIABLES ENVIRONMENT VARIABLES
@ -1022,5 +1036,5 @@ AUTHOR
REVISION REVISION
Last updated: 31 August 2021 Last updated: 30 July 2022
Copyright (c) 1997-2021 University of Cambridge. Copyright (c) 1997-2022 University of Cambridge.

View File

@ -1111,7 +1111,8 @@ SUBJECT MODIFIERS
copy=<number or name> copy captured substring copy=<number or name> copy captured substring
depth_limit=<n> set a depth limit depth_limit=<n> set a depth limit
dfa use pcre2_dfa_match() dfa use pcre2_dfa_match()
find_limits find match and depth limits find_limits find heap, match and depth limits
find_limits_noheap find match and depth limits
get=<number or name> extract captured substring get=<number or name> extract captured substring
getall extract all captured substrings getall extract all captured substrings
/g global global matching /g global global matching
@ -1411,7 +1412,7 @@ SUBJECT MODIFIERS
The heap_limit, match_limit, and depth_limit modifiers set the appro- The heap_limit, match_limit, and depth_limit modifiers set the appro-
priate limits in the match context. These values are ignored when the priate limits in the match context. These values are ignored when the
find_limits modifier is specified. find_limits or find_limits_noheap modifier is specified.
Finding minimum limits Finding minimum limits
@ -1419,8 +1420,12 @@ SUBJECT MODIFIERS
calls the relevant matching function several times, setting different calls the relevant matching function several times, setting different
values in the match context via pcre2_set_heap_limit(), values in the match context via pcre2_set_heap_limit(),
pcre2_set_match_limit(), or pcre2_set_depth_limit() until it finds the pcre2_set_match_limit(), or pcre2_set_depth_limit() until it finds the
minimum values for each parameter that allows the match to complete smallest value for each parameter that allows the match to complete
without error. If JIT is being used, only the match limit is relevant. without a "limit exceeded" error. The match itself may succeed or fail.
An alternative modifier, find_limits_noheap, omits the heap limit. This
is used in the standard tests, because the minimum heap limit varies
between systems. If JIT is being used, only the match limit is rele-
vant, and the other two are automatically omitted.
When using this modifier, the pattern should not contain any limit set- When using this modifier, the pattern should not contain any limit set-
tings such as (*LIMIT_MATCH=...) within it. If such a setting is tings such as (*LIMIT_MATCH=...) within it. If such a setting is
@ -1446,9 +1451,7 @@ SUBJECT MODIFIERS
For both kinds of matching, the heap_limit number, which is in For both kinds of matching, the heap_limit number, which is in
kibibytes (units of 1024 bytes), limits the amount of heap memory used kibibytes (units of 1024 bytes), limits the amount of heap memory used
for matching. A value of zero disables the use of any heap memory; many for matching.
simple pattern matches can be done without using the heap, so zero is
not an unreasonable setting.
Showing MARK names Showing MARK names
@ -1463,13 +1466,11 @@ SUBJECT MODIFIERS
The memory modifier causes pcre2test to log the sizes of all heap mem- The memory modifier causes pcre2test to log the sizes of all heap mem-
ory allocation and freeing calls that occur during a call to ory allocation and freeing calls that occur during a call to
pcre2_match() or pcre2_dfa_match(). These occur only when a match re- pcre2_match() or pcre2_dfa_match(). In the latter case, heap memory is
quires a bigger vector than the default for remembering backtracking used only when a match requires more internal workspace that the de-
points (pcre2_match()) or for internal workspace (pcre2_dfa_match()). fault allocation on the stack, so in many cases there will be no out-
In many cases there will be no heap memory used and therefore no addi- put. No heap memory is allocated during matching with JIT. For this
tional output. No heap memory is allocated during matching with JIT, so modifier to work, the null_context modifier must not be set on both the
in that case the memory modifier never has any effect. For this modi-
fier to work, the null_context modifier must not be set on both the
pattern and the subject, though it can be set on one or the other. pattern and the subject, though it can be set on one or the other.
Setting a starting offset Setting a starting offset
@ -1518,7 +1519,8 @@ SUBJECT MODIFIERS
null_context modifier is set, however, NULL is passed. This is for null_context modifier is set, however, NULL is passed. This is for
testing that the matching and substitution functions behave correctly testing that the matching and substitution functions behave correctly
in this case (they use default values). This modifier cannot be used in this case (they use default values). This modifier cannot be used
with the find_limits or substitute_callout modifiers. with the find_limits, find_limits_noheap, or substitute_callout modi-
fiers.
Similarly, for testing purposes, if the null_subject or null_replace- Similarly, for testing purposes, if the null_subject or null_replace-
ment modifier is set, the subject or replacement string pointers are ment modifier is set, the subject or replacement string pointers are
@ -1949,5 +1951,5 @@ AUTHOR
REVISION REVISION
Last updated: 12 January 2022 Last updated: 27 July 2022
Copyright (c) 1997-2022 University of Cambridge. Copyright (c) 1997-2022 University of Cambridge.

View File

@ -205,9 +205,6 @@ point. */
* Global variables * * Global variables *
*************************************************/ *************************************************/
/* Jeffrey Friedl has some debugging requirements that are not part of the
regular code. */
static const char *colour_string = "1;31"; static const char *colour_string = "1;31";
static const char *colour_option = NULL; static const char *colour_option = NULL;
static const char *dee_option = NULL; static const char *dee_option = NULL;
@ -220,6 +217,10 @@ static const char *output_text = NULL;
static char *main_buffer = NULL; static char *main_buffer = NULL;
static const char *printname_nl = STDOUT_NL; /* Changed to NULL for -Z */
static int printname_colon = ':'; /* Changed to 0 for -Z */
static int printname_hyphen = '-'; /* Changed to 0 for -Z */
static int after_context = 0; static int after_context = 0;
static int before_context = 0; static int before_context = 0;
static int binary_files = BIN_BINARY; static int binary_files = BIN_BINARY;
@ -483,6 +484,7 @@ static option_item optionlist[] = {
{ OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" }, { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
{ OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" }, { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
{ OP_NODATA, N_ALLABSK, NULL, "allow-lookaround-bsk", "allow \\K in lookarounds" }, { OP_NODATA, N_ALLABSK, NULL, "allow-lookaround-bsk", "allow \\K in lookarounds" },
{ OP_NODATA, 'Z', NULL, "null", "output 0 byte after file names" },
{ OP_NODATA, 0, NULL, NULL, NULL } { OP_NODATA, 0, NULL, NULL, NULL }
}; };
@ -1773,7 +1775,7 @@ if (after_context > 0 && lastmatchnumber > 0)
{ {
char *pp = end_of_line(lastmatchrestart, endptr, &ellength); char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
if (ellength == 0 && pp == main_buffer + bufsize) break; if (ellength == 0 && pp == main_buffer + bufsize) break;
if (printname != NULL) fprintf(stdout, "%s-", printname); if (printname != NULL) fprintf(stdout, "%s%c", printname, printname_hyphen);
if (number) fprintf(stdout, "%lu-", lastmatchnumber++); if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout); FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
lastmatchrestart = pp; lastmatchrestart = pp;
@ -2730,7 +2732,9 @@ while (ptr < endptr)
else if (filenames == FN_MATCH_ONLY) else if (filenames == FN_MATCH_ONLY)
{ {
fprintf(stdout, "%s" STDOUT_NL, printname); fprintf(stdout, "%s", printname);
if (printname_nl == NULL) fprintf(stdout, "%c", 0);
else fprintf(stdout, "%s", printname_nl);
return 0; return 0;
} }
@ -2749,7 +2753,8 @@ while (ptr < endptr)
{ {
PCRE2_SIZE oldstartoffset; PCRE2_SIZE oldstartoffset;
if (printname != NULL) fprintf(stdout, "%s:", printname); if (printname != NULL) fprintf(stdout, "%s%c", printname,
printname_colon);
if (number) fprintf(stdout, "%lu:", linenumber); if (number) fprintf(stdout, "%lu:", linenumber);
/* Handle --line-offsets */ /* Handle --line-offsets */
@ -2871,7 +2876,8 @@ while (ptr < endptr)
while (lastmatchrestart < p) while (lastmatchrestart < p)
{ {
char *pp = lastmatchrestart; char *pp = lastmatchrestart;
if (printname != NULL) fprintf(stdout, "%s-", printname); if (printname != NULL) fprintf(stdout, "%s%c", printname,
printname_hyphen);
if (number) fprintf(stdout, "%lu-", lastmatchnumber++); if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
pp = end_of_line(pp, endptr, &ellength); pp = end_of_line(pp, endptr, &ellength);
FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout); FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
@ -2912,7 +2918,8 @@ while (ptr < endptr)
{ {
int ellength; int ellength;
char *pp = p; char *pp = p;
if (printname != NULL) fprintf(stdout, "%s-", printname); if (printname != NULL) fprintf(stdout, "%s%c", printname,
printname_hyphen);
if (number) fprintf(stdout, "%lu-", linenumber - linecount--); if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
pp = end_of_line(pp, endptr, &ellength); pp = end_of_line(pp, endptr, &ellength);
FWRITE_IGNORE(p, 1, pp - p, stdout); FWRITE_IGNORE(p, 1, pp - p, stdout);
@ -2926,7 +2933,8 @@ while (ptr < endptr)
if (after_context > 0 || before_context > 0) if (after_context > 0 || before_context > 0)
endhyphenpending = TRUE; endhyphenpending = TRUE;
if (printname != NULL) fprintf(stdout, "%s:", printname); if (printname != NULL) fprintf(stdout, "%s%c", printname,
printname_colon);
if (number) fprintf(stdout, "%lu:", linenumber); if (number) fprintf(stdout, "%lu:", linenumber);
/* In multiline mode, or if colouring, we have to split the line(s) up /* In multiline mode, or if colouring, we have to split the line(s) up
@ -3131,7 +3139,9 @@ were none. If we found a match, we won't have got this far. */
if (filenames == FN_NOMATCH_ONLY) if (filenames == FN_NOMATCH_ONLY)
{ {
fprintf(stdout, "%s" STDOUT_NL, printname); fprintf(stdout, "%s", printname);
if (printname_nl == NULL) fprintf(stdout, "%c", 0);
else fprintf(stdout, "%s", printname_nl);
return 0; return 0;
} }
@ -3142,7 +3152,7 @@ if (count_only && !quiet)
if (count > 0 || !omit_zero_count) if (count > 0 || !omit_zero_count)
{ {
if (printname != NULL && filenames != FN_NONE) if (printname != NULL && filenames != FN_NONE)
fprintf(stdout, "%s:", printname); fprintf(stdout, "%s%c", printname, printname_colon);
fprintf(stdout, "%lu" STDOUT_NL, count); fprintf(stdout, "%lu" STDOUT_NL, count);
counts_printed++; counts_printed++;
} }
@ -3528,8 +3538,6 @@ switch(letter)
case 'u': options |= PCRE2_UTF; utf = TRUE; break; case 'u': options |= PCRE2_UTF; utf = TRUE; break;
case 'U': options |= PCRE2_UTF|PCRE2_MATCH_INVALID_UTF; utf = TRUE; break; case 'U': options |= PCRE2_UTF|PCRE2_MATCH_INVALID_UTF; utf = TRUE; break;
case 'v': invert = TRUE; break; case 'v': invert = TRUE; break;
case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
case 'V': case 'V':
{ {
@ -3540,6 +3548,10 @@ switch(letter)
pcre2grep_exit(0); pcre2grep_exit(0);
break; break;
case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
case 'Z': printname_colon = printname_hyphen = 0; printname_nl = NULL; break;
default: default:
fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter); fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
pcre2grep_exit(usage(2)); pcre2grep_exit(usage(2));
@ -4259,8 +4271,6 @@ if (DEE_option != NULL)
(void)pcre2_set_compile_extra_options(compile_context, extra_options); (void)pcre2_set_compile_extra_options(compile_context, extra_options);
/* Check the values for Jeffrey Friedl's debugging options. */
/* If use_jit is set, check whether JIT is available. If not, do not try /* If use_jit is set, check whether JIT is available. If not, do not try
to use JIT. */ to use JIT. */

19
testdata/grepoutput vendored
View File

@ -991,3 +991,22 @@ RC=0
---------------------------- Test 134 ----------------------------- ---------------------------- Test 134 -----------------------------
=AB3CD5= =AB3CD5=
RC=0 RC=0
---------------------------- Test 135 -----------------------------
./testdata/grepinputv@The word is cat in this line
RC=0
./testdata/grepinputv@./testdata/grepinputv@RC=0
./testdata/grepinputv@This line contains \E and (regex) *meta* [characters].
./testdata/grepinputv@The word is cat in this line
./testdata/grepinputv@The caterpillar sat on the mat
RC=0
testdata/grepinputM3:start end in between start
end and following
testdata/grepinputM7:start end in between start
end and following start
end other stuff
testdata/grepinputM11:start end in between start
end
testdata/grepinputM16:start end in between start
end
RC=0