Implement -Z in pcre2grep and update documentation
This commit is contained in:
parent
cc5e121c8e
commit
8b133fa0ba
|
@ -49,6 +49,8 @@ tests.
|
||||||
tests run by 'make check', but can be run manually. The current output is from
|
tests run by 'make check', but can be run manually. The current output is from
|
||||||
a 64-bit system.
|
a 64-bit system.
|
||||||
|
|
||||||
|
13. Implemented -Z aka --null in pcre2grep.
|
||||||
|
|
||||||
|
|
||||||
Version 10.40 15-April-2022
|
Version 10.40 15-April-2022
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
42
RunGrepTest
42
RunGrepTest
|
@ -68,6 +68,22 @@ diff -b /dev/null /dev/null 2>/dev/null && cf="diff -b"
|
||||||
diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u"
|
diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u"
|
||||||
diff -ub /dev/null /dev/null 2>/dev/null && cf="diff -ub"
|
diff -ub /dev/null /dev/null 2>/dev/null && cf="diff -ub"
|
||||||
|
|
||||||
|
# Some tests involve NUL characters. It seems impossible to handle them easily
|
||||||
|
# in many operating systems. An earlier version of this script used sed to
|
||||||
|
# translate NUL into the string ZERO, but this didn't work on Solaris (aka
|
||||||
|
# SunOS), where the version of sed explicitly doesn't like them, and also MacOS
|
||||||
|
# (Darwin), OpenBSD, FreeBSD, NetBSD, and some Linux distributions like Alpine,
|
||||||
|
# even when using GNU sed. A user suggested using tr instead, which
|
||||||
|
# necessitates translating to a single character. However, on (some versions
|
||||||
|
# of?) Solaris, the normal "tr" cannot handle binary zeros, but if
|
||||||
|
# /usr/xpg4/bin/tr is available, it can do so, so test for that.
|
||||||
|
|
||||||
|
if [ -x /usr/xpg4/bin/tr ] ; then
|
||||||
|
tr=/usr/xpg4/bin/tr
|
||||||
|
else
|
||||||
|
tr=tr
|
||||||
|
fi
|
||||||
|
|
||||||
# If this test is being run from "make check", $srcdir will be set. If not, set
|
# If this test is being run from "make check", $srcdir will be set. If not, set
|
||||||
# it to the current or parent directory, whichever one contains the test data.
|
# it to the current or parent directory, whichever one contains the test data.
|
||||||
# Subsequently, we run most of the pcre2grep tests in the source directory so
|
# Subsequently, we run most of the pcre2grep tests in the source directory so
|
||||||
|
@ -685,6 +701,16 @@ echo "---------------------------- Test 134 -----------------------------" >>tes
|
||||||
(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1
|
(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtrygrep
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
|
echo "---------------------------- Test 135 -----------------------------" >>testtrygrep
|
||||||
|
(cd $srcdir; $valgrind $vjs $pcre2grep -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
|
||||||
|
echo "RC=$?" >>testtrygrep
|
||||||
|
(cd $srcdir; $valgrind $vjs $pcre2grep -lZ 'word' ./testdata/grepinputv ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
|
||||||
|
echo "RC=$?" >>testtrygrep
|
||||||
|
(cd $srcdir; $valgrind $vjs $pcre2grep -A 1 -B 1 -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
|
||||||
|
echo "RC=$?" >>testtrygrep
|
||||||
|
(cd $srcdir; $valgrind $vjs $pcre2grep -MHZn 'start[\s]+end' testdata/grepinputM) >>testtrygrep
|
||||||
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
# Now compare the results.
|
# Now compare the results.
|
||||||
|
|
||||||
$cf $srcdir/testdata/grepoutput testtrygrep
|
$cf $srcdir/testdata/grepoutput testtrygrep
|
||||||
|
@ -759,22 +785,6 @@ $valgrind $vjs $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >
|
||||||
printf '%c--------------------------- Test N6 ------------------------------\r\n' - >>testtrygrep
|
printf '%c--------------------------- Test N6 ------------------------------\r\n' - >>testtrygrep
|
||||||
$valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
$valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||||
|
|
||||||
# This next test involves NUL characters. It seems impossible to handle them
|
|
||||||
# easily in many operating systems. An earlier version of this script used sed
|
|
||||||
# to translate NUL into the string ZERO, but this didn't work on Solaris (aka
|
|
||||||
# SunOS), where the version of sed explicitly doesn't like them, and also MacOS
|
|
||||||
# (Darwin), OpenBSD, FreeBSD, NetBSD, and some Linux distributions like Alpine,
|
|
||||||
# even when using GNU sed. A user suggested using tr instead, which
|
|
||||||
# necessitates translating to a single character (@). However, on (some
|
|
||||||
# versions of?) Solaris, the normal "tr" cannot handle binary zeros, but if
|
|
||||||
# /usr/xpg4/bin/tr is available, it can do so, so test for that.
|
|
||||||
|
|
||||||
if [ -x /usr/xpg4/bin/tr ] ; then
|
|
||||||
tr=/usr/xpg4/bin/tr
|
|
||||||
else
|
|
||||||
tr=tr
|
|
||||||
fi
|
|
||||||
|
|
||||||
printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep
|
printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep
|
||||||
printf 'abc\0def' >testNinputgrep
|
printf 'abc\0def' >testNinputgrep
|
||||||
$valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep
|
$valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep
|
||||||
|
|
|
@ -121,6 +121,7 @@ environment, for example.
|
||||||
pcre2_substring.c
|
pcre2_substring.c
|
||||||
pcre2_tables.c
|
pcre2_tables.c
|
||||||
pcre2_ucd.c
|
pcre2_ucd.c
|
||||||
|
pcre2_ucptables.c
|
||||||
pcre2_valid_utf.c
|
pcre2_valid_utf.c
|
||||||
pcre2_xclass.c
|
pcre2_xclass.c
|
||||||
|
|
||||||
|
@ -373,7 +374,7 @@ Otherwise:
|
||||||
1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
|
1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
|
||||||
have been created.
|
have been created.
|
||||||
|
|
||||||
2. Edit RunTest.bat to indentify the full or relative location of
|
2. Edit RunTest.bat to identify the full or relative location of
|
||||||
the pcre2 source (wherein which the testdata folder resides), e.g.:
|
the pcre2 source (wherein which the testdata folder resides), e.g.:
|
||||||
|
|
||||||
set srcdir=C:\pcre2\pcre2-10.00
|
set srcdir=C:\pcre2\pcre2-10.00
|
||||||
|
|
|
@ -17,7 +17,7 @@ pcre2-dev+subscribe@googlegroups.com.
|
||||||
You can access the archives and also subscribe or manage your subscription
|
You can access the archives and also subscribe or manage your subscription
|
||||||
here:
|
here:
|
||||||
|
|
||||||
https://groups.google.com/pcre2-dev
|
https://groups.google.com/g/pcre2-dev
|
||||||
|
|
||||||
Please read the NEWS file if you are upgrading from a previous release. The
|
Please read the NEWS file if you are upgrading from a previous release. The
|
||||||
contents of this README file are:
|
contents of this README file are:
|
||||||
|
@ -375,7 +375,8 @@ library. They are also documented in the pcre2build man page.
|
||||||
necessary to specify something like LIBS="-lncurses" as well. This is
|
necessary to specify something like LIBS="-lncurses" as well. This is
|
||||||
because, to quote the readline INSTALL, "Readline uses the termcap functions,
|
because, to quote the readline INSTALL, "Readline uses the termcap functions,
|
||||||
but does not link with the termcap or curses library itself, allowing
|
but does not link with the termcap or curses library itself, allowing
|
||||||
applications which link with readline the to choose an appropriate library."
|
applications which link with readline the option to choose an appropriate
|
||||||
|
library."
|
||||||
If you get error messages about missing functions tgetstr, tgetent, tputs,
|
If you get error messages about missing functions tgetstr, tgetent, tputs,
|
||||||
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
|
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
|
||||||
should fix it.
|
should fix it.
|
||||||
|
@ -400,10 +401,10 @@ library. They are also documented in the pcre2build man page.
|
||||||
Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
|
Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
|
||||||
be created. This is normally run under valgrind or used when PCRE2 is
|
be created. This is normally run under valgrind or used when PCRE2 is
|
||||||
compiled with address sanitizing enabled. It calls the fuzzing function and
|
compiled with address sanitizing enabled. It calls the fuzzing function and
|
||||||
outputs information about it is doing. The input strings are specified by
|
outputs information about what it is doing. The input strings are specified
|
||||||
arguments: if an argument starts with "=" the rest of it is a literal input
|
by arguments: if an argument starts with "=" the rest of it is a literal
|
||||||
string. Otherwise, it is assumed to be a file name, and the contents of the
|
input string. Otherwise, it is assumed to be a file name, and the contents
|
||||||
file are the test string.
|
of the file are the test string.
|
||||||
|
|
||||||
. Releases before 10.30 could be compiled with --disable-stack-for-recursion,
|
. Releases before 10.30 could be compiled with --disable-stack-for-recursion,
|
||||||
which caused pcre2_match() to use individual blocks on the heap for
|
which caused pcre2_match() to use individual blocks on the heap for
|
||||||
|
@ -695,7 +696,7 @@ Test 14 contains some special UTF and UCP tests that give different output for
|
||||||
different code unit widths.
|
different code unit widths.
|
||||||
|
|
||||||
Test 15 contains a number of tests that must not be run with JIT. They check,
|
Test 15 contains a number of tests that must not be run with JIT. They check,
|
||||||
among other non-JIT things, the match-limiting features of the intepretive
|
among other non-JIT things, the match-limiting features of the interpretive
|
||||||
matcher.
|
matcher.
|
||||||
|
|
||||||
Test 16 is run only when JIT support is not available. It checks that an
|
Test 16 is run only when JIT support is not available. It checks that an
|
||||||
|
|
|
@ -1017,7 +1017,7 @@ has its own memory control arrangements (see the
|
||||||
documentation for more details). If the limit is reached, the negative error
|
documentation for more details). If the limit is reached, the negative error
|
||||||
code PCRE2_ERROR_HEAPLIMIT is returned. The default limit can be set when PCRE2
|
code PCRE2_ERROR_HEAPLIMIT is returned. The default limit can be set when PCRE2
|
||||||
is built; if it is not, the default is set very large and is essentially
|
is built; if it is not, the default is set very large and is essentially
|
||||||
"unlimited".
|
unlimited.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
A value for the heap limit may also be supplied by an item at the start of a
|
A value for the heap limit may also be supplied by an item at the start of a
|
||||||
|
@ -1030,19 +1030,17 @@ less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
|
||||||
limit is set, less than the default.
|
limit is set, less than the default.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <b>pcre2_match()</b> function starts out using a 20KiB vector on the system
|
The <b>pcre2_match()</b> function always needs some heap memory, so setting a
|
||||||
stack for recording backtracking points. The more nested backtracking points
|
value of zero guarantees a "heap limit exceeded" error. Details of how
|
||||||
there are (that is, the deeper the search tree), the more memory is needed.
|
<b>pcre2_match()</b> uses the heap are given in the
|
||||||
Heap memory is used only if the initial vector is too small. If the heap limit
|
<a href="pcre2perform.html"><b>pcre2perform</b></a>
|
||||||
is set to a value less than 21 (in particular, zero) no heap memory will be
|
documentation.
|
||||||
used. In this case, only patterns that do not have a lot of nested backtracking
|
|
||||||
can be successfully processed.
|
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Similarly, for <b>pcre2_dfa_match()</b>, a vector on the system stack is used
|
For <b>pcre2_dfa_match()</b>, a vector on the system stack is used when
|
||||||
when processing pattern recursions, lookarounds, or atomic groups, and only if
|
processing pattern recursions, lookarounds, or atomic groups, and only if this
|
||||||
this is not big enough is heap memory used. In this case, too, setting a value
|
is not big enough is heap memory used. In this case, setting a value of zero
|
||||||
of zero disables the use of the heap.
|
disables the use of the heap.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
|
@ -1089,10 +1087,10 @@ less than the limit set by the caller of <b>pcre2_match()</b> or
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
This parameter limits the depth of nested backtracking in <b>pcre2_match()</b>.
|
This parameter limits the depth of nested backtracking in <b>pcre2_match()</b>.
|
||||||
Each time a nested backtracking point is passed, a new memory "frame" is used
|
Each time a nested backtracking point is passed, a new memory frame is used
|
||||||
to remember the state of matching at that point. Thus, this parameter
|
to remember the state of matching at that point. Thus, this parameter
|
||||||
indirectly limits the amount of memory that is used in a match. However,
|
indirectly limits the amount of memory that is used in a match. However,
|
||||||
because the size of each memory "frame" depends on the number of capturing
|
because the size of each memory frame depends on the number of capturing
|
||||||
parentheses, the actual memory limit varies from pattern to pattern. This limit
|
parentheses, the actual memory limit varies from pattern to pattern. This limit
|
||||||
was more useful in versions before 10.30, where function recursion was used for
|
was more useful in versions before 10.30, where function recursion was used for
|
||||||
backtracking.
|
backtracking.
|
||||||
|
@ -3148,11 +3146,11 @@ The backtracking match limit was reached.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ERROR_NOMEMORY
|
PCRE2_ERROR_NOMEMORY
|
||||||
</pre>
|
</pre>
|
||||||
If a pattern contains many nested backtracking points, heap memory is used to
|
Heap memory is used to remember backgracking points. This error is given when
|
||||||
remember them. This error is given when the memory allocation function (default
|
the memory allocation function (default or custom) fails. Note that a different
|
||||||
or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
|
error, PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds
|
||||||
if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
|
the heap limit. PCRE2_ERROR_NOMEMORY is also returned if
|
||||||
also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
|
PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ERROR_NULL
|
PCRE2_ERROR_NULL
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -4020,9 +4018,9 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 14 December 2021
|
Last updated: 27 July 2022
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2021 University of Cambridge.
|
Copyright © 1997-2022 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
|
|
@ -284,12 +284,11 @@ to the <b>configure</b> command. This setting also applies to the
|
||||||
counting is done differently).
|
counting is done differently).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <b>pcre2_match()</b> function starts out using a 20KiB vector on the system
|
The <b>pcre2_match()</b> function uses heap memory to record backtracking
|
||||||
stack to record backtracking points. The more nested backtracking points there
|
points. The more nested backtracking points there are (that is, the deeper the
|
||||||
are (that is, the deeper the search tree), the more memory is needed. If the
|
search tree), the more memory is needed. There is an upper limit, specified in
|
||||||
initial vector is not large enough, heap memory is used, up to a certain limit,
|
kibibytes (units of 1024 bytes). This limit can be changed at run time, as
|
||||||
which is specified in kibibytes (units of 1024 bytes). The limit can be changed
|
described in the
|
||||||
at run time, as described in the
|
|
||||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
documentation. The default limit (in effect unlimited) is 20 million. You can
|
documentation. The default limit (in effect unlimited) is 20 million. You can
|
||||||
change this by a setting such as
|
change this by a setting such as
|
||||||
|
@ -609,16 +608,16 @@ give a warning.
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
<br>
|
<br>
|
||||||
University Computing Service
|
Retired from University Computing Service
|
||||||
<br>
|
<br>
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
<br>
|
<br>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC26" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC26" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 08 December 2021
|
Last updated: 27 July 2022
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2021 University of Cambridge.
|
Copyright © 1997-2022 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
|
|
@ -71,13 +71,15 @@ For example:
|
||||||
<pre>
|
<pre>
|
||||||
pcre2grep some-pattern file1 - file3
|
pcre2grep some-pattern file1 - file3
|
||||||
</pre>
|
</pre>
|
||||||
Input files are searched line by line. By default, each line that matches a
|
By default, input files are searched line by line. Each line that matches a
|
||||||
pattern is copied to the standard output, and if there is more than one file,
|
pattern is copied to the standard output, and if there is more than one file,
|
||||||
the file name is output at the start of each line, followed by a colon.
|
the file name is output at the start of each line, followed by a colon.
|
||||||
However, there are options that can change how <b>pcre2grep</b> behaves. In
|
However, there are options that can change how <b>pcre2grep</b> behaves. For
|
||||||
particular, the <b>-M</b> option makes it possible to search for strings that
|
example, the <b>-M</b> option makes it possible to search for strings that span
|
||||||
span line boundaries. What defines a line boundary is controlled by the
|
line boundaries. What defines a line boundary is controlled by the <b>-N</b>
|
||||||
<b>-N</b> (<b>--newline</b>) option.
|
(<b>--newline</b>) option. The <b>-h</b> and <b>-H</b> options control whether or
|
||||||
|
not file names are shown, and the <b>-Z</b> option changes the file name
|
||||||
|
terminator to a zero byte.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The amount of memory used for buffering files that are being scanned is
|
The amount of memory used for buffering files that are being scanned is
|
||||||
|
@ -178,9 +180,11 @@ Output up to <i>number</i> lines of context after each matching line. Fewer
|
||||||
lines are output if the next match or the end of the file is reached, or if the
|
lines are output if the next match or the end of the file is reached, or if the
|
||||||
processing buffer size has been set too small. If file names and/or line
|
processing buffer size has been set too small. If file names and/or line
|
||||||
numbers are being output, a hyphen separator is used instead of a colon for the
|
numbers are being output, a hyphen separator is used instead of a colon for the
|
||||||
context lines. A line containing "--" is output between each group of lines,
|
context lines (the <b>-Z</b> option can be used to change the file name
|
||||||
unless they are in fact contiguous in the input file. The value of <i>number</i>
|
terminator to a zero byte). A line containing "--" is output between each group
|
||||||
is expected to be relatively small. When <b>-c</b> is used, <b>-A</b> is ignored.
|
of lines, unless they are in fact contiguous in the input file. The value of
|
||||||
|
<i>number</i> is expected to be relatively small. When <b>-c</b> is used,
|
||||||
|
<b>-A</b> is ignored.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-a</b>, <b>--text</b>
|
<b>-a</b>, <b>--text</b>
|
||||||
|
@ -199,9 +203,10 @@ Output up to <i>number</i> lines of context before each matching line. Fewer
|
||||||
lines are output if the previous match or the start of the file is within
|
lines are output if the previous match or the start of the file is within
|
||||||
<i>number</i> lines, or if the processing buffer size has been set too small. If
|
<i>number</i> lines, or if the processing buffer size has been set too small. If
|
||||||
file names and/or line numbers are being output, a hyphen separator is used
|
file names and/or line numbers are being output, a hyphen separator is used
|
||||||
instead of a colon for the context lines. A line containing "--" is output
|
instead of a colon for the context lines (the <b>-Z</b> option can be used to
|
||||||
between each group of lines, unless they are in fact contiguous in the input
|
change the file name terminator to a zero byte). A line containing "--" is
|
||||||
file. The value of <i>number</i> is expected to be relatively small. When
|
output between each group of lines, unless they are in fact contiguous in the
|
||||||
|
input file. The value of <i>number</i> is expected to be relatively small. When
|
||||||
<b>-c</b> is used, <b>-B</b> is ignored.
|
<b>-c</b> is used, <b>-B</b> is ignored.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -411,20 +416,22 @@ shown separately. This option is mutually exclusive with <b>--output</b>,
|
||||||
<P>
|
<P>
|
||||||
<b>-H</b>, <b>--with-filename</b>
|
<b>-H</b>, <b>--with-filename</b>
|
||||||
Force the inclusion of the file name at the start of output lines when
|
Force the inclusion of the file name at the start of output lines when
|
||||||
searching a single file. By default, the file name is not shown in this case.
|
searching a single file. The file name is not normally shown in this case.
|
||||||
For matching lines, the file name is followed by a colon; for context lines, a
|
By default, for matching lines, the file name is followed by a colon; for
|
||||||
hyphen separator is used. If a line number is also being output, it follows the
|
context lines, a hyphen separator is used. The <b>-Z</b> option can be used to
|
||||||
file name. When the <b>-M</b> option causes a pattern to match more than one
|
change the terminator to a zero byte. If a line number is also being output,
|
||||||
line, only the first is preceded by the file name. This option overrides any
|
it follows the file name. When the <b>-M</b> option causes a pattern to match
|
||||||
previous <b>-h</b>, <b>-l</b>, or <b>-L</b> options.
|
more than one line, only the first is preceded by the file name. This option
|
||||||
|
overrides any previous <b>-h</b>, <b>-l</b>, or <b>-L</b> options.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-h</b>, <b>--no-filename</b>
|
<b>-h</b>, <b>--no-filename</b>
|
||||||
Suppress the output file names when searching multiple files. By default,
|
Suppress the output file names when searching multiple files. File names are
|
||||||
file names are shown when multiple files are searched. For matching lines, the
|
normally shown when multiple files are searched. By default, for matching
|
||||||
file name is followed by a colon; for context lines, a hyphen separator is used.
|
lines, the file name is followed by a colon; for context lines, a hyphen
|
||||||
If a line number is also being output, it follows the file name. This option
|
separator is used. The <b>-Z</b> option can be used to change the terminator to
|
||||||
overrides any previous <b>-H</b>, <b>-L</b>, or <b>-l</b> options.
|
a zero byte. If a line number is also being output, it follows the file name.
|
||||||
|
This option overrides any previous <b>-H</b>, <b>-L</b>, or <b>-l</b> options.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>--heap-limit</b>=<i>number</i>
|
<b>--heap-limit</b>=<i>number</i>
|
||||||
|
@ -481,18 +488,20 @@ given any number of times. If a directory matches both <b>--include-dir</b> and
|
||||||
<b>-L</b>, <b>--files-without-match</b>
|
<b>-L</b>, <b>--files-without-match</b>
|
||||||
Instead of outputting lines from the files, just output the names of the files
|
Instead of outputting lines from the files, just output the names of the files
|
||||||
that do not contain any lines that would have been output. Each file name is
|
that do not contain any lines that would have been output. Each file name is
|
||||||
output once, on a separate line. This option overrides any previous <b>-H</b>,
|
output once, on a separate line by default, but if the <b>-Z</b> option is set,
|
||||||
<b>-h</b>, or <b>-l</b> options.
|
they are separated by zero bytes instead of newlines. This option overrides any
|
||||||
|
previous <b>-H</b>, <b>-h</b>, or <b>-l</b> options.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-l</b>, <b>--files-with-matches</b>
|
<b>-l</b>, <b>--files-with-matches</b>
|
||||||
Instead of outputting lines from the files, just output the names of the files
|
Instead of outputting lines from the files, just output the names of the files
|
||||||
containing lines that would have been output. Each file name is output once, on
|
containing lines that would have been output. Each file name is output once, on
|
||||||
a separate line. Searching normally stops as soon as a matching line is found
|
a separate line, but if the <b>-Z</b> option is set, they are separated by zero
|
||||||
in a file. However, if the <b>-c</b> (count) option is also used, matching
|
bytes instead of newlines. Searching normally stops as soon as a matching line
|
||||||
continues in order to obtain the correct count, and those files that have at
|
is found in a file. However, if the <b>-c</b> (count) option is also used,
|
||||||
least one match are listed along with their counts. Using this option with
|
matching continues in order to obtain the correct count, and those files that
|
||||||
<b>-c</b> is a way of suppressing the listing of files with no matches that
|
have at least one match are listed along with their counts. Using this option
|
||||||
|
with <b>-c</b> is a way of suppressing the listing of files with no matches that
|
||||||
occurs with <b>-c</b> on its own. This option overrides any previous <b>-H</b>,
|
occurs with <b>-c</b> on its own. This option overrides any previous <b>-H</b>,
|
||||||
<b>-h</b>, or <b>-L</b> options.
|
<b>-h</b>, or <b>-L</b> options.
|
||||||
</P>
|
</P>
|
||||||
|
@ -592,10 +601,7 @@ value set by <b>--match-limit</b> is reached, an error occurs.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
The <b>--heap-limit</b> option specifies, as a number of kibibytes (units of
|
The <b>--heap-limit</b> option specifies, as a number of kibibytes (units of
|
||||||
1024 bytes), the amount of heap memory that may be used for matching. Heap
|
1024 bytes), the maximum amount of heap memory that may be used for matching.
|
||||||
memory is needed only if matching the pattern requires a significant number of
|
|
||||||
nested backtracking points to be remembered. This parameter can be set to zero
|
|
||||||
to forbid the use of heap memory altogether.
|
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
The <b>--depth-limit</b> option limits the depth of nested backtracking points,
|
The <b>--depth-limit</b> option limits the depth of nested backtracking points,
|
||||||
|
@ -839,6 +845,13 @@ pattern and ")$" at the end. This option applies only to the patterns that are
|
||||||
matched against the contents of files; it does not apply to patterns specified
|
matched against the contents of files; it does not apply to patterns specified
|
||||||
by any of the <b>--include</b> or <b>--exclude</b> options.
|
by any of the <b>--include</b> or <b>--exclude</b> options.
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>-Z</b>, <b>--null</b>
|
||||||
|
Terminate files names in the regular output with a zero byte (the NUL
|
||||||
|
character) instead of what would normally appear. This is useful when file
|
||||||
|
names contain unusual characters such as colons, hyphens, or even newlines. The
|
||||||
|
option does not apply to file names in error messages.
|
||||||
|
</P>
|
||||||
<br><a name="SEC7" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
|
<br><a name="SEC7" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
|
||||||
<P>
|
<P>
|
||||||
The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
|
The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
|
||||||
|
@ -1053,9 +1066,9 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC16" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC16" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 31 August 2021
|
Last updated: 30 July 2022
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2021 University of Cambridge.
|
Copyright © 1997-2022 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
|
|
@ -71,13 +71,18 @@ is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
|
||||||
The maximum length of a string argument to a callout is the largest number a
|
The maximum length of a string argument to a callout is the largest number a
|
||||||
32-bit unsigned integer can hold.
|
32-bit unsigned integer can hold.
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
The maximum amount of heap memory used for matching is controlled by the heap
|
||||||
|
limit, which can be set in a pattern or in a match context. The default is a
|
||||||
|
very large number, effectively unlimited.
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
AUTHOR
|
AUTHOR
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
<br>
|
<br>
|
||||||
University Computing Service
|
Retired from University Computing Service
|
||||||
<br>
|
<br>
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
<br>
|
<br>
|
||||||
|
@ -86,9 +91,9 @@ Cambridge, England.
|
||||||
REVISION
|
REVISION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 02 February 2019
|
Last updated: 26 July 2022
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2019 University of Cambridge.
|
Copyright © 1997-2022 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
|
|
@ -83,12 +83,31 @@ From release 10.30, the interpretive (non-JIT) version of <b>pcre2_match()</b>
|
||||||
uses very little system stack at run time. In earlier releases recursive
|
uses very little system stack at run time. In earlier releases recursive
|
||||||
function calls could use a great deal of stack, and this could cause problems,
|
function calls could use a great deal of stack, and this could cause problems,
|
||||||
but this usage has been eliminated. Backtracking positions are now explicitly
|
but this usage has been eliminated. Backtracking positions are now explicitly
|
||||||
remembered in memory frames controlled by the code. An initial 20KiB vector of
|
remembered in memory frames controlled by the code.
|
||||||
frames is allocated on the system stack (enough for about 100 frames for small
|
</P>
|
||||||
patterns), but if this is insufficient, heap memory is used. The amount of heap
|
<P>
|
||||||
memory can be limited; if the limit is set to zero, only the initial stack
|
The size of each frame depends on the size of pointer variables and the number
|
||||||
vector is used. Rewriting patterns to be time-efficient, as described below,
|
of capturing parenthesized groups in the pattern being matched. On a 64-bit
|
||||||
may also reduce the memory requirements.
|
system the frame size for a pattern with no captures is 128 bytes. For each
|
||||||
|
capturing group the size increases by 16 bytes.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Until release 10.41, an initial 20KiB frames vector was allocated on the system
|
||||||
|
stack, but this still caused some issues for multi-thread applications where
|
||||||
|
each thread has a very small stack. From release 10.41 backtracking memory
|
||||||
|
frames are always held in heap memory. An initial heap allocation is obtained
|
||||||
|
the first time any match data block is passed to <b>pcre2_match()</b>. This is
|
||||||
|
remembered with the match data block and re-used if that block is used for
|
||||||
|
another match. It is freed when the match data block itself is freed.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The size of the initial block is the larger of 20KiB or ten times the pattern's
|
||||||
|
frame size, unless the heap limit is less than this, in which case the heap
|
||||||
|
limit is used. If the initial block proves to be too small during matching, it
|
||||||
|
is replaced by a larger block, subject to the heap limit. The heap limit is
|
||||||
|
checked only when a new block is to be allocated. Reducing the heap limit
|
||||||
|
between calls to <b>pcre2_match()</b> with the same match data block does not
|
||||||
|
affect the saved block.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive
|
In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive
|
||||||
|
@ -245,16 +264,16 @@ pattern to match. This is done by repeatedly matching with different limits.
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
<br>
|
<br>
|
||||||
University Computing Service
|
Retired from University Computing Service
|
||||||
<br>
|
<br>
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
<br>
|
<br>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 03 February 2019
|
Last updated: 27 July 2022
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2019 University of Cambridge.
|
Copyright © 1997-2022 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
|
|
@ -1241,7 +1241,8 @@ pattern, but can be overridden by modifiers on the subject.
|
||||||
copy=<number or name> copy captured substring
|
copy=<number or name> copy captured substring
|
||||||
depth_limit=<n> set a depth limit
|
depth_limit=<n> set a depth limit
|
||||||
dfa use <b>pcre2_dfa_match()</b>
|
dfa use <b>pcre2_dfa_match()</b>
|
||||||
find_limits find match and depth limits
|
find_limits find heap, match and depth limits
|
||||||
|
find_limits_noheap find match and depth limits
|
||||||
get=<number or name> extract captured substring
|
get=<number or name> extract captured substring
|
||||||
getall extract all captured substrings
|
getall extract all captured substrings
|
||||||
/g global global matching
|
/g global global matching
|
||||||
|
@ -1564,7 +1565,7 @@ Setting heap, match, and depth limits
|
||||||
<P>
|
<P>
|
||||||
The <b>heap_limit</b>, <b>match_limit</b>, and <b>depth_limit</b> modifiers set
|
The <b>heap_limit</b>, <b>match_limit</b>, and <b>depth_limit</b> modifiers set
|
||||||
the appropriate limits in the match context. These values are ignored when the
|
the appropriate limits in the match context. These values are ignored when the
|
||||||
<b>find_limits</b> modifier is specified.
|
<b>find_limits</b> or <b>find_limits_noheap</b> modifier is specified.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Finding minimum limits
|
Finding minimum limits
|
||||||
|
@ -1574,8 +1575,12 @@ If the <b>find_limits</b> modifier is present on a subject line, <b>pcre2test</b
|
||||||
calls the relevant matching function several times, setting different values in
|
calls the relevant matching function several times, setting different values in
|
||||||
the match context via <b>pcre2_set_heap_limit()</b>,
|
the match context via <b>pcre2_set_heap_limit()</b>,
|
||||||
<b>pcre2_set_match_limit()</b>, or <b>pcre2_set_depth_limit()</b> until it finds
|
<b>pcre2_set_match_limit()</b>, or <b>pcre2_set_depth_limit()</b> until it finds
|
||||||
the minimum values for each parameter that allows the match to complete without
|
the smallest value for each parameter that allows the match to complete without
|
||||||
error. If JIT is being used, only the match limit is relevant.
|
a "limit exceeded" error. The match itself may succeed or fail. An alternative
|
||||||
|
modifier, <b>find_limits_noheap</b>, omits the heap limit. This is used in the
|
||||||
|
standard tests, because the minimum heap limit varies between systems. If JIT
|
||||||
|
is being used, only the match limit is relevant, and the other two are
|
||||||
|
automatically omitted.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
When using this modifier, the pattern should not contain any limit settings
|
When using this modifier, the pattern should not contain any limit settings
|
||||||
|
@ -1603,9 +1608,7 @@ overall amount of computing resource that is used.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
For both kinds of matching, the <i>heap_limit</i> number, which is in kibibytes
|
For both kinds of matching, the <i>heap_limit</i> number, which is in kibibytes
|
||||||
(units of 1024 bytes), limits the amount of heap memory used for matching. A
|
(units of 1024 bytes), limits the amount of heap memory used for matching.
|
||||||
value of zero disables the use of any heap memory; many simple pattern matches
|
|
||||||
can be done without using the heap, so zero is not an unreasonable setting.
|
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Showing MARK names
|
Showing MARK names
|
||||||
|
@ -1623,12 +1626,10 @@ Showing memory usage
|
||||||
<P>
|
<P>
|
||||||
The <b>memory</b> modifier causes <b>pcre2test</b> to log the sizes of all heap
|
The <b>memory</b> modifier causes <b>pcre2test</b> to log the sizes of all heap
|
||||||
memory allocation and freeing calls that occur during a call to
|
memory allocation and freeing calls that occur during a call to
|
||||||
<b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>. These occur only when a match
|
<b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>. In the latter case, heap memory
|
||||||
requires a bigger vector than the default for remembering backtracking points
|
is used only when a match requires more internal workspace that the default
|
||||||
(<b>pcre2_match()</b>) or for internal workspace (<b>pcre2_dfa_match()</b>). In
|
allocation on the stack, so in many cases there will be no output. No heap
|
||||||
many cases there will be no heap memory used and therefore no additional
|
memory is allocated during matching with JIT. For this modifier to work, the
|
||||||
output. No heap memory is allocated during matching with JIT, so in that case
|
|
||||||
the <b>memory</b> modifier never has any effect. For this modifier to work, the
|
|
||||||
<b>null_context</b> modifier must not be set on both the pattern and the
|
<b>null_context</b> modifier must not be set on both the pattern and the
|
||||||
subject, though it can be set on one or the other.
|
subject, though it can be set on one or the other.
|
||||||
</P>
|
</P>
|
||||||
|
@ -1690,7 +1691,8 @@ Normally, <b>pcre2test</b> passes a context block to <b>pcre2_match()</b>,
|
||||||
If the <b>null_context</b> modifier is set, however, NULL is passed. This is for
|
If the <b>null_context</b> modifier is set, however, NULL is passed. This is for
|
||||||
testing that the matching and substitution functions behave correctly in this
|
testing that the matching and substitution functions behave correctly in this
|
||||||
case (they use default values). This modifier cannot be used with the
|
case (they use default values). This modifier cannot be used with the
|
||||||
<b>find_limits</b> or <b>substitute_callout</b> modifiers.
|
<b>find_limits</b>, <b>find_limits_noheap</b>, or <b>substitute_callout</b>
|
||||||
|
modifiers.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Similarly, for testing purposes, if the <b>null_subject</b> or
|
Similarly, for testing purposes, if the <b>null_subject</b> or
|
||||||
|
@ -2141,7 +2143,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 12 January 2022
|
Last updated: 27 July 2022
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2022 University of Cambridge.
|
Copyright © 1997-2022 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
114
doc/pcre2.txt
114
doc/pcre2.txt
|
@ -1028,7 +1028,7 @@ PCRE2 CONTEXTS
|
||||||
pcre2jit documentation for more details). If the limit is reached, the
|
pcre2jit documentation for more details). If the limit is reached, the
|
||||||
negative error code PCRE2_ERROR_HEAPLIMIT is returned. The default
|
negative error code PCRE2_ERROR_HEAPLIMIT is returned. The default
|
||||||
limit can be set when PCRE2 is built; if it is not, the default is set
|
limit can be set when PCRE2 is built; if it is not, the default is set
|
||||||
very large and is essentially "unlimited".
|
very large and is essentially unlimited.
|
||||||
|
|
||||||
A value for the heap limit may also be supplied by an item at the start
|
A value for the heap limit may also be supplied by an item at the start
|
||||||
of a pattern of the form
|
of a pattern of the form
|
||||||
|
@ -1039,19 +1039,15 @@ PCRE2 CONTEXTS
|
||||||
less ddd is less than the limit set by the caller of pcre2_match() or,
|
less ddd is less than the limit set by the caller of pcre2_match() or,
|
||||||
if no such limit is set, less than the default.
|
if no such limit is set, less than the default.
|
||||||
|
|
||||||
The pcre2_match() function starts out using a 20KiB vector on the sys-
|
The pcre2_match() function always needs some heap memory, so setting a
|
||||||
tem stack for recording backtracking points. The more nested backtrack-
|
value of zero guarantees a "heap limit exceeded" error. Details of how
|
||||||
ing points there are (that is, the deeper the search tree), the more
|
pcre2_match() uses the heap are given in the pcre2perform documenta-
|
||||||
memory is needed. Heap memory is used only if the initial vector is
|
tion.
|
||||||
too small. If the heap limit is set to a value less than 21 (in partic-
|
|
||||||
ular, zero) no heap memory will be used. In this case, only patterns
|
|
||||||
that do not have a lot of nested backtracking can be successfully pro-
|
|
||||||
cessed.
|
|
||||||
|
|
||||||
Similarly, for pcre2_dfa_match(), a vector on the system stack is used
|
For pcre2_dfa_match(), a vector on the system stack is used when pro-
|
||||||
when processing pattern recursions, lookarounds, or atomic groups, and
|
cessing pattern recursions, lookarounds, or atomic groups, and only if
|
||||||
only if this is not big enough is heap memory used. In this case, too,
|
this is not big enough is heap memory used. In this case, setting a
|
||||||
setting a value of zero disables the use of the heap.
|
value of zero disables the use of the heap.
|
||||||
|
|
||||||
int pcre2_set_match_limit(pcre2_match_context *mcontext,
|
int pcre2_set_match_limit(pcre2_match_context *mcontext,
|
||||||
uint32_t value);
|
uint32_t value);
|
||||||
|
@ -1093,12 +1089,12 @@ PCRE2 CONTEXTS
|
||||||
|
|
||||||
This parameter limits the depth of nested backtracking in
|
This parameter limits the depth of nested backtracking in
|
||||||
pcre2_match(). Each time a nested backtracking point is passed, a new
|
pcre2_match(). Each time a nested backtracking point is passed, a new
|
||||||
memory "frame" is used to remember the state of matching at that point.
|
memory frame is used to remember the state of matching at that point.
|
||||||
Thus, this parameter indirectly limits the amount of memory that is
|
Thus, this parameter indirectly limits the amount of memory that is
|
||||||
used in a match. However, because the size of each memory "frame" de-
|
used in a match. However, because the size of each memory frame depends
|
||||||
pends on the number of capturing parentheses, the actual memory limit
|
on the number of capturing parentheses, the actual memory limit varies
|
||||||
varies from pattern to pattern. This limit was more useful in versions
|
from pattern to pattern. This limit was more useful in versions before
|
||||||
before 10.30, where function recursion was used for backtracking.
|
10.30, where function recursion was used for backtracking.
|
||||||
|
|
||||||
The depth limit is not relevant, and is ignored, when matching is done
|
The depth limit is not relevant, and is ignored, when matching is done
|
||||||
using JIT compiled code. However, it is supported by pcre2_dfa_match(),
|
using JIT compiled code. However, it is supported by pcre2_dfa_match(),
|
||||||
|
@ -3051,12 +3047,12 @@ ERROR RETURNS FROM pcre2_match()
|
||||||
|
|
||||||
PCRE2_ERROR_NOMEMORY
|
PCRE2_ERROR_NOMEMORY
|
||||||
|
|
||||||
If a pattern contains many nested backtracking points, heap memory is
|
Heap memory is used to remember backgracking points. This error is
|
||||||
used to remember them. This error is given when the memory allocation
|
given when the memory allocation function (default or custom) fails.
|
||||||
function (default or custom) fails. Note that a different error,
|
Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given if the
|
||||||
PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds
|
amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
|
||||||
the heap limit. PCRE2_ERROR_NOMEMORY is also returned if
|
also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory alloca-
|
||||||
PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
|
tion fails.
|
||||||
|
|
||||||
PCRE2_ERROR_NULL
|
PCRE2_ERROR_NULL
|
||||||
|
|
||||||
|
@ -3860,8 +3856,8 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 14 December 2021
|
Last updated: 27 July 2022
|
||||||
Copyright (c) 1997-2021 University of Cambridge.
|
Copyright (c) 1997-2022 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@ -4118,14 +4114,13 @@ LIMITING PCRE2 RESOURCE USAGE
|
||||||
pcre2_dfa_match() matching function, and to JIT matching (though the
|
pcre2_dfa_match() matching function, and to JIT matching (though the
|
||||||
counting is done differently).
|
counting is done differently).
|
||||||
|
|
||||||
The pcre2_match() function starts out using a 20KiB vector on the sys-
|
The pcre2_match() function uses heap memory to record backtracking
|
||||||
tem stack to record backtracking points. The more nested backtracking
|
points. The more nested backtracking points there are (that is, the
|
||||||
points there are (that is, the deeper the search tree), the more memory
|
deeper the search tree), the more memory is needed. There is an upper
|
||||||
is needed. If the initial vector is not large enough, heap memory is
|
limit, specified in kibibytes (units of 1024 bytes). This limit can be
|
||||||
used, up to a certain limit, which is specified in kibibytes (units of
|
changed at run time, as described in the pcre2api documentation. The
|
||||||
1024 bytes). The limit can be changed at run time, as described in the
|
default limit (in effect unlimited) is 20 million. You can change this
|
||||||
pcre2api documentation. The default limit (in effect unlimited) is 20
|
by a setting such as
|
||||||
million. You can change this by a setting such as
|
|
||||||
|
|
||||||
--with-heap-limit=500
|
--with-heap-limit=500
|
||||||
|
|
||||||
|
@ -4450,14 +4445,14 @@ SEE ALSO
|
||||||
AUTHOR
|
AUTHOR
|
||||||
|
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
University Computing Service
|
Retired from University Computing Service
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
|
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 08 December 2021
|
Last updated: 27 July 2022
|
||||||
Copyright (c) 1997-2021 University of Cambridge.
|
Copyright (c) 1997-2022 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@ -5596,18 +5591,22 @@ SIZE AND OTHER LIMITATIONS
|
||||||
The maximum length of a string argument to a callout is the largest
|
The maximum length of a string argument to a callout is the largest
|
||||||
number a 32-bit unsigned integer can hold.
|
number a 32-bit unsigned integer can hold.
|
||||||
|
|
||||||
|
The maximum amount of heap memory used for matching is controlled by
|
||||||
|
the heap limit, which can be set in a pattern or in a match context.
|
||||||
|
The default is a very large number, effectively unlimited.
|
||||||
|
|
||||||
|
|
||||||
AUTHOR
|
AUTHOR
|
||||||
|
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
University Computing Service
|
Retired from University Computing Service
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
|
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 02 February 2019
|
Last updated: 26 July 2022
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2022 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@ -9773,12 +9772,29 @@ STACK AND HEAP USAGE AT RUN TIME
|
||||||
sive function calls could use a great deal of stack, and this could
|
sive function calls could use a great deal of stack, and this could
|
||||||
cause problems, but this usage has been eliminated. Backtracking posi-
|
cause problems, but this usage has been eliminated. Backtracking posi-
|
||||||
tions are now explicitly remembered in memory frames controlled by the
|
tions are now explicitly remembered in memory frames controlled by the
|
||||||
code. An initial 20KiB vector of frames is allocated on the system
|
code.
|
||||||
stack (enough for about 100 frames for small patterns), but if this is
|
|
||||||
insufficient, heap memory is used. The amount of heap memory can be
|
The size of each frame depends on the size of pointer variables and the
|
||||||
limited; if the limit is set to zero, only the initial stack vector is
|
number of capturing parenthesized groups in the pattern being matched.
|
||||||
used. Rewriting patterns to be time-efficient, as described below, may
|
On a 64-bit system the frame size for a pattern with no captures is 128
|
||||||
also reduce the memory requirements.
|
bytes. For each capturing group the size increases by 16 bytes.
|
||||||
|
|
||||||
|
Until release 10.41, an initial 20KiB frames vector was allocated on
|
||||||
|
the system stack, but this still caused some issues for multi-thread
|
||||||
|
applications where each thread has a very small stack. From release
|
||||||
|
10.41 backtracking memory frames are always held in heap memory. An
|
||||||
|
initial heap allocation is obtained the first time any match data block
|
||||||
|
is passed to pcre2_match(). This is remembered with the match data
|
||||||
|
block and re-used if that block is used for another match. It is freed
|
||||||
|
when the match data block itself is freed.
|
||||||
|
|
||||||
|
The size of the initial block is the larger of 20KiB or ten times the
|
||||||
|
pattern's frame size, unless the heap limit is less than this, in which
|
||||||
|
case the heap limit is used. If the initial block proves to be too
|
||||||
|
small during matching, it is replaced by a larger block, subject to the
|
||||||
|
heap limit. The heap limit is checked only when a new block is to be
|
||||||
|
allocated. Reducing the heap limit between calls to pcre2_match() with
|
||||||
|
the same match data block does not affect the saved block.
|
||||||
|
|
||||||
In contrast to pcre2_match(), pcre2_dfa_match() does use recursive
|
In contrast to pcre2_match(), pcre2_dfa_match() does use recursive
|
||||||
function calls, but only for processing atomic groups, lookaround as-
|
function calls, but only for processing atomic groups, lookaround as-
|
||||||
|
@ -9926,14 +9942,14 @@ PROCESSING TIME
|
||||||
AUTHOR
|
AUTHOR
|
||||||
|
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
University Computing Service
|
Retired from University Computing Service
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
|
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 03 February 2019
|
Last updated: 27 July 2022
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2022 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2GREP 1 "27 July 2022" "PCRE2 10.41"
|
.TH PCRE2GREP 1 "30 July 2022" "PCRE2 10.41"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
pcre2grep - a grep with Perl-compatible regular expressions.
|
pcre2grep - a grep with Perl-compatible regular expressions.
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -43,13 +43,15 @@ For example:
|
||||||
.sp
|
.sp
|
||||||
pcre2grep some-pattern file1 - file3
|
pcre2grep some-pattern file1 - file3
|
||||||
.sp
|
.sp
|
||||||
Input files are searched line by line. By default, each line that matches a
|
By default, input files are searched line by line. Each line that matches a
|
||||||
pattern is copied to the standard output, and if there is more than one file,
|
pattern is copied to the standard output, and if there is more than one file,
|
||||||
the file name is output at the start of each line, followed by a colon.
|
the file name is output at the start of each line, followed by a colon.
|
||||||
However, there are options that can change how \fBpcre2grep\fP behaves. In
|
However, there are options that can change how \fBpcre2grep\fP behaves. For
|
||||||
particular, the \fB-M\fP option makes it possible to search for strings that
|
example, the \fB-M\fP option makes it possible to search for strings that span
|
||||||
span line boundaries. What defines a line boundary is controlled by the
|
line boundaries. What defines a line boundary is controlled by the \fB-N\fP
|
||||||
\fB-N\fP (\fB--newline\fP) option.
|
(\fB--newline\fP) option. The \fB-h\fP and \fB-H\fP options control whether or
|
||||||
|
not file names are shown, and the \fB-Z\fP option changes the file name
|
||||||
|
terminator to a zero byte.
|
||||||
.P
|
.P
|
||||||
The amount of memory used for buffering files that are being scanned is
|
The amount of memory used for buffering files that are being scanned is
|
||||||
controlled by parameters that can be set by the \fB--buffer-size\fP and
|
controlled by parameters that can be set by the \fB--buffer-size\fP and
|
||||||
|
@ -149,9 +151,11 @@ Output up to \fInumber\fP lines of context after each matching line. Fewer
|
||||||
lines are output if the next match or the end of the file is reached, or if the
|
lines are output if the next match or the end of the file is reached, or if the
|
||||||
processing buffer size has been set too small. If file names and/or line
|
processing buffer size has been set too small. If file names and/or line
|
||||||
numbers are being output, a hyphen separator is used instead of a colon for the
|
numbers are being output, a hyphen separator is used instead of a colon for the
|
||||||
context lines. A line containing "--" is output between each group of lines,
|
context lines (the \fB-Z\fP option can be used to change the file name
|
||||||
unless they are in fact contiguous in the input file. The value of \fInumber\fP
|
terminator to a zero byte). A line containing "--" is output between each group
|
||||||
is expected to be relatively small. When \fB-c\fP is used, \fB-A\fP is ignored.
|
of lines, unless they are in fact contiguous in the input file. The value of
|
||||||
|
\fInumber\fP is expected to be relatively small. When \fB-c\fP is used,
|
||||||
|
\fB-A\fP is ignored.
|
||||||
.TP
|
.TP
|
||||||
\fB-a\fP, \fB--text\fP
|
\fB-a\fP, \fB--text\fP
|
||||||
Treat binary files as text. This is equivalent to
|
Treat binary files as text. This is equivalent to
|
||||||
|
@ -167,9 +171,10 @@ Output up to \fInumber\fP lines of context before each matching line. Fewer
|
||||||
lines are output if the previous match or the start of the file is within
|
lines are output if the previous match or the start of the file is within
|
||||||
\fInumber\fP lines, or if the processing buffer size has been set too small. If
|
\fInumber\fP lines, or if the processing buffer size has been set too small. If
|
||||||
file names and/or line numbers are being output, a hyphen separator is used
|
file names and/or line numbers are being output, a hyphen separator is used
|
||||||
instead of a colon for the context lines. A line containing "--" is output
|
instead of a colon for the context lines (the \fB-Z\fP option can be used to
|
||||||
between each group of lines, unless they are in fact contiguous in the input
|
change the file name terminator to a zero byte). A line containing "--" is
|
||||||
file. The value of \fInumber\fP is expected to be relatively small. When
|
output between each group of lines, unless they are in fact contiguous in the
|
||||||
|
input file. The value of \fInumber\fP is expected to be relatively small. When
|
||||||
\fB-c\fP is used, \fB-B\fP is ignored.
|
\fB-c\fP is used, \fB-B\fP is ignored.
|
||||||
.TP
|
.TP
|
||||||
\fB--binary-files=\fP\fIword\fP
|
\fB--binary-files=\fP\fIword\fP
|
||||||
|
@ -356,19 +361,21 @@ shown separately. This option is mutually exclusive with \fB--output\fP,
|
||||||
.TP
|
.TP
|
||||||
\fB-H\fP, \fB--with-filename\fP
|
\fB-H\fP, \fB--with-filename\fP
|
||||||
Force the inclusion of the file name at the start of output lines when
|
Force the inclusion of the file name at the start of output lines when
|
||||||
searching a single file. By default, the file name is not shown in this case.
|
searching a single file. The file name is not normally shown in this case.
|
||||||
For matching lines, the file name is followed by a colon; for context lines, a
|
By default, for matching lines, the file name is followed by a colon; for
|
||||||
hyphen separator is used. If a line number is also being output, it follows the
|
context lines, a hyphen separator is used. The \fB-Z\fP option can be used to
|
||||||
file name. When the \fB-M\fP option causes a pattern to match more than one
|
change the terminator to a zero byte. If a line number is also being output,
|
||||||
line, only the first is preceded by the file name. This option overrides any
|
it follows the file name. When the \fB-M\fP option causes a pattern to match
|
||||||
previous \fB-h\fP, \fB-l\fP, or \fB-L\fP options.
|
more than one line, only the first is preceded by the file name. This option
|
||||||
|
overrides any previous \fB-h\fP, \fB-l\fP, or \fB-L\fP options.
|
||||||
.TP
|
.TP
|
||||||
\fB-h\fP, \fB--no-filename\fP
|
\fB-h\fP, \fB--no-filename\fP
|
||||||
Suppress the output file names when searching multiple files. By default,
|
Suppress the output file names when searching multiple files. File names are
|
||||||
file names are shown when multiple files are searched. For matching lines, the
|
normally shown when multiple files are searched. By default, for matching
|
||||||
file name is followed by a colon; for context lines, a hyphen separator is used.
|
lines, the file name is followed by a colon; for context lines, a hyphen
|
||||||
If a line number is also being output, it follows the file name. This option
|
separator is used. The \fB-Z\fP option can be used to change the terminator to
|
||||||
overrides any previous \fB-H\fP, \fB-L\fP, or \fB-l\fP options.
|
a zero byte. If a line number is also being output, it follows the file name.
|
||||||
|
This option overrides any previous \fB-H\fP, \fB-L\fP, or \fB-l\fP options.
|
||||||
.TP
|
.TP
|
||||||
\fB--heap-limit\fP=\fInumber\fP
|
\fB--heap-limit\fP=\fInumber\fP
|
||||||
See \fB--match-limit\fP below.
|
See \fB--match-limit\fP below.
|
||||||
|
@ -417,17 +424,19 @@ given any number of times. If a directory matches both \fB--include-dir\fP and
|
||||||
\fB-L\fP, \fB--files-without-match\fP
|
\fB-L\fP, \fB--files-without-match\fP
|
||||||
Instead of outputting lines from the files, just output the names of the files
|
Instead of outputting lines from the files, just output the names of the files
|
||||||
that do not contain any lines that would have been output. Each file name is
|
that do not contain any lines that would have been output. Each file name is
|
||||||
output once, on a separate line. This option overrides any previous \fB-H\fP,
|
output once, on a separate line by default, but if the \fB-Z\fP option is set,
|
||||||
\fB-h\fP, or \fB-l\fP options.
|
they are separated by zero bytes instead of newlines. This option overrides any
|
||||||
|
previous \fB-H\fP, \fB-h\fP, or \fB-l\fP options.
|
||||||
.TP
|
.TP
|
||||||
\fB-l\fP, \fB--files-with-matches\fP
|
\fB-l\fP, \fB--files-with-matches\fP
|
||||||
Instead of outputting lines from the files, just output the names of the files
|
Instead of outputting lines from the files, just output the names of the files
|
||||||
containing lines that would have been output. Each file name is output once, on
|
containing lines that would have been output. Each file name is output once, on
|
||||||
a separate line. Searching normally stops as soon as a matching line is found
|
a separate line, but if the \fB-Z\fP option is set, they are separated by zero
|
||||||
in a file. However, if the \fB-c\fP (count) option is also used, matching
|
bytes instead of newlines. Searching normally stops as soon as a matching line
|
||||||
continues in order to obtain the correct count, and those files that have at
|
is found in a file. However, if the \fB-c\fP (count) option is also used,
|
||||||
least one match are listed along with their counts. Using this option with
|
matching continues in order to obtain the correct count, and those files that
|
||||||
\fB-c\fP is a way of suppressing the listing of files with no matches that
|
have at least one match are listed along with their counts. Using this option
|
||||||
|
with \fB-c\fP is a way of suppressing the listing of files with no matches that
|
||||||
occurs with \fB-c\fP on its own. This option overrides any previous \fB-H\fP,
|
occurs with \fB-c\fP on its own. This option overrides any previous \fB-H\fP,
|
||||||
\fB-h\fP, or \fB-L\fP options.
|
\fB-h\fP, or \fB-L\fP options.
|
||||||
.TP
|
.TP
|
||||||
|
@ -729,6 +738,12 @@ be more than one line. This is equivalent to having "^(?:" at the start of each
|
||||||
pattern and ")$" at the end. This option applies only to the patterns that are
|
pattern and ")$" at the end. This option applies only to the patterns that are
|
||||||
matched against the contents of files; it does not apply to patterns specified
|
matched against the contents of files; it does not apply to patterns specified
|
||||||
by any of the \fB--include\fP or \fB--exclude\fP options.
|
by any of the \fB--include\fP or \fB--exclude\fP options.
|
||||||
|
.TP
|
||||||
|
\fB-Z\fP, \fB--null\fP
|
||||||
|
Terminate files names in the regular output with a zero byte (the NUL
|
||||||
|
character) instead of what would normally appear. This is useful when file
|
||||||
|
names contain unusual characters such as colons, hyphens, or even newlines. The
|
||||||
|
option does not apply to file names in error messages.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "ENVIRONMENT VARIABLES"
|
.SH "ENVIRONMENT VARIABLES"
|
||||||
|
@ -957,6 +972,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 27 July 2022
|
Last updated: 30 July 2022
|
||||||
Copyright (c) 1997-2022 University of Cambridge.
|
Copyright (c) 1997-2022 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -42,13 +42,15 @@ DESCRIPTION
|
||||||
|
|
||||||
pcre2grep some-pattern file1 - file3
|
pcre2grep some-pattern file1 - file3
|
||||||
|
|
||||||
Input files are searched line by line. By default, each line that
|
By default, input files are searched line by line. Each line that
|
||||||
matches a pattern is copied to the standard output, and if there is
|
matches a pattern is copied to the standard output, and if there is
|
||||||
more than one file, the file name is output at the start of each line,
|
more than one file, the file name is output at the start of each line,
|
||||||
followed by a colon. However, there are options that can change how
|
followed by a colon. However, there are options that can change how
|
||||||
pcre2grep behaves. In particular, the -M option makes it possible to
|
pcre2grep behaves. For example, the -M option makes it possible to
|
||||||
search for strings that span line boundaries. What defines a line
|
search for strings that span line boundaries. What defines a line
|
||||||
boundary is controlled by the -N (--newline) option.
|
boundary is controlled by the -N (--newline) option. The -h and -H op-
|
||||||
|
tions control whether or not file names are shown, and the -Z option
|
||||||
|
changes the file name terminator to a zero byte.
|
||||||
|
|
||||||
The amount of memory used for buffering files that are being scanned is
|
The amount of memory used for buffering files that are being scanned is
|
||||||
controlled by parameters that can be set by the --buffer-size and
|
controlled by parameters that can be set by the --buffer-size and
|
||||||
|
@ -149,10 +151,12 @@ OPTIONS
|
||||||
the file is reached, or if the processing buffer size has
|
the file is reached, or if the processing buffer size has
|
||||||
been set too small. If file names and/or line numbers are be-
|
been set too small. If file names and/or line numbers are be-
|
||||||
ing output, a hyphen separator is used instead of a colon for
|
ing output, a hyphen separator is used instead of a colon for
|
||||||
the context lines. A line containing "--" is output between
|
the context lines (the -Z option can be used to change the
|
||||||
each group of lines, unless they are in fact contiguous in
|
file name terminator to a zero byte). A line containing "--"
|
||||||
the input file. The value of number is expected to be rela-
|
is output between each group of lines, unless they are in
|
||||||
tively small. When -c is used, -A is ignored.
|
fact contiguous in the input file. The value of number is ex-
|
||||||
|
pected to be relatively small. When -c is used, -A is ig-
|
||||||
|
nored.
|
||||||
|
|
||||||
-a, --text
|
-a, --text
|
||||||
Treat binary files as text. This is equivalent to --binary-
|
Treat binary files as text. This is equivalent to --binary-
|
||||||
|
@ -170,11 +174,12 @@ OPTIONS
|
||||||
start of the file is within number lines, or if the process-
|
start of the file is within number lines, or if the process-
|
||||||
ing buffer size has been set too small. If file names and/or
|
ing buffer size has been set too small. If file names and/or
|
||||||
line numbers are being output, a hyphen separator is used in-
|
line numbers are being output, a hyphen separator is used in-
|
||||||
stead of a colon for the context lines. A line containing
|
stead of a colon for the context lines (the -Z option can be
|
||||||
"--" is output between each group of lines, unless they are
|
used to change the file name terminator to a zero byte). A
|
||||||
in fact contiguous in the input file. The value of number is
|
line containing "--" is output between each group of lines,
|
||||||
expected to be relatively small. When -c is used, -B is ig-
|
unless they are in fact contiguous in the input file. The
|
||||||
nored.
|
value of number is expected to be relatively small. When -c
|
||||||
|
is used, -B is ignored.
|
||||||
|
|
||||||
--binary-files=word
|
--binary-files=word
|
||||||
Specify how binary files are to be processed. If the word is
|
Specify how binary files are to be processed. If the word is
|
||||||
|
@ -387,22 +392,25 @@ OPTIONS
|
||||||
|
|
||||||
-H, --with-filename
|
-H, --with-filename
|
||||||
Force the inclusion of the file name at the start of output
|
Force the inclusion of the file name at the start of output
|
||||||
lines when searching a single file. By default, the file name
|
lines when searching a single file. The file name is not nor-
|
||||||
is not shown in this case. For matching lines, the file name
|
mally shown in this case. By default, for matching lines,
|
||||||
is followed by a colon; for context lines, a hyphen separator
|
the file name is followed by a colon; for context lines, a
|
||||||
is used. If a line number is also being output, it follows
|
hyphen separator is used. The -Z option can be used to change
|
||||||
the file name. When the -M option causes a pattern to match
|
the terminator to a zero byte. If a line number is also being
|
||||||
more than one line, only the first is preceded by the file
|
output, it follows the file name. When the -M option causes a
|
||||||
name. This option overrides any previous -h, -l, or -L op-
|
pattern to match more than one line, only the first is pre-
|
||||||
tions.
|
ceded by the file name. This option overrides any previous
|
||||||
|
-h, -l, or -L options.
|
||||||
|
|
||||||
-h, --no-filename
|
-h, --no-filename
|
||||||
Suppress the output file names when searching multiple files.
|
Suppress the output file names when searching multiple files.
|
||||||
By default, file names are shown when multiple files are
|
File names are normally shown when multiple files are
|
||||||
searched. For matching lines, the file name is followed by a
|
searched. By default, for matching lines, the file name is
|
||||||
colon; for context lines, a hyphen separator is used. If a
|
followed by a colon; for context lines, a hyphen separator is
|
||||||
line number is also being output, it follows the file name.
|
used. The -Z option can be used to change the terminator to a
|
||||||
This option overrides any previous -H, -L, or -l options.
|
zero byte. If a line number is also being output, it follows
|
||||||
|
the file name. This option overrides any previous -H, -L, or
|
||||||
|
-l options.
|
||||||
|
|
||||||
--heap-limit=number
|
--heap-limit=number
|
||||||
See --match-limit below.
|
See --match-limit below.
|
||||||
|
@ -455,21 +463,23 @@ OPTIONS
|
||||||
Instead of outputting lines from the files, just output the
|
Instead of outputting lines from the files, just output the
|
||||||
names of the files that do not contain any lines that would
|
names of the files that do not contain any lines that would
|
||||||
have been output. Each file name is output once, on a sepa-
|
have been output. Each file name is output once, on a sepa-
|
||||||
rate line. This option overrides any previous -H, -h, or -l
|
rate line by default, but if the -Z option is set, they are
|
||||||
options.
|
separated by zero bytes instead of newlines. This option
|
||||||
|
overrides any previous -H, -h, or -l options.
|
||||||
|
|
||||||
-l, --files-with-matches
|
-l, --files-with-matches
|
||||||
Instead of outputting lines from the files, just output the
|
Instead of outputting lines from the files, just output the
|
||||||
names of the files containing lines that would have been out-
|
names of the files containing lines that would have been out-
|
||||||
put. Each file name is output once, on a separate line.
|
put. Each file name is output once, on a separate line, but
|
||||||
Searching normally stops as soon as a matching line is found
|
if the -Z option is set, they are separated by zero bytes in-
|
||||||
in a file. However, if the -c (count) option is also used,
|
stead of newlines. Searching normally stops as soon as a
|
||||||
matching continues in order to obtain the correct count, and
|
matching line is found in a file. However, if the -c (count)
|
||||||
those files that have at least one match are listed along
|
option is also used, matching continues in order to obtain
|
||||||
with their counts. Using this option with -c is a way of sup-
|
the correct count, and those files that have at least one
|
||||||
pressing the listing of files with no matches that occurs
|
match are listed along with their counts. Using this option
|
||||||
with -c on its own. This option overrides any previous -H,
|
with -c is a way of suppressing the listing of files with no
|
||||||
-h, or -L options.
|
matches that occurs with -c on its own. This option overrides
|
||||||
|
any previous -H, -h, or -L options.
|
||||||
|
|
||||||
--label=name
|
--label=name
|
||||||
This option supplies a name to be used for the standard input
|
This option supplies a name to be used for the standard input
|
||||||
|
@ -571,11 +581,8 @@ OPTIONS
|
||||||
an error occurs.
|
an error occurs.
|
||||||
|
|
||||||
The --heap-limit option specifies, as a number of kibibytes
|
The --heap-limit option specifies, as a number of kibibytes
|
||||||
(units of 1024 bytes), the amount of heap memory that may be
|
(units of 1024 bytes), the maximum amount of heap memory that
|
||||||
used for matching. Heap memory is needed only if matching the
|
may be used for matching.
|
||||||
pattern requires a significant number of nested backtracking
|
|
||||||
points to be remembered. This parameter can be set to zero to
|
|
||||||
forbid the use of heap memory altogether.
|
|
||||||
|
|
||||||
The --depth-limit option limits the depth of nested back-
|
The --depth-limit option limits the depth of nested back-
|
||||||
tracking points, which indirectly limits the amount of memory
|
tracking points, which indirectly limits the amount of memory
|
||||||
|
@ -812,6 +819,13 @@ OPTIONS
|
||||||
does not apply to patterns specified by any of the --include
|
does not apply to patterns specified by any of the --include
|
||||||
or --exclude options.
|
or --exclude options.
|
||||||
|
|
||||||
|
-Z, --null
|
||||||
|
Terminate files names in the regular output with a zero byte
|
||||||
|
(the NUL character) instead of what would normally appear.
|
||||||
|
This is useful when file names contain unusual characters
|
||||||
|
such as colons, hyphens, or even newlines. The option does
|
||||||
|
not apply to file names in error messages.
|
||||||
|
|
||||||
|
|
||||||
ENVIRONMENT VARIABLES
|
ENVIRONMENT VARIABLES
|
||||||
|
|
||||||
|
@ -1022,5 +1036,5 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 31 August 2021
|
Last updated: 30 July 2022
|
||||||
Copyright (c) 1997-2021 University of Cambridge.
|
Copyright (c) 1997-2022 University of Cambridge.
|
||||||
|
|
|
@ -1111,7 +1111,8 @@ SUBJECT MODIFIERS
|
||||||
copy=<number or name> copy captured substring
|
copy=<number or name> copy captured substring
|
||||||
depth_limit=<n> set a depth limit
|
depth_limit=<n> set a depth limit
|
||||||
dfa use pcre2_dfa_match()
|
dfa use pcre2_dfa_match()
|
||||||
find_limits find match and depth limits
|
find_limits find heap, match and depth limits
|
||||||
|
find_limits_noheap find match and depth limits
|
||||||
get=<number or name> extract captured substring
|
get=<number or name> extract captured substring
|
||||||
getall extract all captured substrings
|
getall extract all captured substrings
|
||||||
/g global global matching
|
/g global global matching
|
||||||
|
@ -1411,7 +1412,7 @@ SUBJECT MODIFIERS
|
||||||
|
|
||||||
The heap_limit, match_limit, and depth_limit modifiers set the appro-
|
The heap_limit, match_limit, and depth_limit modifiers set the appro-
|
||||||
priate limits in the match context. These values are ignored when the
|
priate limits in the match context. These values are ignored when the
|
||||||
find_limits modifier is specified.
|
find_limits or find_limits_noheap modifier is specified.
|
||||||
|
|
||||||
Finding minimum limits
|
Finding minimum limits
|
||||||
|
|
||||||
|
@ -1419,8 +1420,12 @@ SUBJECT MODIFIERS
|
||||||
calls the relevant matching function several times, setting different
|
calls the relevant matching function several times, setting different
|
||||||
values in the match context via pcre2_set_heap_limit(),
|
values in the match context via pcre2_set_heap_limit(),
|
||||||
pcre2_set_match_limit(), or pcre2_set_depth_limit() until it finds the
|
pcre2_set_match_limit(), or pcre2_set_depth_limit() until it finds the
|
||||||
minimum values for each parameter that allows the match to complete
|
smallest value for each parameter that allows the match to complete
|
||||||
without error. If JIT is being used, only the match limit is relevant.
|
without a "limit exceeded" error. The match itself may succeed or fail.
|
||||||
|
An alternative modifier, find_limits_noheap, omits the heap limit. This
|
||||||
|
is used in the standard tests, because the minimum heap limit varies
|
||||||
|
between systems. If JIT is being used, only the match limit is rele-
|
||||||
|
vant, and the other two are automatically omitted.
|
||||||
|
|
||||||
When using this modifier, the pattern should not contain any limit set-
|
When using this modifier, the pattern should not contain any limit set-
|
||||||
tings such as (*LIMIT_MATCH=...) within it. If such a setting is
|
tings such as (*LIMIT_MATCH=...) within it. If such a setting is
|
||||||
|
@ -1446,9 +1451,7 @@ SUBJECT MODIFIERS
|
||||||
|
|
||||||
For both kinds of matching, the heap_limit number, which is in
|
For both kinds of matching, the heap_limit number, which is in
|
||||||
kibibytes (units of 1024 bytes), limits the amount of heap memory used
|
kibibytes (units of 1024 bytes), limits the amount of heap memory used
|
||||||
for matching. A value of zero disables the use of any heap memory; many
|
for matching.
|
||||||
simple pattern matches can be done without using the heap, so zero is
|
|
||||||
not an unreasonable setting.
|
|
||||||
|
|
||||||
Showing MARK names
|
Showing MARK names
|
||||||
|
|
||||||
|
@ -1463,13 +1466,11 @@ SUBJECT MODIFIERS
|
||||||
|
|
||||||
The memory modifier causes pcre2test to log the sizes of all heap mem-
|
The memory modifier causes pcre2test to log the sizes of all heap mem-
|
||||||
ory allocation and freeing calls that occur during a call to
|
ory allocation and freeing calls that occur during a call to
|
||||||
pcre2_match() or pcre2_dfa_match(). These occur only when a match re-
|
pcre2_match() or pcre2_dfa_match(). In the latter case, heap memory is
|
||||||
quires a bigger vector than the default for remembering backtracking
|
used only when a match requires more internal workspace that the de-
|
||||||
points (pcre2_match()) or for internal workspace (pcre2_dfa_match()).
|
fault allocation on the stack, so in many cases there will be no out-
|
||||||
In many cases there will be no heap memory used and therefore no addi-
|
put. No heap memory is allocated during matching with JIT. For this
|
||||||
tional output. No heap memory is allocated during matching with JIT, so
|
modifier to work, the null_context modifier must not be set on both the
|
||||||
in that case the memory modifier never has any effect. For this modi-
|
|
||||||
fier to work, the null_context modifier must not be set on both the
|
|
||||||
pattern and the subject, though it can be set on one or the other.
|
pattern and the subject, though it can be set on one or the other.
|
||||||
|
|
||||||
Setting a starting offset
|
Setting a starting offset
|
||||||
|
@ -1518,7 +1519,8 @@ SUBJECT MODIFIERS
|
||||||
null_context modifier is set, however, NULL is passed. This is for
|
null_context modifier is set, however, NULL is passed. This is for
|
||||||
testing that the matching and substitution functions behave correctly
|
testing that the matching and substitution functions behave correctly
|
||||||
in this case (they use default values). This modifier cannot be used
|
in this case (they use default values). This modifier cannot be used
|
||||||
with the find_limits or substitute_callout modifiers.
|
with the find_limits, find_limits_noheap, or substitute_callout modi-
|
||||||
|
fiers.
|
||||||
|
|
||||||
Similarly, for testing purposes, if the null_subject or null_replace-
|
Similarly, for testing purposes, if the null_subject or null_replace-
|
||||||
ment modifier is set, the subject or replacement string pointers are
|
ment modifier is set, the subject or replacement string pointers are
|
||||||
|
@ -1949,5 +1951,5 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 12 January 2022
|
Last updated: 27 July 2022
|
||||||
Copyright (c) 1997-2022 University of Cambridge.
|
Copyright (c) 1997-2022 University of Cambridge.
|
||||||
|
|
|
@ -205,9 +205,6 @@ point. */
|
||||||
* Global variables *
|
* Global variables *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* Jeffrey Friedl has some debugging requirements that are not part of the
|
|
||||||
regular code. */
|
|
||||||
|
|
||||||
static const char *colour_string = "1;31";
|
static const char *colour_string = "1;31";
|
||||||
static const char *colour_option = NULL;
|
static const char *colour_option = NULL;
|
||||||
static const char *dee_option = NULL;
|
static const char *dee_option = NULL;
|
||||||
|
@ -220,6 +217,10 @@ static const char *output_text = NULL;
|
||||||
|
|
||||||
static char *main_buffer = NULL;
|
static char *main_buffer = NULL;
|
||||||
|
|
||||||
|
static const char *printname_nl = STDOUT_NL; /* Changed to NULL for -Z */
|
||||||
|
static int printname_colon = ':'; /* Changed to 0 for -Z */
|
||||||
|
static int printname_hyphen = '-'; /* Changed to 0 for -Z */
|
||||||
|
|
||||||
static int after_context = 0;
|
static int after_context = 0;
|
||||||
static int before_context = 0;
|
static int before_context = 0;
|
||||||
static int binary_files = BIN_BINARY;
|
static int binary_files = BIN_BINARY;
|
||||||
|
@ -483,6 +484,7 @@ static option_item optionlist[] = {
|
||||||
{ OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
|
{ OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
|
||||||
{ OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
|
{ OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
|
||||||
{ OP_NODATA, N_ALLABSK, NULL, "allow-lookaround-bsk", "allow \\K in lookarounds" },
|
{ OP_NODATA, N_ALLABSK, NULL, "allow-lookaround-bsk", "allow \\K in lookarounds" },
|
||||||
|
{ OP_NODATA, 'Z', NULL, "null", "output 0 byte after file names" },
|
||||||
{ OP_NODATA, 0, NULL, NULL, NULL }
|
{ OP_NODATA, 0, NULL, NULL, NULL }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1773,7 +1775,7 @@ if (after_context > 0 && lastmatchnumber > 0)
|
||||||
{
|
{
|
||||||
char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
|
char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
|
||||||
if (ellength == 0 && pp == main_buffer + bufsize) break;
|
if (ellength == 0 && pp == main_buffer + bufsize) break;
|
||||||
if (printname != NULL) fprintf(stdout, "%s-", printname);
|
if (printname != NULL) fprintf(stdout, "%s%c", printname, printname_hyphen);
|
||||||
if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
|
if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
|
||||||
FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
|
FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
|
||||||
lastmatchrestart = pp;
|
lastmatchrestart = pp;
|
||||||
|
@ -2730,7 +2732,9 @@ while (ptr < endptr)
|
||||||
|
|
||||||
else if (filenames == FN_MATCH_ONLY)
|
else if (filenames == FN_MATCH_ONLY)
|
||||||
{
|
{
|
||||||
fprintf(stdout, "%s" STDOUT_NL, printname);
|
fprintf(stdout, "%s", printname);
|
||||||
|
if (printname_nl == NULL) fprintf(stdout, "%c", 0);
|
||||||
|
else fprintf(stdout, "%s", printname_nl);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2749,7 +2753,8 @@ while (ptr < endptr)
|
||||||
{
|
{
|
||||||
PCRE2_SIZE oldstartoffset;
|
PCRE2_SIZE oldstartoffset;
|
||||||
|
|
||||||
if (printname != NULL) fprintf(stdout, "%s:", printname);
|
if (printname != NULL) fprintf(stdout, "%s%c", printname,
|
||||||
|
printname_colon);
|
||||||
if (number) fprintf(stdout, "%lu:", linenumber);
|
if (number) fprintf(stdout, "%lu:", linenumber);
|
||||||
|
|
||||||
/* Handle --line-offsets */
|
/* Handle --line-offsets */
|
||||||
|
@ -2871,7 +2876,8 @@ while (ptr < endptr)
|
||||||
while (lastmatchrestart < p)
|
while (lastmatchrestart < p)
|
||||||
{
|
{
|
||||||
char *pp = lastmatchrestart;
|
char *pp = lastmatchrestart;
|
||||||
if (printname != NULL) fprintf(stdout, "%s-", printname);
|
if (printname != NULL) fprintf(stdout, "%s%c", printname,
|
||||||
|
printname_hyphen);
|
||||||
if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
|
if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
|
||||||
pp = end_of_line(pp, endptr, &ellength);
|
pp = end_of_line(pp, endptr, &ellength);
|
||||||
FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
|
FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
|
||||||
|
@ -2912,7 +2918,8 @@ while (ptr < endptr)
|
||||||
{
|
{
|
||||||
int ellength;
|
int ellength;
|
||||||
char *pp = p;
|
char *pp = p;
|
||||||
if (printname != NULL) fprintf(stdout, "%s-", printname);
|
if (printname != NULL) fprintf(stdout, "%s%c", printname,
|
||||||
|
printname_hyphen);
|
||||||
if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
|
if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
|
||||||
pp = end_of_line(pp, endptr, &ellength);
|
pp = end_of_line(pp, endptr, &ellength);
|
||||||
FWRITE_IGNORE(p, 1, pp - p, stdout);
|
FWRITE_IGNORE(p, 1, pp - p, stdout);
|
||||||
|
@ -2926,7 +2933,8 @@ while (ptr < endptr)
|
||||||
if (after_context > 0 || before_context > 0)
|
if (after_context > 0 || before_context > 0)
|
||||||
endhyphenpending = TRUE;
|
endhyphenpending = TRUE;
|
||||||
|
|
||||||
if (printname != NULL) fprintf(stdout, "%s:", printname);
|
if (printname != NULL) fprintf(stdout, "%s%c", printname,
|
||||||
|
printname_colon);
|
||||||
if (number) fprintf(stdout, "%lu:", linenumber);
|
if (number) fprintf(stdout, "%lu:", linenumber);
|
||||||
|
|
||||||
/* In multiline mode, or if colouring, we have to split the line(s) up
|
/* In multiline mode, or if colouring, we have to split the line(s) up
|
||||||
|
@ -3131,7 +3139,9 @@ were none. If we found a match, we won't have got this far. */
|
||||||
|
|
||||||
if (filenames == FN_NOMATCH_ONLY)
|
if (filenames == FN_NOMATCH_ONLY)
|
||||||
{
|
{
|
||||||
fprintf(stdout, "%s" STDOUT_NL, printname);
|
fprintf(stdout, "%s", printname);
|
||||||
|
if (printname_nl == NULL) fprintf(stdout, "%c", 0);
|
||||||
|
else fprintf(stdout, "%s", printname_nl);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3142,7 +3152,7 @@ if (count_only && !quiet)
|
||||||
if (count > 0 || !omit_zero_count)
|
if (count > 0 || !omit_zero_count)
|
||||||
{
|
{
|
||||||
if (printname != NULL && filenames != FN_NONE)
|
if (printname != NULL && filenames != FN_NONE)
|
||||||
fprintf(stdout, "%s:", printname);
|
fprintf(stdout, "%s%c", printname, printname_colon);
|
||||||
fprintf(stdout, "%lu" STDOUT_NL, count);
|
fprintf(stdout, "%lu" STDOUT_NL, count);
|
||||||
counts_printed++;
|
counts_printed++;
|
||||||
}
|
}
|
||||||
|
@ -3528,8 +3538,6 @@ switch(letter)
|
||||||
case 'u': options |= PCRE2_UTF; utf = TRUE; break;
|
case 'u': options |= PCRE2_UTF; utf = TRUE; break;
|
||||||
case 'U': options |= PCRE2_UTF|PCRE2_MATCH_INVALID_UTF; utf = TRUE; break;
|
case 'U': options |= PCRE2_UTF|PCRE2_MATCH_INVALID_UTF; utf = TRUE; break;
|
||||||
case 'v': invert = TRUE; break;
|
case 'v': invert = TRUE; break;
|
||||||
case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
|
|
||||||
case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
|
|
||||||
|
|
||||||
case 'V':
|
case 'V':
|
||||||
{
|
{
|
||||||
|
@ -3540,6 +3548,10 @@ switch(letter)
|
||||||
pcre2grep_exit(0);
|
pcre2grep_exit(0);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
|
||||||
|
case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
|
||||||
|
case 'Z': printname_colon = printname_hyphen = 0; printname_nl = NULL; break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
|
fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
|
||||||
pcre2grep_exit(usage(2));
|
pcre2grep_exit(usage(2));
|
||||||
|
@ -4259,8 +4271,6 @@ if (DEE_option != NULL)
|
||||||
|
|
||||||
(void)pcre2_set_compile_extra_options(compile_context, extra_options);
|
(void)pcre2_set_compile_extra_options(compile_context, extra_options);
|
||||||
|
|
||||||
/* Check the values for Jeffrey Friedl's debugging options. */
|
|
||||||
|
|
||||||
/* If use_jit is set, check whether JIT is available. If not, do not try
|
/* If use_jit is set, check whether JIT is available. If not, do not try
|
||||||
to use JIT. */
|
to use JIT. */
|
||||||
|
|
||||||
|
|
|
@ -991,3 +991,22 @@ RC=0
|
||||||
---------------------------- Test 134 -----------------------------
|
---------------------------- Test 134 -----------------------------
|
||||||
=AB3CD5=
|
=AB3CD5=
|
||||||
RC=0
|
RC=0
|
||||||
|
---------------------------- Test 135 -----------------------------
|
||||||
|
./testdata/grepinputv@The word is cat in this line
|
||||||
|
RC=0
|
||||||
|
./testdata/grepinputv@./testdata/grepinputv@RC=0
|
||||||
|
./testdata/grepinputv@This line contains \E and (regex) *meta* [characters].
|
||||||
|
./testdata/grepinputv@The word is cat in this line
|
||||||
|
./testdata/grepinputv@The caterpillar sat on the mat
|
||||||
|
RC=0
|
||||||
|
testdata/grepinputM |