Jason Hood's pcre2grep patches (modified a bit) to add --output to pcre2grep,

and also an inbuilt callout echo.
This commit is contained in:
Philip.Hazel 2017-04-06 18:02:40 +00:00
parent 2b36600b2b
commit 88abc14e42
17 changed files with 1106 additions and 589 deletions

View File

@ -113,6 +113,9 @@ a message, and abandon the run (this would have detected #13 above).
19. Implemented PCRE2_ENDANCHORED.
20. Applied Jason Hood's patches (slightly modified) to pcre2grep, to implement
the --output=text (-O) option and the inbuilt callout echo.
Version 10.23 14-February-2017
------------------------------

View File

@ -598,6 +598,10 @@ printf "123\n456\n789\n---abc\ndef\nxyz\n---\n" >testNinputgrep
$valgrind $vjs $pcre2grep -Mo '(\n|[^-])*---' testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 120 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -HO '$0:$2$1$3' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
# Now compare the results.
$cf $srcdir/testdata/grepoutput testtrygrep
@ -667,6 +671,9 @@ if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Callout scri
echo "Testing pcre2grep script callouts"
$valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep
$valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep
$valgrind $vjs $pcre2grep '(T)(?C"|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep
$valgrind $vjs $pcre2grep '(T)(?C"|$1$n")(*F)' $srcdir/testdata/grepinputv >>testtrygrep
# The above has no newline, which 'diff -ub' ignores, so add one.
$cf $srcdir/testdata/grepoutputC testtrygrep
if [ $? != 0 ] ; then exit 1; fi
else

View File

@ -585,6 +585,10 @@ echo ---------------------------- Test 119 ----------------------------->>testtr
%pcre2grep% -Mo "(\n|[^-])*---" testNinputgrep >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 120 ------------------------------>>testtrygrep
(pushd %srcdir% & %pcre2grep% -HO "$0:$2$1$3" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
:: Now compare the results.
%cf% %srcdir%\testdata\grepoutput testtrygrep %cfout%
@ -654,6 +658,10 @@ if %ERRORLEVEL% equ 0 (
echo Testing pcre2grep script callouts
%pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%/testdata/grepinputv >testtrygrep
%pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%/testdata/grepinputv >>testtrygrep
%pcre2grep% "(T)(?C'|$0:$1')" %srcdir%/testdata/grepinputv >>testtrygrep
%pcre2grep% --om-separator / "(T)(?C'|$1')" %srcdir%/testdata/grepinputv >>testtrygrep
:: The above has no newline, which 'diff -ub' ignores, so add one.
echo />>testtrygrep
%cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout%
if ERRORLEVEL 1 exit /b 1
) else (

View File

@ -60,6 +60,7 @@ The option bits are:
PCRE2_DOLLAR_ENDONLY $ not to match newline at end
PCRE2_DOTALL . matches anything including NL
PCRE2_DUPNAMES Allow duplicate names for subpatterns
PCRE2_ENDANCHORED Pattern can match only at end of subject
PCRE2_EXTENDED Ignore white space and # comments
PCRE2_FIRSTLINE Force matching to be before newline
PCRE2_MATCH_UNSET_BACKREF Match unset back references

View File

@ -50,6 +50,7 @@ up a callout function or specify the recursion depth limit. The <i>length</i>
and <i>startoffset</i> values are code units, not characters. The options are:
<pre>
PCRE2_ANCHORED Match only at the first position
PCRE2_ENDANCHORED Pattern can match only at end of subject
PCRE2_NOTBOL Subject is not the beginning of a line
PCRE2_NOTEOL Subject is not the end of a line
PCRE2_NOTEMPTY An empty string is not a valid match

View File

@ -53,6 +53,7 @@ units, not characters. The length may be given as PCRE2_ZERO_TERMINATE for a
subject that is terminated by a binary zero code unit. The options are:
<pre>
PCRE2_ANCHORED Match only at the first position
PCRE2_ENDANCHORED Pattern can match only at end of subject
PCRE2_NOTBOL Subject string is not the beginning of a line
PCRE2_NOTEOL Subject string is not the end of a line
PCRE2_NOTEMPTY An empty string is not a valid match

View File

@ -64,6 +64,7 @@ The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for
zero-terminated strings. The options are:
<pre>
PCRE2_ANCHORED Match only at the first position
PCRE2_ENDANCHORED Pattern can match only at end of subject
PCRE2_NOTBOL Subject is not the beginning of a line
PCRE2_NOTEOL Subject is not the end of a line
PCRE2_NOTEMPTY An empty string is not a valid match

View File

@ -1123,8 +1123,8 @@ documentation).
<P>
For those options that can be different in different parts of the pattern, the
contents of the <i>options</i> argument specifies their settings at the start of
compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK options can be set at
the time of matching as well as at compile time.
compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and PCRE2_NO_UTF_CHECK
options can be set at the time of matching as well as at compile time.
</P>
<P>
Other, less frequently required compile-time parameters (for example, the
@ -1279,6 +1279,13 @@ only one instance of the named subpattern can ever be matched. There are more
details of named subpatterns below; see also the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
documentation.
<pre>
PCRE2_ENDANCHORED
</pre>
If this bit is set, the end of any pattern match must be right at the end of
the string being searched (the "subject string"). This effect can also be
achieved by appropriate constructs in the pattern itself, which is the only way
to do it in Perl.
<pre>
PCRE2_EXTENDED
</pre>
@ -2141,16 +2148,16 @@ Option bits for <b>pcre2_match()</b>
</b><br>
<P>
The unused bits of the <i>options</i> argument for <b>pcre2_match()</b> must be
zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT,
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is
described below.
zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDANCHORED,
PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT.
Their action is described below.
</P>
<P>
Setting PCRE2_ANCHORED at match time is not supported by the just-in-time (JIT)
compiler. If it is set, JIT matching is disabled and the interpretive code in
<b>pcre2_match()</b> is run. Apart from PCRE2_NO_JIT (obviously), the remaining
options are supported for JIT matching.
Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not supported by
the just-in-time (JIT) compiler. If it is set, JIT matching is disabled and the
interpretive code in <b>pcre2_match()</b> is run. Apart from PCRE2_NO_JIT
(obviously), the remaining options are supported for JIT matching.
<pre>
PCRE2_ANCHORED
</pre>
@ -2159,6 +2166,12 @@ matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out
to be anchored by virtue of its contents, it cannot be made unachored at
matching time. Note that setting the option at match time disables JIT
matching.
<pre>
PCRE2_ENDANCHORED
</pre>
If the PCRE2_ENDANCHORED option is set, any string that <b>pcre2_match()</b>
matches must be right at the end of the subject string. Note that setting the
option at match time disables JIT matching.
<pre>
PCRE2_NOTBOL
</pre>
@ -3100,11 +3113,11 @@ Option bits for <b>pcre_dfa_match()</b>
</b><br>
<P>
The unused bits of the <i>options</i> argument for <b>pcre2_dfa_match()</b> must
be zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK,
PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and
PCRE2_DFA_RESTART. All but the last four of these are exactly the same as for
<b>pcre2_match()</b>, so their description is not repeated here.
be zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDANCHORED,
PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST,
and PCRE2_DFA_RESTART. All but the last four of these are exactly the same as
for <b>pcre2_match()</b>, so their description is not repeated here.
<pre>
PCRE2_PARTIAL_HARD
PCRE2_PARTIAL_SOFT
@ -3258,7 +3271,7 @@ Cambridge, England.
</P>
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
<P>
Last updated: 01 April 2017
Last updated: 04 April 2017
<br>
Copyright &copy; 1997-2017 University of Cambridge.
<br>

View File

@ -22,7 +22,7 @@ please consult the man page, in case the conversion went wrong.
<li><a name="TOC7" href="#SEC7">NEWLINES</a>
<li><a name="TOC8" href="#SEC8">OPTIONS COMPATIBILITY</a>
<li><a name="TOC9" href="#SEC9">OPTIONS WITH DATA</a>
<li><a name="TOC10" href="#SEC10">CALLING EXTERNAL SCRIPTS</a>
<li><a name="TOC10" href="#SEC10">USING PCRE2'S CALLOUT FACILITY</a>
<li><a name="TOC11" href="#SEC11">MATCHING ERRORS</a>
<li><a name="TOC12" href="#SEC12">DIAGNOSTICS</a>
<li><a name="TOC13" href="#SEC13">SEE ALSO</a>
@ -384,8 +384,8 @@ Instead of showing lines or parts of lines that match, show each match as an
offset from the start of the file and a length, separated by a comma. In this
mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b>
options are ignored. If there is more than one match in a line, each of them is
shown separately. This option is mutually exclusive with <b>--line-offsets</b>
and <b>--only-matching</b>.
shown separately. This option is mutually exclusive with <b>--output</b>,
<b>--line-offsets</b>, and <b>--only-matching</b>.
</P>
<P>
<b>-H</b>, <b>--with-filename</b>
@ -491,7 +491,8 @@ number is terminated by a colon (as usual; see the <b>-n</b> option), and the
offset and length are separated by a comma. In this mode, no context is shown.
That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> options are ignored. If there is
more than one match in a line, each of them is shown separately. This option is
mutually exclusive with <b>--file-offsets</b> and <b>--only-matching</b>.
mutually exclusive with <b>--output</b>, <b>--file-offsets</b>, and
<b>--only-matching</b>.
</P>
<P>
<b>--locale</b>=<i>locale-name</i>
@ -602,6 +603,36 @@ use of JIT at run time. It is provided for testing and working round problems.
It should never be needed in normal use.
</P>
<P>
<b>-O</b> <i>text</i>, <b>--output</b>=<i>text</i>
When there is a match, instead of outputting the whole line that matched,
output just the given text. This option is mutually exclusive with
<b>--only-matching</b>, <b>--file-offsets</b>, and <b>--line-offsets</b>. Escape
sequences starting with a dollar character may be used to insert the contents
of the matched part of the line and/or captured substrings into the text.
<br>
<br>
$&#60;digits&#62; or ${&#60;digits&#62;} is replaced by the captured
substring of the given decimal number; zero substitutes the whole match. If
the number is greater than the number of capturing substrings, or if the
capture is unset, the replacement is empty.
<br>
<br>
$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
newline; $r by carriage return; $t by tab; $v by vertical tab.
<br>
<br>
$o&#60;digits&#62; is replaced by the character represented by the given octal
number; up to three digits are processed.
<br>
<br>
$x&#60;digits&#62; is replaced by the character represented by the given hexadecimal
number; up to two digits are processed.
<br>
<br>
Any other character is substituted by itself. In particular, $$ is replaced by
a single dollar.
</P>
<P>
<b>-o</b>, <b>--only-matching</b>
Show only the part of the line that matched a pattern instead of the whole
line. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and
@ -611,7 +642,7 @@ combined with <b>-v</b> (invert the sense of the match to find non-matching
lines), no output is generated, but the return code is set appropriately. If
the matched portion of the line is empty, nothing is output unless the file
name or line number are being printed, in which case they are shown on an
otherwise empty line. This option is mutually exclusive with
otherwise empty line. This option is mutually exclusive with <b>--output</b>,
<b>--file-offsets</b> and <b>--line-offsets</b>.
</P>
<P>
@ -621,7 +652,7 @@ given number. Up to 32 capturing parentheses are supported, and -o0 is
equivalent to <b>-o</b> without a number. Because these options can be given
without an argument (see above), if an argument is present, it must be given in
the same shell item, for example, -o3 or --only-matching=2. The comments given
for the non-argument case above also apply to this case. If the specified
for the non-argument case above also apply to this option. If the specified
capturing parentheses do not exist in the pattern, or were not set in the
match, nothing is output unless the file name or line number are being output.
<br>
@ -735,9 +766,9 @@ as in the GNU <b>grep</b> program. Any long option of the form
(PCRE2 terminology). However, the <b>--depth-limit</b>, <b>--file-list</b>,
<b>--file-offsets</b>, <b>--include-dir</b>, <b>--line-offsets</b>,
<b>--locale</b>, <b>--match-limit</b>, <b>-M</b>, <b>--multiline</b>, <b>-N</b>,
<b>--newline</b>, <b>--om-separator</b>, <b>-u</b>, and <b>--utf-8</b> options are
specific to <b>pcre2grep</b>, as is the use of the <b>--only-matching</b> option
with a capturing parentheses number.
<b>--newline</b>, <b>--om-separator</b>, <b>--output</b>, <b>-u</b>, and
<b>--utf-8</b> options are specific to <b>pcre2grep</b>, as is the use of the
<b>--only-matching</b> option with a capturing parentheses number.
</P>
<P>
Although most of the common options work the same way, a few are different in
@ -778,23 +809,30 @@ The exceptions to the above are the <b>--colour</b> (or <b>--color</b>) and
options does have data, it must be given in the first form, using an equals
character. Otherwise <b>pcre2grep</b> will assume that it has no data.
</P>
<br><a name="SEC10" href="#TOC1">CALLING EXTERNAL SCRIPTS</a><br>
<br><a name="SEC10" href="#TOC1">USING PCRE2'S CALLOUT FACILITY</a><br>
<P>
<b>pcre2grep</b> has, by default, support for calling external programs or
scripts during matching by making use of PCRE2's callout facility. However,
this support can be disabled when <b>pcre2grep</b> is built. You can find out
whether your binary has support for callouts by running it with the <b>--help</b>
option. If the support is not enabled, all callouts in patterns are ignored by
<b>pcre2grep</b>.
scripts or echoing specific strings during matching by making use of PCRE2's
callout facility. However, this support can be disabled when <b>pcre2grep</b> is
built. You can find out whether your binary has support for callouts by running
it with the <b>--help</b> option. If the support is not enabled, all callouts in
patterns are ignored by <b>pcre2grep</b>.
</P>
<P>
A callout in a PCRE2 pattern is of the form (?C&#60;arg&#62;) where the argument is
either a number or a quoted string (see the
<a href="pcre2callout.html"><b>pcre2callout</b></a>
documentation for details). Numbered callouts are ignored by <b>pcre2grep</b>.
String arguments are parsed as a list of substrings separated by pipe (vertical
bar) characters. The first substring must be an executable name, with the
following substrings specifying arguments:
documentation for details). Numbered callouts are ignored by <b>pcre2grep</b>;
only callouts with string arguments are useful.
</P>
<br><b>
Calling external programs or scripts
</b><br>
<P>
If the callout string does not start with a pipe (vertical bar) character, it
is parsed into a list of substrings separated by pipe characters. The first
substring must be an executable name, with the following substrings specifying
arguments:
<pre>
executable_name|arg1|arg2|...
</pre>
@ -828,6 +866,19 @@ callout to be ignored. If running the program fails for any reason (including
the non-existence of the executable), a local matching failure occurs and the
matcher backtracks in the normal way.
</P>
<br><b>
Echoing a specific string
</b><br>
<P>
If the callout string starts with a pipe (vertical bar) character, the rest of
the string is written to the output, having been passed through the same escape
processing as text from the --output option. This provides a simple echoing
facility that avoids calling an external program or script. No terminator is
added to the string, so if you want a newline, you must include it explicitly.
Matching continues normally after the string is output. If you want to see only
the callout output but not any output from an actual match, you should end the
relevant pattern with (*FAIL).
</P>
<br><a name="SEC11" href="#TOC1">MATCHING ERRORS</a><br>
<P>
It is possible to supply a regular expression that takes a very long time to
@ -867,7 +918,7 @@ Cambridge, England.
</P>
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
<P>
Last updated: 31 March 2017
Last updated: 06 April 2017
<br>
Copyright &copy; 1997-2017 University of Cambridge.
<br>

View File

@ -568,6 +568,7 @@ for a description of their effects.
dollar_endonly set PCRE2_DOLLAR_ENDONLY
/s dotall set PCRE2_DOTALL
dupnames set PCRE2_DUPNAMES
endanchored set PCRE2_ENDANCHORED
/x extended set PCRE2_EXTENDED
firstline set PCRE2_FIRSTLINE
match_unset_backref set PCRE2_MATCH_UNSET_BACKREF
@ -1039,6 +1040,7 @@ The following modifiers set options for <b>pcre2_match()</b> or
for a description of their effects.
<pre>
anchored set PCRE2_ANCHORED
endanchored set PCRE2_ENDANCHORED
dfa_restart set PCRE2_DFA_RESTART
dfa_shortest set PCRE2_DFA_SHORTEST
no_jit set PCRE2_NO_JIT
@ -1798,7 +1800,7 @@ Cambridge, England.
</P>
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
<P>
Last updated: 25 March 2017
Last updated: 04 April 2017
<br>
Copyright &copy; 1997-2017 University of Cambridge.
<br>

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
.TH PCRE2GREP 1 "31 March 2017" "PCRE2 10.30"
.TH PCRE2GREP 1 "06 April 2017" "PCRE2 10.30"
.SH NAME
pcre2grep - a grep with Perl-compatible regular expressions.
.SH SYNOPSIS
@ -330,8 +330,8 @@ Instead of showing lines or parts of lines that match, show each match as an
offset from the start of the file and a length, separated by a comma. In this
mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP
options are ignored. If there is more than one match in a line, each of them is
shown separately. This option is mutually exclusive with \fB--line-offsets\fP
and \fB--only-matching\fP.
shown separately. This option is mutually exclusive with \fB--output\fP,
\fB--line-offsets\fP, and \fB--only-matching\fP.
.TP
\fB-H\fP, \fB--with-filename\fP
Force the inclusion of the file name at the start of output lines when
@ -424,7 +424,8 @@ number is terminated by a colon (as usual; see the \fB-n\fP option), and the
offset and length are separated by a comma. In this mode, no context is shown.
That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. If there is
more than one match in a line, each of them is shown separately. This option is
mutually exclusive with \fB--file-offsets\fP and \fB--only-matching\fP.
mutually exclusive with \fB--output\fP, \fB--file-offsets\fP, and
\fB--only-matching\fP.
.TP
\fB--locale\fP=\fIlocale-name\fP
This option specifies a locale to be used for pattern matching. It overrides
@ -521,6 +522,30 @@ was explicitly disabled at build time. This option can be used to disable the
use of JIT at run time. It is provided for testing and working round problems.
It should never be needed in normal use.
.TP
\fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP
When there is a match, instead of outputting the whole line that matched,
output just the given text. This option is mutually exclusive with
\fB--only-matching\fP, \fB--file-offsets\fP, and \fB--line-offsets\fP. Escape
sequences starting with a dollar character may be used to insert the contents
of the matched part of the line and/or captured substrings into the text.
.sp
$<digits> or ${<digits>} is replaced by the captured
substring of the given decimal number; zero substitutes the whole match. If
the number is greater than the number of capturing substrings, or if the
capture is unset, the replacement is empty.
.sp
$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
newline; $r by carriage return; $t by tab; $v by vertical tab.
.sp
$o<digits> is replaced by the character represented by the given octal
number; up to three digits are processed.
.sp
$x<digits> is replaced by the character represented by the given hexadecimal
number; up to two digits are processed.
.sp
Any other character is substituted by itself. In particular, $$ is replaced by
a single dollar.
.TP
\fB-o\fP, \fB--only-matching\fP
Show only the part of the line that matched a pattern instead of the whole
line. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and
@ -530,7 +555,7 @@ combined with \fB-v\fP (invert the sense of the match to find non-matching
lines), no output is generated, but the return code is set appropriately. If
the matched portion of the line is empty, nothing is output unless the file
name or line number are being printed, in which case they are shown on an
otherwise empty line. This option is mutually exclusive with
otherwise empty line. This option is mutually exclusive with \fB--output\fP,
\fB--file-offsets\fP and \fB--line-offsets\fP.
.TP
\fB-o\fP\fInumber\fP, \fB--only-matching\fP=\fInumber\fP
@ -539,7 +564,7 @@ given number. Up to 32 capturing parentheses are supported, and -o0 is
equivalent to \fB-o\fP without a number. Because these options can be given
without an argument (see above), if an argument is present, it must be given in
the same shell item, for example, -o3 or --only-matching=2. The comments given
for the non-argument case above also apply to this case. If the specified
for the non-argument case above also apply to this option. If the specified
capturing parentheses do not exist in the pattern, or were not set in the
match, nothing is output unless the file name or line number are being output.
.sp
@ -647,9 +672,9 @@ as in the GNU \fBgrep\fP program. Any long option of the form
(PCRE2 terminology). However, the \fB--depth-limit\fP, \fB--file-list\fP,
\fB--file-offsets\fP, \fB--include-dir\fP, \fB--line-offsets\fP,
\fB--locale\fP, \fB--match-limit\fP, \fB-M\fP, \fB--multiline\fP, \fB-N\fP,
\fB--newline\fP, \fB--om-separator\fP, \fB-u\fP, and \fB--utf-8\fP options are
specific to \fBpcre2grep\fP, as is the use of the \fB--only-matching\fP option
with a capturing parentheses number.
\fB--newline\fP, \fB--om-separator\fP, \fB--output\fP, \fB-u\fP, and
\fB--utf-8\fP options are specific to \fBpcre2grep\fP, as is the use of the
\fB--only-matching\fP option with a capturing parentheses number.
.P
Although most of the common options work the same way, a few are different in
\fBpcre2grep\fP. For example, the \fB--include\fP option's argument is a glob
@ -690,25 +715,32 @@ options does have data, it must be given in the first form, using an equals
character. Otherwise \fBpcre2grep\fP will assume that it has no data.
.
.
.SH "CALLING EXTERNAL SCRIPTS"
.SH "USING PCRE2'S CALLOUT FACILITY"
.rs
.sp
\fBpcre2grep\fP has, by default, support for calling external programs or
scripts during matching by making use of PCRE2's callout facility. However,
this support can be disabled when \fBpcre2grep\fP is built. You can find out
whether your binary has support for callouts by running it with the \fB--help\fP
option. If the support is not enabled, all callouts in patterns are ignored by
\fBpcre2grep\fP.
scripts or echoing specific strings during matching by making use of PCRE2's
callout facility. However, this support can be disabled when \fBpcre2grep\fP is
built. You can find out whether your binary has support for callouts by running
it with the \fB--help\fP option. If the support is not enabled, all callouts in
patterns are ignored by \fBpcre2grep\fP.
.P
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is
either a number or a quoted string (see the
.\" HREF
\fBpcre2callout\fP
.\"
documentation for details). Numbered callouts are ignored by \fBpcre2grep\fP.
String arguments are parsed as a list of substrings separated by pipe (vertical
bar) characters. The first substring must be an executable name, with the
following substrings specifying arguments:
documentation for details). Numbered callouts are ignored by \fBpcre2grep\fP;
only callouts with string arguments are useful.
.
.
.SS "Calling external programs or scripts"
.rs
.sp
If the callout string does not start with a pipe (vertical bar) character, it
is parsed into a list of substrings separated by pipe characters. The first
substring must be an executable name, with the following substrings specifying
arguments:
.sp
executable_name|arg1|arg2|...
.sp
@ -742,6 +774,19 @@ the non-existence of the executable), a local matching failure occurs and the
matcher backtracks in the normal way.
.
.
.SS "Echoing a specific string"
.rs
.sp
If the callout string starts with a pipe (vertical bar) character, the rest of
the string is written to the output, having been passed through the same escape
processing as text from the --output option. This provides a simple echoing
facility that avoids calling an external program or script. No terminator is
added to the string, so if you want a newline, you must include it explicitly.
Matching continues normally after the string is output. If you want to see only
the callout output but not any output from an actual match, you should end the
relevant pattern with (*FAIL).
.
.
.SH "MATCHING ERRORS"
.rs
.sp
@ -789,6 +834,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 31 March 2017
Last updated: 06 April 2017
Copyright (c) 1997-2017 University of Cambridge.
.fi

View File

@ -363,8 +363,8 @@ OPTIONS
length, separated by a comma. In this mode, no context is
shown. That is, the -A, -B, and -C options are ignored. If
there is more than one match in a line, each of them is shown
separately. This option is mutually exclusive with --line-
offsets and --only-matching.
separately. This option is mutually exclusive with --output,
--line-offsets, and --only-matching.
-H, --with-filename
Force the inclusion of the file name at the start of output
@ -469,8 +469,8 @@ OPTIONS
separated by a comma. In this mode, no context is shown.
That is, the -A, -B, and -C options are ignored. If there is
more than one match in a line, each of them is shown sepa-
rately. This option is mutually exclusive with --file-offsets
and --only-matching.
rately. This option is mutually exclusive with --output,
--file-offsets, and --only-matching.
--locale=locale-name
This option specifies a locale to be used for pattern match-
@ -585,6 +585,33 @@ OPTIONS
run time. It is provided for testing and working round prob-
lems. It should never be needed in normal use.
-O text, --output=text
When there is a match, instead of outputting the whole line
that matched, output just the given text. This option is
mutually exclusive with --only-matching, --file-offsets, and
--line-offsets. Escape sequences starting with a dollar char-
acter may be used to insert the contents of the matched part
of the line and/or captured substrings into the text.
$<digits> or ${<digits>} is replaced by the captured sub-
string of the given decimal number; zero substitutes the
whole match. If the number is greater than the number of cap-
turing substrings, or if the capture is unset, the replace-
ment is empty.
$a is replaced by bell; $b by backspace; $e by escape; $f by
form feed; $n by newline; $r by carriage return; $t by tab;
$v by vertical tab.
$o<digits> is replaced by the character represented by the
given octal number; up to three digits are processed.
$x<digits> is replaced by the character represented by the
given hexadecimal number; up to two digits are processed.
Any other character is substituted by itself. In particular,
$$ is replaced by a single dollar.
-o, --only-matching
Show only the part of the line that matched a pattern instead
of the whole line. In this mode, no context is shown. That
@ -596,8 +623,8 @@ OPTIONS
ately. If the matched portion of the line is empty, nothing
is output unless the file name or line number are being
printed, in which case they are shown on an otherwise empty
line. This option is mutually exclusive with --file-offsets
and --line-offsets.
line. This option is mutually exclusive with --output,
--file-offsets and --line-offsets.
-onumber, --only-matching=number
Show only the part of the line that matched the capturing
@ -607,7 +634,7 @@ OPTIONS
(see above), if an argument is present, it must be given in
the same shell item, for example, -o3 or --only-matching=2.
The comments given for the non-argument case above also apply
to this case. If the specified capturing parentheses do not
to this option. If the specified capturing parentheses do not
exist in the pattern, or were not set in the match, nothing
is output unless the file name or line number are being out-
put.
@ -723,7 +750,7 @@ OPTIONS COMPATIBILITY
terminology) is also available as --xxx-regex (PCRE2 terminology). How-
ever, the --depth-limit, --file-list, --file-offsets, --include-dir,
--line-offsets, --locale, --match-limit, -M, --multiline, -N, --new-
line, --om-separator, -u, and --utf-8 options are specific to
line, --om-separator, --output, -u, and --utf-8 options are specific to
pcre2grep, as is the use of the --only-matching option with a capturing
parentheses number.
@ -766,33 +793,38 @@ OPTIONS WITH DATA
equals character. Otherwise pcre2grep will assume that it has no data.
CALLING EXTERNAL SCRIPTS
USING PCRE2'S CALLOUT FACILITY
pcre2grep has, by default, support for calling external programs or
scripts during matching by making use of PCRE2's callout facility. How-
ever, this support can be disabled when pcre2grep is built. You can
find out whether your binary has support for callouts by running it
with the --help option. If the support is not enabled, all callouts in
patterns are ignored by pcre2grep.
scripts or echoing specific strings during matching by making use of
PCRE2's callout facility. However, this support can be disabled when
pcre2grep is built. You can find out whether your binary has support
for callouts by running it with the --help option. If the support is
not enabled, all callouts in patterns are ignored by pcre2grep.
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argu-
ment is either a number or a quoted string (see the pcre2callout docu-
mentation for details). Numbered callouts are ignored by pcre2grep.
String arguments are parsed as a list of substrings separated by pipe
(vertical bar) characters. The first substring must be an executable
name, with the following substrings specifying arguments:
mentation for details). Numbered callouts are ignored by pcre2grep;
only callouts with string arguments are useful.
Calling external programs or scripts
If the callout string does not start with a pipe (vertical bar) charac-
ter, it is parsed into a list of substrings separated by pipe charac-
ters. The first substring must be an executable name, with the follow-
ing substrings specifying arguments:
executable_name|arg1|arg2|...
Any substring (including the executable name) may contain escape
sequences started by a dollar character: $<digits> or ${<digits>} is
replaced by the captured substring of the given decimal number, which
must be greater than zero. If the number is greater than the number of
capturing substrings, or if the capture is unset, the replacement is
Any substring (including the executable name) may contain escape
sequences started by a dollar character: $<digits> or ${<digits>} is
replaced by the captured substring of the given decimal number, which
must be greater than zero. If the number is greater than the number of
capturing substrings, or if the capture is unset, the replacement is
empty.
Any other character is substituted by itself. In particular, $$ is
replaced by a single dollar and $| is replaced by a pipe character.
Any other character is substituted by itself. In particular, $$ is
replaced by a single dollar and $| is replaced by a pipe character.
Here is an example:
echo -e "abcde\n12345" | pcre2grep \
@ -808,37 +840,49 @@ CALLING EXTERNAL SCRIPTS
The parameters for the execv() system call that is used to run the pro-
gram or script are zero-terminated strings. This means that binary zero
characters in the callout argument will cause premature termination of
their substrings, and therefore should not be present. Any syntax
errors in the string (for example, a dollar not followed by another
character) cause the callout to be ignored. If running the program
characters in the callout argument will cause premature termination of
their substrings, and therefore should not be present. Any syntax
errors in the string (for example, a dollar not followed by another
character) cause the callout to be ignored. If running the program
fails for any reason (including the non-existence of the executable), a
local matching failure occurs and the matcher backtracks in the normal
local matching failure occurs and the matcher backtracks in the normal
way.
Echoing a specific string
If the callout string starts with a pipe (vertical bar) character, the
rest of the string is written to the output, having been passed through
the same escape processing as text from the --output option. This pro-
vides a simple echoing facility that avoids calling an external program
or script. No terminator is added to the string, so if you want a new-
line, you must include it explicitly. Matching continues normally
after the string is output. If you want to see only the callout output
but not any output from an actual match, you should end the relevant
pattern with (*FAIL).
MATCHING ERRORS
It is possible to supply a regular expression that takes a very long
time to fail to match certain lines. Such patterns normally involve
nested indefinite repeats, for example: (a+)*\d when matched against a
line of a's with no final digit. The PCRE2 matching function has a
resource limit that causes it to abort in these circumstances. If this
happens, pcre2grep outputs an error message and the line that caused
the problem to the standard error stream. If there are more than 20
It is possible to supply a regular expression that takes a very long
time to fail to match certain lines. Such patterns normally involve
nested indefinite repeats, for example: (a+)*\d when matched against a
line of a's with no final digit. The PCRE2 matching function has a
resource limit that causes it to abort in these circumstances. If this
happens, pcre2grep outputs an error message and the line that caused
the problem to the standard error stream. If there are more than 20
such errors, pcre2grep gives up.
The --match-limit option of pcre2grep can be used to set the overall
The --match-limit option of pcre2grep can be used to set the overall
resource limit; there is a second option called --depth-limit that sets
a limit on the amount of memory that is used (see the discussion of
a limit on the amount of memory that is used (see the discussion of
these options above).
DIAGNOSTICS
Exit status is 0 if any matches were found, 1 if no matches were found,
and 2 for syntax errors, overlong lines, non-existent or inaccessible
files (even if matches were found in other files) or too many matching
and 2 for syntax errors, overlong lines, non-existent or inaccessible
files (even if matches were found in other files) or too many matching
errors. Using the -s option to suppress error messages about inaccessi-
ble files does not affect the return code.
@ -857,5 +901,5 @@ AUTHOR
REVISION
Last updated: 31 March 2017
Last updated: 06 April 2017
Copyright (c) 1997-2017 University of Cambridge.

View File

@ -511,6 +511,7 @@ PATTERN MODIFIERS
dollar_endonly set PCRE2_DOLLAR_ENDONLY
/s dotall set PCRE2_DOTALL
dupnames set PCRE2_DUPNAMES
endanchored set PCRE2_ENDANCHORED
/x extended set PCRE2_EXTENDED
firstline set PCRE2_FIRSTLINE
match_unset_backref set PCRE2_MATCH_UNSET_BACKREF
@ -926,6 +927,7 @@ SUBJECT MODIFIERS
pcre2_dfa_match(). See pcreapi for a description of their effects.
anchored set PCRE2_ANCHORED
endanchored set PCRE2_ENDANCHORED
dfa_restart set PCRE2_DFA_RESTART
dfa_shortest set PCRE2_DFA_SHORTEST
no_jit set PCRE2_NO_JIT
@ -1630,5 +1632,5 @@ AUTHOR
REVISION
Last updated: 25 March 2017
Last updated: 04 April 2017
Copyright (c) 1997-2017 University of Cambridge.

View File

@ -175,8 +175,9 @@ static const char *dee_option = NULL;
static const char *DEE_option = NULL;
static const char *locale = NULL;
static const char *newline_arg = NULL;
static const char *om_separator = "";
static const char *om_separator = NULL;
static const char *stdin_name = "(standard input)";
static const char *output_text = NULL;
static char *main_buffer = NULL;
@ -196,6 +197,7 @@ static int dee_action = dee_SKIP;
#else
static int dee_action = dee_READ;
#endif
static int DEE_action = DEE_READ;
static int error_count = 0;
static int filenames = FN_DEFAULT;
@ -233,7 +235,6 @@ static BOOL number = FALSE;
static BOOL omit_zero_count = FALSE;
static BOOL resource_error = FALSE;
static BOOL quiet = FALSE;
static BOOL show_only_matching = FALSE;
static BOOL show_total_count = FALSE;
static BOOL silent = FALSE;
static BOOL utf = FALSE;
@ -247,6 +248,7 @@ typedef struct omstr {
static omstr *only_matching = NULL;
static omstr *only_matching_last = NULL;
static int only_matching_count;
/* Structure for holding the two variables that describe a number chain. */
@ -406,6 +408,7 @@ static option_item optionlist[] = {
#else
{ OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
#endif
{ OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" },
{ OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
{ OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
@ -793,7 +796,7 @@ return isatty(fileno(f));
/************* Print optionally coloured match Unix-style and z/OS **********/
static void
print_match(const char* buf, int length)
print_match(const void *buf, int length)
{
if (length == 0) return;
if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
@ -942,7 +945,7 @@ static CONSOLE_SCREEN_BUFFER_INFO csbi;
static WORD match_colour;
static void
print_match(const char* buf, int length)
print_match(const void *buf, int length)
{
if (length == 0) return;
if (do_colour)
@ -1001,7 +1004,7 @@ return FALSE;
/************* Print optionally coloured match when we can't do it **********/
static void
print_match(const char* buf, int length)
print_match(const void *buf, int length)
{
if (length == 0) return;
FWRITE(buf, 1, length, stdout);
@ -1658,6 +1661,277 @@ return FALSE; /* No match, no errors */
}
/*************************************************
* Check output text for errors *
*************************************************/
static BOOL
syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
{
PCRE2_SPTR begin = string;
for (; *string != 0; string++)
{
if (*string == '$')
{
PCRE2_SIZE capture_id = 0;
BOOL brace = FALSE;
string++;
/* Syntax error: a character must be present after $. */
if (*string == 0)
{
if (!callout)
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
(int)(string - begin), "no character after $");
return FALSE;
}
if (*string == '{')
{
/* Must be a decimal number in braces, e.g: {5} or {38} */
string++;
brace = TRUE;
}
if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
{
do
{
/* Maximum capture id is 65535. */
if (capture_id <= 65535)
capture_id = capture_id * 10 + (*string - '0');
string++;
}
while (*string >= '0' && *string <= '9');
if (brace)
{
/* Syntax error: closing brace is missing. */
if (*string != '}')
{
if (!callout)
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
(int)(string - begin), "missing closing brace");
return FALSE;
}
}
else
{
/* To negate the effect of the for. */
string--;
}
}
else if (brace)
{
/* Syntax error: a decimal number required. */
if (!callout)
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
(int)(string - begin), "decimal number expected");
return FALSE;
}
else if (*string == 'o')
{
string++;
if (*string < '0' || *string > '7')
{
/* Syntax error: an octal number required. */
if (!callout)
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
(int)(string - begin), "octal number expected");
return FALSE;
}
}
else if (*string == 'x')
{
string++;
if (!isxdigit((unsigned char)*string))
{
/* Syntax error: a hexdecimal number required. */
if (!callout)
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
(int)(string - begin), "hexadecimal number expected");
return FALSE;
}
}
}
}
return TRUE;
}
/*************************************************
* Display output text *
*************************************************/
/* Display the output text, which is assumed to have already been syntax
checked. Output may contain escape sequences started by the dollar sign. The
escape sequences are substituted as follows:
$<digits> or ${<digits>} is replaced by the captured substring of the given
decimal number; zero will substitute the whole match. If the number is
greater than the number of capturing substrings, or if the capture is unset,
the replacement is empty.
$a is replaced by bell.
$b is replaced by backspace.
$e is replaced by escape.
$f is replaced by form feed.
$n is replaced by newline.
$r is replaced by carriage return.
$t is replaced by tab.
$v is replaced by vertical tab.
$o<digits> is replaced by the character represented by the given octal
number; up to three digits are processed.
$x<digits> is replaced by the character represented by the given hexadecimal
number; up to two digits are processed.
Any other character is substituted by itself. E.g: $$ is replaced by a single
dollar.
Arguments:
string: the output text
callout: TRUE for the builtin callout, FALSE for --output
subject the start of the subject
ovector: capture offsets
capture_top: number of captures
Returns: TRUE if something was output, other than newline
FALSE if nothing was output, or newline was last output
*/
static BOOL
display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
{
BOOL printed = FALSE;
for (; *string != 0; string++)
{
int ch = EOF;
if (*string == '$')
{
PCRE2_SIZE capture_id = 0;
BOOL brace = FALSE;
string++;
if (*string == '{')
{
/* Must be a decimal number in braces, e.g: {5} or {38} */
string++;
brace = TRUE;
}
if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
{
do
{
/* Maximum capture id is 65535. */
if (capture_id <= 65535)
capture_id = capture_id * 10 + (*string - '0');
string++;
}
while (*string >= '0' && *string <= '9');
if (!brace)
{
/* To negate the effect of the for. */
string--;
}
if (capture_id < capture_top)
{
PCRE2_SIZE capturesize;
capture_id *= 2;
capturesize = ovector[capture_id + 1] - ovector[capture_id];
if (capturesize > 0)
{
print_match(subject + ovector[capture_id], capturesize);
printed = TRUE;
}
}
}
else if (*string == 'a') ch = '\a';
else if (*string == 'b') ch = '\b';
#ifndef EBCDIC
else if (*string == 'e') ch = '\033';
#else
else if (*string == 'e') ch = '\047';
#endif
else if (*string == 'f') ch = '\f';
else if (*string == 'r') ch = '\r';
else if (*string == 't') ch = '\t';
else if (*string == 'v') ch = '\v';
else if (*string == 'n')
{
fprintf(stdout, STDOUT_NL);
printed = FALSE;
}
else if (*string == 'o')
{
string++;
ch = *string - '0';
if (string[1] >= '0' && string[1] <= '7')
{
string++;
ch = ch * 8 + (*string - '0');
}
if (string[1] >= '0' && string[1] <= '7')
{
string++;
ch = ch * 8 + (*string - '0');
}
}
else if (*string == 'x')
{
string++;
if (*string >= '0' && *string <= '9')
ch = *string - '0';
else
ch = (*string | 0x20) - 'a' + 10;
if (isxdigit((unsigned char)string[1]))
{
string++;
ch *= 16;
if (*string >= '0' && *string <= '9')
ch += *string - '0';
else
ch += (*string | 0x20) - 'a' + 10;
}
}
else
{
ch = *string;
}
}
else
{
ch = *string;
}
if (ch != EOF)
{
fprintf(stdout, "%c", ch);
printed = TRUE;
}
}
return printed;
}
#ifdef SUPPORT_PCRE2GREP_CALLOUT
/*************************************************
@ -1683,6 +1957,10 @@ follows:
Any other character is substituted by itself. E.g: $$ is replaced by a single
dollar or $| replaced by a pipe character.
Alternatively, if string starts with pipe, the remainder is taken as an output
string, same as --output. In this case, --om-separator is used to separate each
callout, defaulting to newline.
Example:
echo -e "abcde\n12345" | pcre2grep \
@ -1725,6 +2003,16 @@ int result = 0;
/* Only callout with strings are supported. */
if (string == NULL || length == 0) return 0;
/* If there's no command, output the remainder directly. */
if (*string == '|')
{
string++;
if (!syntax_check_output_text(string, TRUE)) return 0;
(void)display_output_text(string, TRUE, subject, ovector, capture_top);
return 0;
}
/* Checking syntax and compute the number of string fragments. Callout strings
are ignored in case of a syntax error. */
@ -2174,8 +2462,8 @@ while (ptr < endptr)
}
#endif
/* We come back here after a match when show_only_matching is set, in order
to find any further matches in the same line. This applies to
/* We come back here after a match when only_matching_count is non-zero, in
order to find any further matches in the same line. This applies to
--only-matching, --file-offsets, and --line-offsets. */
ONLY_MATCHING_RESTART:
@ -2229,13 +2517,13 @@ while (ptr < endptr)
/* The --only-matching option prints just the substring that matched,
and/or one or more captured portions of it, as long as these strings are
not empty. The --file-offsets and --line-offsets options output offsets for
the matching substring (all three set show_only_matching). None of these
mutually exclusive options prints any context. Afterwards, adjust the start
and then jump back to look for further matches in the same line. If we are
in invert mode, however, nothing is printed and we do not restart - this
could still be useful because the return code is set. */
the matching substring (all three set only_matching_count non-zero). None
of these mutually exclusive options prints any context. Afterwards, adjust
the start and then jump back to look for further matches in the same line.
If we are in invert mode, however, nothing is printed and we do not restart
- this could still be useful because the return code is set. */
else if (show_only_matching)
else if (only_matching_count != 0)
{
if (!invert)
{
@ -2257,6 +2545,16 @@ while (ptr < endptr)
(int)(filepos + matchptr + offsets[0] - ptr),
(int)(offsets[1] - offsets[0]));
/* Handle --output (which has already been syntax checked) */
else if (output_text != NULL)
{
if (display_output_text((PCRE2_SPTR)output_text, FALSE,
(PCRE2_SPTR)matchptr, offsets, mrc) || printname != NULL ||
number)
fprintf(stdout, STDOUT_NL);
}
/* Handle --only-matching, which may occur many times */
else
@ -2272,7 +2570,8 @@ while (ptr < endptr)
int plen = offsets[2*n + 1] - offsets[2*n];
if (plen > 0)
{
if (printed) fprintf(stdout, "%s", om_separator);
if (printed && om_separator != NULL)
fprintf(stdout, "%s", om_separator);
print_match(matchptr + offsets[n*2], plen);
printed = TRUE;
}
@ -2557,7 +2856,7 @@ while (ptr < endptr)
/* End of file; print final "after" lines if wanted; do_after_lines sets
hyphenpending if it prints something. */
if (!show_only_matching && !(count_only|show_total_count))
if (only_matching_count == 0 && !(count_only|show_total_count))
{
do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
hyphenpending |= endhyphenpending;
@ -3518,26 +3817,31 @@ if (both_context > 0)
if (before_context == 0) before_context = both_context;
}
/* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
However, all three set show_only_matching because they display, each in their
own way, only the data that has matched. */
/* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
permitted. They display, each in their own way, only the data that has matched.
*/
if ((only_matching != NULL && (file_offsets || line_offsets)) ||
(file_offsets && line_offsets))
only_matching_count = (only_matching != NULL) + (output_text != NULL) +
file_offsets + line_offsets;
if (only_matching_count > 1)
{
fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --file-offsets "
"and/or --line-offsets\n");
fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
"--file-offsets and/or --line-offsets\n");
pcre2grep_exit(usage(2));
}
/* Check the text supplied to --output for errors. */
if (output_text != NULL &&
!syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
goto EXIT2;
/* Put limits into the match data block. */
if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
if (only_matching != NULL || file_offsets || line_offsets)
show_only_matching = TRUE;
/* If a locale has not been provided as an option, see if the LC_CTYPE or
LC_ALL environment variable is set, and if so, use it. */
@ -3827,6 +4131,14 @@ for (; i < argc; i++)
else if (frc == 0 && rc == 1) rc = 0;
}
#ifdef SUPPORT_PCRE2GREP_CALLOUT
/* If separating builtin echo callouts by implicit newline, add one more for
the final item. */
if (om_separator != NULL && strcmp(om_separator, STDOUT_NL) == 0)
fprintf(stdout, STDOUT_NL);
#endif
/* Show the total number of matches if requested, but not if only one file's
count was printed. */

5
testdata/grepoutput vendored
View File

@ -829,3 +829,8 @@ def
xyz
---
RC=0
---------------------------- Test 120 ------------------------------
./testdata/grepinput:the binary zero.:zerothe.
./testdata/grepinput:a binary zero:zeroa
./testdata/grepinput:the binary zero.:zerothe.
RC=0

View File

@ -6,3 +6,9 @@ Arg1: [qu] [qu]
Arg1: [ t] [ t]
The quick brown
This time it jumps and jumps and jumps.
0:T
The quick brown
0:T
This time it jumps and jumps and jumps.
T
T