Jason Hood's pcre2grep patches (modified a bit) to add --output to pcre2grep,

and also an inbuilt callout echo.
This commit is contained in:
Philip.Hazel 2017-04-06 18:02:40 +00:00
parent 2b36600b2b
commit 88abc14e42
17 changed files with 1106 additions and 589 deletions

View File

@ -113,6 +113,9 @@ a message, and abandon the run (this would have detected #13 above).
19. Implemented PCRE2_ENDANCHORED. 19. Implemented PCRE2_ENDANCHORED.
20. Applied Jason Hood's patches (slightly modified) to pcre2grep, to implement
the --output=text (-O) option and the inbuilt callout echo.
Version 10.23 14-February-2017 Version 10.23 14-February-2017
------------------------------ ------------------------------

View File

@ -598,6 +598,10 @@ printf "123\n456\n789\n---abc\ndef\nxyz\n---\n" >testNinputgrep
$valgrind $vjs $pcre2grep -Mo '(\n|[^-])*---' testNinputgrep >>testtrygrep $valgrind $vjs $pcre2grep -Mo '(\n|[^-])*---' testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 120 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -HO '$0:$2$1$3' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
# Now compare the results. # Now compare the results.
$cf $srcdir/testdata/grepoutput testtrygrep $cf $srcdir/testdata/grepoutput testtrygrep
@ -667,6 +671,9 @@ if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Callout scri
echo "Testing pcre2grep script callouts" echo "Testing pcre2grep script callouts"
$valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep $valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep
$valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep $valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep
$valgrind $vjs $pcre2grep '(T)(?C"|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep
$valgrind $vjs $pcre2grep '(T)(?C"|$1$n")(*F)' $srcdir/testdata/grepinputv >>testtrygrep
# The above has no newline, which 'diff -ub' ignores, so add one.
$cf $srcdir/testdata/grepoutputC testtrygrep $cf $srcdir/testdata/grepoutputC testtrygrep
if [ $? != 0 ] ; then exit 1; fi if [ $? != 0 ] ; then exit 1; fi
else else

View File

@ -585,6 +585,10 @@ echo ---------------------------- Test 119 ----------------------------->>testtr
%pcre2grep% -Mo "(\n|[^-])*---" testNinputgrep >>testtrygrep %pcre2grep% -Mo "(\n|[^-])*---" testNinputgrep >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep echo RC=^%ERRORLEVEL%>>testtrygrep
echo ---------------------------- Test 120 ------------------------------>>testtrygrep
(pushd %srcdir% & %pcre2grep% -HO "$0:$2$1$3" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep
echo RC=^%ERRORLEVEL%>>testtrygrep
:: Now compare the results. :: Now compare the results.
%cf% %srcdir%\testdata\grepoutput testtrygrep %cfout% %cf% %srcdir%\testdata\grepoutput testtrygrep %cfout%
@ -654,6 +658,10 @@ if %ERRORLEVEL% equ 0 (
echo Testing pcre2grep script callouts echo Testing pcre2grep script callouts
%pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%/testdata/grepinputv >testtrygrep %pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%/testdata/grepinputv >testtrygrep
%pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%/testdata/grepinputv >>testtrygrep %pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%/testdata/grepinputv >>testtrygrep
%pcre2grep% "(T)(?C'|$0:$1')" %srcdir%/testdata/grepinputv >>testtrygrep
%pcre2grep% --om-separator / "(T)(?C'|$1')" %srcdir%/testdata/grepinputv >>testtrygrep
:: The above has no newline, which 'diff -ub' ignores, so add one.
echo />>testtrygrep
%cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout% %cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout%
if ERRORLEVEL 1 exit /b 1 if ERRORLEVEL 1 exit /b 1
) else ( ) else (

View File

@ -60,6 +60,7 @@ The option bits are:
PCRE2_DOLLAR_ENDONLY $ not to match newline at end PCRE2_DOLLAR_ENDONLY $ not to match newline at end
PCRE2_DOTALL . matches anything including NL PCRE2_DOTALL . matches anything including NL
PCRE2_DUPNAMES Allow duplicate names for subpatterns PCRE2_DUPNAMES Allow duplicate names for subpatterns
PCRE2_ENDANCHORED Pattern can match only at end of subject
PCRE2_EXTENDED Ignore white space and # comments PCRE2_EXTENDED Ignore white space and # comments
PCRE2_FIRSTLINE Force matching to be before newline PCRE2_FIRSTLINE Force matching to be before newline
PCRE2_MATCH_UNSET_BACKREF Match unset back references PCRE2_MATCH_UNSET_BACKREF Match unset back references

View File

@ -50,6 +50,7 @@ up a callout function or specify the recursion depth limit. The <i>length</i>
and <i>startoffset</i> values are code units, not characters. The options are: and <i>startoffset</i> values are code units, not characters. The options are:
<pre> <pre>
PCRE2_ANCHORED Match only at the first position PCRE2_ANCHORED Match only at the first position
PCRE2_ENDANCHORED Pattern can match only at end of subject
PCRE2_NOTBOL Subject is not the beginning of a line PCRE2_NOTBOL Subject is not the beginning of a line
PCRE2_NOTEOL Subject is not the end of a line PCRE2_NOTEOL Subject is not the end of a line
PCRE2_NOTEMPTY An empty string is not a valid match PCRE2_NOTEMPTY An empty string is not a valid match

View File

@ -53,6 +53,7 @@ units, not characters. The length may be given as PCRE2_ZERO_TERMINATE for a
subject that is terminated by a binary zero code unit. The options are: subject that is terminated by a binary zero code unit. The options are:
<pre> <pre>
PCRE2_ANCHORED Match only at the first position PCRE2_ANCHORED Match only at the first position
PCRE2_ENDANCHORED Pattern can match only at end of subject
PCRE2_NOTBOL Subject string is not the beginning of a line PCRE2_NOTBOL Subject string is not the beginning of a line
PCRE2_NOTEOL Subject string is not the end of a line PCRE2_NOTEOL Subject string is not the end of a line
PCRE2_NOTEMPTY An empty string is not a valid match PCRE2_NOTEMPTY An empty string is not a valid match

View File

@ -64,6 +64,7 @@ The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for
zero-terminated strings. The options are: zero-terminated strings. The options are:
<pre> <pre>
PCRE2_ANCHORED Match only at the first position PCRE2_ANCHORED Match only at the first position
PCRE2_ENDANCHORED Pattern can match only at end of subject
PCRE2_NOTBOL Subject is not the beginning of a line PCRE2_NOTBOL Subject is not the beginning of a line
PCRE2_NOTEOL Subject is not the end of a line PCRE2_NOTEOL Subject is not the end of a line
PCRE2_NOTEMPTY An empty string is not a valid match PCRE2_NOTEMPTY An empty string is not a valid match

View File

@ -1123,8 +1123,8 @@ documentation).
<P> <P>
For those options that can be different in different parts of the pattern, the For those options that can be different in different parts of the pattern, the
contents of the <i>options</i> argument specifies their settings at the start of contents of the <i>options</i> argument specifies their settings at the start of
compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK options can be set at compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and PCRE2_NO_UTF_CHECK
the time of matching as well as at compile time. options can be set at the time of matching as well as at compile time.
</P> </P>
<P> <P>
Other, less frequently required compile-time parameters (for example, the Other, less frequently required compile-time parameters (for example, the
@ -1279,6 +1279,13 @@ only one instance of the named subpattern can ever be matched. There are more
details of named subpatterns below; see also the details of named subpatterns below; see also the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a> <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
documentation. documentation.
<pre>
PCRE2_ENDANCHORED
</pre>
If this bit is set, the end of any pattern match must be right at the end of
the string being searched (the "subject string"). This effect can also be
achieved by appropriate constructs in the pattern itself, which is the only way
to do it in Perl.
<pre> <pre>
PCRE2_EXTENDED PCRE2_EXTENDED
</pre> </pre>
@ -2141,16 +2148,16 @@ Option bits for <b>pcre2_match()</b>
</b><br> </b><br>
<P> <P>
The unused bits of the <i>options</i> argument for <b>pcre2_match()</b> must be The unused bits of the <i>options</i> argument for <b>pcre2_match()</b> must be
zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL, zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDANCHORED,
PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT.
described below. Their action is described below.
</P> </P>
<P> <P>
Setting PCRE2_ANCHORED at match time is not supported by the just-in-time (JIT) Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not supported by
compiler. If it is set, JIT matching is disabled and the interpretive code in the just-in-time (JIT) compiler. If it is set, JIT matching is disabled and the
<b>pcre2_match()</b> is run. Apart from PCRE2_NO_JIT (obviously), the remaining interpretive code in <b>pcre2_match()</b> is run. Apart from PCRE2_NO_JIT
options are supported for JIT matching. (obviously), the remaining options are supported for JIT matching.
<pre> <pre>
PCRE2_ANCHORED PCRE2_ANCHORED
</pre> </pre>
@ -2159,6 +2166,12 @@ matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out
to be anchored by virtue of its contents, it cannot be made unachored at to be anchored by virtue of its contents, it cannot be made unachored at
matching time. Note that setting the option at match time disables JIT matching time. Note that setting the option at match time disables JIT
matching. matching.
<pre>
PCRE2_ENDANCHORED
</pre>
If the PCRE2_ENDANCHORED option is set, any string that <b>pcre2_match()</b>
matches must be right at the end of the subject string. Note that setting the
option at match time disables JIT matching.
<pre> <pre>
PCRE2_NOTBOL PCRE2_NOTBOL
</pre> </pre>
@ -3100,11 +3113,11 @@ Option bits for <b>pcre_dfa_match()</b>
</b><br> </b><br>
<P> <P>
The unused bits of the <i>options</i> argument for <b>pcre2_dfa_match()</b> must The unused bits of the <i>options</i> argument for <b>pcre2_dfa_match()</b> must
be zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL, be zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDANCHORED,
PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST,
PCRE2_DFA_RESTART. All but the last four of these are exactly the same as for and PCRE2_DFA_RESTART. All but the last four of these are exactly the same as
<b>pcre2_match()</b>, so their description is not repeated here. for <b>pcre2_match()</b>, so their description is not repeated here.
<pre> <pre>
PCRE2_PARTIAL_HARD PCRE2_PARTIAL_HARD
PCRE2_PARTIAL_SOFT PCRE2_PARTIAL_SOFT
@ -3258,7 +3271,7 @@ Cambridge, England.
</P> </P>
<br><a name="SEC42" href="#TOC1">REVISION</a><br> <br><a name="SEC42" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 01 April 2017 Last updated: 04 April 2017
<br> <br>
Copyright &copy; 1997-2017 University of Cambridge. Copyright &copy; 1997-2017 University of Cambridge.
<br> <br>

View File

@ -22,7 +22,7 @@ please consult the man page, in case the conversion went wrong.
<li><a name="TOC7" href="#SEC7">NEWLINES</a> <li><a name="TOC7" href="#SEC7">NEWLINES</a>
<li><a name="TOC8" href="#SEC8">OPTIONS COMPATIBILITY</a> <li><a name="TOC8" href="#SEC8">OPTIONS COMPATIBILITY</a>
<li><a name="TOC9" href="#SEC9">OPTIONS WITH DATA</a> <li><a name="TOC9" href="#SEC9">OPTIONS WITH DATA</a>
<li><a name="TOC10" href="#SEC10">CALLING EXTERNAL SCRIPTS</a> <li><a name="TOC10" href="#SEC10">USING PCRE2'S CALLOUT FACILITY</a>
<li><a name="TOC11" href="#SEC11">MATCHING ERRORS</a> <li><a name="TOC11" href="#SEC11">MATCHING ERRORS</a>
<li><a name="TOC12" href="#SEC12">DIAGNOSTICS</a> <li><a name="TOC12" href="#SEC12">DIAGNOSTICS</a>
<li><a name="TOC13" href="#SEC13">SEE ALSO</a> <li><a name="TOC13" href="#SEC13">SEE ALSO</a>
@ -384,8 +384,8 @@ Instead of showing lines or parts of lines that match, show each match as an
offset from the start of the file and a length, separated by a comma. In this offset from the start of the file and a length, separated by a comma. In this
mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b>
options are ignored. If there is more than one match in a line, each of them is options are ignored. If there is more than one match in a line, each of them is
shown separately. This option is mutually exclusive with <b>--line-offsets</b> shown separately. This option is mutually exclusive with <b>--output</b>,
and <b>--only-matching</b>. <b>--line-offsets</b>, and <b>--only-matching</b>.
</P> </P>
<P> <P>
<b>-H</b>, <b>--with-filename</b> <b>-H</b>, <b>--with-filename</b>
@ -491,7 +491,8 @@ number is terminated by a colon (as usual; see the <b>-n</b> option), and the
offset and length are separated by a comma. In this mode, no context is shown. offset and length are separated by a comma. In this mode, no context is shown.
That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> options are ignored. If there is That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> options are ignored. If there is
more than one match in a line, each of them is shown separately. This option is more than one match in a line, each of them is shown separately. This option is
mutually exclusive with <b>--file-offsets</b> and <b>--only-matching</b>. mutually exclusive with <b>--output</b>, <b>--file-offsets</b>, and
<b>--only-matching</b>.
</P> </P>
<P> <P>
<b>--locale</b>=<i>locale-name</i> <b>--locale</b>=<i>locale-name</i>
@ -602,6 +603,36 @@ use of JIT at run time. It is provided for testing and working round problems.
It should never be needed in normal use. It should never be needed in normal use.
</P> </P>
<P> <P>
<b>-O</b> <i>text</i>, <b>--output</b>=<i>text</i>
When there is a match, instead of outputting the whole line that matched,
output just the given text. This option is mutually exclusive with
<b>--only-matching</b>, <b>--file-offsets</b>, and <b>--line-offsets</b>. Escape
sequences starting with a dollar character may be used to insert the contents
of the matched part of the line and/or captured substrings into the text.
<br>
<br>
$&#60;digits&#62; or ${&#60;digits&#62;} is replaced by the captured
substring of the given decimal number; zero substitutes the whole match. If
the number is greater than the number of capturing substrings, or if the
capture is unset, the replacement is empty.
<br>
<br>
$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
newline; $r by carriage return; $t by tab; $v by vertical tab.
<br>
<br>
$o&#60;digits&#62; is replaced by the character represented by the given octal
number; up to three digits are processed.
<br>
<br>
$x&#60;digits&#62; is replaced by the character represented by the given hexadecimal
number; up to two digits are processed.
<br>
<br>
Any other character is substituted by itself. In particular, $$ is replaced by
a single dollar.
</P>
<P>
<b>-o</b>, <b>--only-matching</b> <b>-o</b>, <b>--only-matching</b>
Show only the part of the line that matched a pattern instead of the whole Show only the part of the line that matched a pattern instead of the whole
line. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and line. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and
@ -611,7 +642,7 @@ combined with <b>-v</b> (invert the sense of the match to find non-matching
lines), no output is generated, but the return code is set appropriately. If lines), no output is generated, but the return code is set appropriately. If
the matched portion of the line is empty, nothing is output unless the file the matched portion of the line is empty, nothing is output unless the file
name or line number are being printed, in which case they are shown on an name or line number are being printed, in which case they are shown on an
otherwise empty line. This option is mutually exclusive with otherwise empty line. This option is mutually exclusive with <b>--output</b>,
<b>--file-offsets</b> and <b>--line-offsets</b>. <b>--file-offsets</b> and <b>--line-offsets</b>.
</P> </P>
<P> <P>
@ -621,7 +652,7 @@ given number. Up to 32 capturing parentheses are supported, and -o0 is
equivalent to <b>-o</b> without a number. Because these options can be given equivalent to <b>-o</b> without a number. Because these options can be given
without an argument (see above), if an argument is present, it must be given in without an argument (see above), if an argument is present, it must be given in
the same shell item, for example, -o3 or --only-matching=2. The comments given the same shell item, for example, -o3 or --only-matching=2. The comments given
for the non-argument case above also apply to this case. If the specified for the non-argument case above also apply to this option. If the specified
capturing parentheses do not exist in the pattern, or were not set in the capturing parentheses do not exist in the pattern, or were not set in the
match, nothing is output unless the file name or line number are being output. match, nothing is output unless the file name or line number are being output.
<br> <br>
@ -735,9 +766,9 @@ as in the GNU <b>grep</b> program. Any long option of the form
(PCRE2 terminology). However, the <b>--depth-limit</b>, <b>--file-list</b>, (PCRE2 terminology). However, the <b>--depth-limit</b>, <b>--file-list</b>,
<b>--file-offsets</b>, <b>--include-dir</b>, <b>--line-offsets</b>, <b>--file-offsets</b>, <b>--include-dir</b>, <b>--line-offsets</b>,
<b>--locale</b>, <b>--match-limit</b>, <b>-M</b>, <b>--multiline</b>, <b>-N</b>, <b>--locale</b>, <b>--match-limit</b>, <b>-M</b>, <b>--multiline</b>, <b>-N</b>,
<b>--newline</b>, <b>--om-separator</b>, <b>-u</b>, and <b>--utf-8</b> options are <b>--newline</b>, <b>--om-separator</b>, <b>--output</b>, <b>-u</b>, and
specific to <b>pcre2grep</b>, as is the use of the <b>--only-matching</b> option <b>--utf-8</b> options are specific to <b>pcre2grep</b>, as is the use of the
with a capturing parentheses number. <b>--only-matching</b> option with a capturing parentheses number.
</P> </P>
<P> <P>
Although most of the common options work the same way, a few are different in Although most of the common options work the same way, a few are different in
@ -778,23 +809,30 @@ The exceptions to the above are the <b>--colour</b> (or <b>--color</b>) and
options does have data, it must be given in the first form, using an equals options does have data, it must be given in the first form, using an equals
character. Otherwise <b>pcre2grep</b> will assume that it has no data. character. Otherwise <b>pcre2grep</b> will assume that it has no data.
</P> </P>
<br><a name="SEC10" href="#TOC1">CALLING EXTERNAL SCRIPTS</a><br> <br><a name="SEC10" href="#TOC1">USING PCRE2'S CALLOUT FACILITY</a><br>
<P> <P>
<b>pcre2grep</b> has, by default, support for calling external programs or <b>pcre2grep</b> has, by default, support for calling external programs or
scripts during matching by making use of PCRE2's callout facility. However, scripts or echoing specific strings during matching by making use of PCRE2's
this support can be disabled when <b>pcre2grep</b> is built. You can find out callout facility. However, this support can be disabled when <b>pcre2grep</b> is
whether your binary has support for callouts by running it with the <b>--help</b> built. You can find out whether your binary has support for callouts by running
option. If the support is not enabled, all callouts in patterns are ignored by it with the <b>--help</b> option. If the support is not enabled, all callouts in
<b>pcre2grep</b>. patterns are ignored by <b>pcre2grep</b>.
</P> </P>
<P> <P>
A callout in a PCRE2 pattern is of the form (?C&#60;arg&#62;) where the argument is A callout in a PCRE2 pattern is of the form (?C&#60;arg&#62;) where the argument is
either a number or a quoted string (see the either a number or a quoted string (see the
<a href="pcre2callout.html"><b>pcre2callout</b></a> <a href="pcre2callout.html"><b>pcre2callout</b></a>
documentation for details). Numbered callouts are ignored by <b>pcre2grep</b>. documentation for details). Numbered callouts are ignored by <b>pcre2grep</b>;
String arguments are parsed as a list of substrings separated by pipe (vertical only callouts with string arguments are useful.
bar) characters. The first substring must be an executable name, with the </P>
following substrings specifying arguments: <br><b>
Calling external programs or scripts
</b><br>
<P>
If the callout string does not start with a pipe (vertical bar) character, it
is parsed into a list of substrings separated by pipe characters. The first
substring must be an executable name, with the following substrings specifying
arguments:
<pre> <pre>
executable_name|arg1|arg2|... executable_name|arg1|arg2|...
</pre> </pre>
@ -828,6 +866,19 @@ callout to be ignored. If running the program fails for any reason (including
the non-existence of the executable), a local matching failure occurs and the the non-existence of the executable), a local matching failure occurs and the
matcher backtracks in the normal way. matcher backtracks in the normal way.
</P> </P>
<br><b>
Echoing a specific string
</b><br>
<P>
If the callout string starts with a pipe (vertical bar) character, the rest of
the string is written to the output, having been passed through the same escape
processing as text from the --output option. This provides a simple echoing
facility that avoids calling an external program or script. No terminator is
added to the string, so if you want a newline, you must include it explicitly.
Matching continues normally after the string is output. If you want to see only
the callout output but not any output from an actual match, you should end the
relevant pattern with (*FAIL).
</P>
<br><a name="SEC11" href="#TOC1">MATCHING ERRORS</a><br> <br><a name="SEC11" href="#TOC1">MATCHING ERRORS</a><br>
<P> <P>
It is possible to supply a regular expression that takes a very long time to It is possible to supply a regular expression that takes a very long time to
@ -867,7 +918,7 @@ Cambridge, England.
</P> </P>
<br><a name="SEC15" href="#TOC1">REVISION</a><br> <br><a name="SEC15" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 31 March 2017 Last updated: 06 April 2017
<br> <br>
Copyright &copy; 1997-2017 University of Cambridge. Copyright &copy; 1997-2017 University of Cambridge.
<br> <br>

View File

@ -568,6 +568,7 @@ for a description of their effects.
dollar_endonly set PCRE2_DOLLAR_ENDONLY dollar_endonly set PCRE2_DOLLAR_ENDONLY
/s dotall set PCRE2_DOTALL /s dotall set PCRE2_DOTALL
dupnames set PCRE2_DUPNAMES dupnames set PCRE2_DUPNAMES
endanchored set PCRE2_ENDANCHORED
/x extended set PCRE2_EXTENDED /x extended set PCRE2_EXTENDED
firstline set PCRE2_FIRSTLINE firstline set PCRE2_FIRSTLINE
match_unset_backref set PCRE2_MATCH_UNSET_BACKREF match_unset_backref set PCRE2_MATCH_UNSET_BACKREF
@ -1039,6 +1040,7 @@ The following modifiers set options for <b>pcre2_match()</b> or
for a description of their effects. for a description of their effects.
<pre> <pre>
anchored set PCRE2_ANCHORED anchored set PCRE2_ANCHORED
endanchored set PCRE2_ENDANCHORED
dfa_restart set PCRE2_DFA_RESTART dfa_restart set PCRE2_DFA_RESTART
dfa_shortest set PCRE2_DFA_SHORTEST dfa_shortest set PCRE2_DFA_SHORTEST
no_jit set PCRE2_NO_JIT no_jit set PCRE2_NO_JIT
@ -1798,7 +1800,7 @@ Cambridge, England.
</P> </P>
<br><a name="SEC21" href="#TOC1">REVISION</a><br> <br><a name="SEC21" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 25 March 2017 Last updated: 04 April 2017
<br> <br>
Copyright &copy; 1997-2017 University of Cambridge. Copyright &copy; 1997-2017 University of Cambridge.
<br> <br>

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
.TH PCRE2GREP 1 "31 March 2017" "PCRE2 10.30" .TH PCRE2GREP 1 "06 April 2017" "PCRE2 10.30"
.SH NAME .SH NAME
pcre2grep - a grep with Perl-compatible regular expressions. pcre2grep - a grep with Perl-compatible regular expressions.
.SH SYNOPSIS .SH SYNOPSIS
@ -330,8 +330,8 @@ Instead of showing lines or parts of lines that match, show each match as an
offset from the start of the file and a length, separated by a comma. In this offset from the start of the file and a length, separated by a comma. In this
mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP
options are ignored. If there is more than one match in a line, each of them is options are ignored. If there is more than one match in a line, each of them is
shown separately. This option is mutually exclusive with \fB--line-offsets\fP shown separately. This option is mutually exclusive with \fB--output\fP,
and \fB--only-matching\fP. \fB--line-offsets\fP, and \fB--only-matching\fP.
.TP .TP
\fB-H\fP, \fB--with-filename\fP \fB-H\fP, \fB--with-filename\fP
Force the inclusion of the file name at the start of output lines when Force the inclusion of the file name at the start of output lines when
@ -424,7 +424,8 @@ number is terminated by a colon (as usual; see the \fB-n\fP option), and the
offset and length are separated by a comma. In this mode, no context is shown. offset and length are separated by a comma. In this mode, no context is shown.
That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. If there is That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. If there is
more than one match in a line, each of them is shown separately. This option is more than one match in a line, each of them is shown separately. This option is
mutually exclusive with \fB--file-offsets\fP and \fB--only-matching\fP. mutually exclusive with \fB--output\fP, \fB--file-offsets\fP, and
\fB--only-matching\fP.
.TP .TP
\fB--locale\fP=\fIlocale-name\fP \fB--locale\fP=\fIlocale-name\fP
This option specifies a locale to be used for pattern matching. It overrides This option specifies a locale to be used for pattern matching. It overrides
@ -521,6 +522,30 @@ was explicitly disabled at build time. This option can be used to disable the
use of JIT at run time. It is provided for testing and working round problems. use of JIT at run time. It is provided for testing and working round problems.
It should never be needed in normal use. It should never be needed in normal use.
.TP .TP
\fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP
When there is a match, instead of outputting the whole line that matched,
output just the given text. This option is mutually exclusive with
\fB--only-matching\fP, \fB--file-offsets\fP, and \fB--line-offsets\fP. Escape
sequences starting with a dollar character may be used to insert the contents
of the matched part of the line and/or captured substrings into the text.
.sp
$<digits> or ${<digits>} is replaced by the captured
substring of the given decimal number; zero substitutes the whole match. If
the number is greater than the number of capturing substrings, or if the
capture is unset, the replacement is empty.
.sp
$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
newline; $r by carriage return; $t by tab; $v by vertical tab.
.sp
$o<digits> is replaced by the character represented by the given octal
number; up to three digits are processed.
.sp
$x<digits> is replaced by the character represented by the given hexadecimal
number; up to two digits are processed.
.sp
Any other character is substituted by itself. In particular, $$ is replaced by
a single dollar.
.TP
\fB-o\fP, \fB--only-matching\fP \fB-o\fP, \fB--only-matching\fP
Show only the part of the line that matched a pattern instead of the whole Show only the part of the line that matched a pattern instead of the whole
line. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and line. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and
@ -530,7 +555,7 @@ combined with \fB-v\fP (invert the sense of the match to find non-matching
lines), no output is generated, but the return code is set appropriately. If lines), no output is generated, but the return code is set appropriately. If
the matched portion of the line is empty, nothing is output unless the file the matched portion of the line is empty, nothing is output unless the file
name or line number are being printed, in which case they are shown on an name or line number are being printed, in which case they are shown on an
otherwise empty line. This option is mutually exclusive with otherwise empty line. This option is mutually exclusive with \fB--output\fP,
\fB--file-offsets\fP and \fB--line-offsets\fP. \fB--file-offsets\fP and \fB--line-offsets\fP.
.TP .TP
\fB-o\fP\fInumber\fP, \fB--only-matching\fP=\fInumber\fP \fB-o\fP\fInumber\fP, \fB--only-matching\fP=\fInumber\fP
@ -539,7 +564,7 @@ given number. Up to 32 capturing parentheses are supported, and -o0 is
equivalent to \fB-o\fP without a number. Because these options can be given equivalent to \fB-o\fP without a number. Because these options can be given
without an argument (see above), if an argument is present, it must be given in without an argument (see above), if an argument is present, it must be given in
the same shell item, for example, -o3 or --only-matching=2. The comments given the same shell item, for example, -o3 or --only-matching=2. The comments given
for the non-argument case above also apply to this case. If the specified for the non-argument case above also apply to this option. If the specified
capturing parentheses do not exist in the pattern, or were not set in the capturing parentheses do not exist in the pattern, or were not set in the
match, nothing is output unless the file name or line number are being output. match, nothing is output unless the file name or line number are being output.
.sp .sp
@ -647,9 +672,9 @@ as in the GNU \fBgrep\fP program. Any long option of the form
(PCRE2 terminology). However, the \fB--depth-limit\fP, \fB--file-list\fP, (PCRE2 terminology). However, the \fB--depth-limit\fP, \fB--file-list\fP,
\fB--file-offsets\fP, \fB--include-dir\fP, \fB--line-offsets\fP, \fB--file-offsets\fP, \fB--include-dir\fP, \fB--line-offsets\fP,
\fB--locale\fP, \fB--match-limit\fP, \fB-M\fP, \fB--multiline\fP, \fB-N\fP, \fB--locale\fP, \fB--match-limit\fP, \fB-M\fP, \fB--multiline\fP, \fB-N\fP,
\fB--newline\fP, \fB--om-separator\fP, \fB-u\fP, and \fB--utf-8\fP options are \fB--newline\fP, \fB--om-separator\fP, \fB--output\fP, \fB-u\fP, and
specific to \fBpcre2grep\fP, as is the use of the \fB--only-matching\fP option \fB--utf-8\fP options are specific to \fBpcre2grep\fP, as is the use of the
with a capturing parentheses number. \fB--only-matching\fP option with a capturing parentheses number.
.P .P
Although most of the common options work the same way, a few are different in Although most of the common options work the same way, a few are different in
\fBpcre2grep\fP. For example, the \fB--include\fP option's argument is a glob \fBpcre2grep\fP. For example, the \fB--include\fP option's argument is a glob
@ -690,25 +715,32 @@ options does have data, it must be given in the first form, using an equals
character. Otherwise \fBpcre2grep\fP will assume that it has no data. character. Otherwise \fBpcre2grep\fP will assume that it has no data.
. .
. .
.SH "CALLING EXTERNAL SCRIPTS" .SH "USING PCRE2'S CALLOUT FACILITY"
.rs .rs
.sp .sp
\fBpcre2grep\fP has, by default, support for calling external programs or \fBpcre2grep\fP has, by default, support for calling external programs or
scripts during matching by making use of PCRE2's callout facility. However, scripts or echoing specific strings during matching by making use of PCRE2's
this support can be disabled when \fBpcre2grep\fP is built. You can find out callout facility. However, this support can be disabled when \fBpcre2grep\fP is
whether your binary has support for callouts by running it with the \fB--help\fP built. You can find out whether your binary has support for callouts by running
option. If the support is not enabled, all callouts in patterns are ignored by it with the \fB--help\fP option. If the support is not enabled, all callouts in
\fBpcre2grep\fP. patterns are ignored by \fBpcre2grep\fP.
.P .P
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is
either a number or a quoted string (see the either a number or a quoted string (see the
.\" HREF .\" HREF
\fBpcre2callout\fP \fBpcre2callout\fP
.\" .\"
documentation for details). Numbered callouts are ignored by \fBpcre2grep\fP. documentation for details). Numbered callouts are ignored by \fBpcre2grep\fP;
String arguments are parsed as a list of substrings separated by pipe (vertical only callouts with string arguments are useful.
bar) characters. The first substring must be an executable name, with the .
following substrings specifying arguments: .
.SS "Calling external programs or scripts"
.rs
.sp
If the callout string does not start with a pipe (vertical bar) character, it
is parsed into a list of substrings separated by pipe characters. The first
substring must be an executable name, with the following substrings specifying
arguments:
.sp .sp
executable_name|arg1|arg2|... executable_name|arg1|arg2|...
.sp .sp
@ -742,6 +774,19 @@ the non-existence of the executable), a local matching failure occurs and the
matcher backtracks in the normal way. matcher backtracks in the normal way.
. .
. .
.SS "Echoing a specific string"
.rs
.sp
If the callout string starts with a pipe (vertical bar) character, the rest of
the string is written to the output, having been passed through the same escape
processing as text from the --output option. This provides a simple echoing
facility that avoids calling an external program or script. No terminator is
added to the string, so if you want a newline, you must include it explicitly.
Matching continues normally after the string is output. If you want to see only
the callout output but not any output from an actual match, you should end the
relevant pattern with (*FAIL).
.
.
.SH "MATCHING ERRORS" .SH "MATCHING ERRORS"
.rs .rs
.sp .sp
@ -789,6 +834,6 @@ Cambridge, England.
.rs .rs
.sp .sp
.nf .nf
Last updated: 31 March 2017 Last updated: 06 April 2017
Copyright (c) 1997-2017 University of Cambridge. Copyright (c) 1997-2017 University of Cambridge.
.fi .fi

View File

@ -363,8 +363,8 @@ OPTIONS
length, separated by a comma. In this mode, no context is length, separated by a comma. In this mode, no context is
shown. That is, the -A, -B, and -C options are ignored. If shown. That is, the -A, -B, and -C options are ignored. If
there is more than one match in a line, each of them is shown there is more than one match in a line, each of them is shown
separately. This option is mutually exclusive with --line- separately. This option is mutually exclusive with --output,
offsets and --only-matching. --line-offsets, and --only-matching.
-H, --with-filename -H, --with-filename
Force the inclusion of the file name at the start of output Force the inclusion of the file name at the start of output
@ -469,8 +469,8 @@ OPTIONS
separated by a comma. In this mode, no context is shown. separated by a comma. In this mode, no context is shown.
That is, the -A, -B, and -C options are ignored. If there is That is, the -A, -B, and -C options are ignored. If there is
more than one match in a line, each of them is shown sepa- more than one match in a line, each of them is shown sepa-
rately. This option is mutually exclusive with --file-offsets rately. This option is mutually exclusive with --output,
and --only-matching. --file-offsets, and --only-matching.
--locale=locale-name --locale=locale-name
This option specifies a locale to be used for pattern match- This option specifies a locale to be used for pattern match-
@ -585,6 +585,33 @@ OPTIONS
run time. It is provided for testing and working round prob- run time. It is provided for testing and working round prob-
lems. It should never be needed in normal use. lems. It should never be needed in normal use.
-O text, --output=text
When there is a match, instead of outputting the whole line
that matched, output just the given text. This option is
mutually exclusive with --only-matching, --file-offsets, and
--line-offsets. Escape sequences starting with a dollar char-
acter may be used to insert the contents of the matched part
of the line and/or captured substrings into the text.
$<digits> or ${<digits>} is replaced by the captured sub-
string of the given decimal number; zero substitutes the
whole match. If the number is greater than the number of cap-
turing substrings, or if the capture is unset, the replace-
ment is empty.
$a is replaced by bell; $b by backspace; $e by escape; $f by
form feed; $n by newline; $r by carriage return; $t by tab;
$v by vertical tab.
$o<digits> is replaced by the character represented by the
given octal number; up to three digits are processed.
$x<digits> is replaced by the character represented by the
given hexadecimal number; up to two digits are processed.
Any other character is substituted by itself. In particular,
$$ is replaced by a single dollar.
-o, --only-matching -o, --only-matching
Show only the part of the line that matched a pattern instead Show only the part of the line that matched a pattern instead
of the whole line. In this mode, no context is shown. That of the whole line. In this mode, no context is shown. That
@ -596,8 +623,8 @@ OPTIONS
ately. If the matched portion of the line is empty, nothing ately. If the matched portion of the line is empty, nothing
is output unless the file name or line number are being is output unless the file name or line number are being
printed, in which case they are shown on an otherwise empty printed, in which case they are shown on an otherwise empty
line. This option is mutually exclusive with --file-offsets line. This option is mutually exclusive with --output,
and --line-offsets. --file-offsets and --line-offsets.
-onumber, --only-matching=number -onumber, --only-matching=number
Show only the part of the line that matched the capturing Show only the part of the line that matched the capturing
@ -607,7 +634,7 @@ OPTIONS
(see above), if an argument is present, it must be given in (see above), if an argument is present, it must be given in
the same shell item, for example, -o3 or --only-matching=2. the same shell item, for example, -o3 or --only-matching=2.
The comments given for the non-argument case above also apply The comments given for the non-argument case above also apply
to this case. If the specified capturing parentheses do not to this option. If the specified capturing parentheses do not
exist in the pattern, or were not set in the match, nothing exist in the pattern, or were not set in the match, nothing
is output unless the file name or line number are being out- is output unless the file name or line number are being out-
put. put.
@ -723,7 +750,7 @@ OPTIONS COMPATIBILITY
terminology) is also available as --xxx-regex (PCRE2 terminology). How- terminology) is also available as --xxx-regex (PCRE2 terminology). How-
ever, the --depth-limit, --file-list, --file-offsets, --include-dir, ever, the --depth-limit, --file-list, --file-offsets, --include-dir,
--line-offsets, --locale, --match-limit, -M, --multiline, -N, --new- --line-offsets, --locale, --match-limit, -M, --multiline, -N, --new-
line, --om-separator, -u, and --utf-8 options are specific to line, --om-separator, --output, -u, and --utf-8 options are specific to
pcre2grep, as is the use of the --only-matching option with a capturing pcre2grep, as is the use of the --only-matching option with a capturing
parentheses number. parentheses number.
@ -766,33 +793,38 @@ OPTIONS WITH DATA
equals character. Otherwise pcre2grep will assume that it has no data. equals character. Otherwise pcre2grep will assume that it has no data.
CALLING EXTERNAL SCRIPTS USING PCRE2'S CALLOUT FACILITY
pcre2grep has, by default, support for calling external programs or pcre2grep has, by default, support for calling external programs or
scripts during matching by making use of PCRE2's callout facility. How- scripts or echoing specific strings during matching by making use of
ever, this support can be disabled when pcre2grep is built. You can PCRE2's callout facility. However, this support can be disabled when
find out whether your binary has support for callouts by running it pcre2grep is built. You can find out whether your binary has support
with the --help option. If the support is not enabled, all callouts in for callouts by running it with the --help option. If the support is
patterns are ignored by pcre2grep. not enabled, all callouts in patterns are ignored by pcre2grep.
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argu- A callout in a PCRE2 pattern is of the form (?C<arg>) where the argu-
ment is either a number or a quoted string (see the pcre2callout docu- ment is either a number or a quoted string (see the pcre2callout docu-
mentation for details). Numbered callouts are ignored by pcre2grep. mentation for details). Numbered callouts are ignored by pcre2grep;
String arguments are parsed as a list of substrings separated by pipe only callouts with string arguments are useful.
(vertical bar) characters. The first substring must be an executable
name, with the following substrings specifying arguments: Calling external programs or scripts
If the callout string does not start with a pipe (vertical bar) charac-
ter, it is parsed into a list of substrings separated by pipe charac-
ters. The first substring must be an executable name, with the follow-
ing substrings specifying arguments:
executable_name|arg1|arg2|... executable_name|arg1|arg2|...
Any substring (including the executable name) may contain escape Any substring (including the executable name) may contain escape
sequences started by a dollar character: $<digits> or ${<digits>} is sequences started by a dollar character: $<digits> or ${<digits>} is
replaced by the captured substring of the given decimal number, which replaced by the captured substring of the given decimal number, which
must be greater than zero. If the number is greater than the number of must be greater than zero. If the number is greater than the number of
capturing substrings, or if the capture is unset, the replacement is capturing substrings, or if the capture is unset, the replacement is
empty. empty.
Any other character is substituted by itself. In particular, $$ is Any other character is substituted by itself. In particular, $$ is
replaced by a single dollar and $| is replaced by a pipe character. replaced by a single dollar and $| is replaced by a pipe character.
Here is an example: Here is an example:
echo -e "abcde\n12345" | pcre2grep \ echo -e "abcde\n12345" | pcre2grep \
@ -808,37 +840,49 @@ CALLING EXTERNAL SCRIPTS
The parameters for the execv() system call that is used to run the pro- The parameters for the execv() system call that is used to run the pro-
gram or script are zero-terminated strings. This means that binary zero gram or script are zero-terminated strings. This means that binary zero
characters in the callout argument will cause premature termination of characters in the callout argument will cause premature termination of
their substrings, and therefore should not be present. Any syntax their substrings, and therefore should not be present. Any syntax
errors in the string (for example, a dollar not followed by another errors in the string (for example, a dollar not followed by another
character) cause the callout to be ignored. If running the program character) cause the callout to be ignored. If running the program
fails for any reason (including the non-existence of the executable), a fails for any reason (including the non-existence of the executable), a
local matching failure occurs and the matcher backtracks in the normal local matching failure occurs and the matcher backtracks in the normal
way. way.
Echoing a specific string
If the callout string starts with a pipe (vertical bar) character, the
rest of the string is written to the output, having been passed through
the same escape processing as text from the --output option. This pro-
vides a simple echoing facility that avoids calling an external program
or script. No terminator is added to the string, so if you want a new-
line, you must include it explicitly. Matching continues normally
after the string is output. If you want to see only the callout output
but not any output from an actual match, you should end the relevant
pattern with (*FAIL).
MATCHING ERRORS MATCHING ERRORS
It is possible to supply a regular expression that takes a very long It is possible to supply a regular expression that takes a very long
time to fail to match certain lines. Such patterns normally involve time to fail to match certain lines. Such patterns normally involve
nested indefinite repeats, for example: (a+)*\d when matched against a nested indefinite repeats, for example: (a+)*\d when matched against a
line of a's with no final digit. The PCRE2 matching function has a line of a's with no final digit. The PCRE2 matching function has a
resource limit that causes it to abort in these circumstances. If this resource limit that causes it to abort in these circumstances. If this
happens, pcre2grep outputs an error message and the line that caused happens, pcre2grep outputs an error message and the line that caused
the problem to the standard error stream. If there are more than 20 the problem to the standard error stream. If there are more than 20
such errors, pcre2grep gives up. such errors, pcre2grep gives up.
The --match-limit option of pcre2grep can be used to set the overall The --match-limit option of pcre2grep can be used to set the overall
resource limit; there is a second option called --depth-limit that sets resource limit; there is a second option called --depth-limit that sets
a limit on the amount of memory that is used (see the discussion of a limit on the amount of memory that is used (see the discussion of
these options above). these options above).
DIAGNOSTICS DIAGNOSTICS
Exit status is 0 if any matches were found, 1 if no matches were found, Exit status is 0 if any matches were found, 1 if no matches were found,
and 2 for syntax errors, overlong lines, non-existent or inaccessible and 2 for syntax errors, overlong lines, non-existent or inaccessible
files (even if matches were found in other files) or too many matching files (even if matches were found in other files) or too many matching
errors. Using the -s option to suppress error messages about inaccessi- errors. Using the -s option to suppress error messages about inaccessi-
ble files does not affect the return code. ble files does not affect the return code.
@ -857,5 +901,5 @@ AUTHOR
REVISION REVISION
Last updated: 31 March 2017 Last updated: 06 April 2017
Copyright (c) 1997-2017 University of Cambridge. Copyright (c) 1997-2017 University of Cambridge.

View File

@ -511,6 +511,7 @@ PATTERN MODIFIERS
dollar_endonly set PCRE2_DOLLAR_ENDONLY dollar_endonly set PCRE2_DOLLAR_ENDONLY
/s dotall set PCRE2_DOTALL /s dotall set PCRE2_DOTALL
dupnames set PCRE2_DUPNAMES dupnames set PCRE2_DUPNAMES
endanchored set PCRE2_ENDANCHORED
/x extended set PCRE2_EXTENDED /x extended set PCRE2_EXTENDED
firstline set PCRE2_FIRSTLINE firstline set PCRE2_FIRSTLINE
match_unset_backref set PCRE2_MATCH_UNSET_BACKREF match_unset_backref set PCRE2_MATCH_UNSET_BACKREF
@ -926,6 +927,7 @@ SUBJECT MODIFIERS
pcre2_dfa_match(). See pcreapi for a description of their effects. pcre2_dfa_match(). See pcreapi for a description of their effects.
anchored set PCRE2_ANCHORED anchored set PCRE2_ANCHORED
endanchored set PCRE2_ENDANCHORED
dfa_restart set PCRE2_DFA_RESTART dfa_restart set PCRE2_DFA_RESTART
dfa_shortest set PCRE2_DFA_SHORTEST dfa_shortest set PCRE2_DFA_SHORTEST
no_jit set PCRE2_NO_JIT no_jit set PCRE2_NO_JIT
@ -1630,5 +1632,5 @@ AUTHOR
REVISION REVISION
Last updated: 25 March 2017 Last updated: 04 April 2017
Copyright (c) 1997-2017 University of Cambridge. Copyright (c) 1997-2017 University of Cambridge.

View File

@ -175,8 +175,9 @@ static const char *dee_option = NULL;
static const char *DEE_option = NULL; static const char *DEE_option = NULL;
static const char *locale = NULL; static const char *locale = NULL;
static const char *newline_arg = NULL; static const char *newline_arg = NULL;
static const char *om_separator = ""; static const char *om_separator = NULL;
static const char *stdin_name = "(standard input)"; static const char *stdin_name = "(standard input)";
static const char *output_text = NULL;
static char *main_buffer = NULL; static char *main_buffer = NULL;
@ -196,6 +197,7 @@ static int dee_action = dee_SKIP;
#else #else
static int dee_action = dee_READ; static int dee_action = dee_READ;
#endif #endif
static int DEE_action = DEE_READ; static int DEE_action = DEE_READ;
static int error_count = 0; static int error_count = 0;
static int filenames = FN_DEFAULT; static int filenames = FN_DEFAULT;
@ -233,7 +235,6 @@ static BOOL number = FALSE;
static BOOL omit_zero_count = FALSE; static BOOL omit_zero_count = FALSE;
static BOOL resource_error = FALSE; static BOOL resource_error = FALSE;
static BOOL quiet = FALSE; static BOOL quiet = FALSE;
static BOOL show_only_matching = FALSE;
static BOOL show_total_count = FALSE; static BOOL show_total_count = FALSE;
static BOOL silent = FALSE; static BOOL silent = FALSE;
static BOOL utf = FALSE; static BOOL utf = FALSE;
@ -247,6 +248,7 @@ typedef struct omstr {
static omstr *only_matching = NULL; static omstr *only_matching = NULL;
static omstr *only_matching_last = NULL; static omstr *only_matching_last = NULL;
static int only_matching_count;
/* Structure for holding the two variables that describe a number chain. */ /* Structure for holding the two variables that describe a number chain. */
@ -406,6 +408,7 @@ static option_item optionlist[] = {
#else #else
{ OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" }, { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
#endif #endif
{ OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" },
{ OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" }, { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
{ OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" }, { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" }, { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
@ -793,7 +796,7 @@ return isatty(fileno(f));
/************* Print optionally coloured match Unix-style and z/OS **********/ /************* Print optionally coloured match Unix-style and z/OS **********/
static void static void
print_match(const char* buf, int length) print_match(const void *buf, int length)
{ {
if (length == 0) return; if (length == 0) return;
if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
@ -942,7 +945,7 @@ static CONSOLE_SCREEN_BUFFER_INFO csbi;
static WORD match_colour; static WORD match_colour;
static void static void
print_match(const char* buf, int length) print_match(const void *buf, int length)
{ {
if (length == 0) return; if (length == 0) return;
if (do_colour) if (do_colour)
@ -1001,7 +1004,7 @@ return FALSE;
/************* Print optionally coloured match when we can't do it **********/ /************* Print optionally coloured match when we can't do it **********/
static void static void
print_match(const char* buf, int length) print_match(const void *buf, int length)
{ {
if (length == 0) return; if (length == 0) return;
FWRITE(buf, 1, length, stdout); FWRITE(buf, 1, length, stdout);
@ -1658,6 +1661,277 @@ return FALSE; /* No match, no errors */
} }
/*************************************************
* Check output text for errors *
*************************************************/
static BOOL
syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
{
PCRE2_SPTR begin = string;
for (; *string != 0; string++)
{
if (*string == '$')
{
PCRE2_SIZE capture_id = 0;
BOOL brace = FALSE;
string++;
/* Syntax error: a character must be present after $. */
if (*string == 0)
{
if (!callout)
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
(int)(string - begin), "no character after $");
return FALSE;
}
if (*string == '{')
{
/* Must be a decimal number in braces, e.g: {5} or {38} */
string++;
brace = TRUE;
}
if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
{
do
{
/* Maximum capture id is 65535. */
if (capture_id <= 65535)
capture_id = capture_id * 10 + (*string - '0');
string++;
}
while (*string >= '0' && *string <= '9');
if (brace)
{
/* Syntax error: closing brace is missing. */
if (*string != '}')
{
if (!callout)
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
(int)(string - begin), "missing closing brace");
return FALSE;
}
}
else
{
/* To negate the effect of the for. */
string--;
}
}
else if (brace)
{
/* Syntax error: a decimal number required. */
if (!callout)
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
(int)(string - begin), "decimal number expected");
return FALSE;
}
else if (*string == 'o')
{
string++;
if (*string < '0' || *string > '7')
{
/* Syntax error: an octal number required. */
if (!callout)
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
(int)(string - begin), "octal number expected");
return FALSE;
}
}
else if (*string == 'x')
{
string++;
if (!isxdigit((unsigned char)*string))
{
/* Syntax error: a hexdecimal number required. */
if (!callout)
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
(int)(string - begin), "hexadecimal number expected");
return FALSE;
}
}
}
}
return TRUE;
}
/*************************************************
* Display output text *
*************************************************/
/* Display the output text, which is assumed to have already been syntax
checked. Output may contain escape sequences started by the dollar sign. The
escape sequences are substituted as follows:
$<digits> or ${<digits>} is replaced by the captured substring of the given
decimal number; zero will substitute the whole match. If the number is
greater than the number of capturing substrings, or if the capture is unset,
the replacement is empty.
$a is replaced by bell.
$b is replaced by backspace.
$e is replaced by escape.
$f is replaced by form feed.
$n is replaced by newline.
$r is replaced by carriage return.
$t is replaced by tab.
$v is replaced by vertical tab.
$o<digits> is replaced by the character represented by the given octal
number; up to three digits are processed.
$x<digits> is replaced by the character represented by the given hexadecimal
number; up to two digits are processed.
Any other character is substituted by itself. E.g: $$ is replaced by a single
dollar.
Arguments:
string: the output text
callout: TRUE for the builtin callout, FALSE for --output
subject the start of the subject
ovector: capture offsets
capture_top: number of captures
Returns: TRUE if something was output, other than newline
FALSE if nothing was output, or newline was last output
*/
static BOOL
display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
{
BOOL printed = FALSE;
for (; *string != 0; string++)
{
int ch = EOF;
if (*string == '$')
{
PCRE2_SIZE capture_id = 0;
BOOL brace = FALSE;
string++;
if (*string == '{')
{
/* Must be a decimal number in braces, e.g: {5} or {38} */
string++;
brace = TRUE;
}
if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
{
do
{
/* Maximum capture id is 65535. */
if (capture_id <= 65535)
capture_id = capture_id * 10 + (*string - '0');
string++;
}
while (*string >= '0' && *string <= '9');
if (!brace)
{
/* To negate the effect of the for. */
string--;
}
if (capture_id < capture_top)
{
PCRE2_SIZE capturesize;
capture_id *= 2;
capturesize = ovector[capture_id + 1] - ovector[capture_id];
if (capturesize > 0)
{
print_match(subject + ovector[capture_id], capturesize);
printed = TRUE;
}
}
}
else if (*string == 'a') ch = '\a';
else if (*string == 'b') ch = '\b';
#ifndef EBCDIC
else if (*string == 'e') ch = '\033';
#else
else if (*string == 'e') ch = '\047';
#endif
else if (*string == 'f') ch = '\f';
else if (*string == 'r') ch = '\r';
else if (*string == 't') ch = '\t';
else if (*string == 'v') ch = '\v';
else if (*string == 'n')
{
fprintf(stdout, STDOUT_NL);
printed = FALSE;
}
else if (*string == 'o')
{
string++;
ch = *string - '0';
if (string[1] >= '0' && string[1] <= '7')
{
string++;
ch = ch * 8 + (*string - '0');
}
if (string[1] >= '0' && string[1] <= '7')
{
string++;
ch = ch * 8 + (*string - '0');
}
}
else if (*string == 'x')
{
string++;
if (*string >= '0' && *string <= '9')
ch = *string - '0';
else
ch = (*string | 0x20) - 'a' + 10;
if (isxdigit((unsigned char)string[1]))
{
string++;
ch *= 16;
if (*string >= '0' && *string <= '9')
ch += *string - '0';
else
ch += (*string | 0x20) - 'a' + 10;
}
}
else
{
ch = *string;
}
}
else
{
ch = *string;
}
if (ch != EOF)
{
fprintf(stdout, "%c", ch);
printed = TRUE;
}
}
return printed;
}
#ifdef SUPPORT_PCRE2GREP_CALLOUT #ifdef SUPPORT_PCRE2GREP_CALLOUT
/************************************************* /*************************************************
@ -1683,6 +1957,10 @@ follows:
Any other character is substituted by itself. E.g: $$ is replaced by a single Any other character is substituted by itself. E.g: $$ is replaced by a single
dollar or $| replaced by a pipe character. dollar or $| replaced by a pipe character.
Alternatively, if string starts with pipe, the remainder is taken as an output
string, same as --output. In this case, --om-separator is used to separate each
callout, defaulting to newline.
Example: Example:
echo -e "abcde\n12345" | pcre2grep \ echo -e "abcde\n12345" | pcre2grep \
@ -1725,6 +2003,16 @@ int result = 0;
/* Only callout with strings are supported. */ /* Only callout with strings are supported. */
if (string == NULL || length == 0) return 0; if (string == NULL || length == 0) return 0;
/* If there's no command, output the remainder directly. */
if (*string == '|')
{
string++;
if (!syntax_check_output_text(string, TRUE)) return 0;
(void)display_output_text(string, TRUE, subject, ovector, capture_top);
return 0;
}
/* Checking syntax and compute the number of string fragments. Callout strings /* Checking syntax and compute the number of string fragments. Callout strings
are ignored in case of a syntax error. */ are ignored in case of a syntax error. */
@ -2174,8 +2462,8 @@ while (ptr < endptr)
} }
#endif #endif
/* We come back here after a match when show_only_matching is set, in order /* We come back here after a match when only_matching_count is non-zero, in
to find any further matches in the same line. This applies to order to find any further matches in the same line. This applies to
--only-matching, --file-offsets, and --line-offsets. */ --only-matching, --file-offsets, and --line-offsets. */
ONLY_MATCHING_RESTART: ONLY_MATCHING_RESTART:
@ -2229,13 +2517,13 @@ while (ptr < endptr)
/* The --only-matching option prints just the substring that matched, /* The --only-matching option prints just the substring that matched,
and/or one or more captured portions of it, as long as these strings are and/or one or more captured portions of it, as long as these strings are
not empty. The --file-offsets and --line-offsets options output offsets for not empty. The --file-offsets and --line-offsets options output offsets for
the matching substring (all three set show_only_matching). None of these the matching substring (all three set only_matching_count non-zero). None
mutually exclusive options prints any context. Afterwards, adjust the start of these mutually exclusive options prints any context. Afterwards, adjust
and then jump back to look for further matches in the same line. If we are the start and then jump back to look for further matches in the same line.
in invert mode, however, nothing is printed and we do not restart - this If we are in invert mode, however, nothing is printed and we do not restart
could still be useful because the return code is set. */ - this could still be useful because the return code is set. */
else if (show_only_matching) else if (only_matching_count != 0)
{ {
if (!invert) if (!invert)
{ {
@ -2257,6 +2545,16 @@ while (ptr < endptr)
(int)(filepos + matchptr + offsets[0] - ptr), (int)(filepos + matchptr + offsets[0] - ptr),
(int)(offsets[1] - offsets[0])); (int)(offsets[1] - offsets[0]));
/* Handle --output (which has already been syntax checked) */
else if (output_text != NULL)
{
if (display_output_text((PCRE2_SPTR)output_text, FALSE,
(PCRE2_SPTR)matchptr, offsets, mrc) || printname != NULL ||
number)
fprintf(stdout, STDOUT_NL);
}
/* Handle --only-matching, which may occur many times */ /* Handle --only-matching, which may occur many times */
else else
@ -2272,7 +2570,8 @@ while (ptr < endptr)
int plen = offsets[2*n + 1] - offsets[2*n]; int plen = offsets[2*n + 1] - offsets[2*n];
if (plen > 0) if (plen > 0)
{ {
if (printed) fprintf(stdout, "%s", om_separator); if (printed && om_separator != NULL)
fprintf(stdout, "%s", om_separator);
print_match(matchptr + offsets[n*2], plen); print_match(matchptr + offsets[n*2], plen);
printed = TRUE; printed = TRUE;
} }
@ -2557,7 +2856,7 @@ while (ptr < endptr)
/* End of file; print final "after" lines if wanted; do_after_lines sets /* End of file; print final "after" lines if wanted; do_after_lines sets
hyphenpending if it prints something. */ hyphenpending if it prints something. */
if (!show_only_matching && !(count_only|show_total_count)) if (only_matching_count == 0 && !(count_only|show_total_count))
{ {
do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
hyphenpending |= endhyphenpending; hyphenpending |= endhyphenpending;
@ -3518,26 +3817,31 @@ if (both_context > 0)
if (before_context == 0) before_context = both_context; if (before_context == 0) before_context = both_context;
} }
/* Only one of --only-matching, --file-offsets, or --line-offsets is permitted. /* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
However, all three set show_only_matching because they display, each in their permitted. They display, each in their own way, only the data that has matched.
own way, only the data that has matched. */ */
if ((only_matching != NULL && (file_offsets || line_offsets)) || only_matching_count = (only_matching != NULL) + (output_text != NULL) +
(file_offsets && line_offsets)) file_offsets + line_offsets;
if (only_matching_count > 1)
{ {
fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --file-offsets " fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
"and/or --line-offsets\n"); "--file-offsets and/or --line-offsets\n");
pcre2grep_exit(usage(2)); pcre2grep_exit(usage(2));
} }
/* Check the text supplied to --output for errors. */
if (output_text != NULL &&
!syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
goto EXIT2;
/* Put limits into the match data block. */ /* Put limits into the match data block. */
if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit); if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit); if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
if (only_matching != NULL || file_offsets || line_offsets)
show_only_matching = TRUE;
/* If a locale has not been provided as an option, see if the LC_CTYPE or /* If a locale has not been provided as an option, see if the LC_CTYPE or
LC_ALL environment variable is set, and if so, use it. */ LC_ALL environment variable is set, and if so, use it. */
@ -3827,6 +4131,14 @@ for (; i < argc; i++)
else if (frc == 0 && rc == 1) rc = 0; else if (frc == 0 && rc == 1) rc = 0;
} }
#ifdef SUPPORT_PCRE2GREP_CALLOUT
/* If separating builtin echo callouts by implicit newline, add one more for
the final item. */
if (om_separator != NULL && strcmp(om_separator, STDOUT_NL) == 0)
fprintf(stdout, STDOUT_NL);
#endif
/* Show the total number of matches if requested, but not if only one file's /* Show the total number of matches if requested, but not if only one file's
count was printed. */ count was printed. */

5
testdata/grepoutput vendored
View File

@ -829,3 +829,8 @@ def
xyz xyz
--- ---
RC=0 RC=0
---------------------------- Test 120 ------------------------------
./testdata/grepinput:the binary zero.:zerothe.
./testdata/grepinput:a binary zero:zeroa
./testdata/grepinput:the binary zero.:zerothe.
RC=0

View File

@ -6,3 +6,9 @@ Arg1: [qu] [qu]
Arg1: [ t] [ t] Arg1: [ t] [ t]
The quick brown The quick brown
This time it jumps and jumps and jumps. This time it jumps and jumps and jumps.
0:T
The quick brown
0:T
This time it jumps and jumps and jumps.
T
T