Jason Hood's pcre2grep patches (modified a bit) to add --output to pcre2grep,
and also an inbuilt callout echo.
This commit is contained in:
parent
2b36600b2b
commit
88abc14e42
|
@ -113,6 +113,9 @@ a message, and abandon the run (this would have detected #13 above).
|
||||||
|
|
||||||
19. Implemented PCRE2_ENDANCHORED.
|
19. Implemented PCRE2_ENDANCHORED.
|
||||||
|
|
||||||
|
20. Applied Jason Hood's patches (slightly modified) to pcre2grep, to implement
|
||||||
|
the --output=text (-O) option and the inbuilt callout echo.
|
||||||
|
|
||||||
|
|
||||||
Version 10.23 14-February-2017
|
Version 10.23 14-February-2017
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
|
@ -598,6 +598,10 @@ printf "123\n456\n789\n---abc\ndef\nxyz\n---\n" >testNinputgrep
|
||||||
$valgrind $vjs $pcre2grep -Mo '(\n|[^-])*---' testNinputgrep >>testtrygrep
|
$valgrind $vjs $pcre2grep -Mo '(\n|[^-])*---' testNinputgrep >>testtrygrep
|
||||||
echo "RC=$?" >>testtrygrep
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
|
echo "---------------------------- Test 120 ------------------------------" >>testtrygrep
|
||||||
|
(cd $srcdir; $valgrind $vjs $pcre2grep -HO '$0:$2$1$3' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
|
||||||
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
# Now compare the results.
|
# Now compare the results.
|
||||||
|
|
||||||
$cf $srcdir/testdata/grepoutput testtrygrep
|
$cf $srcdir/testdata/grepoutput testtrygrep
|
||||||
|
@ -667,6 +671,9 @@ if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Callout scri
|
||||||
echo "Testing pcre2grep script callouts"
|
echo "Testing pcre2grep script callouts"
|
||||||
$valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep
|
$valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep
|
||||||
$valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep
|
$valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep
|
||||||
|
$valgrind $vjs $pcre2grep '(T)(?C"|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep
|
||||||
|
$valgrind $vjs $pcre2grep '(T)(?C"|$1$n")(*F)' $srcdir/testdata/grepinputv >>testtrygrep
|
||||||
|
# The above has no newline, which 'diff -ub' ignores, so add one.
|
||||||
$cf $srcdir/testdata/grepoutputC testtrygrep
|
$cf $srcdir/testdata/grepoutputC testtrygrep
|
||||||
if [ $? != 0 ] ; then exit 1; fi
|
if [ $? != 0 ] ; then exit 1; fi
|
||||||
else
|
else
|
||||||
|
|
|
@ -585,6 +585,10 @@ echo ---------------------------- Test 119 ----------------------------->>testtr
|
||||||
%pcre2grep% -Mo "(\n|[^-])*---" testNinputgrep >>testtrygrep
|
%pcre2grep% -Mo "(\n|[^-])*---" testNinputgrep >>testtrygrep
|
||||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||||
|
|
||||||
|
echo ---------------------------- Test 120 ------------------------------>>testtrygrep
|
||||||
|
(pushd %srcdir% & %pcre2grep% -HO "$0:$2$1$3" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep
|
||||||
|
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||||
|
|
||||||
:: Now compare the results.
|
:: Now compare the results.
|
||||||
|
|
||||||
%cf% %srcdir%\testdata\grepoutput testtrygrep %cfout%
|
%cf% %srcdir%\testdata\grepoutput testtrygrep %cfout%
|
||||||
|
@ -654,6 +658,10 @@ if %ERRORLEVEL% equ 0 (
|
||||||
echo Testing pcre2grep script callouts
|
echo Testing pcre2grep script callouts
|
||||||
%pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%/testdata/grepinputv >testtrygrep
|
%pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%/testdata/grepinputv >testtrygrep
|
||||||
%pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%/testdata/grepinputv >>testtrygrep
|
%pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%/testdata/grepinputv >>testtrygrep
|
||||||
|
%pcre2grep% "(T)(?C'|$0:$1')" %srcdir%/testdata/grepinputv >>testtrygrep
|
||||||
|
%pcre2grep% --om-separator / "(T)(?C'|$1')" %srcdir%/testdata/grepinputv >>testtrygrep
|
||||||
|
:: The above has no newline, which 'diff -ub' ignores, so add one.
|
||||||
|
echo />>testtrygrep
|
||||||
%cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout%
|
%cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout%
|
||||||
if ERRORLEVEL 1 exit /b 1
|
if ERRORLEVEL 1 exit /b 1
|
||||||
) else (
|
) else (
|
||||||
|
|
|
@ -60,6 +60,7 @@ The option bits are:
|
||||||
PCRE2_DOLLAR_ENDONLY $ not to match newline at end
|
PCRE2_DOLLAR_ENDONLY $ not to match newline at end
|
||||||
PCRE2_DOTALL . matches anything including NL
|
PCRE2_DOTALL . matches anything including NL
|
||||||
PCRE2_DUPNAMES Allow duplicate names for subpatterns
|
PCRE2_DUPNAMES Allow duplicate names for subpatterns
|
||||||
|
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
||||||
PCRE2_EXTENDED Ignore white space and # comments
|
PCRE2_EXTENDED Ignore white space and # comments
|
||||||
PCRE2_FIRSTLINE Force matching to be before newline
|
PCRE2_FIRSTLINE Force matching to be before newline
|
||||||
PCRE2_MATCH_UNSET_BACKREF Match unset back references
|
PCRE2_MATCH_UNSET_BACKREF Match unset back references
|
||||||
|
|
|
@ -50,6 +50,7 @@ up a callout function or specify the recursion depth limit. The <i>length</i>
|
||||||
and <i>startoffset</i> values are code units, not characters. The options are:
|
and <i>startoffset</i> values are code units, not characters. The options are:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ANCHORED Match only at the first position
|
PCRE2_ANCHORED Match only at the first position
|
||||||
|
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
||||||
PCRE2_NOTBOL Subject is not the beginning of a line
|
PCRE2_NOTBOL Subject is not the beginning of a line
|
||||||
PCRE2_NOTEOL Subject is not the end of a line
|
PCRE2_NOTEOL Subject is not the end of a line
|
||||||
PCRE2_NOTEMPTY An empty string is not a valid match
|
PCRE2_NOTEMPTY An empty string is not a valid match
|
||||||
|
|
|
@ -53,6 +53,7 @@ units, not characters. The length may be given as PCRE2_ZERO_TERMINATE for a
|
||||||
subject that is terminated by a binary zero code unit. The options are:
|
subject that is terminated by a binary zero code unit. The options are:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ANCHORED Match only at the first position
|
PCRE2_ANCHORED Match only at the first position
|
||||||
|
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
||||||
PCRE2_NOTBOL Subject string is not the beginning of a line
|
PCRE2_NOTBOL Subject string is not the beginning of a line
|
||||||
PCRE2_NOTEOL Subject string is not the end of a line
|
PCRE2_NOTEOL Subject string is not the end of a line
|
||||||
PCRE2_NOTEMPTY An empty string is not a valid match
|
PCRE2_NOTEMPTY An empty string is not a valid match
|
||||||
|
|
|
@ -64,6 +64,7 @@ The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for
|
||||||
zero-terminated strings. The options are:
|
zero-terminated strings. The options are:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ANCHORED Match only at the first position
|
PCRE2_ANCHORED Match only at the first position
|
||||||
|
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
||||||
PCRE2_NOTBOL Subject is not the beginning of a line
|
PCRE2_NOTBOL Subject is not the beginning of a line
|
||||||
PCRE2_NOTEOL Subject is not the end of a line
|
PCRE2_NOTEOL Subject is not the end of a line
|
||||||
PCRE2_NOTEMPTY An empty string is not a valid match
|
PCRE2_NOTEMPTY An empty string is not a valid match
|
||||||
|
|
|
@ -1123,8 +1123,8 @@ documentation).
|
||||||
<P>
|
<P>
|
||||||
For those options that can be different in different parts of the pattern, the
|
For those options that can be different in different parts of the pattern, the
|
||||||
contents of the <i>options</i> argument specifies their settings at the start of
|
contents of the <i>options</i> argument specifies their settings at the start of
|
||||||
compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK options can be set at
|
compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and PCRE2_NO_UTF_CHECK
|
||||||
the time of matching as well as at compile time.
|
options can be set at the time of matching as well as at compile time.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Other, less frequently required compile-time parameters (for example, the
|
Other, less frequently required compile-time parameters (for example, the
|
||||||
|
@ -1279,6 +1279,13 @@ only one instance of the named subpattern can ever be matched. There are more
|
||||||
details of named subpatterns below; see also the
|
details of named subpatterns below; see also the
|
||||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||||
documentation.
|
documentation.
|
||||||
|
<pre>
|
||||||
|
PCRE2_ENDANCHORED
|
||||||
|
</pre>
|
||||||
|
If this bit is set, the end of any pattern match must be right at the end of
|
||||||
|
the string being searched (the "subject string"). This effect can also be
|
||||||
|
achieved by appropriate constructs in the pattern itself, which is the only way
|
||||||
|
to do it in Perl.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_EXTENDED
|
PCRE2_EXTENDED
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -2141,16 +2148,16 @@ Option bits for <b>pcre2_match()</b>
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The unused bits of the <i>options</i> argument for <b>pcre2_match()</b> must be
|
The unused bits of the <i>options</i> argument for <b>pcre2_match()</b> must be
|
||||||
zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
|
zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDANCHORED,
|
||||||
PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT,
|
PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
|
||||||
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is
|
PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT.
|
||||||
described below.
|
Their action is described below.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Setting PCRE2_ANCHORED at match time is not supported by the just-in-time (JIT)
|
Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not supported by
|
||||||
compiler. If it is set, JIT matching is disabled and the interpretive code in
|
the just-in-time (JIT) compiler. If it is set, JIT matching is disabled and the
|
||||||
<b>pcre2_match()</b> is run. Apart from PCRE2_NO_JIT (obviously), the remaining
|
interpretive code in <b>pcre2_match()</b> is run. Apart from PCRE2_NO_JIT
|
||||||
options are supported for JIT matching.
|
(obviously), the remaining options are supported for JIT matching.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ANCHORED
|
PCRE2_ANCHORED
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -2159,6 +2166,12 @@ matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out
|
||||||
to be anchored by virtue of its contents, it cannot be made unachored at
|
to be anchored by virtue of its contents, it cannot be made unachored at
|
||||||
matching time. Note that setting the option at match time disables JIT
|
matching time. Note that setting the option at match time disables JIT
|
||||||
matching.
|
matching.
|
||||||
|
<pre>
|
||||||
|
PCRE2_ENDANCHORED
|
||||||
|
</pre>
|
||||||
|
If the PCRE2_ENDANCHORED option is set, any string that <b>pcre2_match()</b>
|
||||||
|
matches must be right at the end of the subject string. Note that setting the
|
||||||
|
option at match time disables JIT matching.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_NOTBOL
|
PCRE2_NOTBOL
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -3100,11 +3113,11 @@ Option bits for <b>pcre_dfa_match()</b>
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The unused bits of the <i>options</i> argument for <b>pcre2_dfa_match()</b> must
|
The unused bits of the <i>options</i> argument for <b>pcre2_dfa_match()</b> must
|
||||||
be zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
|
be zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDANCHORED,
|
||||||
PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK,
|
PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
|
||||||
PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and
|
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST,
|
||||||
PCRE2_DFA_RESTART. All but the last four of these are exactly the same as for
|
and PCRE2_DFA_RESTART. All but the last four of these are exactly the same as
|
||||||
<b>pcre2_match()</b>, so their description is not repeated here.
|
for <b>pcre2_match()</b>, so their description is not repeated here.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_PARTIAL_HARD
|
PCRE2_PARTIAL_HARD
|
||||||
PCRE2_PARTIAL_SOFT
|
PCRE2_PARTIAL_SOFT
|
||||||
|
@ -3258,7 +3271,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 01 April 2017
|
Last updated: 04 April 2017
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2017 University of Cambridge.
|
Copyright © 1997-2017 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -22,7 +22,7 @@ please consult the man page, in case the conversion went wrong.
|
||||||
<li><a name="TOC7" href="#SEC7">NEWLINES</a>
|
<li><a name="TOC7" href="#SEC7">NEWLINES</a>
|
||||||
<li><a name="TOC8" href="#SEC8">OPTIONS COMPATIBILITY</a>
|
<li><a name="TOC8" href="#SEC8">OPTIONS COMPATIBILITY</a>
|
||||||
<li><a name="TOC9" href="#SEC9">OPTIONS WITH DATA</a>
|
<li><a name="TOC9" href="#SEC9">OPTIONS WITH DATA</a>
|
||||||
<li><a name="TOC10" href="#SEC10">CALLING EXTERNAL SCRIPTS</a>
|
<li><a name="TOC10" href="#SEC10">USING PCRE2'S CALLOUT FACILITY</a>
|
||||||
<li><a name="TOC11" href="#SEC11">MATCHING ERRORS</a>
|
<li><a name="TOC11" href="#SEC11">MATCHING ERRORS</a>
|
||||||
<li><a name="TOC12" href="#SEC12">DIAGNOSTICS</a>
|
<li><a name="TOC12" href="#SEC12">DIAGNOSTICS</a>
|
||||||
<li><a name="TOC13" href="#SEC13">SEE ALSO</a>
|
<li><a name="TOC13" href="#SEC13">SEE ALSO</a>
|
||||||
|
@ -384,8 +384,8 @@ Instead of showing lines or parts of lines that match, show each match as an
|
||||||
offset from the start of the file and a length, separated by a comma. In this
|
offset from the start of the file and a length, separated by a comma. In this
|
||||||
mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b>
|
mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b>
|
||||||
options are ignored. If there is more than one match in a line, each of them is
|
options are ignored. If there is more than one match in a line, each of them is
|
||||||
shown separately. This option is mutually exclusive with <b>--line-offsets</b>
|
shown separately. This option is mutually exclusive with <b>--output</b>,
|
||||||
and <b>--only-matching</b>.
|
<b>--line-offsets</b>, and <b>--only-matching</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-H</b>, <b>--with-filename</b>
|
<b>-H</b>, <b>--with-filename</b>
|
||||||
|
@ -491,7 +491,8 @@ number is terminated by a colon (as usual; see the <b>-n</b> option), and the
|
||||||
offset and length are separated by a comma. In this mode, no context is shown.
|
offset and length are separated by a comma. In this mode, no context is shown.
|
||||||
That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> options are ignored. If there is
|
That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> options are ignored. If there is
|
||||||
more than one match in a line, each of them is shown separately. This option is
|
more than one match in a line, each of them is shown separately. This option is
|
||||||
mutually exclusive with <b>--file-offsets</b> and <b>--only-matching</b>.
|
mutually exclusive with <b>--output</b>, <b>--file-offsets</b>, and
|
||||||
|
<b>--only-matching</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>--locale</b>=<i>locale-name</i>
|
<b>--locale</b>=<i>locale-name</i>
|
||||||
|
@ -602,6 +603,36 @@ use of JIT at run time. It is provided for testing and working round problems.
|
||||||
It should never be needed in normal use.
|
It should never be needed in normal use.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
<b>-O</b> <i>text</i>, <b>--output</b>=<i>text</i>
|
||||||
|
When there is a match, instead of outputting the whole line that matched,
|
||||||
|
output just the given text. This option is mutually exclusive with
|
||||||
|
<b>--only-matching</b>, <b>--file-offsets</b>, and <b>--line-offsets</b>. Escape
|
||||||
|
sequences starting with a dollar character may be used to insert the contents
|
||||||
|
of the matched part of the line and/or captured substrings into the text.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
$<digits> or ${<digits>} is replaced by the captured
|
||||||
|
substring of the given decimal number; zero substitutes the whole match. If
|
||||||
|
the number is greater than the number of capturing substrings, or if the
|
||||||
|
capture is unset, the replacement is empty.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
|
||||||
|
newline; $r by carriage return; $t by tab; $v by vertical tab.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
$o<digits> is replaced by the character represented by the given octal
|
||||||
|
number; up to three digits are processed.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
$x<digits> is replaced by the character represented by the given hexadecimal
|
||||||
|
number; up to two digits are processed.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
Any other character is substituted by itself. In particular, $$ is replaced by
|
||||||
|
a single dollar.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
<b>-o</b>, <b>--only-matching</b>
|
<b>-o</b>, <b>--only-matching</b>
|
||||||
Show only the part of the line that matched a pattern instead of the whole
|
Show only the part of the line that matched a pattern instead of the whole
|
||||||
line. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and
|
line. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and
|
||||||
|
@ -611,7 +642,7 @@ combined with <b>-v</b> (invert the sense of the match to find non-matching
|
||||||
lines), no output is generated, but the return code is set appropriately. If
|
lines), no output is generated, but the return code is set appropriately. If
|
||||||
the matched portion of the line is empty, nothing is output unless the file
|
the matched portion of the line is empty, nothing is output unless the file
|
||||||
name or line number are being printed, in which case they are shown on an
|
name or line number are being printed, in which case they are shown on an
|
||||||
otherwise empty line. This option is mutually exclusive with
|
otherwise empty line. This option is mutually exclusive with <b>--output</b>,
|
||||||
<b>--file-offsets</b> and <b>--line-offsets</b>.
|
<b>--file-offsets</b> and <b>--line-offsets</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -621,7 +652,7 @@ given number. Up to 32 capturing parentheses are supported, and -o0 is
|
||||||
equivalent to <b>-o</b> without a number. Because these options can be given
|
equivalent to <b>-o</b> without a number. Because these options can be given
|
||||||
without an argument (see above), if an argument is present, it must be given in
|
without an argument (see above), if an argument is present, it must be given in
|
||||||
the same shell item, for example, -o3 or --only-matching=2. The comments given
|
the same shell item, for example, -o3 or --only-matching=2. The comments given
|
||||||
for the non-argument case above also apply to this case. If the specified
|
for the non-argument case above also apply to this option. If the specified
|
||||||
capturing parentheses do not exist in the pattern, or were not set in the
|
capturing parentheses do not exist in the pattern, or were not set in the
|
||||||
match, nothing is output unless the file name or line number are being output.
|
match, nothing is output unless the file name or line number are being output.
|
||||||
<br>
|
<br>
|
||||||
|
@ -735,9 +766,9 @@ as in the GNU <b>grep</b> program. Any long option of the form
|
||||||
(PCRE2 terminology). However, the <b>--depth-limit</b>, <b>--file-list</b>,
|
(PCRE2 terminology). However, the <b>--depth-limit</b>, <b>--file-list</b>,
|
||||||
<b>--file-offsets</b>, <b>--include-dir</b>, <b>--line-offsets</b>,
|
<b>--file-offsets</b>, <b>--include-dir</b>, <b>--line-offsets</b>,
|
||||||
<b>--locale</b>, <b>--match-limit</b>, <b>-M</b>, <b>--multiline</b>, <b>-N</b>,
|
<b>--locale</b>, <b>--match-limit</b>, <b>-M</b>, <b>--multiline</b>, <b>-N</b>,
|
||||||
<b>--newline</b>, <b>--om-separator</b>, <b>-u</b>, and <b>--utf-8</b> options are
|
<b>--newline</b>, <b>--om-separator</b>, <b>--output</b>, <b>-u</b>, and
|
||||||
specific to <b>pcre2grep</b>, as is the use of the <b>--only-matching</b> option
|
<b>--utf-8</b> options are specific to <b>pcre2grep</b>, as is the use of the
|
||||||
with a capturing parentheses number.
|
<b>--only-matching</b> option with a capturing parentheses number.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Although most of the common options work the same way, a few are different in
|
Although most of the common options work the same way, a few are different in
|
||||||
|
@ -778,23 +809,30 @@ The exceptions to the above are the <b>--colour</b> (or <b>--color</b>) and
|
||||||
options does have data, it must be given in the first form, using an equals
|
options does have data, it must be given in the first form, using an equals
|
||||||
character. Otherwise <b>pcre2grep</b> will assume that it has no data.
|
character. Otherwise <b>pcre2grep</b> will assume that it has no data.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC10" href="#TOC1">CALLING EXTERNAL SCRIPTS</a><br>
|
<br><a name="SEC10" href="#TOC1">USING PCRE2'S CALLOUT FACILITY</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2grep</b> has, by default, support for calling external programs or
|
<b>pcre2grep</b> has, by default, support for calling external programs or
|
||||||
scripts during matching by making use of PCRE2's callout facility. However,
|
scripts or echoing specific strings during matching by making use of PCRE2's
|
||||||
this support can be disabled when <b>pcre2grep</b> is built. You can find out
|
callout facility. However, this support can be disabled when <b>pcre2grep</b> is
|
||||||
whether your binary has support for callouts by running it with the <b>--help</b>
|
built. You can find out whether your binary has support for callouts by running
|
||||||
option. If the support is not enabled, all callouts in patterns are ignored by
|
it with the <b>--help</b> option. If the support is not enabled, all callouts in
|
||||||
<b>pcre2grep</b>.
|
patterns are ignored by <b>pcre2grep</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is
|
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is
|
||||||
either a number or a quoted string (see the
|
either a number or a quoted string (see the
|
||||||
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
||||||
documentation for details). Numbered callouts are ignored by <b>pcre2grep</b>.
|
documentation for details). Numbered callouts are ignored by <b>pcre2grep</b>;
|
||||||
String arguments are parsed as a list of substrings separated by pipe (vertical
|
only callouts with string arguments are useful.
|
||||||
bar) characters. The first substring must be an executable name, with the
|
</P>
|
||||||
following substrings specifying arguments:
|
<br><b>
|
||||||
|
Calling external programs or scripts
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
If the callout string does not start with a pipe (vertical bar) character, it
|
||||||
|
is parsed into a list of substrings separated by pipe characters. The first
|
||||||
|
substring must be an executable name, with the following substrings specifying
|
||||||
|
arguments:
|
||||||
<pre>
|
<pre>
|
||||||
executable_name|arg1|arg2|...
|
executable_name|arg1|arg2|...
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -828,6 +866,19 @@ callout to be ignored. If running the program fails for any reason (including
|
||||||
the non-existence of the executable), a local matching failure occurs and the
|
the non-existence of the executable), a local matching failure occurs and the
|
||||||
matcher backtracks in the normal way.
|
matcher backtracks in the normal way.
|
||||||
</P>
|
</P>
|
||||||
|
<br><b>
|
||||||
|
Echoing a specific string
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
If the callout string starts with a pipe (vertical bar) character, the rest of
|
||||||
|
the string is written to the output, having been passed through the same escape
|
||||||
|
processing as text from the --output option. This provides a simple echoing
|
||||||
|
facility that avoids calling an external program or script. No terminator is
|
||||||
|
added to the string, so if you want a newline, you must include it explicitly.
|
||||||
|
Matching continues normally after the string is output. If you want to see only
|
||||||
|
the callout output but not any output from an actual match, you should end the
|
||||||
|
relevant pattern with (*FAIL).
|
||||||
|
</P>
|
||||||
<br><a name="SEC11" href="#TOC1">MATCHING ERRORS</a><br>
|
<br><a name="SEC11" href="#TOC1">MATCHING ERRORS</a><br>
|
||||||
<P>
|
<P>
|
||||||
It is possible to supply a regular expression that takes a very long time to
|
It is possible to supply a regular expression that takes a very long time to
|
||||||
|
@ -867,7 +918,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 31 March 2017
|
Last updated: 06 April 2017
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2017 University of Cambridge.
|
Copyright © 1997-2017 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -568,6 +568,7 @@ for a description of their effects.
|
||||||
dollar_endonly set PCRE2_DOLLAR_ENDONLY
|
dollar_endonly set PCRE2_DOLLAR_ENDONLY
|
||||||
/s dotall set PCRE2_DOTALL
|
/s dotall set PCRE2_DOTALL
|
||||||
dupnames set PCRE2_DUPNAMES
|
dupnames set PCRE2_DUPNAMES
|
||||||
|
endanchored set PCRE2_ENDANCHORED
|
||||||
/x extended set PCRE2_EXTENDED
|
/x extended set PCRE2_EXTENDED
|
||||||
firstline set PCRE2_FIRSTLINE
|
firstline set PCRE2_FIRSTLINE
|
||||||
match_unset_backref set PCRE2_MATCH_UNSET_BACKREF
|
match_unset_backref set PCRE2_MATCH_UNSET_BACKREF
|
||||||
|
@ -1039,6 +1040,7 @@ The following modifiers set options for <b>pcre2_match()</b> or
|
||||||
for a description of their effects.
|
for a description of their effects.
|
||||||
<pre>
|
<pre>
|
||||||
anchored set PCRE2_ANCHORED
|
anchored set PCRE2_ANCHORED
|
||||||
|
endanchored set PCRE2_ENDANCHORED
|
||||||
dfa_restart set PCRE2_DFA_RESTART
|
dfa_restart set PCRE2_DFA_RESTART
|
||||||
dfa_shortest set PCRE2_DFA_SHORTEST
|
dfa_shortest set PCRE2_DFA_SHORTEST
|
||||||
no_jit set PCRE2_NO_JIT
|
no_jit set PCRE2_NO_JIT
|
||||||
|
@ -1798,7 +1800,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 25 March 2017
|
Last updated: 04 April 2017
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2017 University of Cambridge.
|
Copyright © 1997-2017 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -1155,8 +1155,9 @@ COMPILING A PATTERN
|
||||||
|
|
||||||
For those options that can be different in different parts of the pat-
|
For those options that can be different in different parts of the pat-
|
||||||
tern, the contents of the options argument specifies their settings at
|
tern, the contents of the options argument specifies their settings at
|
||||||
the start of compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK
|
the start of compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and
|
||||||
options can be set at the time of matching as well as at compile time.
|
PCRE2_NO_UTF_CHECK options can be set at the time of matching as well
|
||||||
|
as at compile time.
|
||||||
|
|
||||||
Other, less frequently required compile-time parameters (for example,
|
Other, less frequently required compile-time parameters (for example,
|
||||||
the newline setting) can be provided in a compile context (as described
|
the newline setting) can be provided in a compile context (as described
|
||||||
|
@ -1303,6 +1304,13 @@ COMPILING A PATTERN
|
||||||
matched. There are more details of named subpatterns below; see also
|
matched. There are more details of named subpatterns below; see also
|
||||||
the pcre2pattern documentation.
|
the pcre2pattern documentation.
|
||||||
|
|
||||||
|
PCRE2_ENDANCHORED
|
||||||
|
|
||||||
|
If this bit is set, the end of any pattern match must be right at the
|
||||||
|
end of the string being searched (the "subject string"). This effect
|
||||||
|
can also be achieved by appropriate constructs in the pattern itself,
|
||||||
|
which is the only way to do it in Perl.
|
||||||
|
|
||||||
PCRE2_EXTENDED
|
PCRE2_EXTENDED
|
||||||
|
|
||||||
If this bit is set, most white space characters in the pattern are
|
If this bit is set, most white space characters in the pattern are
|
||||||
|
@ -2136,15 +2144,16 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
|
||||||
Option bits for pcre2_match()
|
Option bits for pcre2_match()
|
||||||
|
|
||||||
The unused bits of the options argument for pcre2_match() must be zero.
|
The unused bits of the options argument for pcre2_match() must be zero.
|
||||||
The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
|
The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDANCHORED,
|
||||||
PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT,
|
PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
|
||||||
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their
|
PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PAR-
|
||||||
action is described below.
|
TIAL_SOFT. Their action is described below.
|
||||||
|
|
||||||
Setting PCRE2_ANCHORED at match time is not supported by the just-in-
|
Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not sup-
|
||||||
time (JIT) compiler. If it is set, JIT matching is disabled and the
|
ported by the just-in-time (JIT) compiler. If it is set, JIT matching
|
||||||
interpretive code in pcre2_match() is run. Apart from PCRE2_NO_JIT
|
is disabled and the interpretive code in pcre2_match() is run. Apart
|
||||||
(obviously), the remaining options are supported for JIT matching.
|
from PCRE2_NO_JIT (obviously), the remaining options are supported for
|
||||||
|
JIT matching.
|
||||||
|
|
||||||
PCRE2_ANCHORED
|
PCRE2_ANCHORED
|
||||||
|
|
||||||
|
@ -2154,6 +2163,12 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
|
||||||
unachored at matching time. Note that setting the option at match time
|
unachored at matching time. Note that setting the option at match time
|
||||||
disables JIT matching.
|
disables JIT matching.
|
||||||
|
|
||||||
|
PCRE2_ENDANCHORED
|
||||||
|
|
||||||
|
If the PCRE2_ENDANCHORED option is set, any string that pcre2_match()
|
||||||
|
matches must be right at the end of the subject string. Note that set-
|
||||||
|
ting the option at match time disables JIT matching.
|
||||||
|
|
||||||
PCRE2_NOTBOL
|
PCRE2_NOTBOL
|
||||||
|
|
||||||
This option specifies that first character of the subject string is not
|
This option specifies that first character of the subject string is not
|
||||||
|
@ -3021,12 +3036,12 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
||||||
Option bits for pcre_dfa_match()
|
Option bits for pcre_dfa_match()
|
||||||
|
|
||||||
The unused bits of the options argument for pcre2_dfa_match() must be
|
The unused bits of the options argument for pcre2_dfa_match() must be
|
||||||
zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
|
zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDAN-
|
||||||
PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
|
CHORED, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
|
||||||
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT,
|
PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD,
|
||||||
PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but the last four of
|
PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but
|
||||||
these are exactly the same as for pcre2_match(), so their description
|
the last four of these are exactly the same as for pcre2_match(), so
|
||||||
is not repeated here.
|
their description is not repeated here.
|
||||||
|
|
||||||
PCRE2_PARTIAL_HARD
|
PCRE2_PARTIAL_HARD
|
||||||
PCRE2_PARTIAL_SOFT
|
PCRE2_PARTIAL_SOFT
|
||||||
|
@ -3172,7 +3187,7 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 01 April 2017
|
Last updated: 04 April 2017
|
||||||
Copyright (c) 1997-2017 University of Cambridge.
|
Copyright (c) 1997-2017 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2GREP 1 "31 March 2017" "PCRE2 10.30"
|
.TH PCRE2GREP 1 "06 April 2017" "PCRE2 10.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
pcre2grep - a grep with Perl-compatible regular expressions.
|
pcre2grep - a grep with Perl-compatible regular expressions.
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -330,8 +330,8 @@ Instead of showing lines or parts of lines that match, show each match as an
|
||||||
offset from the start of the file and a length, separated by a comma. In this
|
offset from the start of the file and a length, separated by a comma. In this
|
||||||
mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP
|
mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP
|
||||||
options are ignored. If there is more than one match in a line, each of them is
|
options are ignored. If there is more than one match in a line, each of them is
|
||||||
shown separately. This option is mutually exclusive with \fB--line-offsets\fP
|
shown separately. This option is mutually exclusive with \fB--output\fP,
|
||||||
and \fB--only-matching\fP.
|
\fB--line-offsets\fP, and \fB--only-matching\fP.
|
||||||
.TP
|
.TP
|
||||||
\fB-H\fP, \fB--with-filename\fP
|
\fB-H\fP, \fB--with-filename\fP
|
||||||
Force the inclusion of the file name at the start of output lines when
|
Force the inclusion of the file name at the start of output lines when
|
||||||
|
@ -424,7 +424,8 @@ number is terminated by a colon (as usual; see the \fB-n\fP option), and the
|
||||||
offset and length are separated by a comma. In this mode, no context is shown.
|
offset and length are separated by a comma. In this mode, no context is shown.
|
||||||
That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. If there is
|
That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. If there is
|
||||||
more than one match in a line, each of them is shown separately. This option is
|
more than one match in a line, each of them is shown separately. This option is
|
||||||
mutually exclusive with \fB--file-offsets\fP and \fB--only-matching\fP.
|
mutually exclusive with \fB--output\fP, \fB--file-offsets\fP, and
|
||||||
|
\fB--only-matching\fP.
|
||||||
.TP
|
.TP
|
||||||
\fB--locale\fP=\fIlocale-name\fP
|
\fB--locale\fP=\fIlocale-name\fP
|
||||||
This option specifies a locale to be used for pattern matching. It overrides
|
This option specifies a locale to be used for pattern matching. It overrides
|
||||||
|
@ -521,6 +522,30 @@ was explicitly disabled at build time. This option can be used to disable the
|
||||||
use of JIT at run time. It is provided for testing and working round problems.
|
use of JIT at run time. It is provided for testing and working round problems.
|
||||||
It should never be needed in normal use.
|
It should never be needed in normal use.
|
||||||
.TP
|
.TP
|
||||||
|
\fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP
|
||||||
|
When there is a match, instead of outputting the whole line that matched,
|
||||||
|
output just the given text. This option is mutually exclusive with
|
||||||
|
\fB--only-matching\fP, \fB--file-offsets\fP, and \fB--line-offsets\fP. Escape
|
||||||
|
sequences starting with a dollar character may be used to insert the contents
|
||||||
|
of the matched part of the line and/or captured substrings into the text.
|
||||||
|
.sp
|
||||||
|
$<digits> or ${<digits>} is replaced by the captured
|
||||||
|
substring of the given decimal number; zero substitutes the whole match. If
|
||||||
|
the number is greater than the number of capturing substrings, or if the
|
||||||
|
capture is unset, the replacement is empty.
|
||||||
|
.sp
|
||||||
|
$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
|
||||||
|
newline; $r by carriage return; $t by tab; $v by vertical tab.
|
||||||
|
.sp
|
||||||
|
$o<digits> is replaced by the character represented by the given octal
|
||||||
|
number; up to three digits are processed.
|
||||||
|
.sp
|
||||||
|
$x<digits> is replaced by the character represented by the given hexadecimal
|
||||||
|
number; up to two digits are processed.
|
||||||
|
.sp
|
||||||
|
Any other character is substituted by itself. In particular, $$ is replaced by
|
||||||
|
a single dollar.
|
||||||
|
.TP
|
||||||
\fB-o\fP, \fB--only-matching\fP
|
\fB-o\fP, \fB--only-matching\fP
|
||||||
Show only the part of the line that matched a pattern instead of the whole
|
Show only the part of the line that matched a pattern instead of the whole
|
||||||
line. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and
|
line. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and
|
||||||
|
@ -530,7 +555,7 @@ combined with \fB-v\fP (invert the sense of the match to find non-matching
|
||||||
lines), no output is generated, but the return code is set appropriately. If
|
lines), no output is generated, but the return code is set appropriately. If
|
||||||
the matched portion of the line is empty, nothing is output unless the file
|
the matched portion of the line is empty, nothing is output unless the file
|
||||||
name or line number are being printed, in which case they are shown on an
|
name or line number are being printed, in which case they are shown on an
|
||||||
otherwise empty line. This option is mutually exclusive with
|
otherwise empty line. This option is mutually exclusive with \fB--output\fP,
|
||||||
\fB--file-offsets\fP and \fB--line-offsets\fP.
|
\fB--file-offsets\fP and \fB--line-offsets\fP.
|
||||||
.TP
|
.TP
|
||||||
\fB-o\fP\fInumber\fP, \fB--only-matching\fP=\fInumber\fP
|
\fB-o\fP\fInumber\fP, \fB--only-matching\fP=\fInumber\fP
|
||||||
|
@ -539,7 +564,7 @@ given number. Up to 32 capturing parentheses are supported, and -o0 is
|
||||||
equivalent to \fB-o\fP without a number. Because these options can be given
|
equivalent to \fB-o\fP without a number. Because these options can be given
|
||||||
without an argument (see above), if an argument is present, it must be given in
|
without an argument (see above), if an argument is present, it must be given in
|
||||||
the same shell item, for example, -o3 or --only-matching=2. The comments given
|
the same shell item, for example, -o3 or --only-matching=2. The comments given
|
||||||
for the non-argument case above also apply to this case. If the specified
|
for the non-argument case above also apply to this option. If the specified
|
||||||
capturing parentheses do not exist in the pattern, or were not set in the
|
capturing parentheses do not exist in the pattern, or were not set in the
|
||||||
match, nothing is output unless the file name or line number are being output.
|
match, nothing is output unless the file name or line number are being output.
|
||||||
.sp
|
.sp
|
||||||
|
@ -647,9 +672,9 @@ as in the GNU \fBgrep\fP program. Any long option of the form
|
||||||
(PCRE2 terminology). However, the \fB--depth-limit\fP, \fB--file-list\fP,
|
(PCRE2 terminology). However, the \fB--depth-limit\fP, \fB--file-list\fP,
|
||||||
\fB--file-offsets\fP, \fB--include-dir\fP, \fB--line-offsets\fP,
|
\fB--file-offsets\fP, \fB--include-dir\fP, \fB--line-offsets\fP,
|
||||||
\fB--locale\fP, \fB--match-limit\fP, \fB-M\fP, \fB--multiline\fP, \fB-N\fP,
|
\fB--locale\fP, \fB--match-limit\fP, \fB-M\fP, \fB--multiline\fP, \fB-N\fP,
|
||||||
\fB--newline\fP, \fB--om-separator\fP, \fB-u\fP, and \fB--utf-8\fP options are
|
\fB--newline\fP, \fB--om-separator\fP, \fB--output\fP, \fB-u\fP, and
|
||||||
specific to \fBpcre2grep\fP, as is the use of the \fB--only-matching\fP option
|
\fB--utf-8\fP options are specific to \fBpcre2grep\fP, as is the use of the
|
||||||
with a capturing parentheses number.
|
\fB--only-matching\fP option with a capturing parentheses number.
|
||||||
.P
|
.P
|
||||||
Although most of the common options work the same way, a few are different in
|
Although most of the common options work the same way, a few are different in
|
||||||
\fBpcre2grep\fP. For example, the \fB--include\fP option's argument is a glob
|
\fBpcre2grep\fP. For example, the \fB--include\fP option's argument is a glob
|
||||||
|
@ -690,25 +715,32 @@ options does have data, it must be given in the first form, using an equals
|
||||||
character. Otherwise \fBpcre2grep\fP will assume that it has no data.
|
character. Otherwise \fBpcre2grep\fP will assume that it has no data.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "CALLING EXTERNAL SCRIPTS"
|
.SH "USING PCRE2'S CALLOUT FACILITY"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
\fBpcre2grep\fP has, by default, support for calling external programs or
|
\fBpcre2grep\fP has, by default, support for calling external programs or
|
||||||
scripts during matching by making use of PCRE2's callout facility. However,
|
scripts or echoing specific strings during matching by making use of PCRE2's
|
||||||
this support can be disabled when \fBpcre2grep\fP is built. You can find out
|
callout facility. However, this support can be disabled when \fBpcre2grep\fP is
|
||||||
whether your binary has support for callouts by running it with the \fB--help\fP
|
built. You can find out whether your binary has support for callouts by running
|
||||||
option. If the support is not enabled, all callouts in patterns are ignored by
|
it with the \fB--help\fP option. If the support is not enabled, all callouts in
|
||||||
\fBpcre2grep\fP.
|
patterns are ignored by \fBpcre2grep\fP.
|
||||||
.P
|
.P
|
||||||
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is
|
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is
|
||||||
either a number or a quoted string (see the
|
either a number or a quoted string (see the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcre2callout\fP
|
\fBpcre2callout\fP
|
||||||
.\"
|
.\"
|
||||||
documentation for details). Numbered callouts are ignored by \fBpcre2grep\fP.
|
documentation for details). Numbered callouts are ignored by \fBpcre2grep\fP;
|
||||||
String arguments are parsed as a list of substrings separated by pipe (vertical
|
only callouts with string arguments are useful.
|
||||||
bar) characters. The first substring must be an executable name, with the
|
.
|
||||||
following substrings specifying arguments:
|
.
|
||||||
|
.SS "Calling external programs or scripts"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
If the callout string does not start with a pipe (vertical bar) character, it
|
||||||
|
is parsed into a list of substrings separated by pipe characters. The first
|
||||||
|
substring must be an executable name, with the following substrings specifying
|
||||||
|
arguments:
|
||||||
.sp
|
.sp
|
||||||
executable_name|arg1|arg2|...
|
executable_name|arg1|arg2|...
|
||||||
.sp
|
.sp
|
||||||
|
@ -742,6 +774,19 @@ the non-existence of the executable), a local matching failure occurs and the
|
||||||
matcher backtracks in the normal way.
|
matcher backtracks in the normal way.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
.SS "Echoing a specific string"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
If the callout string starts with a pipe (vertical bar) character, the rest of
|
||||||
|
the string is written to the output, having been passed through the same escape
|
||||||
|
processing as text from the --output option. This provides a simple echoing
|
||||||
|
facility that avoids calling an external program or script. No terminator is
|
||||||
|
added to the string, so if you want a newline, you must include it explicitly.
|
||||||
|
Matching continues normally after the string is output. If you want to see only
|
||||||
|
the callout output but not any output from an actual match, you should end the
|
||||||
|
relevant pattern with (*FAIL).
|
||||||
|
.
|
||||||
|
.
|
||||||
.SH "MATCHING ERRORS"
|
.SH "MATCHING ERRORS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
@ -789,6 +834,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 31 March 2017
|
Last updated: 06 April 2017
|
||||||
Copyright (c) 1997-2017 University of Cambridge.
|
Copyright (c) 1997-2017 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -363,8 +363,8 @@ OPTIONS
|
||||||
length, separated by a comma. In this mode, no context is
|
length, separated by a comma. In this mode, no context is
|
||||||
shown. That is, the -A, -B, and -C options are ignored. If
|
shown. That is, the -A, -B, and -C options are ignored. If
|
||||||
there is more than one match in a line, each of them is shown
|
there is more than one match in a line, each of them is shown
|
||||||
separately. This option is mutually exclusive with --line-
|
separately. This option is mutually exclusive with --output,
|
||||||
offsets and --only-matching.
|
--line-offsets, and --only-matching.
|
||||||
|
|
||||||
-H, --with-filename
|
-H, --with-filename
|
||||||
Force the inclusion of the file name at the start of output
|
Force the inclusion of the file name at the start of output
|
||||||
|
@ -469,8 +469,8 @@ OPTIONS
|
||||||
separated by a comma. In this mode, no context is shown.
|
separated by a comma. In this mode, no context is shown.
|
||||||
That is, the -A, -B, and -C options are ignored. If there is
|
That is, the -A, -B, and -C options are ignored. If there is
|
||||||
more than one match in a line, each of them is shown sepa-
|
more than one match in a line, each of them is shown sepa-
|
||||||
rately. This option is mutually exclusive with --file-offsets
|
rately. This option is mutually exclusive with --output,
|
||||||
and --only-matching.
|
--file-offsets, and --only-matching.
|
||||||
|
|
||||||
--locale=locale-name
|
--locale=locale-name
|
||||||
This option specifies a locale to be used for pattern match-
|
This option specifies a locale to be used for pattern match-
|
||||||
|
@ -585,6 +585,33 @@ OPTIONS
|
||||||
run time. It is provided for testing and working round prob-
|
run time. It is provided for testing and working round prob-
|
||||||
lems. It should never be needed in normal use.
|
lems. It should never be needed in normal use.
|
||||||
|
|
||||||
|
-O text, --output=text
|
||||||
|
When there is a match, instead of outputting the whole line
|
||||||
|
that matched, output just the given text. This option is
|
||||||
|
mutually exclusive with --only-matching, --file-offsets, and
|
||||||
|
--line-offsets. Escape sequences starting with a dollar char-
|
||||||
|
acter may be used to insert the contents of the matched part
|
||||||
|
of the line and/or captured substrings into the text.
|
||||||
|
|
||||||
|
$<digits> or ${<digits>} is replaced by the captured sub-
|
||||||
|
string of the given decimal number; zero substitutes the
|
||||||
|
whole match. If the number is greater than the number of cap-
|
||||||
|
turing substrings, or if the capture is unset, the replace-
|
||||||
|
ment is empty.
|
||||||
|
|
||||||
|
$a is replaced by bell; $b by backspace; $e by escape; $f by
|
||||||
|
form feed; $n by newline; $r by carriage return; $t by tab;
|
||||||
|
$v by vertical tab.
|
||||||
|
|
||||||
|
$o<digits> is replaced by the character represented by the
|
||||||
|
given octal number; up to three digits are processed.
|
||||||
|
|
||||||
|
$x<digits> is replaced by the character represented by the
|
||||||
|
given hexadecimal number; up to two digits are processed.
|
||||||
|
|
||||||
|
Any other character is substituted by itself. In particular,
|
||||||
|
$$ is replaced by a single dollar.
|
||||||
|
|
||||||
-o, --only-matching
|
-o, --only-matching
|
||||||
Show only the part of the line that matched a pattern instead
|
Show only the part of the line that matched a pattern instead
|
||||||
of the whole line. In this mode, no context is shown. That
|
of the whole line. In this mode, no context is shown. That
|
||||||
|
@ -596,8 +623,8 @@ OPTIONS
|
||||||
ately. If the matched portion of the line is empty, nothing
|
ately. If the matched portion of the line is empty, nothing
|
||||||
is output unless the file name or line number are being
|
is output unless the file name or line number are being
|
||||||
printed, in which case they are shown on an otherwise empty
|
printed, in which case they are shown on an otherwise empty
|
||||||
line. This option is mutually exclusive with --file-offsets
|
line. This option is mutually exclusive with --output,
|
||||||
and --line-offsets.
|
--file-offsets and --line-offsets.
|
||||||
|
|
||||||
-onumber, --only-matching=number
|
-onumber, --only-matching=number
|
||||||
Show only the part of the line that matched the capturing
|
Show only the part of the line that matched the capturing
|
||||||
|
@ -607,7 +634,7 @@ OPTIONS
|
||||||
(see above), if an argument is present, it must be given in
|
(see above), if an argument is present, it must be given in
|
||||||
the same shell item, for example, -o3 or --only-matching=2.
|
the same shell item, for example, -o3 or --only-matching=2.
|
||||||
The comments given for the non-argument case above also apply
|
The comments given for the non-argument case above also apply
|
||||||
to this case. If the specified capturing parentheses do not
|
to this option. If the specified capturing parentheses do not
|
||||||
exist in the pattern, or were not set in the match, nothing
|
exist in the pattern, or were not set in the match, nothing
|
||||||
is output unless the file name or line number are being out-
|
is output unless the file name or line number are being out-
|
||||||
put.
|
put.
|
||||||
|
@ -723,7 +750,7 @@ OPTIONS COMPATIBILITY
|
||||||
terminology) is also available as --xxx-regex (PCRE2 terminology). How-
|
terminology) is also available as --xxx-regex (PCRE2 terminology). How-
|
||||||
ever, the --depth-limit, --file-list, --file-offsets, --include-dir,
|
ever, the --depth-limit, --file-list, --file-offsets, --include-dir,
|
||||||
--line-offsets, --locale, --match-limit, -M, --multiline, -N, --new-
|
--line-offsets, --locale, --match-limit, -M, --multiline, -N, --new-
|
||||||
line, --om-separator, -u, and --utf-8 options are specific to
|
line, --om-separator, --output, -u, and --utf-8 options are specific to
|
||||||
pcre2grep, as is the use of the --only-matching option with a capturing
|
pcre2grep, as is the use of the --only-matching option with a capturing
|
||||||
parentheses number.
|
parentheses number.
|
||||||
|
|
||||||
|
@ -766,21 +793,26 @@ OPTIONS WITH DATA
|
||||||
equals character. Otherwise pcre2grep will assume that it has no data.
|
equals character. Otherwise pcre2grep will assume that it has no data.
|
||||||
|
|
||||||
|
|
||||||
CALLING EXTERNAL SCRIPTS
|
USING PCRE2'S CALLOUT FACILITY
|
||||||
|
|
||||||
pcre2grep has, by default, support for calling external programs or
|
pcre2grep has, by default, support for calling external programs or
|
||||||
scripts during matching by making use of PCRE2's callout facility. How-
|
scripts or echoing specific strings during matching by making use of
|
||||||
ever, this support can be disabled when pcre2grep is built. You can
|
PCRE2's callout facility. However, this support can be disabled when
|
||||||
find out whether your binary has support for callouts by running it
|
pcre2grep is built. You can find out whether your binary has support
|
||||||
with the --help option. If the support is not enabled, all callouts in
|
for callouts by running it with the --help option. If the support is
|
||||||
patterns are ignored by pcre2grep.
|
not enabled, all callouts in patterns are ignored by pcre2grep.
|
||||||
|
|
||||||
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argu-
|
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argu-
|
||||||
ment is either a number or a quoted string (see the pcre2callout docu-
|
ment is either a number or a quoted string (see the pcre2callout docu-
|
||||||
mentation for details). Numbered callouts are ignored by pcre2grep.
|
mentation for details). Numbered callouts are ignored by pcre2grep;
|
||||||
String arguments are parsed as a list of substrings separated by pipe
|
only callouts with string arguments are useful.
|
||||||
(vertical bar) characters. The first substring must be an executable
|
|
||||||
name, with the following substrings specifying arguments:
|
Calling external programs or scripts
|
||||||
|
|
||||||
|
If the callout string does not start with a pipe (vertical bar) charac-
|
||||||
|
ter, it is parsed into a list of substrings separated by pipe charac-
|
||||||
|
ters. The first substring must be an executable name, with the follow-
|
||||||
|
ing substrings specifying arguments:
|
||||||
|
|
||||||
executable_name|arg1|arg2|...
|
executable_name|arg1|arg2|...
|
||||||
|
|
||||||
|
@ -816,6 +848,18 @@ CALLING EXTERNAL SCRIPTS
|
||||||
local matching failure occurs and the matcher backtracks in the normal
|
local matching failure occurs and the matcher backtracks in the normal
|
||||||
way.
|
way.
|
||||||
|
|
||||||
|
Echoing a specific string
|
||||||
|
|
||||||
|
If the callout string starts with a pipe (vertical bar) character, the
|
||||||
|
rest of the string is written to the output, having been passed through
|
||||||
|
the same escape processing as text from the --output option. This pro-
|
||||||
|
vides a simple echoing facility that avoids calling an external program
|
||||||
|
or script. No terminator is added to the string, so if you want a new-
|
||||||
|
line, you must include it explicitly. Matching continues normally
|
||||||
|
after the string is output. If you want to see only the callout output
|
||||||
|
but not any output from an actual match, you should end the relevant
|
||||||
|
pattern with (*FAIL).
|
||||||
|
|
||||||
|
|
||||||
MATCHING ERRORS
|
MATCHING ERRORS
|
||||||
|
|
||||||
|
@ -857,5 +901,5 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 31 March 2017
|
Last updated: 06 April 2017
|
||||||
Copyright (c) 1997-2017 University of Cambridge.
|
Copyright (c) 1997-2017 University of Cambridge.
|
||||||
|
|
|
@ -511,6 +511,7 @@ PATTERN MODIFIERS
|
||||||
dollar_endonly set PCRE2_DOLLAR_ENDONLY
|
dollar_endonly set PCRE2_DOLLAR_ENDONLY
|
||||||
/s dotall set PCRE2_DOTALL
|
/s dotall set PCRE2_DOTALL
|
||||||
dupnames set PCRE2_DUPNAMES
|
dupnames set PCRE2_DUPNAMES
|
||||||
|
endanchored set PCRE2_ENDANCHORED
|
||||||
/x extended set PCRE2_EXTENDED
|
/x extended set PCRE2_EXTENDED
|
||||||
firstline set PCRE2_FIRSTLINE
|
firstline set PCRE2_FIRSTLINE
|
||||||
match_unset_backref set PCRE2_MATCH_UNSET_BACKREF
|
match_unset_backref set PCRE2_MATCH_UNSET_BACKREF
|
||||||
|
@ -926,6 +927,7 @@ SUBJECT MODIFIERS
|
||||||
pcre2_dfa_match(). See pcreapi for a description of their effects.
|
pcre2_dfa_match(). See pcreapi for a description of their effects.
|
||||||
|
|
||||||
anchored set PCRE2_ANCHORED
|
anchored set PCRE2_ANCHORED
|
||||||
|
endanchored set PCRE2_ENDANCHORED
|
||||||
dfa_restart set PCRE2_DFA_RESTART
|
dfa_restart set PCRE2_DFA_RESTART
|
||||||
dfa_shortest set PCRE2_DFA_SHORTEST
|
dfa_shortest set PCRE2_DFA_SHORTEST
|
||||||
no_jit set PCRE2_NO_JIT
|
no_jit set PCRE2_NO_JIT
|
||||||
|
@ -1630,5 +1632,5 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 25 March 2017
|
Last updated: 04 April 2017
|
||||||
Copyright (c) 1997-2017 University of Cambridge.
|
Copyright (c) 1997-2017 University of Cambridge.
|
||||||
|
|
362
src/pcre2grep.c
362
src/pcre2grep.c
|
@ -175,8 +175,9 @@ static const char *dee_option = NULL;
|
||||||
static const char *DEE_option = NULL;
|
static const char *DEE_option = NULL;
|
||||||
static const char *locale = NULL;
|
static const char *locale = NULL;
|
||||||
static const char *newline_arg = NULL;
|
static const char *newline_arg = NULL;
|
||||||
static const char *om_separator = "";
|
static const char *om_separator = NULL;
|
||||||
static const char *stdin_name = "(standard input)";
|
static const char *stdin_name = "(standard input)";
|
||||||
|
static const char *output_text = NULL;
|
||||||
|
|
||||||
static char *main_buffer = NULL;
|
static char *main_buffer = NULL;
|
||||||
|
|
||||||
|
@ -196,6 +197,7 @@ static int dee_action = dee_SKIP;
|
||||||
#else
|
#else
|
||||||
static int dee_action = dee_READ;
|
static int dee_action = dee_READ;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static int DEE_action = DEE_READ;
|
static int DEE_action = DEE_READ;
|
||||||
static int error_count = 0;
|
static int error_count = 0;
|
||||||
static int filenames = FN_DEFAULT;
|
static int filenames = FN_DEFAULT;
|
||||||
|
@ -233,7 +235,6 @@ static BOOL number = FALSE;
|
||||||
static BOOL omit_zero_count = FALSE;
|
static BOOL omit_zero_count = FALSE;
|
||||||
static BOOL resource_error = FALSE;
|
static BOOL resource_error = FALSE;
|
||||||
static BOOL quiet = FALSE;
|
static BOOL quiet = FALSE;
|
||||||
static BOOL show_only_matching = FALSE;
|
|
||||||
static BOOL show_total_count = FALSE;
|
static BOOL show_total_count = FALSE;
|
||||||
static BOOL silent = FALSE;
|
static BOOL silent = FALSE;
|
||||||
static BOOL utf = FALSE;
|
static BOOL utf = FALSE;
|
||||||
|
@ -247,6 +248,7 @@ typedef struct omstr {
|
||||||
|
|
||||||
static omstr *only_matching = NULL;
|
static omstr *only_matching = NULL;
|
||||||
static omstr *only_matching_last = NULL;
|
static omstr *only_matching_last = NULL;
|
||||||
|
static int only_matching_count;
|
||||||
|
|
||||||
/* Structure for holding the two variables that describe a number chain. */
|
/* Structure for holding the two variables that describe a number chain. */
|
||||||
|
|
||||||
|
@ -406,6 +408,7 @@ static option_item optionlist[] = {
|
||||||
#else
|
#else
|
||||||
{ OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
|
{ OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
|
||||||
#endif
|
#endif
|
||||||
|
{ OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" },
|
||||||
{ OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
|
{ OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
|
||||||
{ OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
|
{ OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
|
||||||
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
|
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
|
||||||
|
@ -793,7 +796,7 @@ return isatty(fileno(f));
|
||||||
/************* Print optionally coloured match Unix-style and z/OS **********/
|
/************* Print optionally coloured match Unix-style and z/OS **********/
|
||||||
|
|
||||||
static void
|
static void
|
||||||
print_match(const char* buf, int length)
|
print_match(const void *buf, int length)
|
||||||
{
|
{
|
||||||
if (length == 0) return;
|
if (length == 0) return;
|
||||||
if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
|
if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
|
||||||
|
@ -942,7 +945,7 @@ static CONSOLE_SCREEN_BUFFER_INFO csbi;
|
||||||
static WORD match_colour;
|
static WORD match_colour;
|
||||||
|
|
||||||
static void
|
static void
|
||||||
print_match(const char* buf, int length)
|
print_match(const void *buf, int length)
|
||||||
{
|
{
|
||||||
if (length == 0) return;
|
if (length == 0) return;
|
||||||
if (do_colour)
|
if (do_colour)
|
||||||
|
@ -1001,7 +1004,7 @@ return FALSE;
|
||||||
/************* Print optionally coloured match when we can't do it **********/
|
/************* Print optionally coloured match when we can't do it **********/
|
||||||
|
|
||||||
static void
|
static void
|
||||||
print_match(const char* buf, int length)
|
print_match(const void *buf, int length)
|
||||||
{
|
{
|
||||||
if (length == 0) return;
|
if (length == 0) return;
|
||||||
FWRITE(buf, 1, length, stdout);
|
FWRITE(buf, 1, length, stdout);
|
||||||
|
@ -1658,6 +1661,277 @@ return FALSE; /* No match, no errors */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Check output text for errors *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
static BOOL
|
||||||
|
syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR begin = string;
|
||||||
|
for (; *string != 0; string++)
|
||||||
|
{
|
||||||
|
if (*string == '$')
|
||||||
|
{
|
||||||
|
PCRE2_SIZE capture_id = 0;
|
||||||
|
BOOL brace = FALSE;
|
||||||
|
|
||||||
|
string++;
|
||||||
|
|
||||||
|
/* Syntax error: a character must be present after $. */
|
||||||
|
if (*string == 0)
|
||||||
|
{
|
||||||
|
if (!callout)
|
||||||
|
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
|
||||||
|
(int)(string - begin), "no character after $");
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*string == '{')
|
||||||
|
{
|
||||||
|
/* Must be a decimal number in braces, e.g: {5} or {38} */
|
||||||
|
string++;
|
||||||
|
|
||||||
|
brace = TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
|
||||||
|
{
|
||||||
|
do
|
||||||
|
{
|
||||||
|
/* Maximum capture id is 65535. */
|
||||||
|
if (capture_id <= 65535)
|
||||||
|
capture_id = capture_id * 10 + (*string - '0');
|
||||||
|
|
||||||
|
string++;
|
||||||
|
}
|
||||||
|
while (*string >= '0' && *string <= '9');
|
||||||
|
|
||||||
|
if (brace)
|
||||||
|
{
|
||||||
|
/* Syntax error: closing brace is missing. */
|
||||||
|
if (*string != '}')
|
||||||
|
{
|
||||||
|
if (!callout)
|
||||||
|
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
|
||||||
|
(int)(string - begin), "missing closing brace");
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* To negate the effect of the for. */
|
||||||
|
string--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (brace)
|
||||||
|
{
|
||||||
|
/* Syntax error: a decimal number required. */
|
||||||
|
if (!callout)
|
||||||
|
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
|
||||||
|
(int)(string - begin), "decimal number expected");
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
else if (*string == 'o')
|
||||||
|
{
|
||||||
|
string++;
|
||||||
|
|
||||||
|
if (*string < '0' || *string > '7')
|
||||||
|
{
|
||||||
|
/* Syntax error: an octal number required. */
|
||||||
|
if (!callout)
|
||||||
|
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
|
||||||
|
(int)(string - begin), "octal number expected");
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (*string == 'x')
|
||||||
|
{
|
||||||
|
string++;
|
||||||
|
|
||||||
|
if (!isxdigit((unsigned char)*string))
|
||||||
|
{
|
||||||
|
/* Syntax error: a hexdecimal number required. */
|
||||||
|
if (!callout)
|
||||||
|
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
|
||||||
|
(int)(string - begin), "hexadecimal number expected");
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Display output text *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* Display the output text, which is assumed to have already been syntax
|
||||||
|
checked. Output may contain escape sequences started by the dollar sign. The
|
||||||
|
escape sequences are substituted as follows:
|
||||||
|
|
||||||
|
$<digits> or ${<digits>} is replaced by the captured substring of the given
|
||||||
|
decimal number; zero will substitute the whole match. If the number is
|
||||||
|
greater than the number of capturing substrings, or if the capture is unset,
|
||||||
|
the replacement is empty.
|
||||||
|
|
||||||
|
$a is replaced by bell.
|
||||||
|
$b is replaced by backspace.
|
||||||
|
$e is replaced by escape.
|
||||||
|
$f is replaced by form feed.
|
||||||
|
$n is replaced by newline.
|
||||||
|
$r is replaced by carriage return.
|
||||||
|
$t is replaced by tab.
|
||||||
|
$v is replaced by vertical tab.
|
||||||
|
|
||||||
|
$o<digits> is replaced by the character represented by the given octal
|
||||||
|
number; up to three digits are processed.
|
||||||
|
|
||||||
|
$x<digits> is replaced by the character represented by the given hexadecimal
|
||||||
|
number; up to two digits are processed.
|
||||||
|
|
||||||
|
Any other character is substituted by itself. E.g: $$ is replaced by a single
|
||||||
|
dollar.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
string: the output text
|
||||||
|
callout: TRUE for the builtin callout, FALSE for --output
|
||||||
|
subject the start of the subject
|
||||||
|
ovector: capture offsets
|
||||||
|
capture_top: number of captures
|
||||||
|
|
||||||
|
Returns: TRUE if something was output, other than newline
|
||||||
|
FALSE if nothing was output, or newline was last output
|
||||||
|
*/
|
||||||
|
|
||||||
|
static BOOL
|
||||||
|
display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
|
||||||
|
PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
|
||||||
|
{
|
||||||
|
BOOL printed = FALSE;
|
||||||
|
|
||||||
|
for (; *string != 0; string++)
|
||||||
|
{
|
||||||
|
int ch = EOF;
|
||||||
|
if (*string == '$')
|
||||||
|
{
|
||||||
|
PCRE2_SIZE capture_id = 0;
|
||||||
|
BOOL brace = FALSE;
|
||||||
|
|
||||||
|
string++;
|
||||||
|
|
||||||
|
if (*string == '{')
|
||||||
|
{
|
||||||
|
/* Must be a decimal number in braces, e.g: {5} or {38} */
|
||||||
|
string++;
|
||||||
|
|
||||||
|
brace = TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
|
||||||
|
{
|
||||||
|
do
|
||||||
|
{
|
||||||
|
/* Maximum capture id is 65535. */
|
||||||
|
if (capture_id <= 65535)
|
||||||
|
capture_id = capture_id * 10 + (*string - '0');
|
||||||
|
|
||||||
|
string++;
|
||||||
|
}
|
||||||
|
while (*string >= '0' && *string <= '9');
|
||||||
|
|
||||||
|
if (!brace)
|
||||||
|
{
|
||||||
|
/* To negate the effect of the for. */
|
||||||
|
string--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (capture_id < capture_top)
|
||||||
|
{
|
||||||
|
PCRE2_SIZE capturesize;
|
||||||
|
capture_id *= 2;
|
||||||
|
|
||||||
|
capturesize = ovector[capture_id + 1] - ovector[capture_id];
|
||||||
|
if (capturesize > 0)
|
||||||
|
{
|
||||||
|
print_match(subject + ovector[capture_id], capturesize);
|
||||||
|
printed = TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (*string == 'a') ch = '\a';
|
||||||
|
else if (*string == 'b') ch = '\b';
|
||||||
|
#ifndef EBCDIC
|
||||||
|
else if (*string == 'e') ch = '\033';
|
||||||
|
#else
|
||||||
|
else if (*string == 'e') ch = '\047';
|
||||||
|
#endif
|
||||||
|
else if (*string == 'f') ch = '\f';
|
||||||
|
else if (*string == 'r') ch = '\r';
|
||||||
|
else if (*string == 't') ch = '\t';
|
||||||
|
else if (*string == 'v') ch = '\v';
|
||||||
|
else if (*string == 'n')
|
||||||
|
{
|
||||||
|
fprintf(stdout, STDOUT_NL);
|
||||||
|
printed = FALSE;
|
||||||
|
}
|
||||||
|
else if (*string == 'o')
|
||||||
|
{
|
||||||
|
string++;
|
||||||
|
|
||||||
|
ch = *string - '0';
|
||||||
|
if (string[1] >= '0' && string[1] <= '7')
|
||||||
|
{
|
||||||
|
string++;
|
||||||
|
ch = ch * 8 + (*string - '0');
|
||||||
|
}
|
||||||
|
if (string[1] >= '0' && string[1] <= '7')
|
||||||
|
{
|
||||||
|
string++;
|
||||||
|
ch = ch * 8 + (*string - '0');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (*string == 'x')
|
||||||
|
{
|
||||||
|
string++;
|
||||||
|
|
||||||
|
if (*string >= '0' && *string <= '9')
|
||||||
|
ch = *string - '0';
|
||||||
|
else
|
||||||
|
ch = (*string | 0x20) - 'a' + 10;
|
||||||
|
if (isxdigit((unsigned char)string[1]))
|
||||||
|
{
|
||||||
|
string++;
|
||||||
|
ch *= 16;
|
||||||
|
if (*string >= '0' && *string <= '9')
|
||||||
|
ch += *string - '0';
|
||||||
|
else
|
||||||
|
ch += (*string | 0x20) - 'a' + 10;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ch = *string;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ch = *string;
|
||||||
|
}
|
||||||
|
if (ch != EOF)
|
||||||
|
{
|
||||||
|
fprintf(stdout, "%c", ch);
|
||||||
|
printed = TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return printed;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef SUPPORT_PCRE2GREP_CALLOUT
|
#ifdef SUPPORT_PCRE2GREP_CALLOUT
|
||||||
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
|
@ -1683,6 +1957,10 @@ follows:
|
||||||
Any other character is substituted by itself. E.g: $$ is replaced by a single
|
Any other character is substituted by itself. E.g: $$ is replaced by a single
|
||||||
dollar or $| replaced by a pipe character.
|
dollar or $| replaced by a pipe character.
|
||||||
|
|
||||||
|
Alternatively, if string starts with pipe, the remainder is taken as an output
|
||||||
|
string, same as --output. In this case, --om-separator is used to separate each
|
||||||
|
callout, defaulting to newline.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
echo -e "abcde\n12345" | pcre2grep \
|
echo -e "abcde\n12345" | pcre2grep \
|
||||||
|
@ -1725,6 +2003,16 @@ int result = 0;
|
||||||
/* Only callout with strings are supported. */
|
/* Only callout with strings are supported. */
|
||||||
if (string == NULL || length == 0) return 0;
|
if (string == NULL || length == 0) return 0;
|
||||||
|
|
||||||
|
/* If there's no command, output the remainder directly. */
|
||||||
|
|
||||||
|
if (*string == '|')
|
||||||
|
{
|
||||||
|
string++;
|
||||||
|
if (!syntax_check_output_text(string, TRUE)) return 0;
|
||||||
|
(void)display_output_text(string, TRUE, subject, ovector, capture_top);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Checking syntax and compute the number of string fragments. Callout strings
|
/* Checking syntax and compute the number of string fragments. Callout strings
|
||||||
are ignored in case of a syntax error. */
|
are ignored in case of a syntax error. */
|
||||||
|
|
||||||
|
@ -2174,8 +2462,8 @@ while (ptr < endptr)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* We come back here after a match when show_only_matching is set, in order
|
/* We come back here after a match when only_matching_count is non-zero, in
|
||||||
to find any further matches in the same line. This applies to
|
order to find any further matches in the same line. This applies to
|
||||||
--only-matching, --file-offsets, and --line-offsets. */
|
--only-matching, --file-offsets, and --line-offsets. */
|
||||||
|
|
||||||
ONLY_MATCHING_RESTART:
|
ONLY_MATCHING_RESTART:
|
||||||
|
@ -2229,13 +2517,13 @@ while (ptr < endptr)
|
||||||
/* The --only-matching option prints just the substring that matched,
|
/* The --only-matching option prints just the substring that matched,
|
||||||
and/or one or more captured portions of it, as long as these strings are
|
and/or one or more captured portions of it, as long as these strings are
|
||||||
not empty. The --file-offsets and --line-offsets options output offsets for
|
not empty. The --file-offsets and --line-offsets options output offsets for
|
||||||
the matching substring (all three set show_only_matching). None of these
|
the matching substring (all three set only_matching_count non-zero). None
|
||||||
mutually exclusive options prints any context. Afterwards, adjust the start
|
of these mutually exclusive options prints any context. Afterwards, adjust
|
||||||
and then jump back to look for further matches in the same line. If we are
|
the start and then jump back to look for further matches in the same line.
|
||||||
in invert mode, however, nothing is printed and we do not restart - this
|
If we are in invert mode, however, nothing is printed and we do not restart
|
||||||
could still be useful because the return code is set. */
|
- this could still be useful because the return code is set. */
|
||||||
|
|
||||||
else if (show_only_matching)
|
else if (only_matching_count != 0)
|
||||||
{
|
{
|
||||||
if (!invert)
|
if (!invert)
|
||||||
{
|
{
|
||||||
|
@ -2257,6 +2545,16 @@ while (ptr < endptr)
|
||||||
(int)(filepos + matchptr + offsets[0] - ptr),
|
(int)(filepos + matchptr + offsets[0] - ptr),
|
||||||
(int)(offsets[1] - offsets[0]));
|
(int)(offsets[1] - offsets[0]));
|
||||||
|
|
||||||
|
/* Handle --output (which has already been syntax checked) */
|
||||||
|
|
||||||
|
else if (output_text != NULL)
|
||||||
|
{
|
||||||
|
if (display_output_text((PCRE2_SPTR)output_text, FALSE,
|
||||||
|
(PCRE2_SPTR)matchptr, offsets, mrc) || printname != NULL ||
|
||||||
|
number)
|
||||||
|
fprintf(stdout, STDOUT_NL);
|
||||||
|
}
|
||||||
|
|
||||||
/* Handle --only-matching, which may occur many times */
|
/* Handle --only-matching, which may occur many times */
|
||||||
|
|
||||||
else
|
else
|
||||||
|
@ -2272,7 +2570,8 @@ while (ptr < endptr)
|
||||||
int plen = offsets[2*n + 1] - offsets[2*n];
|
int plen = offsets[2*n + 1] - offsets[2*n];
|
||||||
if (plen > 0)
|
if (plen > 0)
|
||||||
{
|
{
|
||||||
if (printed) fprintf(stdout, "%s", om_separator);
|
if (printed && om_separator != NULL)
|
||||||
|
fprintf(stdout, "%s", om_separator);
|
||||||
print_match(matchptr + offsets[n*2], plen);
|
print_match(matchptr + offsets[n*2], plen);
|
||||||
printed = TRUE;
|
printed = TRUE;
|
||||||
}
|
}
|
||||||
|
@ -2557,7 +2856,7 @@ while (ptr < endptr)
|
||||||
/* End of file; print final "after" lines if wanted; do_after_lines sets
|
/* End of file; print final "after" lines if wanted; do_after_lines sets
|
||||||
hyphenpending if it prints something. */
|
hyphenpending if it prints something. */
|
||||||
|
|
||||||
if (!show_only_matching && !(count_only|show_total_count))
|
if (only_matching_count == 0 && !(count_only|show_total_count))
|
||||||
{
|
{
|
||||||
do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
|
do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
|
||||||
hyphenpending |= endhyphenpending;
|
hyphenpending |= endhyphenpending;
|
||||||
|
@ -3518,26 +3817,31 @@ if (both_context > 0)
|
||||||
if (before_context == 0) before_context = both_context;
|
if (before_context == 0) before_context = both_context;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
|
/* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
|
||||||
However, all three set show_only_matching because they display, each in their
|
permitted. They display, each in their own way, only the data that has matched.
|
||||||
own way, only the data that has matched. */
|
*/
|
||||||
|
|
||||||
if ((only_matching != NULL && (file_offsets || line_offsets)) ||
|
only_matching_count = (only_matching != NULL) + (output_text != NULL) +
|
||||||
(file_offsets && line_offsets))
|
file_offsets + line_offsets;
|
||||||
|
|
||||||
|
if (only_matching_count > 1)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --file-offsets "
|
fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
|
||||||
"and/or --line-offsets\n");
|
"--file-offsets and/or --line-offsets\n");
|
||||||
pcre2grep_exit(usage(2));
|
pcre2grep_exit(usage(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Check the text supplied to --output for errors. */
|
||||||
|
|
||||||
|
if (output_text != NULL &&
|
||||||
|
!syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
|
||||||
|
goto EXIT2;
|
||||||
|
|
||||||
/* Put limits into the match data block. */
|
/* Put limits into the match data block. */
|
||||||
|
|
||||||
if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
|
if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
|
||||||
if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
|
if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
|
||||||
|
|
||||||
if (only_matching != NULL || file_offsets || line_offsets)
|
|
||||||
show_only_matching = TRUE;
|
|
||||||
|
|
||||||
/* If a locale has not been provided as an option, see if the LC_CTYPE or
|
/* If a locale has not been provided as an option, see if the LC_CTYPE or
|
||||||
LC_ALL environment variable is set, and if so, use it. */
|
LC_ALL environment variable is set, and if so, use it. */
|
||||||
|
|
||||||
|
@ -3827,6 +4131,14 @@ for (; i < argc; i++)
|
||||||
else if (frc == 0 && rc == 1) rc = 0;
|
else if (frc == 0 && rc == 1) rc = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_PCRE2GREP_CALLOUT
|
||||||
|
/* If separating builtin echo callouts by implicit newline, add one more for
|
||||||
|
the final item. */
|
||||||
|
|
||||||
|
if (om_separator != NULL && strcmp(om_separator, STDOUT_NL) == 0)
|
||||||
|
fprintf(stdout, STDOUT_NL);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Show the total number of matches if requested, but not if only one file's
|
/* Show the total number of matches if requested, but not if only one file's
|
||||||
count was printed. */
|
count was printed. */
|
||||||
|
|
||||||
|
|
|
@ -829,3 +829,8 @@ def
|
||||||
xyz
|
xyz
|
||||||
---
|
---
|
||||||
RC=0
|
RC=0
|
||||||
|
---------------------------- Test 120 ------------------------------
|
||||||
|
./testdata/grepinput:the binary zero.:zerothe.
|
||||||
|
./testdata/grepinput:a binary zero:zeroa
|
||||||
|
./testdata/grepinput:the binary zero.:zerothe.
|
||||||
|
RC=0
|
||||||
|
|
|
@ -6,3 +6,9 @@ Arg1: [qu] [qu]
|
||||||
Arg1: [ t] [ t]
|
Arg1: [ t] [ t]
|
||||||
The quick brown
|
The quick brown
|
||||||
This time it jumps and jumps and jumps.
|
This time it jumps and jumps and jumps.
|
||||||
|
0:T
|
||||||
|
The quick brown
|
||||||
|
0:T
|
||||||
|
This time it jumps and jumps and jumps.
|
||||||
|
T
|
||||||
|
T
|
||||||
|
|
Loading…
Reference in New Issue