Jason Hood's pcre2grep patches (modified a bit) to add --output to pcre2grep,
and also an inbuilt callout echo.
This commit is contained in:
parent
2b36600b2b
commit
88abc14e42
|
@ -113,6 +113,9 @@ a message, and abandon the run (this would have detected #13 above).
|
|||
|
||||
19. Implemented PCRE2_ENDANCHORED.
|
||||
|
||||
20. Applied Jason Hood's patches (slightly modified) to pcre2grep, to implement
|
||||
the --output=text (-O) option and the inbuilt callout echo.
|
||||
|
||||
|
||||
Version 10.23 14-February-2017
|
||||
------------------------------
|
||||
|
|
|
@ -598,6 +598,10 @@ printf "123\n456\n789\n---abc\ndef\nxyz\n---\n" >testNinputgrep
|
|||
$valgrind $vjs $pcre2grep -Mo '(\n|[^-])*---' testNinputgrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 120 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -HO '$0:$2$1$3' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
# Now compare the results.
|
||||
|
||||
$cf $srcdir/testdata/grepoutput testtrygrep
|
||||
|
@ -667,6 +671,9 @@ if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Callout scri
|
|||
echo "Testing pcre2grep script callouts"
|
||||
$valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep
|
||||
$valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep '(T)(?C"|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep
|
||||
$valgrind $vjs $pcre2grep '(T)(?C"|$1$n")(*F)' $srcdir/testdata/grepinputv >>testtrygrep
|
||||
# The above has no newline, which 'diff -ub' ignores, so add one.
|
||||
$cf $srcdir/testdata/grepoutputC testtrygrep
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else
|
||||
|
|
|
@ -585,6 +585,10 @@ echo ---------------------------- Test 119 ----------------------------->>testtr
|
|||
%pcre2grep% -Mo "(\n|[^-])*---" testNinputgrep >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
echo ---------------------------- Test 120 ------------------------------>>testtrygrep
|
||||
(pushd %srcdir% & %pcre2grep% -HO "$0:$2$1$3" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep
|
||||
echo RC=^%ERRORLEVEL%>>testtrygrep
|
||||
|
||||
:: Now compare the results.
|
||||
|
||||
%cf% %srcdir%\testdata\grepoutput testtrygrep %cfout%
|
||||
|
@ -654,6 +658,10 @@ if %ERRORLEVEL% equ 0 (
|
|||
echo Testing pcre2grep script callouts
|
||||
%pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%/testdata/grepinputv >testtrygrep
|
||||
%pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%/testdata/grepinputv >>testtrygrep
|
||||
%pcre2grep% "(T)(?C'|$0:$1')" %srcdir%/testdata/grepinputv >>testtrygrep
|
||||
%pcre2grep% --om-separator / "(T)(?C'|$1')" %srcdir%/testdata/grepinputv >>testtrygrep
|
||||
:: The above has no newline, which 'diff -ub' ignores, so add one.
|
||||
echo />>testtrygrep
|
||||
%cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout%
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
) else (
|
||||
|
|
|
@ -60,6 +60,7 @@ The option bits are:
|
|||
PCRE2_DOLLAR_ENDONLY $ not to match newline at end
|
||||
PCRE2_DOTALL . matches anything including NL
|
||||
PCRE2_DUPNAMES Allow duplicate names for subpatterns
|
||||
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
||||
PCRE2_EXTENDED Ignore white space and # comments
|
||||
PCRE2_FIRSTLINE Force matching to be before newline
|
||||
PCRE2_MATCH_UNSET_BACKREF Match unset back references
|
||||
|
|
|
@ -50,6 +50,7 @@ up a callout function or specify the recursion depth limit. The <i>length</i>
|
|||
and <i>startoffset</i> values are code units, not characters. The options are:
|
||||
<pre>
|
||||
PCRE2_ANCHORED Match only at the first position
|
||||
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
||||
PCRE2_NOTBOL Subject is not the beginning of a line
|
||||
PCRE2_NOTEOL Subject is not the end of a line
|
||||
PCRE2_NOTEMPTY An empty string is not a valid match
|
||||
|
|
|
@ -53,6 +53,7 @@ units, not characters. The length may be given as PCRE2_ZERO_TERMINATE for a
|
|||
subject that is terminated by a binary zero code unit. The options are:
|
||||
<pre>
|
||||
PCRE2_ANCHORED Match only at the first position
|
||||
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
||||
PCRE2_NOTBOL Subject string is not the beginning of a line
|
||||
PCRE2_NOTEOL Subject string is not the end of a line
|
||||
PCRE2_NOTEMPTY An empty string is not a valid match
|
||||
|
|
|
@ -64,6 +64,7 @@ The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for
|
|||
zero-terminated strings. The options are:
|
||||
<pre>
|
||||
PCRE2_ANCHORED Match only at the first position
|
||||
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
||||
PCRE2_NOTBOL Subject is not the beginning of a line
|
||||
PCRE2_NOTEOL Subject is not the end of a line
|
||||
PCRE2_NOTEMPTY An empty string is not a valid match
|
||||
|
|
|
@ -1123,8 +1123,8 @@ documentation).
|
|||
<P>
|
||||
For those options that can be different in different parts of the pattern, the
|
||||
contents of the <i>options</i> argument specifies their settings at the start of
|
||||
compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK options can be set at
|
||||
the time of matching as well as at compile time.
|
||||
compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and PCRE2_NO_UTF_CHECK
|
||||
options can be set at the time of matching as well as at compile time.
|
||||
</P>
|
||||
<P>
|
||||
Other, less frequently required compile-time parameters (for example, the
|
||||
|
@ -1279,6 +1279,13 @@ only one instance of the named subpattern can ever be matched. There are more
|
|||
details of named subpatterns below; see also the
|
||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||
documentation.
|
||||
<pre>
|
||||
PCRE2_ENDANCHORED
|
||||
</pre>
|
||||
If this bit is set, the end of any pattern match must be right at the end of
|
||||
the string being searched (the "subject string"). This effect can also be
|
||||
achieved by appropriate constructs in the pattern itself, which is the only way
|
||||
to do it in Perl.
|
||||
<pre>
|
||||
PCRE2_EXTENDED
|
||||
</pre>
|
||||
|
@ -2141,16 +2148,16 @@ Option bits for <b>pcre2_match()</b>
|
|||
</b><br>
|
||||
<P>
|
||||
The unused bits of the <i>options</i> argument for <b>pcre2_match()</b> must be
|
||||
zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
|
||||
PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT,
|
||||
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is
|
||||
described below.
|
||||
zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDANCHORED,
|
||||
PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
|
||||
PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT.
|
||||
Their action is described below.
|
||||
</P>
|
||||
<P>
|
||||
Setting PCRE2_ANCHORED at match time is not supported by the just-in-time (JIT)
|
||||
compiler. If it is set, JIT matching is disabled and the interpretive code in
|
||||
<b>pcre2_match()</b> is run. Apart from PCRE2_NO_JIT (obviously), the remaining
|
||||
options are supported for JIT matching.
|
||||
Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not supported by
|
||||
the just-in-time (JIT) compiler. If it is set, JIT matching is disabled and the
|
||||
interpretive code in <b>pcre2_match()</b> is run. Apart from PCRE2_NO_JIT
|
||||
(obviously), the remaining options are supported for JIT matching.
|
||||
<pre>
|
||||
PCRE2_ANCHORED
|
||||
</pre>
|
||||
|
@ -2159,6 +2166,12 @@ matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out
|
|||
to be anchored by virtue of its contents, it cannot be made unachored at
|
||||
matching time. Note that setting the option at match time disables JIT
|
||||
matching.
|
||||
<pre>
|
||||
PCRE2_ENDANCHORED
|
||||
</pre>
|
||||
If the PCRE2_ENDANCHORED option is set, any string that <b>pcre2_match()</b>
|
||||
matches must be right at the end of the subject string. Note that setting the
|
||||
option at match time disables JIT matching.
|
||||
<pre>
|
||||
PCRE2_NOTBOL
|
||||
</pre>
|
||||
|
@ -3100,11 +3113,11 @@ Option bits for <b>pcre_dfa_match()</b>
|
|||
</b><br>
|
||||
<P>
|
||||
The unused bits of the <i>options</i> argument for <b>pcre2_dfa_match()</b> must
|
||||
be zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
|
||||
PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK,
|
||||
PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and
|
||||
PCRE2_DFA_RESTART. All but the last four of these are exactly the same as for
|
||||
<b>pcre2_match()</b>, so their description is not repeated here.
|
||||
be zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDANCHORED,
|
||||
PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
|
||||
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST,
|
||||
and PCRE2_DFA_RESTART. All but the last four of these are exactly the same as
|
||||
for <b>pcre2_match()</b>, so their description is not repeated here.
|
||||
<pre>
|
||||
PCRE2_PARTIAL_HARD
|
||||
PCRE2_PARTIAL_SOFT
|
||||
|
@ -3258,7 +3271,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 01 April 2017
|
||||
Last updated: 04 April 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -22,7 +22,7 @@ please consult the man page, in case the conversion went wrong.
|
|||
<li><a name="TOC7" href="#SEC7">NEWLINES</a>
|
||||
<li><a name="TOC8" href="#SEC8">OPTIONS COMPATIBILITY</a>
|
||||
<li><a name="TOC9" href="#SEC9">OPTIONS WITH DATA</a>
|
||||
<li><a name="TOC10" href="#SEC10">CALLING EXTERNAL SCRIPTS</a>
|
||||
<li><a name="TOC10" href="#SEC10">USING PCRE2'S CALLOUT FACILITY</a>
|
||||
<li><a name="TOC11" href="#SEC11">MATCHING ERRORS</a>
|
||||
<li><a name="TOC12" href="#SEC12">DIAGNOSTICS</a>
|
||||
<li><a name="TOC13" href="#SEC13">SEE ALSO</a>
|
||||
|
@ -384,8 +384,8 @@ Instead of showing lines or parts of lines that match, show each match as an
|
|||
offset from the start of the file and a length, separated by a comma. In this
|
||||
mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b>
|
||||
options are ignored. If there is more than one match in a line, each of them is
|
||||
shown separately. This option is mutually exclusive with <b>--line-offsets</b>
|
||||
and <b>--only-matching</b>.
|
||||
shown separately. This option is mutually exclusive with <b>--output</b>,
|
||||
<b>--line-offsets</b>, and <b>--only-matching</b>.
|
||||
</P>
|
||||
<P>
|
||||
<b>-H</b>, <b>--with-filename</b>
|
||||
|
@ -491,7 +491,8 @@ number is terminated by a colon (as usual; see the <b>-n</b> option), and the
|
|||
offset and length are separated by a comma. In this mode, no context is shown.
|
||||
That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> options are ignored. If there is
|
||||
more than one match in a line, each of them is shown separately. This option is
|
||||
mutually exclusive with <b>--file-offsets</b> and <b>--only-matching</b>.
|
||||
mutually exclusive with <b>--output</b>, <b>--file-offsets</b>, and
|
||||
<b>--only-matching</b>.
|
||||
</P>
|
||||
<P>
|
||||
<b>--locale</b>=<i>locale-name</i>
|
||||
|
@ -602,6 +603,36 @@ use of JIT at run time. It is provided for testing and working round problems.
|
|||
It should never be needed in normal use.
|
||||
</P>
|
||||
<P>
|
||||
<b>-O</b> <i>text</i>, <b>--output</b>=<i>text</i>
|
||||
When there is a match, instead of outputting the whole line that matched,
|
||||
output just the given text. This option is mutually exclusive with
|
||||
<b>--only-matching</b>, <b>--file-offsets</b>, and <b>--line-offsets</b>. Escape
|
||||
sequences starting with a dollar character may be used to insert the contents
|
||||
of the matched part of the line and/or captured substrings into the text.
|
||||
<br>
|
||||
<br>
|
||||
$<digits> or ${<digits>} is replaced by the captured
|
||||
substring of the given decimal number; zero substitutes the whole match. If
|
||||
the number is greater than the number of capturing substrings, or if the
|
||||
capture is unset, the replacement is empty.
|
||||
<br>
|
||||
<br>
|
||||
$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
|
||||
newline; $r by carriage return; $t by tab; $v by vertical tab.
|
||||
<br>
|
||||
<br>
|
||||
$o<digits> is replaced by the character represented by the given octal
|
||||
number; up to three digits are processed.
|
||||
<br>
|
||||
<br>
|
||||
$x<digits> is replaced by the character represented by the given hexadecimal
|
||||
number; up to two digits are processed.
|
||||
<br>
|
||||
<br>
|
||||
Any other character is substituted by itself. In particular, $$ is replaced by
|
||||
a single dollar.
|
||||
</P>
|
||||
<P>
|
||||
<b>-o</b>, <b>--only-matching</b>
|
||||
Show only the part of the line that matched a pattern instead of the whole
|
||||
line. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and
|
||||
|
@ -611,7 +642,7 @@ combined with <b>-v</b> (invert the sense of the match to find non-matching
|
|||
lines), no output is generated, but the return code is set appropriately. If
|
||||
the matched portion of the line is empty, nothing is output unless the file
|
||||
name or line number are being printed, in which case they are shown on an
|
||||
otherwise empty line. This option is mutually exclusive with
|
||||
otherwise empty line. This option is mutually exclusive with <b>--output</b>,
|
||||
<b>--file-offsets</b> and <b>--line-offsets</b>.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -621,7 +652,7 @@ given number. Up to 32 capturing parentheses are supported, and -o0 is
|
|||
equivalent to <b>-o</b> without a number. Because these options can be given
|
||||
without an argument (see above), if an argument is present, it must be given in
|
||||
the same shell item, for example, -o3 or --only-matching=2. The comments given
|
||||
for the non-argument case above also apply to this case. If the specified
|
||||
for the non-argument case above also apply to this option. If the specified
|
||||
capturing parentheses do not exist in the pattern, or were not set in the
|
||||
match, nothing is output unless the file name or line number are being output.
|
||||
<br>
|
||||
|
@ -735,9 +766,9 @@ as in the GNU <b>grep</b> program. Any long option of the form
|
|||
(PCRE2 terminology). However, the <b>--depth-limit</b>, <b>--file-list</b>,
|
||||
<b>--file-offsets</b>, <b>--include-dir</b>, <b>--line-offsets</b>,
|
||||
<b>--locale</b>, <b>--match-limit</b>, <b>-M</b>, <b>--multiline</b>, <b>-N</b>,
|
||||
<b>--newline</b>, <b>--om-separator</b>, <b>-u</b>, and <b>--utf-8</b> options are
|
||||
specific to <b>pcre2grep</b>, as is the use of the <b>--only-matching</b> option
|
||||
with a capturing parentheses number.
|
||||
<b>--newline</b>, <b>--om-separator</b>, <b>--output</b>, <b>-u</b>, and
|
||||
<b>--utf-8</b> options are specific to <b>pcre2grep</b>, as is the use of the
|
||||
<b>--only-matching</b> option with a capturing parentheses number.
|
||||
</P>
|
||||
<P>
|
||||
Although most of the common options work the same way, a few are different in
|
||||
|
@ -778,23 +809,30 @@ The exceptions to the above are the <b>--colour</b> (or <b>--color</b>) and
|
|||
options does have data, it must be given in the first form, using an equals
|
||||
character. Otherwise <b>pcre2grep</b> will assume that it has no data.
|
||||
</P>
|
||||
<br><a name="SEC10" href="#TOC1">CALLING EXTERNAL SCRIPTS</a><br>
|
||||
<br><a name="SEC10" href="#TOC1">USING PCRE2'S CALLOUT FACILITY</a><br>
|
||||
<P>
|
||||
<b>pcre2grep</b> has, by default, support for calling external programs or
|
||||
scripts during matching by making use of PCRE2's callout facility. However,
|
||||
this support can be disabled when <b>pcre2grep</b> is built. You can find out
|
||||
whether your binary has support for callouts by running it with the <b>--help</b>
|
||||
option. If the support is not enabled, all callouts in patterns are ignored by
|
||||
<b>pcre2grep</b>.
|
||||
scripts or echoing specific strings during matching by making use of PCRE2's
|
||||
callout facility. However, this support can be disabled when <b>pcre2grep</b> is
|
||||
built. You can find out whether your binary has support for callouts by running
|
||||
it with the <b>--help</b> option. If the support is not enabled, all callouts in
|
||||
patterns are ignored by <b>pcre2grep</b>.
|
||||
</P>
|
||||
<P>
|
||||
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is
|
||||
either a number or a quoted string (see the
|
||||
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
||||
documentation for details). Numbered callouts are ignored by <b>pcre2grep</b>.
|
||||
String arguments are parsed as a list of substrings separated by pipe (vertical
|
||||
bar) characters. The first substring must be an executable name, with the
|
||||
following substrings specifying arguments:
|
||||
documentation for details). Numbered callouts are ignored by <b>pcre2grep</b>;
|
||||
only callouts with string arguments are useful.
|
||||
</P>
|
||||
<br><b>
|
||||
Calling external programs or scripts
|
||||
</b><br>
|
||||
<P>
|
||||
If the callout string does not start with a pipe (vertical bar) character, it
|
||||
is parsed into a list of substrings separated by pipe characters. The first
|
||||
substring must be an executable name, with the following substrings specifying
|
||||
arguments:
|
||||
<pre>
|
||||
executable_name|arg1|arg2|...
|
||||
</pre>
|
||||
|
@ -828,6 +866,19 @@ callout to be ignored. If running the program fails for any reason (including
|
|||
the non-existence of the executable), a local matching failure occurs and the
|
||||
matcher backtracks in the normal way.
|
||||
</P>
|
||||
<br><b>
|
||||
Echoing a specific string
|
||||
</b><br>
|
||||
<P>
|
||||
If the callout string starts with a pipe (vertical bar) character, the rest of
|
||||
the string is written to the output, having been passed through the same escape
|
||||
processing as text from the --output option. This provides a simple echoing
|
||||
facility that avoids calling an external program or script. No terminator is
|
||||
added to the string, so if you want a newline, you must include it explicitly.
|
||||
Matching continues normally after the string is output. If you want to see only
|
||||
the callout output but not any output from an actual match, you should end the
|
||||
relevant pattern with (*FAIL).
|
||||
</P>
|
||||
<br><a name="SEC11" href="#TOC1">MATCHING ERRORS</a><br>
|
||||
<P>
|
||||
It is possible to supply a regular expression that takes a very long time to
|
||||
|
@ -867,7 +918,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 31 March 2017
|
||||
Last updated: 06 April 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -568,6 +568,7 @@ for a description of their effects.
|
|||
dollar_endonly set PCRE2_DOLLAR_ENDONLY
|
||||
/s dotall set PCRE2_DOTALL
|
||||
dupnames set PCRE2_DUPNAMES
|
||||
endanchored set PCRE2_ENDANCHORED
|
||||
/x extended set PCRE2_EXTENDED
|
||||
firstline set PCRE2_FIRSTLINE
|
||||
match_unset_backref set PCRE2_MATCH_UNSET_BACKREF
|
||||
|
@ -1039,6 +1040,7 @@ The following modifiers set options for <b>pcre2_match()</b> or
|
|||
for a description of their effects.
|
||||
<pre>
|
||||
anchored set PCRE2_ANCHORED
|
||||
endanchored set PCRE2_ENDANCHORED
|
||||
dfa_restart set PCRE2_DFA_RESTART
|
||||
dfa_shortest set PCRE2_DFA_SHORTEST
|
||||
no_jit set PCRE2_NO_JIT
|
||||
|
@ -1798,7 +1800,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 25 March 2017
|
||||
Last updated: 04 April 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -1155,8 +1155,9 @@ COMPILING A PATTERN
|
|||
|
||||
For those options that can be different in different parts of the pat-
|
||||
tern, the contents of the options argument specifies their settings at
|
||||
the start of compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK
|
||||
options can be set at the time of matching as well as at compile time.
|
||||
the start of compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and
|
||||
PCRE2_NO_UTF_CHECK options can be set at the time of matching as well
|
||||
as at compile time.
|
||||
|
||||
Other, less frequently required compile-time parameters (for example,
|
||||
the newline setting) can be provided in a compile context (as described
|
||||
|
@ -1303,6 +1304,13 @@ COMPILING A PATTERN
|
|||
matched. There are more details of named subpatterns below; see also
|
||||
the pcre2pattern documentation.
|
||||
|
||||
PCRE2_ENDANCHORED
|
||||
|
||||
If this bit is set, the end of any pattern match must be right at the
|
||||
end of the string being searched (the "subject string"). This effect
|
||||
can also be achieved by appropriate constructs in the pattern itself,
|
||||
which is the only way to do it in Perl.
|
||||
|
||||
PCRE2_EXTENDED
|
||||
|
||||
If this bit is set, most white space characters in the pattern are
|
||||
|
@ -2136,15 +2144,16 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
|
|||
Option bits for pcre2_match()
|
||||
|
||||
The unused bits of the options argument for pcre2_match() must be zero.
|
||||
The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
|
||||
PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT,
|
||||
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their
|
||||
action is described below.
|
||||
The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDANCHORED,
|
||||
PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
|
||||
PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PAR-
|
||||
TIAL_SOFT. Their action is described below.
|
||||
|
||||
Setting PCRE2_ANCHORED at match time is not supported by the just-in-
|
||||
time (JIT) compiler. If it is set, JIT matching is disabled and the
|
||||
interpretive code in pcre2_match() is run. Apart from PCRE2_NO_JIT
|
||||
(obviously), the remaining options are supported for JIT matching.
|
||||
Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not sup-
|
||||
ported by the just-in-time (JIT) compiler. If it is set, JIT matching
|
||||
is disabled and the interpretive code in pcre2_match() is run. Apart
|
||||
from PCRE2_NO_JIT (obviously), the remaining options are supported for
|
||||
JIT matching.
|
||||
|
||||
PCRE2_ANCHORED
|
||||
|
||||
|
@ -2154,6 +2163,12 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
|
|||
unachored at matching time. Note that setting the option at match time
|
||||
disables JIT matching.
|
||||
|
||||
PCRE2_ENDANCHORED
|
||||
|
||||
If the PCRE2_ENDANCHORED option is set, any string that pcre2_match()
|
||||
matches must be right at the end of the subject string. Note that set-
|
||||
ting the option at match time disables JIT matching.
|
||||
|
||||
PCRE2_NOTBOL
|
||||
|
||||
This option specifies that first character of the subject string is not
|
||||
|
@ -3021,12 +3036,12 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
|||
Option bits for pcre_dfa_match()
|
||||
|
||||
The unused bits of the options argument for pcre2_dfa_match() must be
|
||||
zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
|
||||
PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
|
||||
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT,
|
||||
PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but the last four of
|
||||
these are exactly the same as for pcre2_match(), so their description
|
||||
is not repeated here.
|
||||
zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDAN-
|
||||
CHORED, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
|
||||
PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD,
|
||||
PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but
|
||||
the last four of these are exactly the same as for pcre2_match(), so
|
||||
their description is not repeated here.
|
||||
|
||||
PCRE2_PARTIAL_HARD
|
||||
PCRE2_PARTIAL_SOFT
|
||||
|
@ -3172,7 +3187,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 01 April 2017
|
||||
Last updated: 04 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2GREP 1 "31 March 2017" "PCRE2 10.30"
|
||||
.TH PCRE2GREP 1 "06 April 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
pcre2grep - a grep with Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -330,8 +330,8 @@ Instead of showing lines or parts of lines that match, show each match as an
|
|||
offset from the start of the file and a length, separated by a comma. In this
|
||||
mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP
|
||||
options are ignored. If there is more than one match in a line, each of them is
|
||||
shown separately. This option is mutually exclusive with \fB--line-offsets\fP
|
||||
and \fB--only-matching\fP.
|
||||
shown separately. This option is mutually exclusive with \fB--output\fP,
|
||||
\fB--line-offsets\fP, and \fB--only-matching\fP.
|
||||
.TP
|
||||
\fB-H\fP, \fB--with-filename\fP
|
||||
Force the inclusion of the file name at the start of output lines when
|
||||
|
@ -424,7 +424,8 @@ number is terminated by a colon (as usual; see the \fB-n\fP option), and the
|
|||
offset and length are separated by a comma. In this mode, no context is shown.
|
||||
That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. If there is
|
||||
more than one match in a line, each of them is shown separately. This option is
|
||||
mutually exclusive with \fB--file-offsets\fP and \fB--only-matching\fP.
|
||||
mutually exclusive with \fB--output\fP, \fB--file-offsets\fP, and
|
||||
\fB--only-matching\fP.
|
||||
.TP
|
||||
\fB--locale\fP=\fIlocale-name\fP
|
||||
This option specifies a locale to be used for pattern matching. It overrides
|
||||
|
@ -521,6 +522,30 @@ was explicitly disabled at build time. This option can be used to disable the
|
|||
use of JIT at run time. It is provided for testing and working round problems.
|
||||
It should never be needed in normal use.
|
||||
.TP
|
||||
\fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP
|
||||
When there is a match, instead of outputting the whole line that matched,
|
||||
output just the given text. This option is mutually exclusive with
|
||||
\fB--only-matching\fP, \fB--file-offsets\fP, and \fB--line-offsets\fP. Escape
|
||||
sequences starting with a dollar character may be used to insert the contents
|
||||
of the matched part of the line and/or captured substrings into the text.
|
||||
.sp
|
||||
$<digits> or ${<digits>} is replaced by the captured
|
||||
substring of the given decimal number; zero substitutes the whole match. If
|
||||
the number is greater than the number of capturing substrings, or if the
|
||||
capture is unset, the replacement is empty.
|
||||
.sp
|
||||
$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
|
||||
newline; $r by carriage return; $t by tab; $v by vertical tab.
|
||||
.sp
|
||||
$o<digits> is replaced by the character represented by the given octal
|
||||
number; up to three digits are processed.
|
||||
.sp
|
||||
$x<digits> is replaced by the character represented by the given hexadecimal
|
||||
number; up to two digits are processed.
|
||||
.sp
|
||||
Any other character is substituted by itself. In particular, $$ is replaced by
|
||||
a single dollar.
|
||||
.TP
|
||||
\fB-o\fP, \fB--only-matching\fP
|
||||
Show only the part of the line that matched a pattern instead of the whole
|
||||
line. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and
|
||||
|
@ -530,7 +555,7 @@ combined with \fB-v\fP (invert the sense of the match to find non-matching
|
|||
lines), no output is generated, but the return code is set appropriately. If
|
||||
the matched portion of the line is empty, nothing is output unless the file
|
||||
name or line number are being printed, in which case they are shown on an
|
||||
otherwise empty line. This option is mutually exclusive with
|
||||
otherwise empty line. This option is mutually exclusive with \fB--output\fP,
|
||||
\fB--file-offsets\fP and \fB--line-offsets\fP.
|
||||
.TP
|
||||
\fB-o\fP\fInumber\fP, \fB--only-matching\fP=\fInumber\fP
|
||||
|
@ -539,7 +564,7 @@ given number. Up to 32 capturing parentheses are supported, and -o0 is
|
|||
equivalent to \fB-o\fP without a number. Because these options can be given
|
||||
without an argument (see above), if an argument is present, it must be given in
|
||||
the same shell item, for example, -o3 or --only-matching=2. The comments given
|
||||
for the non-argument case above also apply to this case. If the specified
|
||||
for the non-argument case above also apply to this option. If the specified
|
||||
capturing parentheses do not exist in the pattern, or were not set in the
|
||||
match, nothing is output unless the file name or line number are being output.
|
||||
.sp
|
||||
|
@ -647,9 +672,9 @@ as in the GNU \fBgrep\fP program. Any long option of the form
|
|||
(PCRE2 terminology). However, the \fB--depth-limit\fP, \fB--file-list\fP,
|
||||
\fB--file-offsets\fP, \fB--include-dir\fP, \fB--line-offsets\fP,
|
||||
\fB--locale\fP, \fB--match-limit\fP, \fB-M\fP, \fB--multiline\fP, \fB-N\fP,
|
||||
\fB--newline\fP, \fB--om-separator\fP, \fB-u\fP, and \fB--utf-8\fP options are
|
||||
specific to \fBpcre2grep\fP, as is the use of the \fB--only-matching\fP option
|
||||
with a capturing parentheses number.
|
||||
\fB--newline\fP, \fB--om-separator\fP, \fB--output\fP, \fB-u\fP, and
|
||||
\fB--utf-8\fP options are specific to \fBpcre2grep\fP, as is the use of the
|
||||
\fB--only-matching\fP option with a capturing parentheses number.
|
||||
.P
|
||||
Although most of the common options work the same way, a few are different in
|
||||
\fBpcre2grep\fP. For example, the \fB--include\fP option's argument is a glob
|
||||
|
@ -690,25 +715,32 @@ options does have data, it must be given in the first form, using an equals
|
|||
character. Otherwise \fBpcre2grep\fP will assume that it has no data.
|
||||
.
|
||||
.
|
||||
.SH "CALLING EXTERNAL SCRIPTS"
|
||||
.SH "USING PCRE2'S CALLOUT FACILITY"
|
||||
.rs
|
||||
.sp
|
||||
\fBpcre2grep\fP has, by default, support for calling external programs or
|
||||
scripts during matching by making use of PCRE2's callout facility. However,
|
||||
this support can be disabled when \fBpcre2grep\fP is built. You can find out
|
||||
whether your binary has support for callouts by running it with the \fB--help\fP
|
||||
option. If the support is not enabled, all callouts in patterns are ignored by
|
||||
\fBpcre2grep\fP.
|
||||
scripts or echoing specific strings during matching by making use of PCRE2's
|
||||
callout facility. However, this support can be disabled when \fBpcre2grep\fP is
|
||||
built. You can find out whether your binary has support for callouts by running
|
||||
it with the \fB--help\fP option. If the support is not enabled, all callouts in
|
||||
patterns are ignored by \fBpcre2grep\fP.
|
||||
.P
|
||||
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is
|
||||
either a number or a quoted string (see the
|
||||
.\" HREF
|
||||
\fBpcre2callout\fP
|
||||
.\"
|
||||
documentation for details). Numbered callouts are ignored by \fBpcre2grep\fP.
|
||||
String arguments are parsed as a list of substrings separated by pipe (vertical
|
||||
bar) characters. The first substring must be an executable name, with the
|
||||
following substrings specifying arguments:
|
||||
documentation for details). Numbered callouts are ignored by \fBpcre2grep\fP;
|
||||
only callouts with string arguments are useful.
|
||||
.
|
||||
.
|
||||
.SS "Calling external programs or scripts"
|
||||
.rs
|
||||
.sp
|
||||
If the callout string does not start with a pipe (vertical bar) character, it
|
||||
is parsed into a list of substrings separated by pipe characters. The first
|
||||
substring must be an executable name, with the following substrings specifying
|
||||
arguments:
|
||||
.sp
|
||||
executable_name|arg1|arg2|...
|
||||
.sp
|
||||
|
@ -742,6 +774,19 @@ the non-existence of the executable), a local matching failure occurs and the
|
|||
matcher backtracks in the normal way.
|
||||
.
|
||||
.
|
||||
.SS "Echoing a specific string"
|
||||
.rs
|
||||
.sp
|
||||
If the callout string starts with a pipe (vertical bar) character, the rest of
|
||||
the string is written to the output, having been passed through the same escape
|
||||
processing as text from the --output option. This provides a simple echoing
|
||||
facility that avoids calling an external program or script. No terminator is
|
||||
added to the string, so if you want a newline, you must include it explicitly.
|
||||
Matching continues normally after the string is output. If you want to see only
|
||||
the callout output but not any output from an actual match, you should end the
|
||||
relevant pattern with (*FAIL).
|
||||
.
|
||||
.
|
||||
.SH "MATCHING ERRORS"
|
||||
.rs
|
||||
.sp
|
||||
|
@ -789,6 +834,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 31 March 2017
|
||||
Last updated: 06 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -363,8 +363,8 @@ OPTIONS
|
|||
length, separated by a comma. In this mode, no context is
|
||||
shown. That is, the -A, -B, and -C options are ignored. If
|
||||
there is more than one match in a line, each of them is shown
|
||||
separately. This option is mutually exclusive with --line-
|
||||
offsets and --only-matching.
|
||||
separately. This option is mutually exclusive with --output,
|
||||
--line-offsets, and --only-matching.
|
||||
|
||||
-H, --with-filename
|
||||
Force the inclusion of the file name at the start of output
|
||||
|
@ -469,8 +469,8 @@ OPTIONS
|
|||
separated by a comma. In this mode, no context is shown.
|
||||
That is, the -A, -B, and -C options are ignored. If there is
|
||||
more than one match in a line, each of them is shown sepa-
|
||||
rately. This option is mutually exclusive with --file-offsets
|
||||
and --only-matching.
|
||||
rately. This option is mutually exclusive with --output,
|
||||
--file-offsets, and --only-matching.
|
||||
|
||||
--locale=locale-name
|
||||
This option specifies a locale to be used for pattern match-
|
||||
|
@ -585,6 +585,33 @@ OPTIONS
|
|||
run time. It is provided for testing and working round prob-
|
||||
lems. It should never be needed in normal use.
|
||||
|
||||
-O text, --output=text
|
||||
When there is a match, instead of outputting the whole line
|
||||
that matched, output just the given text. This option is
|
||||
mutually exclusive with --only-matching, --file-offsets, and
|
||||
--line-offsets. Escape sequences starting with a dollar char-
|
||||
acter may be used to insert the contents of the matched part
|
||||
of the line and/or captured substrings into the text.
|
||||
|
||||
$<digits> or ${<digits>} is replaced by the captured sub-
|
||||
string of the given decimal number; zero substitutes the
|
||||
whole match. If the number is greater than the number of cap-
|
||||
turing substrings, or if the capture is unset, the replace-
|
||||
ment is empty.
|
||||
|
||||
$a is replaced by bell; $b by backspace; $e by escape; $f by
|
||||
form feed; $n by newline; $r by carriage return; $t by tab;
|
||||
$v by vertical tab.
|
||||
|
||||
$o<digits> is replaced by the character represented by the
|
||||
given octal number; up to three digits are processed.
|
||||
|
||||
$x<digits> is replaced by the character represented by the
|
||||
given hexadecimal number; up to two digits are processed.
|
||||
|
||||
Any other character is substituted by itself. In particular,
|
||||
$$ is replaced by a single dollar.
|
||||
|
||||
-o, --only-matching
|
||||
Show only the part of the line that matched a pattern instead
|
||||
of the whole line. In this mode, no context is shown. That
|
||||
|
@ -596,8 +623,8 @@ OPTIONS
|
|||
ately. If the matched portion of the line is empty, nothing
|
||||
is output unless the file name or line number are being
|
||||
printed, in which case they are shown on an otherwise empty
|
||||
line. This option is mutually exclusive with --file-offsets
|
||||
and --line-offsets.
|
||||
line. This option is mutually exclusive with --output,
|
||||
--file-offsets and --line-offsets.
|
||||
|
||||
-onumber, --only-matching=number
|
||||
Show only the part of the line that matched the capturing
|
||||
|
@ -607,7 +634,7 @@ OPTIONS
|
|||
(see above), if an argument is present, it must be given in
|
||||
the same shell item, for example, -o3 or --only-matching=2.
|
||||
The comments given for the non-argument case above also apply
|
||||
to this case. If the specified capturing parentheses do not
|
||||
to this option. If the specified capturing parentheses do not
|
||||
exist in the pattern, or were not set in the match, nothing
|
||||
is output unless the file name or line number are being out-
|
||||
put.
|
||||
|
@ -723,7 +750,7 @@ OPTIONS COMPATIBILITY
|
|||
terminology) is also available as --xxx-regex (PCRE2 terminology). How-
|
||||
ever, the --depth-limit, --file-list, --file-offsets, --include-dir,
|
||||
--line-offsets, --locale, --match-limit, -M, --multiline, -N, --new-
|
||||
line, --om-separator, -u, and --utf-8 options are specific to
|
||||
line, --om-separator, --output, -u, and --utf-8 options are specific to
|
||||
pcre2grep, as is the use of the --only-matching option with a capturing
|
||||
parentheses number.
|
||||
|
||||
|
@ -766,21 +793,26 @@ OPTIONS WITH DATA
|
|||
equals character. Otherwise pcre2grep will assume that it has no data.
|
||||
|
||||
|
||||
CALLING EXTERNAL SCRIPTS
|
||||
USING PCRE2'S CALLOUT FACILITY
|
||||
|
||||
pcre2grep has, by default, support for calling external programs or
|
||||
scripts during matching by making use of PCRE2's callout facility. How-
|
||||
ever, this support can be disabled when pcre2grep is built. You can
|
||||
find out whether your binary has support for callouts by running it
|
||||
with the --help option. If the support is not enabled, all callouts in
|
||||
patterns are ignored by pcre2grep.
|
||||
scripts or echoing specific strings during matching by making use of
|
||||
PCRE2's callout facility. However, this support can be disabled when
|
||||
pcre2grep is built. You can find out whether your binary has support
|
||||
for callouts by running it with the --help option. If the support is
|
||||
not enabled, all callouts in patterns are ignored by pcre2grep.
|
||||
|
||||
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argu-
|
||||
ment is either a number or a quoted string (see the pcre2callout docu-
|
||||
mentation for details). Numbered callouts are ignored by pcre2grep.
|
||||
String arguments are parsed as a list of substrings separated by pipe
|
||||
(vertical bar) characters. The first substring must be an executable
|
||||
name, with the following substrings specifying arguments:
|
||||
mentation for details). Numbered callouts are ignored by pcre2grep;
|
||||
only callouts with string arguments are useful.
|
||||
|
||||
Calling external programs or scripts
|
||||
|
||||
If the callout string does not start with a pipe (vertical bar) charac-
|
||||
ter, it is parsed into a list of substrings separated by pipe charac-
|
||||
ters. The first substring must be an executable name, with the follow-
|
||||
ing substrings specifying arguments:
|
||||
|
||||
executable_name|arg1|arg2|...
|
||||
|
||||
|
@ -816,6 +848,18 @@ CALLING EXTERNAL SCRIPTS
|
|||
local matching failure occurs and the matcher backtracks in the normal
|
||||
way.
|
||||
|
||||
Echoing a specific string
|
||||
|
||||
If the callout string starts with a pipe (vertical bar) character, the
|
||||
rest of the string is written to the output, having been passed through
|
||||
the same escape processing as text from the --output option. This pro-
|
||||
vides a simple echoing facility that avoids calling an external program
|
||||
or script. No terminator is added to the string, so if you want a new-
|
||||
line, you must include it explicitly. Matching continues normally
|
||||
after the string is output. If you want to see only the callout output
|
||||
but not any output from an actual match, you should end the relevant
|
||||
pattern with (*FAIL).
|
||||
|
||||
|
||||
MATCHING ERRORS
|
||||
|
||||
|
@ -857,5 +901,5 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 31 March 2017
|
||||
Last updated: 06 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
|
|
|
@ -511,6 +511,7 @@ PATTERN MODIFIERS
|
|||
dollar_endonly set PCRE2_DOLLAR_ENDONLY
|
||||
/s dotall set PCRE2_DOTALL
|
||||
dupnames set PCRE2_DUPNAMES
|
||||
endanchored set PCRE2_ENDANCHORED
|
||||
/x extended set PCRE2_EXTENDED
|
||||
firstline set PCRE2_FIRSTLINE
|
||||
match_unset_backref set PCRE2_MATCH_UNSET_BACKREF
|
||||
|
@ -926,6 +927,7 @@ SUBJECT MODIFIERS
|
|||
pcre2_dfa_match(). See pcreapi for a description of their effects.
|
||||
|
||||
anchored set PCRE2_ANCHORED
|
||||
endanchored set PCRE2_ENDANCHORED
|
||||
dfa_restart set PCRE2_DFA_RESTART
|
||||
dfa_shortest set PCRE2_DFA_SHORTEST
|
||||
no_jit set PCRE2_NO_JIT
|
||||
|
@ -1630,5 +1632,5 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 25 March 2017
|
||||
Last updated: 04 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
|
|
362
src/pcre2grep.c
362
src/pcre2grep.c
|
@ -175,8 +175,9 @@ static const char *dee_option = NULL;
|
|||
static const char *DEE_option = NULL;
|
||||
static const char *locale = NULL;
|
||||
static const char *newline_arg = NULL;
|
||||
static const char *om_separator = "";
|
||||
static const char *om_separator = NULL;
|
||||
static const char *stdin_name = "(standard input)";
|
||||
static const char *output_text = NULL;
|
||||
|
||||
static char *main_buffer = NULL;
|
||||
|
||||
|
@ -196,6 +197,7 @@ static int dee_action = dee_SKIP;
|
|||
#else
|
||||
static int dee_action = dee_READ;
|
||||
#endif
|
||||
|
||||
static int DEE_action = DEE_READ;
|
||||
static int error_count = 0;
|
||||
static int filenames = FN_DEFAULT;
|
||||
|
@ -233,7 +235,6 @@ static BOOL number = FALSE;
|
|||
static BOOL omit_zero_count = FALSE;
|
||||
static BOOL resource_error = FALSE;
|
||||
static BOOL quiet = FALSE;
|
||||
static BOOL show_only_matching = FALSE;
|
||||
static BOOL show_total_count = FALSE;
|
||||
static BOOL silent = FALSE;
|
||||
static BOOL utf = FALSE;
|
||||
|
@ -247,6 +248,7 @@ typedef struct omstr {
|
|||
|
||||
static omstr *only_matching = NULL;
|
||||
static omstr *only_matching_last = NULL;
|
||||
static int only_matching_count;
|
||||
|
||||
/* Structure for holding the two variables that describe a number chain. */
|
||||
|
||||
|
@ -406,6 +408,7 @@ static option_item optionlist[] = {
|
|||
#else
|
||||
{ OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
|
||||
#endif
|
||||
{ OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" },
|
||||
{ OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
|
||||
{ OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
|
||||
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
|
||||
|
@ -793,7 +796,7 @@ return isatty(fileno(f));
|
|||
/************* Print optionally coloured match Unix-style and z/OS **********/
|
||||
|
||||
static void
|
||||
print_match(const char* buf, int length)
|
||||
print_match(const void *buf, int length)
|
||||
{
|
||||
if (length == 0) return;
|
||||
if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
|
||||
|
@ -942,7 +945,7 @@ static CONSOLE_SCREEN_BUFFER_INFO csbi;
|
|||
static WORD match_colour;
|
||||
|
||||
static void
|
||||
print_match(const char* buf, int length)
|
||||
print_match(const void *buf, int length)
|
||||
{
|
||||
if (length == 0) return;
|
||||
if (do_colour)
|
||||
|
@ -1001,7 +1004,7 @@ return FALSE;
|
|||
/************* Print optionally coloured match when we can't do it **********/
|
||||
|
||||
static void
|
||||
print_match(const char* buf, int length)
|
||||
print_match(const void *buf, int length)
|
||||
{
|
||||
if (length == 0) return;
|
||||
FWRITE(buf, 1, length, stdout);
|
||||
|
@ -1658,6 +1661,277 @@ return FALSE; /* No match, no errors */
|
|||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check output text for errors *
|
||||
*************************************************/
|
||||
|
||||
static BOOL
|
||||
syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
|
||||
{
|
||||
PCRE2_SPTR begin = string;
|
||||
for (; *string != 0; string++)
|
||||
{
|
||||
if (*string == '$')
|
||||
{
|
||||
PCRE2_SIZE capture_id = 0;
|
||||
BOOL brace = FALSE;
|
||||
|
||||
string++;
|
||||
|
||||
/* Syntax error: a character must be present after $. */
|
||||
if (*string == 0)
|
||||
{
|
||||
if (!callout)
|
||||
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
|
||||
(int)(string - begin), "no character after $");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (*string == '{')
|
||||
{
|
||||
/* Must be a decimal number in braces, e.g: {5} or {38} */
|
||||
string++;
|
||||
|
||||
brace = TRUE;
|
||||
}
|
||||
|
||||
if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
|
||||
{
|
||||
do
|
||||
{
|
||||
/* Maximum capture id is 65535. */
|
||||
if (capture_id <= 65535)
|
||||
capture_id = capture_id * 10 + (*string - '0');
|
||||
|
||||
string++;
|
||||
}
|
||||
while (*string >= '0' && *string <= '9');
|
||||
|
||||
if (brace)
|
||||
{
|
||||
/* Syntax error: closing brace is missing. */
|
||||
if (*string != '}')
|
||||
{
|
||||
if (!callout)
|
||||
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
|
||||
(int)(string - begin), "missing closing brace");
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* To negate the effect of the for. */
|
||||
string--;
|
||||
}
|
||||
}
|
||||
else if (brace)
|
||||
{
|
||||
/* Syntax error: a decimal number required. */
|
||||
if (!callout)
|
||||
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
|
||||
(int)(string - begin), "decimal number expected");
|
||||
return FALSE;
|
||||
}
|
||||
else if (*string == 'o')
|
||||
{
|
||||
string++;
|
||||
|
||||
if (*string < '0' || *string > '7')
|
||||
{
|
||||
/* Syntax error: an octal number required. */
|
||||
if (!callout)
|
||||
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
|
||||
(int)(string - begin), "octal number expected");
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
else if (*string == 'x')
|
||||
{
|
||||
string++;
|
||||
|
||||
if (!isxdigit((unsigned char)*string))
|
||||
{
|
||||
/* Syntax error: a hexdecimal number required. */
|
||||
if (!callout)
|
||||
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
|
||||
(int)(string - begin), "hexadecimal number expected");
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Display output text *
|
||||
*************************************************/
|
||||
|
||||
/* Display the output text, which is assumed to have already been syntax
|
||||
checked. Output may contain escape sequences started by the dollar sign. The
|
||||
escape sequences are substituted as follows:
|
||||
|
||||
$<digits> or ${<digits>} is replaced by the captured substring of the given
|
||||
decimal number; zero will substitute the whole match. If the number is
|
||||
greater than the number of capturing substrings, or if the capture is unset,
|
||||
the replacement is empty.
|
||||
|
||||
$a is replaced by bell.
|
||||
$b is replaced by backspace.
|
||||
$e is replaced by escape.
|
||||
$f is replaced by form feed.
|
||||
$n is replaced by newline.
|
||||
$r is replaced by carriage return.
|
||||
$t is replaced by tab.
|
||||
$v is replaced by vertical tab.
|
||||
|
||||
$o<digits> is replaced by the character represented by the given octal
|
||||
number; up to three digits are processed.
|
||||
|
||||
$x<digits> is replaced by the character represented by the given hexadecimal
|
||||
number; up to two digits are processed.
|
||||
|
||||
Any other character is substituted by itself. E.g: $$ is replaced by a single
|
||||
dollar.
|
||||
|
||||
Arguments:
|
||||
string: the output text
|
||||
callout: TRUE for the builtin callout, FALSE for --output
|
||||
subject the start of the subject
|
||||
ovector: capture offsets
|
||||
capture_top: number of captures
|
||||
|
||||
Returns: TRUE if something was output, other than newline
|
||||
FALSE if nothing was output, or newline was last output
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
|
||||
PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
|
||||
{
|
||||
BOOL printed = FALSE;
|
||||
|
||||
for (; *string != 0; string++)
|
||||
{
|
||||
int ch = EOF;
|
||||
if (*string == '$')
|
||||
{
|
||||
PCRE2_SIZE capture_id = 0;
|
||||
BOOL brace = FALSE;
|
||||
|
||||
string++;
|
||||
|
||||
if (*string == '{')
|
||||
{
|
||||
/* Must be a decimal number in braces, e.g: {5} or {38} */
|
||||
string++;
|
||||
|
||||
brace = TRUE;
|
||||
}
|
||||
|
||||
if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
|
||||
{
|
||||
do
|
||||
{
|
||||
/* Maximum capture id is 65535. */
|
||||
if (capture_id <= 65535)
|
||||
capture_id = capture_id * 10 + (*string - '0');
|
||||
|
||||
string++;
|
||||
}
|
||||
while (*string >= '0' && *string <= '9');
|
||||
|
||||
if (!brace)
|
||||
{
|
||||
/* To negate the effect of the for. */
|
||||
string--;
|
||||
}
|
||||
|
||||
if (capture_id < capture_top)
|
||||
{
|
||||
PCRE2_SIZE capturesize;
|
||||
capture_id *= 2;
|
||||
|
||||
capturesize = ovector[capture_id + 1] - ovector[capture_id];
|
||||
if (capturesize > 0)
|
||||
{
|
||||
print_match(subject + ovector[capture_id], capturesize);
|
||||
printed = TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (*string == 'a') ch = '\a';
|
||||
else if (*string == 'b') ch = '\b';
|
||||
#ifndef EBCDIC
|
||||
else if (*string == 'e') ch = '\033';
|
||||
#else
|
||||
else if (*string == 'e') ch = '\047';
|
||||
#endif
|
||||
else if (*string == 'f') ch = '\f';
|
||||
else if (*string == 'r') ch = '\r';
|
||||
else if (*string == 't') ch = '\t';
|
||||
else if (*string == 'v') ch = '\v';
|
||||
else if (*string == 'n')
|
||||
{
|
||||
fprintf(stdout, STDOUT_NL);
|
||||
printed = FALSE;
|
||||
}
|
||||
else if (*string == 'o')
|
||||
{
|
||||
string++;
|
||||
|
||||
ch = *string - '0';
|
||||
if (string[1] >= '0' && string[1] <= '7')
|
||||
{
|
||||
string++;
|
||||
ch = ch * 8 + (*string - '0');
|
||||
}
|
||||
if (string[1] >= '0' && string[1] <= '7')
|
||||
{
|
||||
string++;
|
||||
ch = ch * 8 + (*string - '0');
|
||||
}
|
||||
}
|
||||
else if (*string == 'x')
|
||||
{
|
||||
string++;
|
||||
|
||||
if (*string >= '0' && *string <= '9')
|
||||
ch = *string - '0';
|
||||
else
|
||||
ch = (*string | 0x20) - 'a' + 10;
|
||||
if (isxdigit((unsigned char)string[1]))
|
||||
{
|
||||
string++;
|
||||
ch *= 16;
|
||||
if (*string >= '0' && *string <= '9')
|
||||
ch += *string - '0';
|
||||
else
|
||||
ch += (*string | 0x20) - 'a' + 10;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ch = *string;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ch = *string;
|
||||
}
|
||||
if (ch != EOF)
|
||||
{
|
||||
fprintf(stdout, "%c", ch);
|
||||
printed = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
return printed;
|
||||
}
|
||||
|
||||
|
||||
#ifdef SUPPORT_PCRE2GREP_CALLOUT
|
||||
|
||||
/*************************************************
|
||||
|
@ -1683,6 +1957,10 @@ follows:
|
|||
Any other character is substituted by itself. E.g: $$ is replaced by a single
|
||||
dollar or $| replaced by a pipe character.
|
||||
|
||||
Alternatively, if string starts with pipe, the remainder is taken as an output
|
||||
string, same as --output. In this case, --om-separator is used to separate each
|
||||
callout, defaulting to newline.
|
||||
|
||||
Example:
|
||||
|
||||
echo -e "abcde\n12345" | pcre2grep \
|
||||
|
@ -1725,6 +2003,16 @@ int result = 0;
|
|||
/* Only callout with strings are supported. */
|
||||
if (string == NULL || length == 0) return 0;
|
||||
|
||||
/* If there's no command, output the remainder directly. */
|
||||
|
||||
if (*string == '|')
|
||||
{
|
||||
string++;
|
||||
if (!syntax_check_output_text(string, TRUE)) return 0;
|
||||
(void)display_output_text(string, TRUE, subject, ovector, capture_top);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Checking syntax and compute the number of string fragments. Callout strings
|
||||
are ignored in case of a syntax error. */
|
||||
|
||||
|
@ -2174,8 +2462,8 @@ while (ptr < endptr)
|
|||
}
|
||||
#endif
|
||||
|
||||
/* We come back here after a match when show_only_matching is set, in order
|
||||
to find any further matches in the same line. This applies to
|
||||
/* We come back here after a match when only_matching_count is non-zero, in
|
||||
order to find any further matches in the same line. This applies to
|
||||
--only-matching, --file-offsets, and --line-offsets. */
|
||||
|
||||
ONLY_MATCHING_RESTART:
|
||||
|
@ -2229,13 +2517,13 @@ while (ptr < endptr)
|
|||
/* The --only-matching option prints just the substring that matched,
|
||||
and/or one or more captured portions of it, as long as these strings are
|
||||
not empty. The --file-offsets and --line-offsets options output offsets for
|
||||
the matching substring (all three set show_only_matching). None of these
|
||||
mutually exclusive options prints any context. Afterwards, adjust the start
|
||||
and then jump back to look for further matches in the same line. If we are
|
||||
in invert mode, however, nothing is printed and we do not restart - this
|
||||
could still be useful because the return code is set. */
|
||||
the matching substring (all three set only_matching_count non-zero). None
|
||||
of these mutually exclusive options prints any context. Afterwards, adjust
|
||||
the start and then jump back to look for further matches in the same line.
|
||||
If we are in invert mode, however, nothing is printed and we do not restart
|
||||
- this could still be useful because the return code is set. */
|
||||
|
||||
else if (show_only_matching)
|
||||
else if (only_matching_count != 0)
|
||||
{
|
||||
if (!invert)
|
||||
{
|
||||
|
@ -2257,6 +2545,16 @@ while (ptr < endptr)
|
|||
(int)(filepos + matchptr + offsets[0] - ptr),
|
||||
(int)(offsets[1] - offsets[0]));
|
||||
|
||||
/* Handle --output (which has already been syntax checked) */
|
||||
|
||||
else if (output_text != NULL)
|
||||
{
|
||||
if (display_output_text((PCRE2_SPTR)output_text, FALSE,
|
||||
(PCRE2_SPTR)matchptr, offsets, mrc) || printname != NULL ||
|
||||
number)
|
||||
fprintf(stdout, STDOUT_NL);
|
||||
}
|
||||
|
||||
/* Handle --only-matching, which may occur many times */
|
||||
|
||||
else
|
||||
|
@ -2272,7 +2570,8 @@ while (ptr < endptr)
|
|||
int plen = offsets[2*n + 1] - offsets[2*n];
|
||||
if (plen > 0)
|
||||
{
|
||||
if (printed) fprintf(stdout, "%s", om_separator);
|
||||
if (printed && om_separator != NULL)
|
||||
fprintf(stdout, "%s", om_separator);
|
||||
print_match(matchptr + offsets[n*2], plen);
|
||||
printed = TRUE;
|
||||
}
|
||||
|
@ -2557,7 +2856,7 @@ while (ptr < endptr)
|
|||
/* End of file; print final "after" lines if wanted; do_after_lines sets
|
||||
hyphenpending if it prints something. */
|
||||
|
||||
if (!show_only_matching && !(count_only|show_total_count))
|
||||
if (only_matching_count == 0 && !(count_only|show_total_count))
|
||||
{
|
||||
do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
|
||||
hyphenpending |= endhyphenpending;
|
||||
|
@ -3518,26 +3817,31 @@ if (both_context > 0)
|
|||
if (before_context == 0) before_context = both_context;
|
||||
}
|
||||
|
||||
/* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
|
||||
However, all three set show_only_matching because they display, each in their
|
||||
own way, only the data that has matched. */
|
||||
/* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
|
||||
permitted. They display, each in their own way, only the data that has matched.
|
||||
*/
|
||||
|
||||
if ((only_matching != NULL && (file_offsets || line_offsets)) ||
|
||||
(file_offsets && line_offsets))
|
||||
only_matching_count = (only_matching != NULL) + (output_text != NULL) +
|
||||
file_offsets + line_offsets;
|
||||
|
||||
if (only_matching_count > 1)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --file-offsets "
|
||||
"and/or --line-offsets\n");
|
||||
fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
|
||||
"--file-offsets and/or --line-offsets\n");
|
||||
pcre2grep_exit(usage(2));
|
||||
}
|
||||
|
||||
/* Check the text supplied to --output for errors. */
|
||||
|
||||
if (output_text != NULL &&
|
||||
!syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
|
||||
goto EXIT2;
|
||||
|
||||
/* Put limits into the match data block. */
|
||||
|
||||
if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
|
||||
if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
|
||||
|
||||
if (only_matching != NULL || file_offsets || line_offsets)
|
||||
show_only_matching = TRUE;
|
||||
|
||||
/* If a locale has not been provided as an option, see if the LC_CTYPE or
|
||||
LC_ALL environment variable is set, and if so, use it. */
|
||||
|
||||
|
@ -3827,6 +4131,14 @@ for (; i < argc; i++)
|
|||
else if (frc == 0 && rc == 1) rc = 0;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_PCRE2GREP_CALLOUT
|
||||
/* If separating builtin echo callouts by implicit newline, add one more for
|
||||
the final item. */
|
||||
|
||||
if (om_separator != NULL && strcmp(om_separator, STDOUT_NL) == 0)
|
||||
fprintf(stdout, STDOUT_NL);
|
||||
#endif
|
||||
|
||||
/* Show the total number of matches if requested, but not if only one file's
|
||||
count was printed. */
|
||||
|
||||
|
|
|
@ -829,3 +829,8 @@ def
|
|||
xyz
|
||||
---
|
||||
RC=0
|
||||
---------------------------- Test 120 ------------------------------
|
||||
./testdata/grepinput:the binary zero.:zerothe.
|
||||
./testdata/grepinput:a binary zero:zeroa
|
||||
./testdata/grepinput:the binary zero.:zerothe.
|
||||
RC=0
|
||||
|
|
|
@ -6,3 +6,9 @@ Arg1: [qu] [qu]
|
|||
Arg1: [ t] [ t]
|
||||
The quick brown
|
||||
This time it jumps and jumps and jumps.
|
||||
0:T
|
||||
The quick brown
|
||||
0:T
|
||||
This time it jumps and jumps and jumps.
|
||||
T
|
||||
T
|
||||
|
|
Loading…
Reference in New Issue