Add --disable-pcre2grep-callout-fork configuration setting.

This commit is contained in:
Philip.Hazel 2018-11-17 16:45:57 +00:00
parent 149af0e21b
commit 0ad7ff1549
13 changed files with 208 additions and 95 deletions

View File

@ -83,6 +83,7 @@
# 2018-06-19 PH added checks for stdint.h and inttypes.h (later removed) # 2018-06-19 PH added checks for stdint.h and inttypes.h (later removed)
# 2018-06-27 PH added Daniel's patch to increase the stack for MSVC # 2018-06-27 PH added Daniel's patch to increase the stack for MSVC
# 2018-11-14 PH removed unnecessary checks for stdint.h and inttypes.h # 2018-11-14 PH removed unnecessary checks for stdint.h and inttypes.h
# 2018-11-16 PH added PCRE2GREP_SUPPORT_CALLOUT_FORK support and tidied
PROJECT(PCRE2 C) PROJECT(PCRE2 C)
@ -136,7 +137,7 @@ OPTION(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF)
OPTION(PCRE2_DEBUG "Include debugging code" OFF) OPTION(PCRE2_DEBUG "Include debugging code" OFF)
OPTION(DISABLE_PERCENT_ZT "Disable the use of %zu and %td (rarely needed)" OFF) OPTION(PCRE2_DISABLE_PERCENT_ZT "Disable the use of %zu and %td (rarely needed)" OFF)
SET(PCRE2_EBCDIC OFF CACHE BOOL SET(PCRE2_EBCDIC OFF CACHE BOOL
"Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)") "Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)")
@ -177,12 +178,15 @@ SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL
SET(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL SET(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL
"Enable SELinux compatible execmem allocator in JIT.") "Enable SELinux compatible execmem allocator in JIT.")
SET(PCRE2_SUPPORT_PCRE2GREP_JIT ON CACHE BOOL SET(PCRE2GREP_SUPPORT_JIT ON CACHE BOOL
"Enable use of Just-in-time compiling in pcre2grep.") "Enable use of Just-in-time compiling in pcre2grep.")
SET(PCRE2_SUPPORT_PCRE2GREP_CALLOUT ON CACHE BOOL SET(PCRE2GREP_SUPPORT_CALLOUT ON CACHE BOOL
"Enable callout string support in pcre2grep.") "Enable callout string support in pcre2grep.")
SET(PCRE2GREP_SUPPORT_CALLOUT_FORK ON CACHE BOOL
"Enable callout string fork support in pcre2grep.")
SET(PCRE2_SUPPORT_UNICODE ON CACHE BOOL SET(PCRE2_SUPPORT_UNICODE ON CACHE BOOL
"Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.") "Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.")
@ -301,18 +305,25 @@ IF(PCRE2_SUPPORT_JIT_SEALLOC)
SET(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1) SET(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1)
ENDIF(PCRE2_SUPPORT_JIT_SEALLOC) ENDIF(PCRE2_SUPPORT_JIT_SEALLOC)
IF(PCRE2_SUPPORT_PCRE2GREP_JIT) IF(PCRE2GREP_SUPPORT_JIT)
SET(SUPPORT_PCRE2GREP_JIT 1) SET(SUPPORT_PCRE2GREP_JIT 1)
ENDIF(PCRE2_SUPPORT_PCRE2GREP_JIT) ENDIF(PCRE2GREP_SUPPORT_JIT)
IF(PCRE2_SUPPORT_PCRE2GREP_CALLOUT) IF(PCRE2GREP_SUPPORT_CALLOUT)
SET(SUPPORT_PCRE2GREP_CALLOUT 1) SET(SUPPORT_PCRE2GREP_CALLOUT 1)
ENDIF(PCRE2_SUPPORT_PCRE2GREP_CALLOUT) IF(PCRE2GREP_SUPPORT_CALLOUT_FORK)
SET(SUPPORT_PCRE2GREP_CALLOUT_FORK 1)
ENDIF(PCRE2GREP_SUPPORT_CALLOUT_FORK)
ENDIF(PCRE2GREP_SUPPORT_CALLOUT)
IF(PCRE2_SUPPORT_VALGRIND) IF(PCRE2_SUPPORT_VALGRIND)
SET(SUPPORT_VALGRIND 1) SET(SUPPORT_VALGRIND 1)
ENDIF(PCRE2_SUPPORT_VALGRIND) ENDIF(PCRE2_SUPPORT_VALGRIND)
IF(PCRE2_DISABLE_PERCENT_ZT)
SET(DISABLE_PERCENT_ZT 1)
ENDIF(PCRE2_DISABLE_PERCENT_ZT)
# This next one used to reference ${READLINE_LIBRARY}) # This next one used to reference ${READLINE_LIBRARY})
# but I was advised to add the NCURSES test as well, along with # but I was advised to add the NCURSES test as well, along with
# some modifications to cmake/FindReadline.cmake which should # some modifications to cmake/FindReadline.cmake which should
@ -802,10 +813,11 @@ IF(PCRE2_SHOW_REPORT)
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}") MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}") MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}")
MESSAGE(STATUS " Build pcre2grep ................. : ${PCRE2_BUILD_PCRE2GREP}") MESSAGE(STATUS " Build pcre2grep ................. : ${PCRE2_BUILD_PCRE2GREP}")
MESSAGE(STATUS " Enable JIT in pcre2grep ......... : ${PCRE2_SUPPORT_PCRE2GREP_JIT}") MESSAGE(STATUS " Enable JIT in pcre2grep ......... : ${PCRE2GREP_SUPPORT_JIT}")
MESSAGE(STATUS " Enable callouts in pcre2grep .... : ${PCRE2_SUPPORT_PCRE2GREP_CALLOUT}") MESSAGE(STATUS " Enable callouts in pcre2grep .... : ${PCRE2GREP_SUPPORT_CALLOUT}")
MESSAGE(STATUS " Enable callout fork in pcre2grep. : ${PCRE2GREP_SUPPORT_CALLOUT_FORK}")
MESSAGE(STATUS " Buffer size for pcre2grep ....... : ${PCRE2GREP_BUFSIZE}") MESSAGE(STATUS " Buffer size for pcre2grep ....... : ${PCRE2GREP_BUFSIZE}")
MESSAGE(STATUS " Build tests (implies pcre2test . : ${PCRE2_BUILD_TESTS}") MESSAGE(STATUS " Build tests (implies pcre2test .. : ${PCRE2_BUILD_TESTS}")
MESSAGE(STATUS " and pcre2grep)") MESSAGE(STATUS " and pcre2grep)")
IF(ZLIB_FOUND) IF(ZLIB_FOUND)
MESSAGE(STATUS " Link pcre2grep with libz ........ : ${PCRE2_SUPPORT_LIBZ}") MESSAGE(STATUS " Link pcre2grep with libz ........ : ${PCRE2_SUPPORT_LIBZ}")
@ -828,6 +840,11 @@ IF(PCRE2_SHOW_REPORT)
MESSAGE(STATUS " Link pcre2test with libreadline . : Library not found" ) MESSAGE(STATUS " Link pcre2test with libreadline . : Library not found" )
ENDIF(READLINE_FOUND) ENDIF(READLINE_FOUND)
MESSAGE(STATUS " Support Valgrind .................: ${PCRE2_SUPPORT_VALGRIND}") MESSAGE(STATUS " Support Valgrind .................: ${PCRE2_SUPPORT_VALGRIND}")
IF(PCRE2_DISABLE_PERCENT_ZT)
MESSAGE(STATUS " Use %zu and %td ..................: OFF" )
ELSE(PCRE2_DISABLE_PERCENT_ZT)
MESSAGE(STATUS " Use %zu and %td ..................: AUTO" )
ENDIF(PCRE2_DISABLE_PERCENT_ZT)
IF(MINGW AND NOT PCRE2_STATIC) IF(MINGW AND NOT PCRE2_STATIC)
MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}") MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}")

View File

@ -73,6 +73,10 @@ forcibly disable the use of %zu and %td in formatting strings because there is
at least one version of VMS that claims to be C99 but does not support these at least one version of VMS that claims to be C99 but does not support these
modifiers. modifiers.
18. Added --disable-pcre2grep-callout-fork, which restricts the callout support
in pcre2grep to the inbuilt echo facility. This may be useful in environments
that do not support fork().
Version 10.32 10-September-2018 Version 10.32 10-September-2018
------------------------------- -------------------------------

View File

@ -625,6 +625,7 @@ EXTRA_DIST += \
testdata/grepoutput \ testdata/grepoutput \
testdata/grepoutput8 \ testdata/grepoutput8 \
testdata/grepoutputC \ testdata/grepoutputC \
testdata/grepoutputCN \
testdata/grepoutputN \ testdata/grepoutputN \
testdata/greppatN4 \ testdata/greppatN4 \
testdata/testinput1 \ testdata/testinput1 \

View File

@ -732,16 +732,24 @@ fi
$cf $srcdir/testdata/grepoutputN testtrygrep $cf $srcdir/testdata/grepoutputN testtrygrep
if [ $? != 0 ] ; then exit 1; fi if [ $? != 0 ] ; then exit 1; fi
# If pcre2grep supports script callouts, run some tests on them. # If pcre2grep supports script callouts, run some tests on them. It is possible
# to restrict these callouts to the non-fork case, either for security, or for
# environments that do not support fork(). This is handled by comparing to a
# different output.
if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Callout scripts in patterns are supported'; then if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scripts in patterns are supported'; then
echo "Testing pcre2grep script callouts" echo "Testing pcre2grep script callouts"
$valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep $valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep
$valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep $valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep
$valgrind $vjs $pcre2grep '(T)(?C"|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep $valgrind $vjs $pcre2grep '(T)(?C"|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep
$valgrind $vjs $pcre2grep '(T)(?C"|$1$n")(*F)' $srcdir/testdata/grepinputv >>testtrygrep $valgrind $vjs $pcre2grep '(T)(?C"|$1$n")(*F)' $srcdir/testdata/grepinputv >>testtrygrep
# The above has no newline, which 'diff -ub' ignores, so add one.
$cf $srcdir/testdata/grepoutputC testtrygrep if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Non-fork callout scripts in patterns are supported'; then
$cf $srcdir/testdata/grepoutputCN testtrygrep
else
$cf $srcdir/testdata/grepoutputC testtrygrep
fi
if [ $? != 0 ] ; then exit 1; fi if [ $? != 0 ] ; then exit 1; fi
else else
echo "Script callouts are not supported" echo "Script callouts are not supported"

View File

@ -653,14 +653,19 @@ if ERRORLEVEL 1 exit /b 1
:: If pcre2grep supports script callouts, run some tests on them. :: If pcre2grep supports script callouts, run some tests on them.
%pcre2grep% --help | %pcre2grep% -q "Callout scripts in patterns are supported" %pcre2grep% --help | %pcre2grep% -q "callout scripts in patterns are supported"
if %ERRORLEVEL% equ 0 ( if %ERRORLEVEL% equ 0 (
echo Testing pcre2grep script callouts echo Testing pcre2grep script callouts
%pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%/testdata/grepinputv >testtrygrep %pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%/testdata/grepinputv >testtrygrep
%pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%/testdata/grepinputv >>testtrygrep %pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%/testdata/grepinputv >>testtrygrep
%pcre2grep% "(T)(?C'|$0:$1$n')" %srcdir%/testdata/grepinputv >>testtrygrep %pcre2grep% "(T)(?C'|$0:$1$n')" %srcdir%/testdata/grepinputv >>testtrygrep
%pcre2grep% "(T)(?C'|$1$n')(*F)" %srcdir%/testdata/grepinputv >>testtrygrep %pcre2grep% "(T)(?C'|$1$n')(*F)" %srcdir%/testdata/grepinputv >>testtrygrep
%cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout% %pcre2grep% --help | %pcre2grep% -q "Non-script callout scripts in patterns are supported"
if %ERRORLEVEL% equ 0 (
%cf% %srcdir%\testdata\grepoutputCN testtrygrep %cfout%
) else (
%cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout%
)
if ERRORLEVEL 1 exit /b 1 if ERRORLEVEL 1 exit /b 1
) else ( ) else (
echo Script callouts are not supported echo Script callouts are not supported

View File

@ -28,6 +28,8 @@
#cmakedefine SUPPORT_JIT 1 #cmakedefine SUPPORT_JIT 1
#cmakedefine SLJIT_PROT_EXECUTABLE_ALLOCATOR 1 #cmakedefine SLJIT_PROT_EXECUTABLE_ALLOCATOR 1
#cmakedefine SUPPORT_PCRE2GREP_JIT 1 #cmakedefine SUPPORT_PCRE2GREP_JIT 1
#cmakedefine SUPPORT_PCRE2GREP_CALLOUT 1
#cmakedefine SUPPORT_PCRE2GREP_CALLOUT_FORK 1
#cmakedefine SUPPORT_UNICODE 1 #cmakedefine SUPPORT_UNICODE 1
#cmakedefine SUPPORT_VALGRIND 1 #cmakedefine SUPPORT_VALGRIND 1

View File

@ -173,6 +173,12 @@ AC_ARG_ENABLE(pcre2grep-callout,
[disable callout script support in pcre2grep]), [disable callout script support in pcre2grep]),
, enable_pcre2grep_callout=yes) , enable_pcre2grep_callout=yes)
# Handle --disable-pcre2grep-callout-fork (enabled by default)
AC_ARG_ENABLE(pcre2grep-callout-fork,
AS_HELP_STRING([--disable-pcre2grep-callout-fork],
[disable callout script fork support in pcre2grep]),
, enable_pcre2grep_callout_fork=yes)
# Handle --enable-rebuild-chartables # Handle --enable-rebuild-chartables
AC_ARG_ENABLE(rebuild-chartables, AC_ARG_ENABLE(rebuild-chartables,
AS_HELP_STRING([--enable-rebuild-chartables], AS_HELP_STRING([--enable-rebuild-chartables],
@ -630,13 +636,21 @@ if test "$enable_pcre2grep_jit" = "yes"; then
fi fi
if test "$enable_pcre2grep_callout" = "yes"; then if test "$enable_pcre2grep_callout" = "yes"; then
if test "$HAVE_WINDOWS_H" != "1"; then if test "$enable_pcre2grep_callout_fork" = "yes"; then
if test "$HAVE_SYS_WAIT_H" != "1"; then if test "$HAVE_WINDOWS_H" != "1"; then
AC_MSG_ERROR([Callout script support needs sys/wait.h.]) if test "$HAVE_SYS_WAIT_H" != "1"; then
AC_MSG_ERROR([Callout script support needs sys/wait.h.])
fi
fi fi
AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT_FORK], [], [
Define to any value to enable fork support in pcre2grep callout scripts.
This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also
defined.])
fi fi
AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [ AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [
Define to any value to enable callout script support in pcre2grep.]) Define to any value to enable callout script support in pcre2grep.])
else
enable_pcre2grep_callout_fork="no"
fi fi
if test "$enable_unicode" = "yes"; then if test "$enable_unicode" = "yes"; then
@ -1038,6 +1052,7 @@ $PACKAGE-$VERSION configuration summary:
Build static libs .................. : ${enable_static} Build static libs .................. : ${enable_static}
Use JIT in pcre2grep ............... : ${enable_pcre2grep_jit} Use JIT in pcre2grep ............... : ${enable_pcre2grep_jit}
Enable callouts in pcre2grep ....... : ${enable_pcre2grep_callout} Enable callouts in pcre2grep ....... : ${enable_pcre2grep_callout}
Enable fork in pcre2grep callouts .. : ${enable_pcre2grep_callout_fork}
Initial buffer size for pcre2grep .. : ${with_pcre2grep_bufsize} Initial buffer size for pcre2grep .. : ${with_pcre2grep_bufsize}
Maximum buffer size for pcre2grep .. : ${with_pcre2grep_max_bufsize} Maximum buffer size for pcre2grep .. : ${with_pcre2grep_max_bufsize}
Link pcre2grep with libz ........... : ${enable_pcre2grep_libz} Link pcre2grep with libz ........... : ${enable_pcre2grep_libz}

View File

@ -853,10 +853,12 @@ character. Otherwise <b>pcre2grep</b> will assume that it has no data.
<P> <P>
<b>pcre2grep</b> has, by default, support for calling external programs or <b>pcre2grep</b> has, by default, support for calling external programs or
scripts or echoing specific strings during matching by making use of PCRE2's scripts or echoing specific strings during matching by making use of PCRE2's
callout facility. However, this support can be disabled when <b>pcre2grep</b> is callout facility. However, this support can be completely or partially disabled
built. You can find out whether your binary has support for callouts by running when <b>pcre2grep</b> is built. You can find out whether your binary has support
it with the <b>--help</b> option. If the support is not enabled, all callouts in for callouts by running it with the <b>--help</b> option. If callout support is
patterns are ignored by <b>pcre2grep</b>. completely disabled, all callouts in patterns are ignored by <b>pcre2grep</b>.
If the facility is partially disabled, calling external programs is not
supported, and callouts that request it are ignored.
</P> </P>
<P> <P>
A callout in a PCRE2 pattern is of the form (?C&#60;arg&#62;) where the argument is A callout in a PCRE2 pattern is of the form (?C&#60;arg&#62;) where the argument is
@ -869,8 +871,9 @@ only callouts with string arguments are useful.
Calling external programs or scripts Calling external programs or scripts
</b><br> </b><br>
<P> <P>
If the callout string does not start with a pipe (vertical bar) character, it This facility can be independently disabled when <b>pcre2grep</b> is built. If
is parsed into a list of substrings separated by pipe characters. The first the callout string does not start with a pipe (vertical bar) character, it is
parsed into a list of substrings separated by pipe characters. The first
substring must be an executable name, with the following substrings specifying substring must be an executable name, with the following substrings specifying
arguments: arguments:
<pre> <pre>
@ -910,14 +913,15 @@ matcher backtracks in the normal way.
Echoing a specific string Echoing a specific string
</b><br> </b><br>
<P> <P>
If the callout string starts with a pipe (vertical bar) character, the rest of This facility is always available, provided that callouts were not completely
the string is written to the output, having been passed through the same escape disabled when <b>pcre2grep</b> was built. If the callout string starts with a
processing as text from the --output option. This provides a simple echoing pipe (vertical bar) character, the rest of the string is written to the output,
facility that avoids calling an external program or script. No terminator is having been passed through the same escape processing as text from the --output
added to the string, so if you want a newline, you must include it explicitly. option. This provides a simple echoing facility that avoids calling an external
Matching continues normally after the string is output. If you want to see only program or script. No terminator is added to the string, so if you want a
the callout output but not any output from an actual match, you should end the newline, you must include it explicitly. Matching continues normally after the
relevant pattern with (*FAIL). string is output. If you want to see only the callout output but not any output
from an actual match, you should end the relevant pattern with (*FAIL).
</P> </P>
<br><a name="SEC12" href="#TOC1">MATCHING ERRORS</a><br> <br><a name="SEC12" href="#TOC1">MATCHING ERRORS</a><br>
<P> <P>
@ -962,7 +966,7 @@ Cambridge, England.
</P> </P>
<br><a name="SEC16" href="#TOC1">REVISION</a><br> <br><a name="SEC16" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 24 February 2018 Last updated: 17 November 2018
<br> <br>
Copyright &copy; 1997-2018 University of Cambridge. Copyright &copy; 1997-2018 University of Cambridge.
<br> <br>

View File

@ -1,4 +1,4 @@
.TH PCRE2GREP 1 "24 February 2018" "PCRE2 10.32" .TH PCRE2GREP 1 "17 November 2018" "PCRE2 10.33"
.SH NAME .SH NAME
pcre2grep - a grep with Perl-compatible regular expressions. pcre2grep - a grep with Perl-compatible regular expressions.
.SH SYNOPSIS .SH SYNOPSIS
@ -759,10 +759,12 @@ character. Otherwise \fBpcre2grep\fP will assume that it has no data.
.sp .sp
\fBpcre2grep\fP has, by default, support for calling external programs or \fBpcre2grep\fP has, by default, support for calling external programs or
scripts or echoing specific strings during matching by making use of PCRE2's scripts or echoing specific strings during matching by making use of PCRE2's
callout facility. However, this support can be disabled when \fBpcre2grep\fP is callout facility. However, this support can be completely or partially disabled
built. You can find out whether your binary has support for callouts by running when \fBpcre2grep\fP is built. You can find out whether your binary has support
it with the \fB--help\fP option. If the support is not enabled, all callouts in for callouts by running it with the \fB--help\fP option. If callout support is
patterns are ignored by \fBpcre2grep\fP. completely disabled, all callouts in patterns are ignored by \fBpcre2grep\fP.
If the facility is partially disabled, calling external programs is not
supported, and callouts that request it are ignored.
.P .P
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is
either a number or a quoted string (see the either a number or a quoted string (see the
@ -776,8 +778,9 @@ only callouts with string arguments are useful.
.SS "Calling external programs or scripts" .SS "Calling external programs or scripts"
.rs .rs
.sp .sp
If the callout string does not start with a pipe (vertical bar) character, it This facility can be independently disabled when \fBpcre2grep\fP is built. If
is parsed into a list of substrings separated by pipe characters. The first the callout string does not start with a pipe (vertical bar) character, it is
parsed into a list of substrings separated by pipe characters. The first
substring must be an executable name, with the following substrings specifying substring must be an executable name, with the following substrings specifying
arguments: arguments:
.sp .sp
@ -816,14 +819,15 @@ matcher backtracks in the normal way.
.SS "Echoing a specific string" .SS "Echoing a specific string"
.rs .rs
.sp .sp
If the callout string starts with a pipe (vertical bar) character, the rest of This facility is always available, provided that callouts were not completely
the string is written to the output, having been passed through the same escape disabled when \fBpcre2grep\fP was built. If the callout string starts with a
processing as text from the --output option. This provides a simple echoing pipe (vertical bar) character, the rest of the string is written to the output,
facility that avoids calling an external program or script. No terminator is having been passed through the same escape processing as text from the --output
added to the string, so if you want a newline, you must include it explicitly. option. This provides a simple echoing facility that avoids calling an external
Matching continues normally after the string is output. If you want to see only program or script. No terminator is added to the string, so if you want a
the callout output but not any output from an actual match, you should end the newline, you must include it explicitly. Matching continues normally after the
relevant pattern with (*FAIL). string is output. If you want to see only the callout output but not any output
from an actual match, you should end the relevant pattern with (*FAIL).
. .
. .
.SH "MATCHING ERRORS" .SH "MATCHING ERRORS"
@ -876,6 +880,6 @@ Cambridge, England.
.rs .rs
.sp .sp
.nf .nf
Last updated: 24 February 2018 Last updated: 17 November 2018
Copyright (c) 1997-2018 University of Cambridge. Copyright (c) 1997-2018 University of Cambridge.
.fi .fi

View File

@ -832,22 +832,26 @@ USING PCRE2'S CALLOUT FACILITY
pcre2grep has, by default, support for calling external programs or pcre2grep has, by default, support for calling external programs or
scripts or echoing specific strings during matching by making use of scripts or echoing specific strings during matching by making use of
PCRE2's callout facility. However, this support can be disabled when PCRE2's callout facility. However, this support can be completely or
pcre2grep is built. You can find out whether your binary has support partially disabled when pcre2grep is built. You can find out whether
for callouts by running it with the --help option. If the support is your binary has support for callouts by running it with the --help
not enabled, all callouts in patterns are ignored by pcre2grep. option. If callout support is completely disabled, all callouts in pat-
terns are ignored by pcre2grep. If the facility is partially disabled,
calling external programs is not supported, and callouts that request
it are ignored.
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argu- A callout in a PCRE2 pattern is of the form (?C<arg>) where the argu-
ment is either a number or a quoted string (see the pcre2callout docu- ment is either a number or a quoted string (see the pcre2callout docu-
mentation for details). Numbered callouts are ignored by pcre2grep; mentation for details). Numbered callouts are ignored by pcre2grep;
only callouts with string arguments are useful. only callouts with string arguments are useful.
Calling external programs or scripts Calling external programs or scripts
If the callout string does not start with a pipe (vertical bar) charac- This facility can be independently disabled when pcre2grep is built. If
ter, it is parsed into a list of substrings separated by pipe charac- the callout string does not start with a pipe (vertical bar) character,
ters. The first substring must be an executable name, with the follow- it is parsed into a list of substrings separated by pipe characters.
ing substrings specifying arguments: The first substring must be an executable name, with the following sub-
strings specifying arguments:
executable_name|arg1|arg2|... executable_name|arg1|arg2|...
@ -885,44 +889,45 @@ USING PCRE2'S CALLOUT FACILITY
Echoing a specific string Echoing a specific string
If the callout string starts with a pipe (vertical bar) character, the This facility is always available, provided that callouts were not com-
rest of the string is written to the output, having been passed through pletely disabled when pcre2grep was built. If the callout string starts
the same escape processing as text from the --output option. This pro- with a pipe (vertical bar) character, the rest of the string is written
vides a simple echoing facility that avoids calling an external program to the output, having been passed through the same escape processing as
or script. No terminator is added to the string, so if you want a new- text from the --output option. This provides a simple echoing facility
line, you must include it explicitly. Matching continues normally that avoids calling an external program or script. No terminator is
after the string is output. If you want to see only the callout output added to the string, so if you want a newline, you must include it
but not any output from an actual match, you should end the relevant explicitly. Matching continues normally after the string is output. If
pattern with (*FAIL). you want to see only the callout output but not any output from an
actual match, you should end the relevant pattern with (*FAIL).
MATCHING ERRORS MATCHING ERRORS
It is possible to supply a regular expression that takes a very long It is possible to supply a regular expression that takes a very long
time to fail to match certain lines. Such patterns normally involve time to fail to match certain lines. Such patterns normally involve
nested indefinite repeats, for example: (a+)*\d when matched against a nested indefinite repeats, for example: (a+)*\d when matched against a
line of a's with no final digit. The PCRE2 matching function has a line of a's with no final digit. The PCRE2 matching function has a
resource limit that causes it to abort in these circumstances. If this resource limit that causes it to abort in these circumstances. If this
happens, pcre2grep outputs an error message and the line that caused happens, pcre2grep outputs an error message and the line that caused
the problem to the standard error stream. If there are more than 20 the problem to the standard error stream. If there are more than 20
such errors, pcre2grep gives up. such errors, pcre2grep gives up.
The --match-limit option of pcre2grep can be used to set the overall The --match-limit option of pcre2grep can be used to set the overall
resource limit. There are also other limits that affect the amount of resource limit. There are also other limits that affect the amount of
memory used during matching; see the discussion of --heap-limit and memory used during matching; see the discussion of --heap-limit and
--depth-limit above. --depth-limit above.
DIAGNOSTICS DIAGNOSTICS
Exit status is 0 if any matches were found, 1 if no matches were found, Exit status is 0 if any matches were found, 1 if no matches were found,
and 2 for syntax errors, overlong lines, non-existent or inaccessible and 2 for syntax errors, overlong lines, non-existent or inaccessible
files (even if matches were found in other files) or too many matching files (even if matches were found in other files) or too many matching
errors. Using the -s option to suppress error messages about inaccessi- errors. Using the -s option to suppress error messages about inaccessi-
ble files does not affect the return code. ble files does not affect the return code.
When run under VMS, the return code is placed in the symbol When run under VMS, the return code is placed in the symbol
PCRE2GREP_RC because VMS does not distinguish between exit(0) and PCRE2GREP_RC because VMS does not distinguish between exit(0) and
exit(1). exit(1).
@ -940,5 +945,5 @@ AUTHOR
REVISION REVISION
Last updated: 24 February 2018 Last updated: 17 November 2018
Copyright (c) 1997-2018 University of Cambridge. Copyright (c) 1997-2018 University of Cambridge.

View File

@ -290,6 +290,11 @@ sure both macros are undefined; an emulation function will then be used. */
/* Define to any value to enable callout script support in pcre2grep. */ /* Define to any value to enable callout script support in pcre2grep. */
#undef SUPPORT_PCRE2GREP_CALLOUT #undef SUPPORT_PCRE2GREP_CALLOUT
/* Define to any value to enable fork support in pcre2grep callout scripts.
This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined.
*/
#undef SUPPORT_PCRE2GREP_CALLOUT_FORK
/* Define to any value to enable JIT support in pcre2grep. Note that this will /* Define to any value to enable JIT support in pcre2grep. Note that this will
have no effect unless SUPPORT_JIT is also defined. */ have no effect unless SUPPORT_JIT is also defined. */
#undef SUPPORT_PCRE2GREP_JIT #undef SUPPORT_PCRE2GREP_JIT

View File

@ -73,7 +73,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include <fcntl.h> /* For _O_BINARY */ #include <fcntl.h> /* For _O_BINARY */
#endif #endif
#ifdef SUPPORT_PCRE2GREP_CALLOUT #if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK)
#ifdef WIN32 #ifdef WIN32
#include <process.h> #include <process.h>
#else #else
@ -1133,7 +1133,11 @@ printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL); printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
#ifdef SUPPORT_PCRE2GREP_CALLOUT #ifdef SUPPORT_PCRE2GREP_CALLOUT
printf("Callout scripts in patterns are supported." STDOUT_NL); #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
printf("All callout scripts in patterns are supported." STDOUT_NL);
#else
printf("Non-fork callout scripts in patterns are supported." STDOUT_NL);
#endif
#else #else
printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL); printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
#endif #endif
@ -2017,10 +2021,10 @@ return printed;
* Parse and execute callout scripts * * Parse and execute callout scripts *
*************************************************/ *************************************************/
/* This function parses a callout string block and executes the /* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout
program specified by the string. The string is a list of substrings string block and executes the program specified by the string. The string is a
separated by pipe characters. The first substring represents the list of substrings separated by pipe characters. The first substring represents
executable name, and the following substrings specify the arguments: the executable name, and the following substrings specify the arguments:
program_name|param1|param2|... program_name|param1|param2|...
@ -2037,8 +2041,9 @@ follows:
dollar or $| replaced by a pipe character. dollar or $| replaced by a pipe character.
Alternatively, if string starts with pipe, the remainder is taken as an output Alternatively, if string starts with pipe, the remainder is taken as an output
string, same as --output. In this case, --om-separator is used to separate each string, same as --output. This is the only form that is supported if
callout, defaulting to newline. SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to
separate each callout, defaulting to newline.
Example: Example:
@ -2066,6 +2071,8 @@ PCRE2_SPTR string = calloutptr->callout_string;
PCRE2_SPTR subject = calloutptr->subject; PCRE2_SPTR subject = calloutptr->subject;
PCRE2_SIZE *ovector = calloutptr->offset_vector; PCRE2_SIZE *ovector = calloutptr->offset_vector;
PCRE2_SIZE capture_top = calloutptr->capture_top; PCRE2_SIZE capture_top = calloutptr->capture_top;
#ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
PCRE2_SIZE argsvectorlen = 2; PCRE2_SIZE argsvectorlen = 2;
PCRE2_SIZE argslen = 1; PCRE2_SIZE argslen = 1;
char *args; char *args;
@ -2076,10 +2083,12 @@ char **argsvectorptr;
pid_t pid; pid_t pid;
#endif #endif
int result = 0; int result = 0;
#endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
(void)unused; /* Avoid compiler warning */ (void)unused; /* Avoid compiler warning */
/* Only callout with strings are supported. */ /* Only callout with strings are supported. */
if (string == NULL || length == 0) return 0; if (string == NULL || length == 0) return 0;
/* If there's no command, output the remainder directly. */ /* If there's no command, output the remainder directly. */
@ -2092,6 +2101,10 @@ if (*string == '|')
return 0; return 0;
} }
#ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK
return 0;
#else
/* Checking syntax and compute the number of string fragments. Callout strings /* Checking syntax and compute the number of string fragments. Callout strings
are ignored in case of a syntax error. */ are ignored in case of a syntax error. */
@ -2294,9 +2307,9 @@ free(argsvector);
continues) or non-zero (match fails). */ continues) or non-zero (match fails). */
return result != 0; return result != 0;
#endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
} }
#endif /* SUPPORT_PCRE2GREP_CALLOUT */
#endif

30
testdata/grepoutputCN vendored Normal file
View File

@ -0,0 +1,30 @@
The quick brown
This time it jumps and jumps and jumps.
This line contains \E and (regex) *meta* [characters].
The word is cat in this line
The caterpillar sat on the mat
The snowcat is not an animal
The quick brown
This time it jumps and jumps and jumps.
This line contains \E and (regex) *meta* [characters].
The word is cat in this line
The caterpillar sat on the mat
The snowcat is not an animal
0:T
The quick brown
0:T
This time it jumps and jumps and jumps.
0:T
This line contains \E and (regex) *meta* [characters].
0:T
The word is cat in this line
0:T
The caterpillar sat on the mat
0:T
The snowcat is not an animal
T
T
T
T
T
T