From 0ad7ff15499a0629b7252f7120bfc39afa4dbed2 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Sat, 17 Nov 2018 16:45:57 +0000 Subject: [PATCH] Add --disable-pcre2grep-callout-fork configuration setting. --- CMakeLists.txt | 37 ++++++++++++++------ ChangeLog | 4 +++ Makefile.am | 1 + RunGrepTest | 16 ++++++--- RunGrepTest.bat | 9 +++-- config-cmake.h.in | 2 ++ configure.ac | 21 ++++++++++-- doc/html/pcre2grep.html | 34 ++++++++++--------- doc/pcre2grep.1 | 36 +++++++++++--------- doc/pcre2grep.txt | 75 ++++++++++++++++++++++------------------- src/config.h.in | 5 +++ src/pcre2grep.c | 33 ++++++++++++------ testdata/grepoutputCN | 30 +++++++++++++++++ 13 files changed, 208 insertions(+), 95 deletions(-) create mode 100644 testdata/grepoutputCN diff --git a/CMakeLists.txt b/CMakeLists.txt index f086483..0306bc4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -83,6 +83,7 @@ # 2018-06-19 PH added checks for stdint.h and inttypes.h (later removed) # 2018-06-27 PH added Daniel's patch to increase the stack for MSVC # 2018-11-14 PH removed unnecessary checks for stdint.h and inttypes.h +# 2018-11-16 PH added PCRE2GREP_SUPPORT_CALLOUT_FORK support and tidied PROJECT(PCRE2 C) @@ -136,7 +137,7 @@ OPTION(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF) OPTION(PCRE2_DEBUG "Include debugging code" OFF) -OPTION(DISABLE_PERCENT_ZT "Disable the use of %zu and %td (rarely needed)" OFF) +OPTION(PCRE2_DISABLE_PERCENT_ZT "Disable the use of %zu and %td (rarely needed)" OFF) SET(PCRE2_EBCDIC OFF CACHE BOOL "Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)") @@ -177,12 +178,15 @@ SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL SET(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL "Enable SELinux compatible execmem allocator in JIT.") -SET(PCRE2_SUPPORT_PCRE2GREP_JIT ON CACHE BOOL +SET(PCRE2GREP_SUPPORT_JIT ON CACHE BOOL "Enable use of Just-in-time compiling in pcre2grep.") -SET(PCRE2_SUPPORT_PCRE2GREP_CALLOUT ON CACHE BOOL +SET(PCRE2GREP_SUPPORT_CALLOUT ON CACHE BOOL "Enable callout string support in pcre2grep.") +SET(PCRE2GREP_SUPPORT_CALLOUT_FORK ON CACHE BOOL + "Enable callout string fork support in pcre2grep.") + SET(PCRE2_SUPPORT_UNICODE ON CACHE BOOL "Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.") @@ -301,18 +305,25 @@ IF(PCRE2_SUPPORT_JIT_SEALLOC) SET(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1) ENDIF(PCRE2_SUPPORT_JIT_SEALLOC) -IF(PCRE2_SUPPORT_PCRE2GREP_JIT) +IF(PCRE2GREP_SUPPORT_JIT) SET(SUPPORT_PCRE2GREP_JIT 1) -ENDIF(PCRE2_SUPPORT_PCRE2GREP_JIT) +ENDIF(PCRE2GREP_SUPPORT_JIT) -IF(PCRE2_SUPPORT_PCRE2GREP_CALLOUT) +IF(PCRE2GREP_SUPPORT_CALLOUT) SET(SUPPORT_PCRE2GREP_CALLOUT 1) -ENDIF(PCRE2_SUPPORT_PCRE2GREP_CALLOUT) + IF(PCRE2GREP_SUPPORT_CALLOUT_FORK) + SET(SUPPORT_PCRE2GREP_CALLOUT_FORK 1) + ENDIF(PCRE2GREP_SUPPORT_CALLOUT_FORK) +ENDIF(PCRE2GREP_SUPPORT_CALLOUT) IF(PCRE2_SUPPORT_VALGRIND) SET(SUPPORT_VALGRIND 1) ENDIF(PCRE2_SUPPORT_VALGRIND) +IF(PCRE2_DISABLE_PERCENT_ZT) + SET(DISABLE_PERCENT_ZT 1) +ENDIF(PCRE2_DISABLE_PERCENT_ZT) + # This next one used to reference ${READLINE_LIBRARY}) # but I was advised to add the NCURSES test as well, along with # some modifications to cmake/FindReadline.cmake which should @@ -802,10 +813,11 @@ IF(PCRE2_SHOW_REPORT) MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}") MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}") MESSAGE(STATUS " Build pcre2grep ................. : ${PCRE2_BUILD_PCRE2GREP}") - MESSAGE(STATUS " Enable JIT in pcre2grep ......... : ${PCRE2_SUPPORT_PCRE2GREP_JIT}") - MESSAGE(STATUS " Enable callouts in pcre2grep .... : ${PCRE2_SUPPORT_PCRE2GREP_CALLOUT}") + MESSAGE(STATUS " Enable JIT in pcre2grep ......... : ${PCRE2GREP_SUPPORT_JIT}") + MESSAGE(STATUS " Enable callouts in pcre2grep .... : ${PCRE2GREP_SUPPORT_CALLOUT}") + MESSAGE(STATUS " Enable callout fork in pcre2grep. : ${PCRE2GREP_SUPPORT_CALLOUT_FORK}") MESSAGE(STATUS " Buffer size for pcre2grep ....... : ${PCRE2GREP_BUFSIZE}") - MESSAGE(STATUS " Build tests (implies pcre2test . : ${PCRE2_BUILD_TESTS}") + MESSAGE(STATUS " Build tests (implies pcre2test .. : ${PCRE2_BUILD_TESTS}") MESSAGE(STATUS " and pcre2grep)") IF(ZLIB_FOUND) MESSAGE(STATUS " Link pcre2grep with libz ........ : ${PCRE2_SUPPORT_LIBZ}") @@ -828,6 +840,11 @@ IF(PCRE2_SHOW_REPORT) MESSAGE(STATUS " Link pcre2test with libreadline . : Library not found" ) ENDIF(READLINE_FOUND) MESSAGE(STATUS " Support Valgrind .................: ${PCRE2_SUPPORT_VALGRIND}") + IF(PCRE2_DISABLE_PERCENT_ZT) + MESSAGE(STATUS " Use %zu and %td ..................: OFF" ) + ELSE(PCRE2_DISABLE_PERCENT_ZT) + MESSAGE(STATUS " Use %zu and %td ..................: AUTO" ) + ENDIF(PCRE2_DISABLE_PERCENT_ZT) IF(MINGW AND NOT PCRE2_STATIC) MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}") diff --git a/ChangeLog b/ChangeLog index b5aa03b..fc9cb11 100644 --- a/ChangeLog +++ b/ChangeLog @@ -73,6 +73,10 @@ forcibly disable the use of %zu and %td in formatting strings because there is at least one version of VMS that claims to be C99 but does not support these modifiers. +18. Added --disable-pcre2grep-callout-fork, which restricts the callout support +in pcre2grep to the inbuilt echo facility. This may be useful in environments +that do not support fork(). + Version 10.32 10-September-2018 ------------------------------- diff --git a/Makefile.am b/Makefile.am index 17facba..b5e3635 100644 --- a/Makefile.am +++ b/Makefile.am @@ -625,6 +625,7 @@ EXTRA_DIST += \ testdata/grepoutput \ testdata/grepoutput8 \ testdata/grepoutputC \ + testdata/grepoutputCN \ testdata/grepoutputN \ testdata/greppatN4 \ testdata/testinput1 \ diff --git a/RunGrepTest b/RunGrepTest index 74ff4c1..ddf57ac 100755 --- a/RunGrepTest +++ b/RunGrepTest @@ -732,16 +732,24 @@ fi $cf $srcdir/testdata/grepoutputN testtrygrep if [ $? != 0 ] ; then exit 1; fi -# If pcre2grep supports script callouts, run some tests on them. +# If pcre2grep supports script callouts, run some tests on them. It is possible +# to restrict these callouts to the non-fork case, either for security, or for +# environments that do not support fork(). This is handled by comparing to a +# different output. -if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Callout scripts in patterns are supported'; then +if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scripts in patterns are supported'; then echo "Testing pcre2grep script callouts" $valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep $valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep $valgrind $vjs $pcre2grep '(T)(?C"|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep $valgrind $vjs $pcre2grep '(T)(?C"|$1$n")(*F)' $srcdir/testdata/grepinputv >>testtrygrep - # The above has no newline, which 'diff -ub' ignores, so add one. - $cf $srcdir/testdata/grepoutputC testtrygrep + + if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Non-fork callout scripts in patterns are supported'; then + $cf $srcdir/testdata/grepoutputCN testtrygrep + else + $cf $srcdir/testdata/grepoutputC testtrygrep + fi + if [ $? != 0 ] ; then exit 1; fi else echo "Script callouts are not supported" diff --git a/RunGrepTest.bat b/RunGrepTest.bat index 50a9644..4a095a3 100644 --- a/RunGrepTest.bat +++ b/RunGrepTest.bat @@ -653,14 +653,19 @@ if ERRORLEVEL 1 exit /b 1 :: If pcre2grep supports script callouts, run some tests on them. -%pcre2grep% --help | %pcre2grep% -q "Callout scripts in patterns are supported" +%pcre2grep% --help | %pcre2grep% -q "callout scripts in patterns are supported" if %ERRORLEVEL% equ 0 ( echo Testing pcre2grep script callouts %pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%/testdata/grepinputv >testtrygrep %pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%/testdata/grepinputv >>testtrygrep %pcre2grep% "(T)(?C'|$0:$1$n')" %srcdir%/testdata/grepinputv >>testtrygrep %pcre2grep% "(T)(?C'|$1$n')(*F)" %srcdir%/testdata/grepinputv >>testtrygrep - %cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout% + %pcre2grep% --help | %pcre2grep% -q "Non-script callout scripts in patterns are supported" + if %ERRORLEVEL% equ 0 ( + %cf% %srcdir%\testdata\grepoutputCN testtrygrep %cfout% + ) else ( + %cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout% + ) if ERRORLEVEL 1 exit /b 1 ) else ( echo Script callouts are not supported diff --git a/config-cmake.h.in b/config-cmake.h.in index f72b18c..529b009 100644 --- a/config-cmake.h.in +++ b/config-cmake.h.in @@ -28,6 +28,8 @@ #cmakedefine SUPPORT_JIT 1 #cmakedefine SLJIT_PROT_EXECUTABLE_ALLOCATOR 1 #cmakedefine SUPPORT_PCRE2GREP_JIT 1 +#cmakedefine SUPPORT_PCRE2GREP_CALLOUT 1 +#cmakedefine SUPPORT_PCRE2GREP_CALLOUT_FORK 1 #cmakedefine SUPPORT_UNICODE 1 #cmakedefine SUPPORT_VALGRIND 1 diff --git a/configure.ac b/configure.ac index 054aca5..c59a439 100644 --- a/configure.ac +++ b/configure.ac @@ -173,6 +173,12 @@ AC_ARG_ENABLE(pcre2grep-callout, [disable callout script support in pcre2grep]), , enable_pcre2grep_callout=yes) +# Handle --disable-pcre2grep-callout-fork (enabled by default) +AC_ARG_ENABLE(pcre2grep-callout-fork, + AS_HELP_STRING([--disable-pcre2grep-callout-fork], + [disable callout script fork support in pcre2grep]), + , enable_pcre2grep_callout_fork=yes) + # Handle --enable-rebuild-chartables AC_ARG_ENABLE(rebuild-chartables, AS_HELP_STRING([--enable-rebuild-chartables], @@ -630,13 +636,21 @@ if test "$enable_pcre2grep_jit" = "yes"; then fi if test "$enable_pcre2grep_callout" = "yes"; then - if test "$HAVE_WINDOWS_H" != "1"; then - if test "$HAVE_SYS_WAIT_H" != "1"; then - AC_MSG_ERROR([Callout script support needs sys/wait.h.]) + if test "$enable_pcre2grep_callout_fork" = "yes"; then + if test "$HAVE_WINDOWS_H" != "1"; then + if test "$HAVE_SYS_WAIT_H" != "1"; then + AC_MSG_ERROR([Callout script support needs sys/wait.h.]) + fi fi + AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT_FORK], [], [ + Define to any value to enable fork support in pcre2grep callout scripts. + This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also + defined.]) fi AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [ Define to any value to enable callout script support in pcre2grep.]) +else + enable_pcre2grep_callout_fork="no" fi if test "$enable_unicode" = "yes"; then @@ -1038,6 +1052,7 @@ $PACKAGE-$VERSION configuration summary: Build static libs .................. : ${enable_static} Use JIT in pcre2grep ............... : ${enable_pcre2grep_jit} Enable callouts in pcre2grep ....... : ${enable_pcre2grep_callout} + Enable fork in pcre2grep callouts .. : ${enable_pcre2grep_callout_fork} Initial buffer size for pcre2grep .. : ${with_pcre2grep_bufsize} Maximum buffer size for pcre2grep .. : ${with_pcre2grep_max_bufsize} Link pcre2grep with libz ........... : ${enable_pcre2grep_libz} diff --git a/doc/html/pcre2grep.html b/doc/html/pcre2grep.html index 272b87d..30e7bd4 100644 --- a/doc/html/pcre2grep.html +++ b/doc/html/pcre2grep.html @@ -853,10 +853,12 @@ character. Otherwise pcre2grep will assume that it has no data.

pcre2grep has, by default, support for calling external programs or scripts or echoing specific strings during matching by making use of PCRE2's -callout facility. However, this support can be disabled when pcre2grep is -built. You can find out whether your binary has support for callouts by running -it with the --help option. If the support is not enabled, all callouts in -patterns are ignored by pcre2grep. +callout facility. However, this support can be completely or partially disabled +when pcre2grep is built. You can find out whether your binary has support +for callouts by running it with the --help option. If callout support is +completely disabled, all callouts in patterns are ignored by pcre2grep. +If the facility is partially disabled, calling external programs is not +supported, and callouts that request it are ignored.

A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is @@ -869,8 +871,9 @@ only callouts with string arguments are useful. Calling external programs or scripts

-If the callout string does not start with a pipe (vertical bar) character, it -is parsed into a list of substrings separated by pipe characters. The first +This facility can be independently disabled when pcre2grep is built. If +the callout string does not start with a pipe (vertical bar) character, it is +parsed into a list of substrings separated by pipe characters. The first substring must be an executable name, with the following substrings specifying arguments:

@@ -910,14 +913,15 @@ matcher backtracks in the normal way.
 Echoing a specific string
 

-If the callout string starts with a pipe (vertical bar) character, the rest of -the string is written to the output, having been passed through the same escape -processing as text from the --output option. This provides a simple echoing -facility that avoids calling an external program or script. No terminator is -added to the string, so if you want a newline, you must include it explicitly. -Matching continues normally after the string is output. If you want to see only -the callout output but not any output from an actual match, you should end the -relevant pattern with (*FAIL). +This facility is always available, provided that callouts were not completely +disabled when pcre2grep was built. If the callout string starts with a +pipe (vertical bar) character, the rest of the string is written to the output, +having been passed through the same escape processing as text from the --output +option. This provides a simple echoing facility that avoids calling an external +program or script. No terminator is added to the string, so if you want a +newline, you must include it explicitly. Matching continues normally after the +string is output. If you want to see only the callout output but not any output +from an actual match, you should end the relevant pattern with (*FAIL).


MATCHING ERRORS

@@ -962,7 +966,7 @@ Cambridge, England.


REVISION

-Last updated: 24 February 2018 +Last updated: 17 November 2018
Copyright © 1997-2018 University of Cambridge.
diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1 index ce112af..6f8c440 100644 --- a/doc/pcre2grep.1 +++ b/doc/pcre2grep.1 @@ -1,4 +1,4 @@ -.TH PCRE2GREP 1 "24 February 2018" "PCRE2 10.32" +.TH PCRE2GREP 1 "17 November 2018" "PCRE2 10.33" .SH NAME pcre2grep - a grep with Perl-compatible regular expressions. .SH SYNOPSIS @@ -759,10 +759,12 @@ character. Otherwise \fBpcre2grep\fP will assume that it has no data. .sp \fBpcre2grep\fP has, by default, support for calling external programs or scripts or echoing specific strings during matching by making use of PCRE2's -callout facility. However, this support can be disabled when \fBpcre2grep\fP is -built. You can find out whether your binary has support for callouts by running -it with the \fB--help\fP option. If the support is not enabled, all callouts in -patterns are ignored by \fBpcre2grep\fP. +callout facility. However, this support can be completely or partially disabled +when \fBpcre2grep\fP is built. You can find out whether your binary has support +for callouts by running it with the \fB--help\fP option. If callout support is +completely disabled, all callouts in patterns are ignored by \fBpcre2grep\fP. +If the facility is partially disabled, calling external programs is not +supported, and callouts that request it are ignored. .P A callout in a PCRE2 pattern is of the form (?C) where the argument is either a number or a quoted string (see the @@ -776,8 +778,9 @@ only callouts with string arguments are useful. .SS "Calling external programs or scripts" .rs .sp -If the callout string does not start with a pipe (vertical bar) character, it -is parsed into a list of substrings separated by pipe characters. The first +This facility can be independently disabled when \fBpcre2grep\fP is built. If +the callout string does not start with a pipe (vertical bar) character, it is +parsed into a list of substrings separated by pipe characters. The first substring must be an executable name, with the following substrings specifying arguments: .sp @@ -816,14 +819,15 @@ matcher backtracks in the normal way. .SS "Echoing a specific string" .rs .sp -If the callout string starts with a pipe (vertical bar) character, the rest of -the string is written to the output, having been passed through the same escape -processing as text from the --output option. This provides a simple echoing -facility that avoids calling an external program or script. No terminator is -added to the string, so if you want a newline, you must include it explicitly. -Matching continues normally after the string is output. If you want to see only -the callout output but not any output from an actual match, you should end the -relevant pattern with (*FAIL). +This facility is always available, provided that callouts were not completely +disabled when \fBpcre2grep\fP was built. If the callout string starts with a +pipe (vertical bar) character, the rest of the string is written to the output, +having been passed through the same escape processing as text from the --output +option. This provides a simple echoing facility that avoids calling an external +program or script. No terminator is added to the string, so if you want a +newline, you must include it explicitly. Matching continues normally after the +string is output. If you want to see only the callout output but not any output +from an actual match, you should end the relevant pattern with (*FAIL). . . .SH "MATCHING ERRORS" @@ -876,6 +880,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 24 February 2018 +Last updated: 17 November 2018 Copyright (c) 1997-2018 University of Cambridge. .fi diff --git a/doc/pcre2grep.txt b/doc/pcre2grep.txt index 000239c..e9f197c 100644 --- a/doc/pcre2grep.txt +++ b/doc/pcre2grep.txt @@ -832,22 +832,26 @@ USING PCRE2'S CALLOUT FACILITY pcre2grep has, by default, support for calling external programs or scripts or echoing specific strings during matching by making use of - PCRE2's callout facility. However, this support can be disabled when - pcre2grep is built. You can find out whether your binary has support - for callouts by running it with the --help option. If the support is - not enabled, all callouts in patterns are ignored by pcre2grep. + PCRE2's callout facility. However, this support can be completely or + partially disabled when pcre2grep is built. You can find out whether + your binary has support for callouts by running it with the --help + option. If callout support is completely disabled, all callouts in pat- + terns are ignored by pcre2grep. If the facility is partially disabled, + calling external programs is not supported, and callouts that request + it are ignored. - A callout in a PCRE2 pattern is of the form (?C) where the argu- - ment is either a number or a quoted string (see the pcre2callout docu- - mentation for details). Numbered callouts are ignored by pcre2grep; + A callout in a PCRE2 pattern is of the form (?C) where the argu- + ment is either a number or a quoted string (see the pcre2callout docu- + mentation for details). Numbered callouts are ignored by pcre2grep; only callouts with string arguments are useful. Calling external programs or scripts - If the callout string does not start with a pipe (vertical bar) charac- - ter, it is parsed into a list of substrings separated by pipe charac- - ters. The first substring must be an executable name, with the follow- - ing substrings specifying arguments: + This facility can be independently disabled when pcre2grep is built. If + the callout string does not start with a pipe (vertical bar) character, + it is parsed into a list of substrings separated by pipe characters. + The first substring must be an executable name, with the following sub- + strings specifying arguments: executable_name|arg1|arg2|... @@ -885,44 +889,45 @@ USING PCRE2'S CALLOUT FACILITY Echoing a specific string - If the callout string starts with a pipe (vertical bar) character, the - rest of the string is written to the output, having been passed through - the same escape processing as text from the --output option. This pro- - vides a simple echoing facility that avoids calling an external program - or script. No terminator is added to the string, so if you want a new- - line, you must include it explicitly. Matching continues normally - after the string is output. If you want to see only the callout output - but not any output from an actual match, you should end the relevant - pattern with (*FAIL). + This facility is always available, provided that callouts were not com- + pletely disabled when pcre2grep was built. If the callout string starts + with a pipe (vertical bar) character, the rest of the string is written + to the output, having been passed through the same escape processing as + text from the --output option. This provides a simple echoing facility + that avoids calling an external program or script. No terminator is + added to the string, so if you want a newline, you must include it + explicitly. Matching continues normally after the string is output. If + you want to see only the callout output but not any output from an + actual match, you should end the relevant pattern with (*FAIL). MATCHING ERRORS - It is possible to supply a regular expression that takes a very long - time to fail to match certain lines. Such patterns normally involve - nested indefinite repeats, for example: (a+)*\d when matched against a - line of a's with no final digit. The PCRE2 matching function has a - resource limit that causes it to abort in these circumstances. If this - happens, pcre2grep outputs an error message and the line that caused - the problem to the standard error stream. If there are more than 20 + It is possible to supply a regular expression that takes a very long + time to fail to match certain lines. Such patterns normally involve + nested indefinite repeats, for example: (a+)*\d when matched against a + line of a's with no final digit. The PCRE2 matching function has a + resource limit that causes it to abort in these circumstances. If this + happens, pcre2grep outputs an error message and the line that caused + the problem to the standard error stream. If there are more than 20 such errors, pcre2grep gives up. - The --match-limit option of pcre2grep can be used to set the overall - resource limit. There are also other limits that affect the amount of - memory used during matching; see the discussion of --heap-limit and + The --match-limit option of pcre2grep can be used to set the overall + resource limit. There are also other limits that affect the amount of + memory used during matching; see the discussion of --heap-limit and --depth-limit above. DIAGNOSTICS Exit status is 0 if any matches were found, 1 if no matches were found, - and 2 for syntax errors, overlong lines, non-existent or inaccessible - files (even if matches were found in other files) or too many matching + and 2 for syntax errors, overlong lines, non-existent or inaccessible + files (even if matches were found in other files) or too many matching errors. Using the -s option to suppress error messages about inaccessi- ble files does not affect the return code. - When run under VMS, the return code is placed in the symbol - PCRE2GREP_RC because VMS does not distinguish between exit(0) and + When run under VMS, the return code is placed in the symbol + PCRE2GREP_RC because VMS does not distinguish between exit(0) and exit(1). @@ -940,5 +945,5 @@ AUTHOR REVISION - Last updated: 24 February 2018 + Last updated: 17 November 2018 Copyright (c) 1997-2018 University of Cambridge. diff --git a/src/config.h.in b/src/config.h.in index 9fdf581..6b8eb7e 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -290,6 +290,11 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to any value to enable callout script support in pcre2grep. */ #undef SUPPORT_PCRE2GREP_CALLOUT +/* Define to any value to enable fork support in pcre2grep callout scripts. + This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined. + */ +#undef SUPPORT_PCRE2GREP_CALLOUT_FORK + /* Define to any value to enable JIT support in pcre2grep. Note that this will have no effect unless SUPPORT_JIT is also defined. */ #undef SUPPORT_PCRE2GREP_JIT diff --git a/src/pcre2grep.c b/src/pcre2grep.c index 1027047..477e867 100644 --- a/src/pcre2grep.c +++ b/src/pcre2grep.c @@ -73,7 +73,7 @@ POSSIBILITY OF SUCH DAMAGE. #include /* For _O_BINARY */ #endif -#ifdef SUPPORT_PCRE2GREP_CALLOUT +#if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK) #ifdef WIN32 #include #else @@ -1133,7 +1133,11 @@ printf("Search for PATTERN in each FILE or standard input." STDOUT_NL); printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL); #ifdef SUPPORT_PCRE2GREP_CALLOUT -printf("Callout scripts in patterns are supported." STDOUT_NL); +#ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK +printf("All callout scripts in patterns are supported." STDOUT_NL); +#else +printf("Non-fork callout scripts in patterns are supported." STDOUT_NL); +#endif #else printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL); #endif @@ -2017,10 +2021,10 @@ return printed; * Parse and execute callout scripts * *************************************************/ -/* This function parses a callout string block and executes the -program specified by the string. The string is a list of substrings -separated by pipe characters. The first substring represents the -executable name, and the following substrings specify the arguments: +/* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout +string block and executes the program specified by the string. The string is a +list of substrings separated by pipe characters. The first substring represents +the executable name, and the following substrings specify the arguments: program_name|param1|param2|... @@ -2037,8 +2041,9 @@ follows: dollar or $| replaced by a pipe character. Alternatively, if string starts with pipe, the remainder is taken as an output -string, same as --output. In this case, --om-separator is used to separate each -callout, defaulting to newline. +string, same as --output. This is the only form that is supported if +SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to +separate each callout, defaulting to newline. Example: @@ -2066,6 +2071,8 @@ PCRE2_SPTR string = calloutptr->callout_string; PCRE2_SPTR subject = calloutptr->subject; PCRE2_SIZE *ovector = calloutptr->offset_vector; PCRE2_SIZE capture_top = calloutptr->capture_top; + +#ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK PCRE2_SIZE argsvectorlen = 2; PCRE2_SIZE argslen = 1; char *args; @@ -2076,10 +2083,12 @@ char **argsvectorptr; pid_t pid; #endif int result = 0; +#endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */ (void)unused; /* Avoid compiler warning */ /* Only callout with strings are supported. */ + if (string == NULL || length == 0) return 0; /* If there's no command, output the remainder directly. */ @@ -2091,6 +2100,10 @@ if (*string == '|') (void)display_output_text(string, TRUE, subject, ovector, capture_top); return 0; } + +#ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK +return 0; +#else /* Checking syntax and compute the number of string fragments. Callout strings are ignored in case of a syntax error. */ @@ -2294,9 +2307,9 @@ free(argsvector); continues) or non-zero (match fails). */ return result != 0; +#endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */ } - -#endif +#endif /* SUPPORT_PCRE2GREP_CALLOUT */ diff --git a/testdata/grepoutputCN b/testdata/grepoutputCN new file mode 100644 index 0000000..5217b5a --- /dev/null +++ b/testdata/grepoutputCN @@ -0,0 +1,30 @@ +The quick brown +This time it jumps and jumps and jumps. +This line contains \E and (regex) *meta* [characters]. +The word is cat in this line +The caterpillar sat on the mat +The snowcat is not an animal +The quick brown +This time it jumps and jumps and jumps. +This line contains \E and (regex) *meta* [characters]. +The word is cat in this line +The caterpillar sat on the mat +The snowcat is not an animal +0:T +The quick brown +0:T +This time it jumps and jumps and jumps. +0:T +This line contains \E and (regex) *meta* [characters]. +0:T +The word is cat in this line +0:T +The caterpillar sat on the mat +0:T +The snowcat is not an animal +T +T +T +T +T +T