From c332eaf4f2c985aaca0979fd310d73545f4c2b83 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Fri, 1 Apr 2016 15:52:08 +0000 Subject: [PATCH] Add callout support to pcre2grep --- CMakeLists.txt | 8 ++ ChangeLog | 3 + Makefile.am | 1 + README | 19 ++- RunGrepTest | 11 ++ configure.ac | 28 ++++ doc/pcre2build.3 | 21 ++- doc/pcre2grep.1 | 56 +++++++- maint/ManyConfigTests | 2 +- src/config.h.in | 6 + src/pcre2grep.c | 302 ++++++++++++++++++++++++++++++++++++++++-- testdata/grepoutputC | 8 ++ 12 files changed, 441 insertions(+), 24 deletions(-) create mode 100644 testdata/grepoutputC diff --git a/CMakeLists.txt b/CMakeLists.txt index 3df4af0..6c84dad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -158,6 +158,9 @@ SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL SET(PCRE2_SUPPORT_PCRE2GREP_JIT ON CACHE BOOL "Enable use of Just-in-time compiling in pcre2grep.") +SET(PCRE2_SUPPORT_PCRE2GREP_CALLOUT ON CACHE BOOL + "Enable callout string support in pcre2grep.") + SET(PCRE2_SUPPORT_UNICODE ON CACHE BOOL "Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.") @@ -273,6 +276,10 @@ IF(PCRE2_SUPPORT_PCRE2GREP_JIT) SET(SUPPORT_PCRE2GREP_JIT 1) ENDIF(PCRE2_SUPPORT_PCRE2GREP_JIT) +IF(PCRE2_SUPPORT_PCRE2GREP_CALLOUT) + SET(SUPPORT_PCRE2GREP_CALLOUT 1) +ENDIF(PCRE2_SUPPORT_PCRE2GREP_CALLOUT) + IF(PCRE2_SUPPORT_VALGRIND) SET(SUPPORT_VALGRIND 1) ENDIF(PCRE2_SUPPORT_VALGRIND) @@ -753,6 +760,7 @@ IF(PCRE2_SHOW_REPORT) MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}") MESSAGE(STATUS " Build pcre2grep ................. : ${PCRE2_BUILD_PCRE2GREP}") MESSAGE(STATUS " Enable JIT in pcre2grep ......... : ${PCRE2_SUPPORT_PCRE2GREP_JIT}") + MESSAGE(STATUS " Enable callouts in pcre2grep .... : ${PCRE2_SUPPORT_PCRE2GREP_CALLOUT}") MESSAGE(STATUS " Buffer size for pcre2grep ....... : ${PCRE2GREP_BUFSIZE}") MESSAGE(STATUS " Build tests (implies pcre2test . : ${PCRE2_BUILD_TESTS}") MESSAGE(STATUS " and pcre2grep)") diff --git a/ChangeLog b/ChangeLog index 43d5308..7566f9b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -97,6 +97,9 @@ appropriate line terminator: \r\n for Windows, \n otherwise. 21. When a line is too long for pcre2grep's internal buffer, show the maximum length in the error message. +22. Added support for string callouts to pcre2grep (Zoltan's patch with PH +additions). + Version 10.21 12-January-2016 ----------------------------- diff --git a/Makefile.am b/Makefile.am index 19bfb90..38f1d41 100644 --- a/Makefile.am +++ b/Makefile.am @@ -570,6 +570,7 @@ EXTRA_DIST += \ testdata/greplist \ testdata/grepoutput \ testdata/grepoutput8 \ + testdata/grepoutputC \ testdata/grepoutputN \ testdata/greppatN4 \ testdata/testinput1 \ diff --git a/README b/README index 48d2ffd..6cb1bbb 100644 --- a/README +++ b/README @@ -168,15 +168,12 @@ library. They are also documented in the pcre2build man page. built. If you want only the 16-bit or 32-bit library, use --disable-pcre2-8 to disable building the 8-bit library. -. If you want to include support for just-in-time compiling, which can give - large performance improvements on certain platforms, add --enable-jit to the - "configure" command. This support is available only for certain hardware +. If you want to include support for just-in-time (JIT) compiling, which can + give large performance improvements on certain platforms, add --enable-jit to + the "configure" command. This support is available only for certain hardware architectures. If you try to enable it on an unsupported architecture, there will be a compile time error. -. When JIT support is enabled, pcre2grep automatically makes use of it, unless - you add --disable-pcre2grep-jit to the "configure" command. - . If you do not want to make use of the support for UTF-8 Unicode character strings in the 8-bit library, UTF-16 Unicode character strings in the 16-bit library, or UTF-32 Unicode character strings in the 32-bit library, you can @@ -324,6 +321,14 @@ library. They are also documented in the pcre2build man page. running "make" to build PCRE2. There is more information about coverage reporting in the "pcre2build" documentation. +. When JIT support is enabled, pcre2grep automatically makes use of it, unless + you add --disable-pcre2grep-jit to the "configure" command. + +. On non-Windows sytems there is support for calling external scripts during + matching in the pcre2grep command via PCRE2's callout facility with string + arguments. This support can be disabled by adding --disable-pcre2grep-callout + to the "configure" command. + . The pcre2grep program currently supports only 8-bit data files, and so requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by @@ -840,4 +845,4 @@ The distribution should contain the files listed below. Philip Hazel Email local part: ph10 Email domain: cam.ac.uk -Last updated: 16 October 2015 +Last updated: 01 April 2016 diff --git a/RunGrepTest b/RunGrepTest index 67d672b..dcd2edc 100755 --- a/RunGrepTest +++ b/RunGrepTest @@ -614,6 +614,17 @@ $valgrind $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >> $cf $srcdir/testdata/grepoutputN testtrygrep if [ $? != 0 ] ; then exit 1; fi +# If pcre2grep supports script callouts, run some tests on them. + +if $valgrind $pcre2grep --help | $valgrind $pcre2grep -q 'Callout scripts in patterns are supported'; then + echo "Testing pcre2grep script callouts" + $valgrind $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep + $valgrind $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep + $cf $srcdir/testdata/grepoutputC testtrygrep + if [ $? != 0 ] ; then exit 1; fi +else + echo "Script callouts are not supported" +fi # Finally, some tests to exercise code that is not tested above, just to be # sure that it runs OK. Doing this improves the coverage statistics. The output diff --git a/configure.ac b/configure.ac index 9c697f5..087cd58 100644 --- a/configure.ac +++ b/configure.ac @@ -148,6 +148,17 @@ AC_ARG_ENABLE(pcre2grep-jit, [disable JIT support in pcre2grep]), , enable_pcre2grep_jit=yes) +# Handle --disable-pcre2grep-callout (enabled by default) but not supported +# for Windows. +if test "$HAVE_WINDOWS_H" != "1"; then + AC_ARG_ENABLE(pcre2grep-callout, + AS_HELP_STRING([--disable-pcre2grep-callout], + [disable callout script support in pcre2grep]), + , enable_pcre2grep_callout=yes) +else + enable_pcre2grep_callout=no +fi + # Handle --enable-rebuild-chartables AC_ARG_ENABLE(rebuild-chartables, AS_HELP_STRING([--enable-rebuild-chartables], @@ -392,6 +403,7 @@ sure both macros are undefined; an emulation function will then be used. */]) AC_HEADER_STDC AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h) AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1]) +AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1]) # Conditional compilation AM_CONDITIONAL(WITH_PCRE2_8, test "x$enable_pcre2_8" = "xyes") @@ -546,6 +558,21 @@ if test "$enable_pcre2grep_jit" = "yes"; then Define to any value to enable JIT support in pcre2grep.]) fi +# Currently pcre2grep callout string is not supported under Windows. + +if test "$enable_pcre2grep_callout" = "yes"; then + if test "$HAVE_WINDOWS_H" != "1"; then + if test "$HAVE_SYS_WAIT_H" != "1"; then + AC_MSG_ERROR([Callout script support needs sys/wait.h.]) + fi + AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [ + Define to any value to enable callout script support in pcre2grep.]) + else + AC_MSG_WARN([Callout script support is not available for Windows: disabled]) + enable_pcre2grep_callout=no + fi +fi + if test "$enable_unicode" = "yes"; then AC_DEFINE([SUPPORT_UNICODE], [], [ Define to any value to enable support for Unicode and UTF encoding. @@ -908,6 +935,7 @@ $PACKAGE-$VERSION configuration summary: Build shared libs ............... : ${enable_shared} Build static libs ............... : ${enable_static} Use JIT in pcre2grep ............ : ${enable_pcre2grep_jit} + Enable callouts in pcre2grep .... : ${enable_pcre2grep_callout} Buffer size for pcre2grep ....... : ${with_pcre2grep_bufsize} Link pcre2grep with libz ........ : ${enable_pcre2grep_libz} Link pcre2grep with libbz2 ...... : ${enable_pcre2grep_libbz2} diff --git a/doc/pcre2build.3 b/doc/pcre2build.3 index a90f1b2..588bd4b 100644 --- a/doc/pcre2build.3 +++ b/doc/pcre2build.3 @@ -1,4 +1,4 @@ -.TH PCRE2BUILD 3 "16 October 2015" "PCRE2 10.21" +.TH PCRE2BUILD 3 "01 April 2016" "PCRE2 10.22" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) . @@ -352,6 +352,19 @@ and equivalent run-time options, refer to these character values in an EBCDIC environment. . . +.SH "PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS" +.rs +.sp +By default, on non-Windows systems, \fBpcre2grep\fP supports the use of +callouts with string arguments within the patterns it is matching, in order to +run external scripts. For details, see the +.\" HREF +\fBpcre2grep\fP +.\" +documentation. This support can be disabled by adding +--disable-pcre2grep-callout to the \fBconfigure\fP command. +. +. .SH "PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT" .rs .sp @@ -381,7 +394,7 @@ parameter value by adding, for example, --with-pcre2grep-bufsize=50K .sp to the \fBconfigure\fP command. The caller of \fPpcre2grep\fP can override this -value by using --buffer-size on the command line.. +value by using --buffer-size on the command line. . . .SH "PCRE2TEST OPTION FOR LIBREADLINE SUPPORT" @@ -519,6 +532,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 16 October 2015 -Copyright (c) 1997-2015 University of Cambridge. +Last updated: 01 April 2016 +Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1 index 028a91e..513a15f 100644 --- a/doc/pcre2grep.1 +++ b/doc/pcre2grep.1 @@ -1,4 +1,4 @@ -.TH PCRE2GREP 1 "03 January 2015" "PCRE2 10.00" +.TH PCRE2GREP 1 "01 April 2016" "PCRE2 10.22" .SH NAME pcre2grep - a grep with Perl-compatible regular expressions. .SH SYNOPSIS @@ -653,6 +653,54 @@ options does have data, it must be given in the first form, using an equals character. Otherwise \fBpcre2grep\fP will assume that it has no data. . . +.SH "CALLING EXTERNAL SCRIPTS" +.rs +.sp +On non-Windows systems, \fBpcre2grep\fP has, by default, support for calling +external programs or scripts during matching by making use of PCRE2's callout +facility. However, this support can be disabled when \fBpcre2grep\fP is built. +You can find out whether your binary has support for callouts by running it +with the \fB--help\fP option. If the support is not enabled, all callouts in +patterns are ignored by \fBpcre2grep\fP. +.P +A callout in a PCRE2 pattern is of the form (?C) where the argument is +either a number or a quoted string (see the +.\" HREF +\fBpcre2callout\fP +.\" +documentation for details). Numbered callouts are ignored by \fBpcre2grep\fP. +String arguments are parsed as a list of substrings separated by pipe (vertical +bar) characters. The first substring must be an executable name, with the +following substrings specifying arguments: +.sp + executable_name|arg1|arg2|... +.sp +Any substirng (including the executable name) may contain escape sequences +started by a dollar character: $ or ${} is replaced by the +captured substring of the given decimal number, which must be greater than +zero. If the number is greater than the number of capturing substrings, or if +the capture is unset, the replacement is empty. +.P +Any other character is substituted by itself. In particular, $$ is replaced by +a single dollar and $| is replaced by a pipe character. Here is an example: +.sp + echo -e "abcde\en12345" | pcre2grep \e + '(?x)(.)(..(.)) + (?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' - + + Output: + + Arg1: [a] [bcd] [d] Arg2: |a| () + abcde + Arg1: [1] [234] [4] Arg2: |1| () + 12345 +.sp +Any syntax errors in the string (for example, a dollar not followed by another +character) cause the callout to be ignored. If running the program fails for +any reason (including the non-existence of the executable), a local matching +failure occurs and the matcher backtracks in the normal way. +. +. .SH "MATCHING ERRORS" .rs .sp @@ -683,7 +731,7 @@ affect the return code. .SH "SEE ALSO" .rs .sp -\fBpcre2pattern\fP(3), \fBpcre2syntax\fP(3). +\fBpcre2pattern\fP(3), \fBpcre2syntax\fP(3), \fBpcre2callout\fP(3). . . .SH AUTHOR @@ -700,6 +748,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 03 January 2015 -Copyright (c) 1997-2015 University of Cambridge. +Last updated: 01 April 2016 +Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/maint/ManyConfigTests b/maint/ManyConfigTests index 8ec940c..0156968 100755 --- a/maint/ManyConfigTests +++ b/maint/ManyConfigTests @@ -326,7 +326,7 @@ if [ $usemain -ne 0 ]; then "--disable-shared" \ "--disable-unicode --disable-stack-for-recursion --disable-shared" \ "--disable-stack-for-recursion --disable-shared --enable-never-backslash-C" \ - "--with-link-size=3 --disable-shared" \ + "--with-link-size=3 --disable-shared --disable-pcre2grep-callout" \ "--disable-unicode --enable-rebuild-chartables --disable-shared" \ "--disable-unicode --enable-newline-is-any --disable-shared" \ "--disable-unicode --enable-newline-is-cr --disable-shared" \ diff --git a/src/config.h.in b/src/config.h.in index e55d0a0..d4821af 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -111,6 +111,9 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ #undef HAVE_SYS_TYPES_H +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_WAIT_H + /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H @@ -262,6 +265,9 @@ sure both macros are undefined; an emulation function will then be used. */ is able to handle .gz files. */ #undef SUPPORT_LIBZ +/* Define to any value to enable callout script support in pcre2grep. */ +#undef SUPPORT_PCRE2GREP_CALLOUT + /* Define to any value to enable JIT support in pcre2grep. */ #undef SUPPORT_PCRE2GREP_JIT diff --git a/src/pcre2grep.c b/src/pcre2grep.c index d2664f4..231d356 100644 --- a/src/pcre2grep.c +++ b/src/pcre2grep.c @@ -58,6 +58,10 @@ POSSIBILITY OF SUCH DAMAGE. #include #include +#ifdef SUPPORT_PCRE2GREP_CALLOUT +#include +#endif + #ifdef HAVE_UNISTD_H #include #endif @@ -121,9 +125,9 @@ apply to fprintf(). */ #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {} -/* Under Windows, we have to set stdout to be binary, so that it does not -convert \r\n at the ends of output lines to \r\r\n. However, that means that -any messages written to stdout must have \r\n as their line terminator. This is +/* Under Windows, we have to set stdout to be binary, so that it does not +convert \r\n at the ends of output lines to \r\r\n. However, that means that +any messages written to stdout must have \r\n as their line terminator. This is handled by using STDOUT_NL as the newline string. */ #if defined(_WIN32) || defined(WIN32) @@ -899,6 +903,13 @@ option_item *op; printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL); printf("Search for PATTERN in each FILE or standard input." STDOUT_NL); printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL); + +#ifdef SUPPORT_PCRE2GREP_CALLOUT +printf("Callout scripts in patterns are supported." STDOUT_NL); +#else +printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL); +#endif + printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL); #ifdef SUPPORT_LIBZ @@ -1484,6 +1495,274 @@ return FALSE; /* No match, no errors */ } +#ifdef SUPPORT_PCRE2GREP_CALLOUT + +/************************************************* +* Parse and execute callout scripts * +*************************************************/ + +/* This function parses a callout string block and executes the +program specified by the string. The string is a list of substrings +separated by pipe characters. The first substring represents the +executable name, and the following substrings specify the arguments: + + program_name|param1|param2|... + +Any substirng (including the program name) can contain escape sequences +started by the dollar character. The escape sequences are substituted as +follows: + + $ or ${} is replaced by the captured substring of the given + decimal number, which must be greater than zero. If the number is greater + than the number of capturing substrings, or if the capture is unset, the + replacement is empty. + + Any other character is substituted by itself. E.g: $$ is replaced by a single + dollar or $| replaced by a pipe character. + +Example: + + echo -e "abcde\n12345" | pcre2grep \ + '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' - + + Output: + + Arg1: [a] [bcd] [d] Arg2: |a| () + abcde + Arg1: [1] [234] [4] Arg2: |1| () + 12345 + +Arguments: + blockptr the callout block + +Returns: currently it always returns with 0 +*/ + +static int +pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused) +{ +PCRE2_SIZE length = calloutptr->callout_string_length; +PCRE2_SPTR string = calloutptr->callout_string; +PCRE2_SPTR subject = calloutptr->subject; +PCRE2_SIZE *ovector = calloutptr->offset_vector; +PCRE2_SIZE capture_top = calloutptr->capture_top; +PCRE2_SIZE argsvectorlen = 2; +PCRE2_SIZE argslen = 1; +char *args; +char *argsptr; +char **argsvector; +char **argsvectorptr; +pid_t pid; +int result = 0; + +(void)unused; /* Avoid compiler warning */ + +/* Only callout with strings are supported. */ +if (string == NULL || length == 0) return 0; + +/* Checking syntax and compute the number of string fragments. Callout strings +are ignored in case of a syntax error. */ + +while (length > 0) + { + if (*string == '|') + { + argsvectorlen++; + + /* Maximum 10000 arguments allowed. */ + if (argsvectorlen > 10000) return 0; + } + else if (*string == '$') + { + PCRE2_SIZE capture_id = 0; + + string++; + length--; + + /* Syntax error: a character must be present after $. */ + if (length == 0) return 0; + + if (*string >= '1' && *string <= '9') + { + do + { + /* Maximum capture id is 65535. */ + if (capture_id <= 65535) + capture_id = capture_id * 10 + (*string - '0'); + + string++; + length--; + } + while (length > 0 && *string >= '0' && *string <= '9'); + + /* To negate the effect of string++ below. */ + string--; + length++; + } + else if (*string == '{') + { + /* Must be a decimal number in parenthesis, e.g: (5) or (38) */ + string++; + length--; + + /* Syntax error: a decimal number required. */ + if (length == 0) return 0; + if (*string < '1' || *string > '9') return 0; + + do + { + /* Maximum capture id is 65535. */ + if (capture_id <= 65535) + capture_id = capture_id * 10 + (*string - '0'); + + string++; + length--; + + /* Syntax error: no more characters */ + if (length == 0) return 0; + } + while (*string >= '0' && *string <= '9'); + + /* Syntax error: close paren is missing. */ + if (*string != '}') return 0; + } + + if (capture_id > 0) + { + if (capture_id < capture_top) + { + capture_id *= 2; + argslen += ovector[capture_id + 1] - ovector[capture_id]; + } + + /* To negate the effect of argslen++ below. */ + argslen--; + } + } + + string++; + length--; + argslen++; + } + +args = (char*)malloc(argslen); +if (args == NULL) return 0; + +argsvector = (char**)malloc(argsvectorlen * sizeof(char*)); +if (argsvector == NULL) + { + free(args); + return 0; + } + +argsptr = args; +argsvectorptr = argsvector; + +*argsvectorptr++ = argsptr; + +length = calloutptr->callout_string_length; +string = calloutptr->callout_string; + +while (length > 0) + { + if (*string == '|') + { + *argsptr++ = '\0'; + *argsvectorptr++ = argsptr; + } + else if (*string == '$') + { + string++; + length--; + + if ((*string >= '1' && *string <= '9') || *string == '{') + { + PCRE2_SIZE capture_id = 0; + + if (*string != '{') + { + do + { + /* Maximum capture id is 65535. */ + if (capture_id <= 65535) + capture_id = capture_id * 10 + (*string - '0'); + + string++; + length--; + } + while (length > 0 && *string >= '0' && *string <= '9'); + + /* To negate the effect of string++ below. */ + string--; + length++; + } + else + { + string++; + length--; + + do + { + /* Maximum capture id is 65535. */ + if (capture_id <= 65535) + capture_id = capture_id * 10 + (*string - '0'); + + string++; + length--; + } + while (*string != '}'); + } + + if (capture_id < capture_top) + { + PCRE2_SIZE capturesize; + capture_id *= 2; + + capturesize = ovector[capture_id + 1] - ovector[capture_id]; + memcpy(argsptr, subject + ovector[capture_id], capturesize); + argsptr += capturesize; + } + } + else + { + *argsptr++ = *string; + } + } + else + { + *argsptr++ = *string; + } + + string++; + length--; + } + +*argsptr++ = '\0'; +*argsvectorptr = NULL; + +pid = fork(); + +if (pid == 0) + { + (void)execv(argsvector[0], argsvector); + /* Control gets here if there is an error, e.g. a non-existent program */ + exit(1); + } +else if (pid > 0) + (void)waitpid(pid, &result, 0); + +free(args); +free(argsvector); + +/* Currently negative return values are not supported, only zero (match +continues) or non-zero (match fails). */ + +return result != 0; +} + +#endif + + /************************************************* * Grep an individual file * @@ -1786,7 +2065,7 @@ while (ptr < endptr) } } - if (printed || printname != NULL || number) + if (printed || printname != NULL || number) fprintf(stdout, STDOUT_NL); } @@ -2637,10 +2916,10 @@ const char *locale_from = "--locale"; pcre2_jit_stack *jit_stack = NULL; #endif -/* In Windows, stdout is set up as a text stream, which means that \n is -converted to \r\n. This causes output lines that are copied from the input to -change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure -that stdout is a binary stream. Note that this means all other output to stdout +/* In Windows, stdout is set up as a text stream, which means that \n is +converted to \r\n. This causes output lines that are copied from the input to +change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure +that stdout is a binary stream. Note that this means all other output to stdout must use STDOUT_NL to terminate lines. */ #if defined(_WIN32) || defined(WIN32) @@ -2654,6 +2933,13 @@ match_context = pcre2_match_context_create(NULL); match_data = pcre2_match_data_create(OFFSET_SIZE, NULL); offsets = pcre2_get_ovector_pointer(match_data); +/* If string (script) callouts are supported, set up the callout processing +function. */ + +#ifdef SUPPORT_PCRE2GREP_CALLOUT +pcre2_set_callout(match_context, pcre2grep_callout, NULL); +#endif + /* Process the options */ for (i = 1; i < argc; i++) diff --git a/testdata/grepoutputC b/testdata/grepoutputC new file mode 100644 index 0000000..0116645 --- /dev/null +++ b/testdata/grepoutputC @@ -0,0 +1,8 @@ +Arg1: [T] [he ] [ ] Arg2: |T| () () (0) +Arg1: [T] [his] [s] Arg2: |T| () () (0) +The quick brown +This time it jumps and jumps and jumps. +Arg1: [qu] [qu] +Arg1: [ t] [ t] +The quick brown +This time it jumps and jumps and jumps.