Add callout support to pcre2grep
This commit is contained in:
parent
ddcedf0338
commit
c332eaf4f2
|
@ -158,6 +158,9 @@ SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL
|
|||
SET(PCRE2_SUPPORT_PCRE2GREP_JIT ON CACHE BOOL
|
||||
"Enable use of Just-in-time compiling in pcre2grep.")
|
||||
|
||||
SET(PCRE2_SUPPORT_PCRE2GREP_CALLOUT ON CACHE BOOL
|
||||
"Enable callout string support in pcre2grep.")
|
||||
|
||||
SET(PCRE2_SUPPORT_UNICODE ON CACHE BOOL
|
||||
"Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.")
|
||||
|
||||
|
@ -273,6 +276,10 @@ IF(PCRE2_SUPPORT_PCRE2GREP_JIT)
|
|||
SET(SUPPORT_PCRE2GREP_JIT 1)
|
||||
ENDIF(PCRE2_SUPPORT_PCRE2GREP_JIT)
|
||||
|
||||
IF(PCRE2_SUPPORT_PCRE2GREP_CALLOUT)
|
||||
SET(SUPPORT_PCRE2GREP_CALLOUT 1)
|
||||
ENDIF(PCRE2_SUPPORT_PCRE2GREP_CALLOUT)
|
||||
|
||||
IF(PCRE2_SUPPORT_VALGRIND)
|
||||
SET(SUPPORT_VALGRIND 1)
|
||||
ENDIF(PCRE2_SUPPORT_VALGRIND)
|
||||
|
@ -753,6 +760,7 @@ IF(PCRE2_SHOW_REPORT)
|
|||
MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}")
|
||||
MESSAGE(STATUS " Build pcre2grep ................. : ${PCRE2_BUILD_PCRE2GREP}")
|
||||
MESSAGE(STATUS " Enable JIT in pcre2grep ......... : ${PCRE2_SUPPORT_PCRE2GREP_JIT}")
|
||||
MESSAGE(STATUS " Enable callouts in pcre2grep .... : ${PCRE2_SUPPORT_PCRE2GREP_CALLOUT}")
|
||||
MESSAGE(STATUS " Buffer size for pcre2grep ....... : ${PCRE2GREP_BUFSIZE}")
|
||||
MESSAGE(STATUS " Build tests (implies pcre2test . : ${PCRE2_BUILD_TESTS}")
|
||||
MESSAGE(STATUS " and pcre2grep)")
|
||||
|
|
|
@ -97,6 +97,9 @@ appropriate line terminator: \r\n for Windows, \n otherwise.
|
|||
21. When a line is too long for pcre2grep's internal buffer, show the maximum
|
||||
length in the error message.
|
||||
|
||||
22. Added support for string callouts to pcre2grep (Zoltan's patch with PH
|
||||
additions).
|
||||
|
||||
|
||||
Version 10.21 12-January-2016
|
||||
-----------------------------
|
||||
|
|
|
@ -570,6 +570,7 @@ EXTRA_DIST += \
|
|||
testdata/greplist \
|
||||
testdata/grepoutput \
|
||||
testdata/grepoutput8 \
|
||||
testdata/grepoutputC \
|
||||
testdata/grepoutputN \
|
||||
testdata/greppatN4 \
|
||||
testdata/testinput1 \
|
||||
|
|
19
README
19
README
|
@ -168,15 +168,12 @@ library. They are also documented in the pcre2build man page.
|
|||
built. If you want only the 16-bit or 32-bit library, use --disable-pcre2-8
|
||||
to disable building the 8-bit library.
|
||||
|
||||
. If you want to include support for just-in-time compiling, which can give
|
||||
large performance improvements on certain platforms, add --enable-jit to the
|
||||
"configure" command. This support is available only for certain hardware
|
||||
. If you want to include support for just-in-time (JIT) compiling, which can
|
||||
give large performance improvements on certain platforms, add --enable-jit to
|
||||
the "configure" command. This support is available only for certain hardware
|
||||
architectures. If you try to enable it on an unsupported architecture, there
|
||||
will be a compile time error.
|
||||
|
||||
. When JIT support is enabled, pcre2grep automatically makes use of it, unless
|
||||
you add --disable-pcre2grep-jit to the "configure" command.
|
||||
|
||||
. If you do not want to make use of the support for UTF-8 Unicode character
|
||||
strings in the 8-bit library, UTF-16 Unicode character strings in the 16-bit
|
||||
library, or UTF-32 Unicode character strings in the 32-bit library, you can
|
||||
|
@ -324,6 +321,14 @@ library. They are also documented in the pcre2build man page.
|
|||
running "make" to build PCRE2. There is more information about coverage
|
||||
reporting in the "pcre2build" documentation.
|
||||
|
||||
. When JIT support is enabled, pcre2grep automatically makes use of it, unless
|
||||
you add --disable-pcre2grep-jit to the "configure" command.
|
||||
|
||||
. On non-Windows sytems there is support for calling external scripts during
|
||||
matching in the pcre2grep command via PCRE2's callout facility with string
|
||||
arguments. This support can be disabled by adding --disable-pcre2grep-callout
|
||||
to the "configure" command.
|
||||
|
||||
. The pcre2grep program currently supports only 8-bit data files, and so
|
||||
requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
|
||||
libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
|
||||
|
@ -840,4 +845,4 @@ The distribution should contain the files listed below.
|
|||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 16 October 2015
|
||||
Last updated: 01 April 2016
|
||||
|
|
11
RunGrepTest
11
RunGrepTest
|
@ -614,6 +614,17 @@ $valgrind $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>
|
|||
$cf $srcdir/testdata/grepoutputN testtrygrep
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
# If pcre2grep supports script callouts, run some tests on them.
|
||||
|
||||
if $valgrind $pcre2grep --help | $valgrind $pcre2grep -q 'Callout scripts in patterns are supported'; then
|
||||
echo "Testing pcre2grep script callouts"
|
||||
$valgrind $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep
|
||||
$valgrind $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep
|
||||
$cf $srcdir/testdata/grepoutputC testtrygrep
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else
|
||||
echo "Script callouts are not supported"
|
||||
fi
|
||||
|
||||
# Finally, some tests to exercise code that is not tested above, just to be
|
||||
# sure that it runs OK. Doing this improves the coverage statistics. The output
|
||||
|
|
28
configure.ac
28
configure.ac
|
@ -148,6 +148,17 @@ AC_ARG_ENABLE(pcre2grep-jit,
|
|||
[disable JIT support in pcre2grep]),
|
||||
, enable_pcre2grep_jit=yes)
|
||||
|
||||
# Handle --disable-pcre2grep-callout (enabled by default) but not supported
|
||||
# for Windows.
|
||||
if test "$HAVE_WINDOWS_H" != "1"; then
|
||||
AC_ARG_ENABLE(pcre2grep-callout,
|
||||
AS_HELP_STRING([--disable-pcre2grep-callout],
|
||||
[disable callout script support in pcre2grep]),
|
||||
, enable_pcre2grep_callout=yes)
|
||||
else
|
||||
enable_pcre2grep_callout=no
|
||||
fi
|
||||
|
||||
# Handle --enable-rebuild-chartables
|
||||
AC_ARG_ENABLE(rebuild-chartables,
|
||||
AS_HELP_STRING([--enable-rebuild-chartables],
|
||||
|
@ -392,6 +403,7 @@ sure both macros are undefined; an emulation function will then be used. */])
|
|||
AC_HEADER_STDC
|
||||
AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h)
|
||||
AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1])
|
||||
AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1])
|
||||
|
||||
# Conditional compilation
|
||||
AM_CONDITIONAL(WITH_PCRE2_8, test "x$enable_pcre2_8" = "xyes")
|
||||
|
@ -546,6 +558,21 @@ if test "$enable_pcre2grep_jit" = "yes"; then
|
|||
Define to any value to enable JIT support in pcre2grep.])
|
||||
fi
|
||||
|
||||
# Currently pcre2grep callout string is not supported under Windows.
|
||||
|
||||
if test "$enable_pcre2grep_callout" = "yes"; then
|
||||
if test "$HAVE_WINDOWS_H" != "1"; then
|
||||
if test "$HAVE_SYS_WAIT_H" != "1"; then
|
||||
AC_MSG_ERROR([Callout script support needs sys/wait.h.])
|
||||
fi
|
||||
AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [
|
||||
Define to any value to enable callout script support in pcre2grep.])
|
||||
else
|
||||
AC_MSG_WARN([Callout script support is not available for Windows: disabled])
|
||||
enable_pcre2grep_callout=no
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "$enable_unicode" = "yes"; then
|
||||
AC_DEFINE([SUPPORT_UNICODE], [], [
|
||||
Define to any value to enable support for Unicode and UTF encoding.
|
||||
|
@ -908,6 +935,7 @@ $PACKAGE-$VERSION configuration summary:
|
|||
Build shared libs ............... : ${enable_shared}
|
||||
Build static libs ............... : ${enable_static}
|
||||
Use JIT in pcre2grep ............ : ${enable_pcre2grep_jit}
|
||||
Enable callouts in pcre2grep .... : ${enable_pcre2grep_callout}
|
||||
Buffer size for pcre2grep ....... : ${with_pcre2grep_bufsize}
|
||||
Link pcre2grep with libz ........ : ${enable_pcre2grep_libz}
|
||||
Link pcre2grep with libbz2 ...... : ${enable_pcre2grep_libbz2}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2BUILD 3 "16 October 2015" "PCRE2 10.21"
|
||||
.TH PCRE2BUILD 3 "01 April 2016" "PCRE2 10.22"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.
|
||||
|
@ -352,6 +352,19 @@ and equivalent run-time options, refer to these character values in an EBCDIC
|
|||
environment.
|
||||
.
|
||||
.
|
||||
.SH "PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS"
|
||||
.rs
|
||||
.sp
|
||||
By default, on non-Windows systems, \fBpcre2grep\fP supports the use of
|
||||
callouts with string arguments within the patterns it is matching, in order to
|
||||
run external scripts. For details, see the
|
||||
.\" HREF
|
||||
\fBpcre2grep\fP
|
||||
.\"
|
||||
documentation. This support can be disabled by adding
|
||||
--disable-pcre2grep-callout to the \fBconfigure\fP command.
|
||||
.
|
||||
.
|
||||
.SH "PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT"
|
||||
.rs
|
||||
.sp
|
||||
|
@ -381,7 +394,7 @@ parameter value by adding, for example,
|
|||
--with-pcre2grep-bufsize=50K
|
||||
.sp
|
||||
to the \fBconfigure\fP command. The caller of \fPpcre2grep\fP can override this
|
||||
value by using --buffer-size on the command line..
|
||||
value by using --buffer-size on the command line.
|
||||
.
|
||||
.
|
||||
.SH "PCRE2TEST OPTION FOR LIBREADLINE SUPPORT"
|
||||
|
@ -519,6 +532,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 16 October 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
Last updated: 01 April 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2GREP 1 "03 January 2015" "PCRE2 10.00"
|
||||
.TH PCRE2GREP 1 "01 April 2016" "PCRE2 10.22"
|
||||
.SH NAME
|
||||
pcre2grep - a grep with Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -653,6 +653,54 @@ options does have data, it must be given in the first form, using an equals
|
|||
character. Otherwise \fBpcre2grep\fP will assume that it has no data.
|
||||
.
|
||||
.
|
||||
.SH "CALLING EXTERNAL SCRIPTS"
|
||||
.rs
|
||||
.sp
|
||||
On non-Windows systems, \fBpcre2grep\fP has, by default, support for calling
|
||||
external programs or scripts during matching by making use of PCRE2's callout
|
||||
facility. However, this support can be disabled when \fBpcre2grep\fP is built.
|
||||
You can find out whether your binary has support for callouts by running it
|
||||
with the \fB--help\fP option. If the support is not enabled, all callouts in
|
||||
patterns are ignored by \fBpcre2grep\fP.
|
||||
.P
|
||||
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is
|
||||
either a number or a quoted string (see the
|
||||
.\" HREF
|
||||
\fBpcre2callout\fP
|
||||
.\"
|
||||
documentation for details). Numbered callouts are ignored by \fBpcre2grep\fP.
|
||||
String arguments are parsed as a list of substrings separated by pipe (vertical
|
||||
bar) characters. The first substring must be an executable name, with the
|
||||
following substrings specifying arguments:
|
||||
.sp
|
||||
executable_name|arg1|arg2|...
|
||||
.sp
|
||||
Any substirng (including the executable name) may contain escape sequences
|
||||
started by a dollar character: $<digits> or ${<digits>} is replaced by the
|
||||
captured substring of the given decimal number, which must be greater than
|
||||
zero. If the number is greater than the number of capturing substrings, or if
|
||||
the capture is unset, the replacement is empty.
|
||||
.P
|
||||
Any other character is substituted by itself. In particular, $$ is replaced by
|
||||
a single dollar and $| is replaced by a pipe character. Here is an example:
|
||||
.sp
|
||||
echo -e "abcde\en12345" | pcre2grep \e
|
||||
'(?x)(.)(..(.))
|
||||
(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
|
||||
|
||||
Output:
|
||||
|
||||
Arg1: [a] [bcd] [d] Arg2: |a| ()
|
||||
abcde
|
||||
Arg1: [1] [234] [4] Arg2: |1| ()
|
||||
12345
|
||||
.sp
|
||||
Any syntax errors in the string (for example, a dollar not followed by another
|
||||
character) cause the callout to be ignored. If running the program fails for
|
||||
any reason (including the non-existence of the executable), a local matching
|
||||
failure occurs and the matcher backtracks in the normal way.
|
||||
.
|
||||
.
|
||||
.SH "MATCHING ERRORS"
|
||||
.rs
|
||||
.sp
|
||||
|
@ -683,7 +731,7 @@ affect the return code.
|
|||
.SH "SEE ALSO"
|
||||
.rs
|
||||
.sp
|
||||
\fBpcre2pattern\fP(3), \fBpcre2syntax\fP(3).
|
||||
\fBpcre2pattern\fP(3), \fBpcre2syntax\fP(3), \fBpcre2callout\fP(3).
|
||||
.
|
||||
.
|
||||
.SH AUTHOR
|
||||
|
@ -700,6 +748,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 03 January 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
Last updated: 01 April 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -326,7 +326,7 @@ if [ $usemain -ne 0 ]; then
|
|||
"--disable-shared" \
|
||||
"--disable-unicode --disable-stack-for-recursion --disable-shared" \
|
||||
"--disable-stack-for-recursion --disable-shared --enable-never-backslash-C" \
|
||||
"--with-link-size=3 --disable-shared" \
|
||||
"--with-link-size=3 --disable-shared --disable-pcre2grep-callout" \
|
||||
"--disable-unicode --enable-rebuild-chartables --disable-shared" \
|
||||
"--disable-unicode --enable-newline-is-any --disable-shared" \
|
||||
"--disable-unicode --enable-newline-is-cr --disable-shared" \
|
||||
|
|
|
@ -111,6 +111,9 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#undef HAVE_SYS_TYPES_H
|
||||
|
||||
/* Define to 1 if you have the <sys/wait.h> header file. */
|
||||
#undef HAVE_SYS_WAIT_H
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#undef HAVE_UNISTD_H
|
||||
|
||||
|
@ -262,6 +265,9 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
is able to handle .gz files. */
|
||||
#undef SUPPORT_LIBZ
|
||||
|
||||
/* Define to any value to enable callout script support in pcre2grep. */
|
||||
#undef SUPPORT_PCRE2GREP_CALLOUT
|
||||
|
||||
/* Define to any value to enable JIT support in pcre2grep. */
|
||||
#undef SUPPORT_PCRE2GREP_JIT
|
||||
|
||||
|
|
302
src/pcre2grep.c
302
src/pcre2grep.c
|
@ -58,6 +58,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#ifdef SUPPORT_PCRE2GREP_CALLOUT
|
||||
#include <sys/wait.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
@ -121,9 +125,9 @@ apply to fprintf(). */
|
|||
|
||||
#define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
|
||||
|
||||
/* Under Windows, we have to set stdout to be binary, so that it does not
|
||||
convert \r\n at the ends of output lines to \r\r\n. However, that means that
|
||||
any messages written to stdout must have \r\n as their line terminator. This is
|
||||
/* Under Windows, we have to set stdout to be binary, so that it does not
|
||||
convert \r\n at the ends of output lines to \r\r\n. However, that means that
|
||||
any messages written to stdout must have \r\n as their line terminator. This is
|
||||
handled by using STDOUT_NL as the newline string. */
|
||||
|
||||
#if defined(_WIN32) || defined(WIN32)
|
||||
|
@ -899,6 +903,13 @@ option_item *op;
|
|||
printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
|
||||
printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
|
||||
printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
|
||||
|
||||
#ifdef SUPPORT_PCRE2GREP_CALLOUT
|
||||
printf("Callout scripts in patterns are supported." STDOUT_NL);
|
||||
#else
|
||||
printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
|
||||
#endif
|
||||
|
||||
printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
|
||||
|
||||
#ifdef SUPPORT_LIBZ
|
||||
|
@ -1484,6 +1495,274 @@ return FALSE; /* No match, no errors */
|
|||
}
|
||||
|
||||
|
||||
#ifdef SUPPORT_PCRE2GREP_CALLOUT
|
||||
|
||||
/*************************************************
|
||||
* Parse and execute callout scripts *
|
||||
*************************************************/
|
||||
|
||||
/* This function parses a callout string block and executes the
|
||||
program specified by the string. The string is a list of substrings
|
||||
separated by pipe characters. The first substring represents the
|
||||
executable name, and the following substrings specify the arguments:
|
||||
|
||||
program_name|param1|param2|...
|
||||
|
||||
Any substirng (including the program name) can contain escape sequences
|
||||
started by the dollar character. The escape sequences are substituted as
|
||||
follows:
|
||||
|
||||
$<digits> or ${<digits>} is replaced by the captured substring of the given
|
||||
decimal number, which must be greater than zero. If the number is greater
|
||||
than the number of capturing substrings, or if the capture is unset, the
|
||||
replacement is empty.
|
||||
|
||||
Any other character is substituted by itself. E.g: $$ is replaced by a single
|
||||
dollar or $| replaced by a pipe character.
|
||||
|
||||
Example:
|
||||
|
||||
echo -e "abcde\n12345" | pcre2grep \
|
||||
'(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
|
||||
|
||||
Output:
|
||||
|
||||
Arg1: [a] [bcd] [d] Arg2: |a| ()
|
||||
abcde
|
||||
Arg1: [1] [234] [4] Arg2: |1| ()
|
||||
12345
|
||||
|
||||
Arguments:
|
||||
blockptr the callout block
|
||||
|
||||
Returns: currently it always returns with 0
|
||||
*/
|
||||
|
||||
static int
|
||||
pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
|
||||
{
|
||||
PCRE2_SIZE length = calloutptr->callout_string_length;
|
||||
PCRE2_SPTR string = calloutptr->callout_string;
|
||||
PCRE2_SPTR subject = calloutptr->subject;
|
||||
PCRE2_SIZE *ovector = calloutptr->offset_vector;
|
||||
PCRE2_SIZE capture_top = calloutptr->capture_top;
|
||||
PCRE2_SIZE argsvectorlen = 2;
|
||||
PCRE2_SIZE argslen = 1;
|
||||
char *args;
|
||||
char *argsptr;
|
||||
char **argsvector;
|
||||
char **argsvectorptr;
|
||||
pid_t pid;
|
||||
int result = 0;
|
||||
|
||||
(void)unused; /* Avoid compiler warning */
|
||||
|
||||
/* Only callout with strings are supported. */
|
||||
if (string == NULL || length == 0) return 0;
|
||||
|
||||
/* Checking syntax and compute the number of string fragments. Callout strings
|
||||
are ignored in case of a syntax error. */
|
||||
|
||||
while (length > 0)
|
||||
{
|
||||
if (*string == '|')
|
||||
{
|
||||
argsvectorlen++;
|
||||
|
||||
/* Maximum 10000 arguments allowed. */
|
||||
if (argsvectorlen > 10000) return 0;
|
||||
}
|
||||
else if (*string == '$')
|
||||
{
|
||||
PCRE2_SIZE capture_id = 0;
|
||||
|
||||
string++;
|
||||
length--;
|
||||
|
||||
/* Syntax error: a character must be present after $. */
|
||||
if (length == 0) return 0;
|
||||
|
||||
if (*string >= '1' && *string <= '9')
|
||||
{
|
||||
do
|
||||
{
|
||||
/* Maximum capture id is 65535. */
|
||||
if (capture_id <= 65535)
|
||||
capture_id = capture_id * 10 + (*string - '0');
|
||||
|
||||
string++;
|
||||
length--;
|
||||
}
|
||||
while (length > 0 && *string >= '0' && *string <= '9');
|
||||
|
||||
/* To negate the effect of string++ below. */
|
||||
string--;
|
||||
length++;
|
||||
}
|
||||
else if (*string == '{')
|
||||
{
|
||||
/* Must be a decimal number in parenthesis, e.g: (5) or (38) */
|
||||
string++;
|
||||
length--;
|
||||
|
||||
/* Syntax error: a decimal number required. */
|
||||
if (length == 0) return 0;
|
||||
if (*string < '1' || *string > '9') return 0;
|
||||
|
||||
do
|
||||
{
|
||||
/* Maximum capture id is 65535. */
|
||||
if (capture_id <= 65535)
|
||||
capture_id = capture_id * 10 + (*string - '0');
|
||||
|
||||
string++;
|
||||
length--;
|
||||
|
||||
/* Syntax error: no more characters */
|
||||
if (length == 0) return 0;
|
||||
}
|
||||
while (*string >= '0' && *string <= '9');
|
||||
|
||||
/* Syntax error: close paren is missing. */
|
||||
if (*string != '}') return 0;
|
||||
}
|
||||
|
||||
if (capture_id > 0)
|
||||
{
|
||||
if (capture_id < capture_top)
|
||||
{
|
||||
capture_id *= 2;
|
||||
argslen += ovector[capture_id + 1] - ovector[capture_id];
|
||||
}
|
||||
|
||||
/* To negate the effect of argslen++ below. */
|
||||
argslen--;
|
||||
}
|
||||
}
|
||||
|
||||
string++;
|
||||
length--;
|
||||
argslen++;
|
||||
}
|
||||
|
||||
args = (char*)malloc(argslen);
|
||||
if (args == NULL) return 0;
|
||||
|
||||
argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
|
||||
if (argsvector == NULL)
|
||||
{
|
||||
free(args);
|
||||
return 0;
|
||||
}
|
||||
|
||||
argsptr = args;
|
||||
argsvectorptr = argsvector;
|
||||
|
||||
*argsvectorptr++ = argsptr;
|
||||
|
||||
length = calloutptr->callout_string_length;
|
||||
string = calloutptr->callout_string;
|
||||
|
||||
while (length > 0)
|
||||
{
|
||||
if (*string == '|')
|
||||
{
|
||||
*argsptr++ = '\0';
|
||||
*argsvectorptr++ = argsptr;
|
||||
}
|
||||
else if (*string == '$')
|
||||
{
|
||||
string++;
|
||||
length--;
|
||||
|
||||
if ((*string >= '1' && *string <= '9') || *string == '{')
|
||||
{
|
||||
PCRE2_SIZE capture_id = 0;
|
||||
|
||||
if (*string != '{')
|
||||
{
|
||||
do
|
||||
{
|
||||
/* Maximum capture id is 65535. */
|
||||
if (capture_id <= 65535)
|
||||
capture_id = capture_id * 10 + (*string - '0');
|
||||
|
||||
string++;
|
||||
length--;
|
||||
}
|
||||
while (length > 0 && *string >= '0' && *string <= '9');
|
||||
|
||||
/* To negate the effect of string++ below. */
|
||||
string--;
|
||||
length++;
|
||||
}
|
||||
else
|
||||
{
|
||||
string++;
|
||||
length--;
|
||||
|
||||
do
|
||||
{
|
||||
/* Maximum capture id is 65535. */
|
||||
if (capture_id <= 65535)
|
||||
capture_id = capture_id * 10 + (*string - '0');
|
||||
|
||||
string++;
|
||||
length--;
|
||||
}
|
||||
while (*string != '}');
|
||||
}
|
||||
|
||||
if (capture_id < capture_top)
|
||||
{
|
||||
PCRE2_SIZE capturesize;
|
||||
capture_id *= 2;
|
||||
|
||||
capturesize = ovector[capture_id + 1] - ovector[capture_id];
|
||||
memcpy(argsptr, subject + ovector[capture_id], capturesize);
|
||||
argsptr += capturesize;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*argsptr++ = *string;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*argsptr++ = *string;
|
||||
}
|
||||
|
||||
string++;
|
||||
length--;
|
||||
}
|
||||
|
||||
*argsptr++ = '\0';
|
||||
*argsvectorptr = NULL;
|
||||
|
||||
pid = fork();
|
||||
|
||||
if (pid == 0)
|
||||
{
|
||||
(void)execv(argsvector[0], argsvector);
|
||||
/* Control gets here if there is an error, e.g. a non-existent program */
|
||||
exit(1);
|
||||
}
|
||||
else if (pid > 0)
|
||||
(void)waitpid(pid, &result, 0);
|
||||
|
||||
free(args);
|
||||
free(argsvector);
|
||||
|
||||
/* Currently negative return values are not supported, only zero (match
|
||||
continues) or non-zero (match fails). */
|
||||
|
||||
return result != 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Grep an individual file *
|
||||
|
@ -1786,7 +2065,7 @@ while (ptr < endptr)
|
|||
}
|
||||
}
|
||||
|
||||
if (printed || printname != NULL || number)
|
||||
if (printed || printname != NULL || number)
|
||||
fprintf(stdout, STDOUT_NL);
|
||||
}
|
||||
|
||||
|
@ -2637,10 +2916,10 @@ const char *locale_from = "--locale";
|
|||
pcre2_jit_stack *jit_stack = NULL;
|
||||
#endif
|
||||
|
||||
/* In Windows, stdout is set up as a text stream, which means that \n is
|
||||
converted to \r\n. This causes output lines that are copied from the input to
|
||||
change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
|
||||
that stdout is a binary stream. Note that this means all other output to stdout
|
||||
/* In Windows, stdout is set up as a text stream, which means that \n is
|
||||
converted to \r\n. This causes output lines that are copied from the input to
|
||||
change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
|
||||
that stdout is a binary stream. Note that this means all other output to stdout
|
||||
must use STDOUT_NL to terminate lines. */
|
||||
|
||||
#if defined(_WIN32) || defined(WIN32)
|
||||
|
@ -2654,6 +2933,13 @@ match_context = pcre2_match_context_create(NULL);
|
|||
match_data = pcre2_match_data_create(OFFSET_SIZE, NULL);
|
||||
offsets = pcre2_get_ovector_pointer(match_data);
|
||||
|
||||
/* If string (script) callouts are supported, set up the callout processing
|
||||
function. */
|
||||
|
||||
#ifdef SUPPORT_PCRE2GREP_CALLOUT
|
||||
pcre2_set_callout(match_context, pcre2grep_callout, NULL);
|
||||
#endif
|
||||
|
||||
/* Process the options */
|
||||
|
||||
for (i = 1; i < argc; i++)
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
Arg1: [T] [he ] [ ] Arg2: |T| () () (0)
|
||||
Arg1: [T] [his] [s] Arg2: |T| () () (0)
|
||||
The quick brown
|
||||
This time it jumps and jumps and jumps.
|
||||
Arg1: [qu] [qu]
|
||||
Arg1: [ t] [ t]
|
||||
The quick brown
|
||||
This time it jumps and jumps and jumps.
|
Loading…
Reference in New Issue