Implement buffer expansion in pcre2grep.
This commit is contained in:
parent
b451e9f3b5
commit
bf6f0bb335
|
@ -76,6 +76,7 @@
|
||||||
# a new option instead of being unconditional.
|
# a new option instead of being unconditional.
|
||||||
# 2016-10-05 PH fixed a typo (PCRE should be PCRE2) in above patch
|
# 2016-10-05 PH fixed a typo (PCRE should be PCRE2) in above patch
|
||||||
# fix by David Gaussmann
|
# fix by David Gaussmann
|
||||||
|
# 2016-10-07 PH added PCREGREP_MAX_BUFSIZE
|
||||||
|
|
||||||
PROJECT(PCRE2 C)
|
PROJECT(PCRE2 C)
|
||||||
|
|
||||||
|
@ -148,7 +149,10 @@ SET(PCRE2_MATCH_LIMIT_RECURSION "MATCH_LIMIT" CACHE STRING
|
||||||
"Default limit on internal recursion. See MATCH_LIMIT_RECURSION in config.h.in for details.")
|
"Default limit on internal recursion. See MATCH_LIMIT_RECURSION in config.h.in for details.")
|
||||||
|
|
||||||
SET(PCRE2GREP_BUFSIZE "20480" CACHE STRING
|
SET(PCRE2GREP_BUFSIZE "20480" CACHE STRING
|
||||||
"Buffer size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details.")
|
"Buffer starting size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details.")
|
||||||
|
|
||||||
|
SET(PCRE2GREP_MAX_BUFSIZE "1048576" CACHE STRING
|
||||||
|
"Buffer maximum size parameter for pcre2grep. See PCRE2GREP_MAX_BUFSIZE in config.h.in for details.")
|
||||||
|
|
||||||
SET(PCRE2_NEWLINE "LF" CACHE STRING
|
SET(PCRE2_NEWLINE "LF" CACHE STRING
|
||||||
"What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF).")
|
"What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF).")
|
||||||
|
|
|
@ -61,6 +61,10 @@ escape sequence for a character whose code point was greater than \x{ff}.
|
||||||
9. Change 19 for 10.22 had a typo (PCRE_STATIC_RUNTIME should be
|
9. Change 19 for 10.22 had a typo (PCRE_STATIC_RUNTIME should be
|
||||||
PCRE2_STATIC_RUNTIME). Fix from David Gaussmann.
|
PCRE2_STATIC_RUNTIME). Fix from David Gaussmann.
|
||||||
|
|
||||||
|
10. Added --max-buffer-size to pcre2grep, to allow for automatic buffer
|
||||||
|
expansion when long lines are encountered. Original patch by Dmitry
|
||||||
|
Cherniachenko.
|
||||||
|
|
||||||
|
|
||||||
Version 10.22 29-July-2016
|
Version 10.22 29-July-2016
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
18
README
18
README
|
@ -339,12 +339,22 @@ library. They are also documented in the pcre2build man page.
|
||||||
|
|
||||||
Of course, the relevant libraries must be installed on your system.
|
Of course, the relevant libraries must be installed on your system.
|
||||||
|
|
||||||
. The default size (in bytes) of the internal buffer used by pcre2grep can be
|
. The default starting size (in bytes) of the internal buffer used by pcre2grep
|
||||||
set by, for example:
|
can be set by, for example:
|
||||||
|
|
||||||
--with-pcre2grep-bufsize=51200
|
--with-pcre2grep-bufsize=51200
|
||||||
|
|
||||||
The value must be a plain integer. The default is 20480.
|
The value must be a plain integer. The default is 20480. The amount of memory
|
||||||
|
used by pcre2grep is actually three times this number, to allow for "before"
|
||||||
|
and "after" lines.
|
||||||
|
|
||||||
|
. The default maximum size of pcre2grep's internal buffer can be set by, for
|
||||||
|
example:
|
||||||
|
|
||||||
|
--with-pcre2grep-max-bufsize=2097152
|
||||||
|
|
||||||
|
The default is either 1048576 or the value of --with-pcre2grep-bufsize,
|
||||||
|
whichever is the larger.
|
||||||
|
|
||||||
. It is possible to compile pcre2test so that it links with the libreadline
|
. It is possible to compile pcre2test so that it links with the libreadline
|
||||||
or libedit libraries, by specifying, respectively,
|
or libedit libraries, by specifying, respectively,
|
||||||
|
@ -845,4 +855,4 @@ The distribution should contain the files listed below.
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: ph10
|
||||||
Email domain: cam.ac.uk
|
Email domain: cam.ac.uk
|
||||||
Last updated: 01 April 2016
|
Last updated: 07 October 2016
|
||||||
|
|
|
@ -440,7 +440,7 @@ echo "---------------------------- Test 82 -----------------------------" >>test
|
||||||
echo "RC=$?" >>testtrygrep
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 83 -----------------------------" >>testtrygrep
|
echo "---------------------------- Test 83 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $vjs $pcre2grep --buffer-size=100 "^a" ./testdata/grepinput3) >>testtrygrep 2>&1
|
(cd $srcdir; $valgrind $vjs $pcre2grep --buffer-size=10 --max-buffer-size=100 "^a" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtrygrep
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 84 -----------------------------" >>testtrygrep
|
echo "---------------------------- Test 84 -----------------------------" >>testtrygrep
|
||||||
|
|
|
@ -41,6 +41,7 @@
|
||||||
#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@
|
#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@
|
||||||
#define PARENS_NEST_LIMIT @PCRE2_PARENS_NEST_LIMIT@
|
#define PARENS_NEST_LIMIT @PCRE2_PARENS_NEST_LIMIT@
|
||||||
#define PCRE2GREP_BUFSIZE @PCRE2GREP_BUFSIZE@
|
#define PCRE2GREP_BUFSIZE @PCRE2GREP_BUFSIZE@
|
||||||
|
#define PCRE2GREP_MAX_BUFSIZE @PCRE2GREP_MAX_BUFSIZE@
|
||||||
|
|
||||||
#define MAX_NAME_SIZE 32
|
#define MAX_NAME_SIZE 32
|
||||||
#define MAX_NAME_COUNT 10000
|
#define MAX_NAME_COUNT 10000
|
||||||
|
|
108
configure.ac
108
configure.ac
|
@ -240,9 +240,15 @@ AC_ARG_ENABLE(pcre2grep-libbz2,
|
||||||
# Handle --with-pcre2grep-bufsize=N
|
# Handle --with-pcre2grep-bufsize=N
|
||||||
AC_ARG_WITH(pcre2grep-bufsize,
|
AC_ARG_WITH(pcre2grep-bufsize,
|
||||||
AS_HELP_STRING([--with-pcre2grep-bufsize=N],
|
AS_HELP_STRING([--with-pcre2grep-bufsize=N],
|
||||||
[pcre2grep buffer size (default=20480, minimum=8192)]),
|
[pcre2grep initial buffer size (default=20480, minimum=8192)]),
|
||||||
, with_pcre2grep_bufsize=20480)
|
, with_pcre2grep_bufsize=20480)
|
||||||
|
|
||||||
|
# Handle --with-pcre2grep-max-bufsize=N
|
||||||
|
AC_ARG_WITH(pcre2grep-max-bufsize,
|
||||||
|
AS_HELP_STRING([--with-pcre2grep-max-bufsize=N],
|
||||||
|
[pcre2grep maximum buffer size (default=1048576, minimum=8192)]),
|
||||||
|
, with_pcre2grep_max_bufsize=1048576)
|
||||||
|
|
||||||
# Handle --enable-pcre2test-libedit
|
# Handle --enable-pcre2test-libedit
|
||||||
AC_ARG_ENABLE(pcre2test-libedit,
|
AC_ARG_ENABLE(pcre2test-libedit,
|
||||||
AS_HELP_STRING([--enable-pcre2test-libedit],
|
AS_HELP_STRING([--enable-pcre2test-libedit],
|
||||||
|
@ -608,15 +614,30 @@ if test $with_pcre2grep_bufsize -lt 8192 ; then
|
||||||
with_pcre2grep_bufsize="8192"
|
with_pcre2grep_bufsize="8192"
|
||||||
else
|
else
|
||||||
if test $? -gt 1 ; then
|
if test $? -gt 1 ; then
|
||||||
AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize])
|
AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test $with_pcre2grep_max_bufsize -lt $with_pcre2grep_bufsize ; then
|
||||||
|
with_pcre2grep_max_bufsize="$with_pcre2grep_bufsize"
|
||||||
|
else
|
||||||
|
if test $? -gt 1 ; then
|
||||||
|
AC_MSG_ERROR([Bad value for --with-pcre2grep-max-bufsize])
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [
|
AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [
|
||||||
The value of PCRE2GREP_BUFSIZE determines the size of buffer used by pcre2grep
|
The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
|
||||||
to hold parts of the file it is searching. This is also the minimum value.
|
pcre2grep to hold parts of the file it is searching. The buffer will be
|
||||||
The actual amount of memory used by pcre2grep is three times this number,
|
expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing very
|
||||||
because it allows for the buffering of "before" and "after" lines.])
|
long lines. The actual amount of memory used by pcre2grep is three times this
|
||||||
|
number, because it allows for the buffering of "before" and "after" lines.])
|
||||||
|
|
||||||
|
AC_DEFINE_UNQUOTED([PCRE2GREP_MAX_BUFSIZE], [$with_pcre2grep_max_bufsize], [
|
||||||
|
The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
|
||||||
|
used by pcre2grep to hold parts of the file it is searching. The actual
|
||||||
|
amount of memory used by pcre2grep is three times this number, because it
|
||||||
|
allows for the buffering of "before" and "after" lines.])
|
||||||
|
|
||||||
if test "$enable_pcre2test_libedit" = "yes"; then
|
if test "$enable_pcre2test_libedit" = "yes"; then
|
||||||
AC_DEFINE([SUPPORT_LIBEDIT], [], [
|
AC_DEFINE([SUPPORT_LIBEDIT], [], [
|
||||||
|
@ -906,43 +927,44 @@ cat <<EOF
|
||||||
|
|
||||||
$PACKAGE-$VERSION configuration summary:
|
$PACKAGE-$VERSION configuration summary:
|
||||||
|
|
||||||
Install prefix .................. : ${prefix}
|
Install prefix ..................... : ${prefix}
|
||||||
C preprocessor .................. : ${CPP}
|
C preprocessor ..................... : ${CPP}
|
||||||
C compiler ...................... : ${CC}
|
C compiler ......................... : ${CC}
|
||||||
Linker .......................... : ${LD}
|
Linker ............................. : ${LD}
|
||||||
C preprocessor flags ............ : ${CPPFLAGS}
|
C preprocessor flags ............... : ${CPPFLAGS}
|
||||||
C compiler flags ................ : ${CFLAGS} ${VISIBILITY_CFLAGS}
|
C compiler flags ................... : ${CFLAGS} ${VISIBILITY_CFLAGS}
|
||||||
Linker flags .................... : ${LDFLAGS}
|
Linker flags ....................... : ${LDFLAGS}
|
||||||
Extra libraries ................. : ${LIBS}
|
Extra libraries .................... : ${LIBS}
|
||||||
|
|
||||||
Build 8-bit pcre2 library ....... : ${enable_pcre2_8}
|
Build 8-bit pcre2 library .......... : ${enable_pcre2_8}
|
||||||
Build 16-bit pcre2 library ...... : ${enable_pcre2_16}
|
Build 16-bit pcre2 library ......... : ${enable_pcre2_16}
|
||||||
Build 32-bit pcre2 library ...... : ${enable_pcre2_32}
|
Build 32-bit pcre2 library ......... : ${enable_pcre2_32}
|
||||||
Include debugging code .......... : ${enable_debug}
|
Include debugging code ............. : ${enable_debug}
|
||||||
Enable JIT compiling support .... : ${enable_jit}
|
Enable JIT compiling support ....... : ${enable_jit}
|
||||||
Enable Unicode support .......... : ${enable_unicode}
|
Enable Unicode support ............. : ${enable_unicode}
|
||||||
Newline char/sequence ........... : ${enable_newline}
|
Newline char/sequence .............. : ${enable_newline}
|
||||||
\R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
|
\R matches only ANYCRLF ............ : ${enable_bsr_anycrlf}
|
||||||
\C is disabled .................. : ${enable_never_backslash_C}
|
\C is disabled ..................... : ${enable_never_backslash_C}
|
||||||
EBCDIC coding ................... : ${enable_ebcdic}
|
EBCDIC coding ...................... : ${enable_ebcdic}
|
||||||
EBCDIC code for NL .............. : ${ebcdic_nl_code}
|
EBCDIC code for NL ................. : ${ebcdic_nl_code}
|
||||||
Rebuild char tables ............. : ${enable_rebuild_chartables}
|
Rebuild char tables ................ : ${enable_rebuild_chartables}
|
||||||
Use stack recursion ............. : ${enable_stack_for_recursion}
|
Use stack recursion ................ : ${enable_stack_for_recursion}
|
||||||
Internal link size .............. : ${with_link_size}
|
Internal link size ................. : ${with_link_size}
|
||||||
Nested parentheses limit ........ : ${with_parens_nest_limit}
|
Nested parentheses limit ........... : ${with_parens_nest_limit}
|
||||||
Match limit ..................... : ${with_match_limit}
|
Match limit ........................ : ${with_match_limit}
|
||||||
Match limit recursion ........... : ${with_match_limit_recursion}
|
Match limit recursion .............. : ${with_match_limit_recursion}
|
||||||
Build shared libs ............... : ${enable_shared}
|
Build shared libs .................. : ${enable_shared}
|
||||||
Build static libs ............... : ${enable_static}
|
Build static libs .................. : ${enable_static}
|
||||||
Use JIT in pcre2grep ............ : ${enable_pcre2grep_jit}
|
Use JIT in pcre2grep ............... : ${enable_pcre2grep_jit}
|
||||||
Enable callouts in pcre2grep .... : ${enable_pcre2grep_callout}
|
Enable callouts in pcre2grep ....... : ${enable_pcre2grep_callout}
|
||||||
Buffer size for pcre2grep ....... : ${with_pcre2grep_bufsize}
|
Initial buffer size for pcre2grep .. : ${with_pcre2grep_bufsize}
|
||||||
Link pcre2grep with libz ........ : ${enable_pcre2grep_libz}
|
Maximum buffer size for pcre2grep .. : ${with_pcre2grep_max_bufsize}
|
||||||
Link pcre2grep with libbz2 ...... : ${enable_pcre2grep_libbz2}
|
Link pcre2grep with libz ........... : ${enable_pcre2grep_libz}
|
||||||
Link pcre2test with libedit ..... : ${enable_pcre2test_libedit}
|
Link pcre2grep with libbz2 ......... : ${enable_pcre2grep_libbz2}
|
||||||
Link pcre2test with libreadline . : ${enable_pcre2test_libreadline}
|
Link pcre2test with libedit ........ : ${enable_pcre2test_libedit}
|
||||||
Valgrind support ................ : ${enable_valgrind}
|
Link pcre2test with libreadline .... : ${enable_pcre2test_libreadline}
|
||||||
Code coverage ................... : ${enable_coverage}
|
Valgrind support ................... : ${enable_valgrind}
|
||||||
|
Code coverage ...................... : ${enable_coverage}
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2BUILD 3 "01 April 2016" "PCRE2 10.22"
|
.TH PCRE2BUILD 3 "07 October 2016" "PCRE2 10.23"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.
|
.
|
||||||
|
@ -385,16 +385,19 @@ they are not.
|
||||||
.sp
|
.sp
|
||||||
\fBpcre2grep\fP uses an internal buffer to hold a "window" on the file it is
|
\fBpcre2grep\fP uses an internal buffer to hold a "window" on the file it is
|
||||||
scanning, in order to be able to output "before" and "after" lines when it
|
scanning, in order to be able to output "before" and "after" lines when it
|
||||||
finds a match. The size of the buffer is controlled by a parameter whose
|
finds a match. The starting size of the buffer is controlled by a parameter
|
||||||
default value is 20K. The buffer itself is three times this size, but because
|
whose default value is 20K. The buffer itself is three times this size, but
|
||||||
of the way it is used for holding "before" lines, the longest line that is
|
because of the way it is used for holding "before" lines, the longest line that
|
||||||
guaranteed to be processable is the parameter size. You can change the default
|
is guaranteed to be processable is the parameter size. If a longer line is
|
||||||
parameter value by adding, for example,
|
encountered, \fBpcre2grep\fP automatically expands the buffer, up to a
|
||||||
|
specified maximum size, whose default is 1M or the starting size, whichever is
|
||||||
|
the larger. You can change the default parameter values by adding, for example,
|
||||||
.sp
|
.sp
|
||||||
--with-pcre2grep-bufsize=50K
|
--with-pcre2grep-bufsize=51200
|
||||||
|
--with-pcre2grep-max-bufsize=2097152
|
||||||
.sp
|
.sp
|
||||||
to the \fBconfigure\fP command. The caller of \fPpcre2grep\fP can override this
|
to the \fBconfigure\fP command. The caller of \fPpcre2grep\fP can override
|
||||||
value by using --buffer-size on the command line.
|
these values by using --buffer-size and --max-buffer-size on the command line.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE2TEST OPTION FOR LIBREADLINE SUPPORT"
|
.SH "PCRE2TEST OPTION FOR LIBREADLINE SUPPORT"
|
||||||
|
@ -532,6 +535,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 01 April 2016
|
Last updated: 07 October 2016
|
||||||
Copyright (c) 1997-2016 University of Cambridge.
|
Copyright (c) 1997-2016 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2GREP 1 "19 June 2016" "PCRE2 10.22"
|
.TH PCRE2GREP 1 "11 October 2016" "PCRE2 10.23"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
pcre2grep - a grep with Perl-compatible regular expressions.
|
pcre2grep - a grep with Perl-compatible regular expressions.
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -52,11 +52,18 @@ span line boundaries. What defines a line boundary is controlled by the
|
||||||
\fB-N\fP (\fB--newline\fP) option.
|
\fB-N\fP (\fB--newline\fP) option.
|
||||||
.P
|
.P
|
||||||
The amount of memory used for buffering files that are being scanned is
|
The amount of memory used for buffering files that are being scanned is
|
||||||
controlled by a parameter that can be set by the \fB--buffer-size\fP option.
|
controlled by parameters that can be set by the \fB--buffer-size\fP and
|
||||||
The default value for this parameter is specified when \fBpcre2grep\fP is
|
\fB--max-buffer-size\fP options. The first of these sets the size of buffer
|
||||||
built, with the default default being 20K. A block of memory three times this
|
that is obtained at the start of processing. If an input file contains very
|
||||||
size is used (to allow for buffering "before" and "after" lines). An error
|
long lines, a larger buffer may be needed; this is handled by automatically
|
||||||
occurs if a line overflows the buffer.
|
extending the buffer, up to the limit specified by \fB--max-buffer-size\fP. The
|
||||||
|
default values for these parameters are specified when \fBpcre2grep\fP is
|
||||||
|
built, with the default defaults being 20K and 1M respectively. An error occurs
|
||||||
|
if a line is too long and the buffer can no longer be expanded.
|
||||||
|
.P
|
||||||
|
The block of memory that is actually used is three times the "buffer size", to
|
||||||
|
allow for buffering "before" and "after" lines. If the buffer size is too
|
||||||
|
small, fewer than requested "before" and "after" lines may be output.
|
||||||
.P
|
.P
|
||||||
Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
|
Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
|
||||||
BUFSIZ is defined in \fB<stdio.h>\fP. When there is more than one pattern
|
BUFSIZ is defined in \fB<stdio.h>\fP. When there is more than one pattern
|
||||||
|
@ -126,24 +133,29 @@ command line starts with a hyphen but is not an option. This allows for the
|
||||||
processing of patterns and file names that start with hyphens.
|
processing of patterns and file names that start with hyphens.
|
||||||
.TP
|
.TP
|
||||||
\fB-A\fP \fInumber\fP, \fB--after-context=\fP\fInumber\fP
|
\fB-A\fP \fInumber\fP, \fB--after-context=\fP\fInumber\fP
|
||||||
Output \fInumber\fP lines of context after each matching line. If file names
|
Output up to \fInumber\fP lines of context after each matching line. Fewer
|
||||||
and/or line numbers are being output, a hyphen separator is used instead of a
|
lines are output if the next match or the end of the file is reached, or if the
|
||||||
colon for the context lines. A line containing "--" is output between each
|
processing buffer size has been set too small. If file names and/or line
|
||||||
group of lines, unless they are in fact contiguous in the input file. The value
|
numbers are being output, a hyphen separator is used instead of a colon for the
|
||||||
of \fInumber\fP is expected to be relatively small. However, \fBpcre2grep\fP
|
context lines. A line containing "--" is output between each group of lines,
|
||||||
guarantees to have up to 8K of following text available for context output.
|
unless they are in fact contiguous in the input file. The value of \fInumber\fP
|
||||||
|
is expected to be relatively small. However, \fBpcre2grep\fP guarantees to have
|
||||||
|
up to 8K of following text available for context output.
|
||||||
.TP
|
.TP
|
||||||
\fB-a\fP, \fB--text\fP
|
\fB-a\fP, \fB--text\fP
|
||||||
Treat binary files as text. This is equivalent to
|
Treat binary files as text. This is equivalent to
|
||||||
\fB--binary-files\fP=\fItext\fP.
|
\fB--binary-files\fP=\fItext\fP.
|
||||||
.TP
|
.TP
|
||||||
\fB-B\fP \fInumber\fP, \fB--before-context=\fP\fInumber\fP
|
\fB-B\fP \fInumber\fP, \fB--before-context=\fP\fInumber\fP
|
||||||
Output \fInumber\fP lines of context before each matching line. If file names
|
Output up to \fInumber\fP lines of context before each matching line. Fewer
|
||||||
and/or line numbers are being output, a hyphen separator is used instead of a
|
lines are output if the previous match or the start of the file is within
|
||||||
colon for the context lines. A line containing "--" is output between each
|
\fInumber\fP lines, or if the processing buffer size has been set too small. If
|
||||||
group of lines, unless they are in fact contiguous in the input file. The value
|
file names and/or line numbers are being output, a hyphen separator is used
|
||||||
of \fInumber\fP is expected to be relatively small. However, \fBpcre2grep\fP
|
instead of a colon for the context lines. A line containing "--" is output
|
||||||
guarantees to have up to 8K of preceding text available for context output.
|
between each group of lines, unless they are in fact contiguous in the input
|
||||||
|
file. The value of \fInumber\fP is expected to be relatively small. However,
|
||||||
|
\fBpcre2grep\fP guarantees to have up to 8K of preceding text available for
|
||||||
|
context output.
|
||||||
.TP
|
.TP
|
||||||
\fB--binary-files=\fP\fIword\fP
|
\fB--binary-files=\fP\fIword\fP
|
||||||
Specify how binary files are to be processed. If the word is "binary" (the
|
Specify how binary files are to be processed. If the word is "binary" (the
|
||||||
|
@ -158,8 +170,9 @@ be of interest and are skipped without causing any output or affecting the
|
||||||
return code.
|
return code.
|
||||||
.TP
|
.TP
|
||||||
\fB--buffer-size=\fP\fInumber\fP
|
\fB--buffer-size=\fP\fInumber\fP
|
||||||
Set the parameter that controls how much memory is used for buffering files
|
Set the parameter that controls how much memory is obtained at the start of
|
||||||
that are being scanned.
|
processing for buffering files that are being scanned. See also
|
||||||
|
\fB--max-buffer-size\fP below.
|
||||||
.TP
|
.TP
|
||||||
\fB-C\fP \fInumber\fP, \fB--context=\fP\fInumber\fP
|
\fB-C\fP \fInumber\fP, \fB--context=\fP\fInumber\fP
|
||||||
Output \fInumber\fP lines of context both before and after each matching line.
|
Output \fInumber\fP lines of context both before and after each matching line.
|
||||||
|
@ -432,6 +445,11 @@ of use only if it is set smaller than \fB--match-limit\fP.
|
||||||
There are no short forms for these options. The default settings are specified
|
There are no short forms for these options. The default settings are specified
|
||||||
when the PCRE2 library is compiled, with the default default being 10 million.
|
when the PCRE2 library is compiled, with the default default being 10 million.
|
||||||
.TP
|
.TP
|
||||||
|
\fB--max-buffer-size=\fInumber\fP
|
||||||
|
This limits the expansion of the processing buffer, whose initial size can be
|
||||||
|
set by \fB--buffer-size\fP. The maximum buffer size is silently forced to be no
|
||||||
|
smaller than the starting buffer size.
|
||||||
|
.TP
|
||||||
\fB-M\fP, \fB--multiline\fP
|
\fB-M\fP, \fB--multiline\fP
|
||||||
Allow patterns to match more than one line. When this option is given, patterns
|
Allow patterns to match more than one line. When this option is given, patterns
|
||||||
may usefully contain literal newline characters and internal occurrences of ^
|
may usefully contain literal newline characters and internal occurrences of ^
|
||||||
|
@ -757,6 +775,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 19 June 2016
|
Last updated: 11 October 2016
|
||||||
Copyright (c) 1997-2016 University of Cambridge.
|
Copyright (c) 1997-2016 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -206,7 +206,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
#define PACKAGE_NAME "PCRE2"
|
#define PACKAGE_NAME "PCRE2"
|
||||||
|
|
||||||
/* Define to the full name and version of this package. */
|
/* Define to the full name and version of this package. */
|
||||||
#define PACKAGE_STRING "PCRE2 10.22"
|
#define PACKAGE_STRING "PCRE2 10.23-RC1"
|
||||||
|
|
||||||
/* Define to the one symbol short name of this package. */
|
/* Define to the one symbol short name of this package. */
|
||||||
#define PACKAGE_TARNAME "pcre2"
|
#define PACKAGE_TARNAME "pcre2"
|
||||||
|
@ -215,7 +215,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
#define PACKAGE_URL ""
|
#define PACKAGE_URL ""
|
||||||
|
|
||||||
/* Define to the version of this package. */
|
/* Define to the version of this package. */
|
||||||
#define PACKAGE_VERSION "10.22"
|
#define PACKAGE_VERSION "10.23-RC1"
|
||||||
|
|
||||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||||
|
@ -224,15 +224,24 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
#define PARENS_NEST_LIMIT 250
|
#define PARENS_NEST_LIMIT 250
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* The value of PCRE2GREP_BUFSIZE determines the size of buffer used by
|
/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
|
||||||
pcre2grep to hold parts of the file it is searching. This is also the
|
pcre2grep to hold parts of the file it is searching. The buffer will be
|
||||||
minimum value. The actual amount of memory used by pcre2grep is three times
|
expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing
|
||||||
this number, because it allows for the buffering of "before" and "after"
|
very long lines. The actual amount of memory used by pcre2grep is three
|
||||||
lines. */
|
times this number, because it allows for the buffering of "before" and
|
||||||
|
"after" lines. */
|
||||||
#ifndef PCRE2GREP_BUFSIZE
|
#ifndef PCRE2GREP_BUFSIZE
|
||||||
#define PCRE2GREP_BUFSIZE 20480
|
#define PCRE2GREP_BUFSIZE 20480
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
|
||||||
|
used by pcre2grep to hold parts of the file it is searching. The actual
|
||||||
|
amount of memory used by pcre2grep is three times this number, because it
|
||||||
|
allows for the buffering of "before" and "after" lines. */
|
||||||
|
#ifndef PCRE2GREP_MAX_BUFSIZE
|
||||||
|
#define PCRE2GREP_MAX_BUFSIZE 1048576
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Define to any value to include debugging code. */
|
/* Define to any value to include debugging code. */
|
||||||
/* #undef PCRE2_DEBUG */
|
/* #undef PCRE2_DEBUG */
|
||||||
|
|
||||||
|
@ -299,7 +308,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
/* #undef SUPPORT_VALGRIND */
|
/* #undef SUPPORT_VALGRIND */
|
||||||
|
|
||||||
/* Version number of package */
|
/* Version number of package */
|
||||||
#define VERSION "10.22"
|
#define VERSION "10.23-RC1"
|
||||||
|
|
||||||
/* Define to empty if `const' does not conform to ANSI C. */
|
/* Define to empty if `const' does not conform to ANSI C. */
|
||||||
/* #undef const */
|
/* #undef const */
|
||||||
|
|
|
@ -207,13 +207,20 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
stack that is used while compiling a pattern. */
|
stack that is used while compiling a pattern. */
|
||||||
#undef PARENS_NEST_LIMIT
|
#undef PARENS_NEST_LIMIT
|
||||||
|
|
||||||
/* The value of PCRE2GREP_BUFSIZE determines the size of buffer used by
|
/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
|
||||||
pcre2grep to hold parts of the file it is searching. This is also the
|
pcre2grep to hold parts of the file it is searching. The buffer will be
|
||||||
minimum value. The actual amount of memory used by pcre2grep is three times
|
expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing
|
||||||
this number, because it allows for the buffering of "before" and "after"
|
very long lines. The actual amount of memory used by pcre2grep is three
|
||||||
lines. */
|
times this number, because it allows for the buffering of "before" and
|
||||||
|
"after" lines. */
|
||||||
#undef PCRE2GREP_BUFSIZE
|
#undef PCRE2GREP_BUFSIZE
|
||||||
|
|
||||||
|
/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
|
||||||
|
used by pcre2grep to hold parts of the file it is searching. The actual
|
||||||
|
amount of memory used by pcre2grep is three times this number, because it
|
||||||
|
allows for the buffering of "before" and "after" lines. */
|
||||||
|
#undef PCRE2GREP_MAX_BUFSIZE
|
||||||
|
|
||||||
/* to make a symbol visible */
|
/* to make a symbol visible */
|
||||||
#undef PCRE2POSIX_EXP_DECL
|
#undef PCRE2POSIX_EXP_DECL
|
||||||
|
|
||||||
|
|
187
src/pcre2grep.c
187
src/pcre2grep.c
|
@ -173,6 +173,7 @@ static int before_context = 0;
|
||||||
static int binary_files = BIN_BINARY;
|
static int binary_files = BIN_BINARY;
|
||||||
static int both_context = 0;
|
static int both_context = 0;
|
||||||
static int bufthird = PCRE2GREP_BUFSIZE;
|
static int bufthird = PCRE2GREP_BUFSIZE;
|
||||||
|
static int max_bufthird = PCRE2GREP_MAX_BUFSIZE;
|
||||||
static int bufsize = 3*PCRE2GREP_BUFSIZE;
|
static int bufsize = 3*PCRE2GREP_BUFSIZE;
|
||||||
static int endlinetype;
|
static int endlinetype;
|
||||||
|
|
||||||
|
@ -344,6 +345,7 @@ used to identify them. */
|
||||||
#define N_EXCLUDE_FROM (-19)
|
#define N_EXCLUDE_FROM (-19)
|
||||||
#define N_INCLUDE_FROM (-20)
|
#define N_INCLUDE_FROM (-20)
|
||||||
#define N_OM_SEPARATOR (-21)
|
#define N_OM_SEPARATOR (-21)
|
||||||
|
#define N_MAX_BUFSIZE (-22)
|
||||||
|
|
||||||
static option_item optionlist[] = {
|
static option_item optionlist[] = {
|
||||||
{ OP_NODATA, N_NULL, NULL, "", "terminate options" },
|
{ OP_NODATA, N_NULL, NULL, "", "terminate options" },
|
||||||
|
@ -352,7 +354,8 @@ static option_item optionlist[] = {
|
||||||
{ OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
|
{ OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
|
||||||
{ OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
|
{ OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
|
||||||
{ OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
|
{ OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
|
||||||
{ OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
|
{ OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer starting size" },
|
||||||
|
{ OP_NUMBER, N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number", "set processing buffer maximum size" },
|
||||||
{ OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
|
{ OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
|
||||||
{ OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
|
{ OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
|
||||||
{ OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
|
{ OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
|
||||||
|
@ -952,8 +955,9 @@ for (op = optionlist; op->one_char != 0; op++)
|
||||||
printf("%.*s%s" STDOUT_NL, n, " ", op->help_text);
|
printf("%.*s%s" STDOUT_NL, n, " ", op->help_text);
|
||||||
}
|
}
|
||||||
|
|
||||||
printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --buffer-size=100K." STDOUT_NL);
|
printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
|
||||||
printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
|
printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
|
||||||
|
printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
|
||||||
printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
|
printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
|
||||||
printf("space is removed and blank lines are ignored." STDOUT_NL);
|
printf("space is removed and blank lines are ignored." STDOUT_NL);
|
||||||
printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
|
printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
|
||||||
|
@ -1100,12 +1104,12 @@ return om;
|
||||||
* Read one line of input *
|
* Read one line of input *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* Normally, input is read using fread() into a large buffer, so many lines may
|
/* Normally, input is read using fread() (or gzread, or BZ2_read) into a large
|
||||||
be read at once. However, doing this for tty input means that no output appears
|
buffer, so many lines may be read at once. However, doing this for tty input
|
||||||
until a lot of input has been typed. Instead, tty input is handled line by
|
means that no output appears until a lot of input has been typed. Instead, tty
|
||||||
line. We cannot use fgets() for this, because it does not stop at a binary
|
input is handled line by line. We cannot use fgets() for this, because it does
|
||||||
zero, and therefore there is no way of telling how many characters it has read,
|
not stop at a binary zero, and therefore there is no way of telling how many
|
||||||
because there may be binary zeros embedded in the data.
|
characters it has read, because there may be binary zeros embedded in the data.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
buffer the buffer to read into
|
buffer the buffer to read into
|
||||||
|
@ -1424,17 +1428,18 @@ do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
|
||||||
if (after_context > 0 && lastmatchnumber > 0)
|
if (after_context > 0 && lastmatchnumber > 0)
|
||||||
{
|
{
|
||||||
int count = 0;
|
int count = 0;
|
||||||
while (lastmatchrestart < endptr && count++ < after_context)
|
while (lastmatchrestart < endptr && count < after_context)
|
||||||
{
|
{
|
||||||
int ellength;
|
int ellength;
|
||||||
char *pp = lastmatchrestart;
|
char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
|
||||||
|
if (ellength == 0 && pp == main_buffer + bufsize) break;
|
||||||
if (printname != NULL) fprintf(stdout, "%s-", printname);
|
if (printname != NULL) fprintf(stdout, "%s-", printname);
|
||||||
if (number) fprintf(stdout, "%d-", lastmatchnumber++);
|
if (number) fprintf(stdout, "%d-", lastmatchnumber++);
|
||||||
pp = end_of_line(pp, endptr, &ellength);
|
|
||||||
FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
|
FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
|
||||||
lastmatchrestart = pp;
|
lastmatchrestart = pp;
|
||||||
|
count++;
|
||||||
}
|
}
|
||||||
hyphenpending = TRUE;
|
if (count > 0) hyphenpending = TRUE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1769,6 +1774,33 @@ return result != 0;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Read a portion of the file into buffer *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
static int
|
||||||
|
fill_buffer(void *handle, int frtype, char *buffer, int length,
|
||||||
|
BOOL input_line_buffered)
|
||||||
|
{
|
||||||
|
#ifdef SUPPORT_LIBZ
|
||||||
|
if (frtype == FR_LIBZ)
|
||||||
|
return gzread((gzFile)handle, buffer, length);
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef SUPPORT_LIBBZ2
|
||||||
|
if (frtype == FR_LIBBZ2)
|
||||||
|
return BZ2_bzread((BZFILE *)handle, buffer, length);
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return (input_line_buffered ?
|
||||||
|
read_one_line(buffer, length, (FILE *)handle) :
|
||||||
|
fread(buffer, 1, length, (FILE *)handle));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Grep an individual file *
|
* Grep an individual file *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
@ -1813,49 +1845,24 @@ BOOL endhyphenpending = FALSE;
|
||||||
BOOL input_line_buffered = line_buffered;
|
BOOL input_line_buffered = line_buffered;
|
||||||
FILE *in = NULL; /* Ensure initialized */
|
FILE *in = NULL; /* Ensure initialized */
|
||||||
|
|
||||||
#ifdef SUPPORT_LIBZ
|
|
||||||
gzFile ingz = NULL;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef SUPPORT_LIBBZ2
|
|
||||||
BZFILE *inbz2 = NULL;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/* Do the first read into the start of the buffer and set up the pointer to end
|
/* Do the first read into the start of the buffer and set up the pointer to end
|
||||||
of what we have. In the case of libz, a non-zipped .gz file will be read as a
|
of what we have. In the case of libz, a non-zipped .gz file will be read as a
|
||||||
plain file. However, if a .bz2 file isn't actually bzipped, the first read will
|
plain file. However, if a .bz2 file isn't actually bzipped, the first read will
|
||||||
fail. */
|
fail. */
|
||||||
|
|
||||||
(void)frtype;
|
if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
|
||||||
|
|
||||||
#ifdef SUPPORT_LIBZ
|
|
||||||
if (frtype == FR_LIBZ)
|
|
||||||
{
|
|
||||||
ingz = (gzFile)handle;
|
|
||||||
bufflength = gzread (ingz, main_buffer, bufsize);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef SUPPORT_LIBBZ2
|
|
||||||
if (frtype == FR_LIBBZ2)
|
|
||||||
{
|
|
||||||
inbz2 = (BZFILE *)handle;
|
|
||||||
bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
|
|
||||||
if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
|
|
||||||
} /* without the cast it is unsigned. */
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
|
|
||||||
{
|
{
|
||||||
in = (FILE *)handle;
|
in = (FILE *)handle;
|
||||||
if (is_file_tty(in)) input_line_buffered = TRUE;
|
if (is_file_tty(in)) input_line_buffered = TRUE;
|
||||||
bufflength = input_line_buffered?
|
|
||||||
read_one_line(main_buffer, bufsize, in) :
|
|
||||||
fread(main_buffer, 1, bufsize, in);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
|
||||||
|
input_line_buffered);
|
||||||
|
|
||||||
|
#ifdef SUPPORT_LIBBZ2
|
||||||
|
if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
|
||||||
|
#endif
|
||||||
|
|
||||||
endptr = main_buffer + bufflength;
|
endptr = main_buffer + bufflength;
|
||||||
|
|
||||||
/* Unless binary-files=text, see if we have a binary file. This uses the same
|
/* Unless binary-files=text, see if we have a binary file. This uses the same
|
||||||
|
@ -1899,18 +1906,61 @@ while (ptr < endptr)
|
||||||
|
|
||||||
/* Check to see if the line we are looking at extends right to the very end
|
/* Check to see if the line we are looking at extends right to the very end
|
||||||
of the buffer without a line terminator. This means the line is too long to
|
of the buffer without a line terminator. This means the line is too long to
|
||||||
handle. */
|
handle at the current buffer size. Until the buffer reaches its maximum size,
|
||||||
|
try doubling it and reading more data. */
|
||||||
|
|
||||||
if (endlinelength == 0 && t == main_buffer + bufsize)
|
if (endlinelength == 0 && t == main_buffer + bufsize)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "pcre2grep: line %d%s%s is too long for the internal buffer\n"
|
if (bufthird < max_bufthird)
|
||||||
"pcre2grep: the buffer size is %d\n"
|
{
|
||||||
"pcre2grep: use the --buffer-size option to change it\n",
|
char *new_buffer;
|
||||||
linenumber,
|
int new_bufthird = 2*bufthird;
|
||||||
(filename == NULL)? "" : " of file ",
|
|
||||||
(filename == NULL)? "" : filename,
|
if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
|
||||||
bufthird);
|
new_buffer = (char *)malloc(3*new_bufthird);
|
||||||
return 2;
|
|
||||||
|
if (new_buffer == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr,
|
||||||
|
"pcre2grep: line %d%s%s is too long for the internal buffer\n"
|
||||||
|
"pcre2grep: not enough memory to increase the buffer size to %d\n",
|
||||||
|
linenumber,
|
||||||
|
(filename == NULL)? "" : " of file ",
|
||||||
|
(filename == NULL)? "" : filename,
|
||||||
|
new_bufthird);
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy the data and adjust pointers to the new buffer location. */
|
||||||
|
|
||||||
|
memcpy(new_buffer, main_buffer, bufsize);
|
||||||
|
bufthird = new_bufthird;
|
||||||
|
bufsize = 3*bufthird;
|
||||||
|
ptr = new_buffer + (ptr - main_buffer);
|
||||||
|
lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
|
||||||
|
free(main_buffer);
|
||||||
|
main_buffer = new_buffer;
|
||||||
|
|
||||||
|
/* Read more data into the buffer and then try to find the line ending
|
||||||
|
again. */
|
||||||
|
|
||||||
|
bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
|
||||||
|
bufsize - bufflength, input_line_buffered);
|
||||||
|
endptr = main_buffer + bufflength;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fprintf(stderr,
|
||||||
|
"pcre2grep: line %d%s%s is too long for the internal buffer\n"
|
||||||
|
"pcre2grep: the maximum buffer size is %d\n"
|
||||||
|
"pcre2grep: use the --max-buffer-size option to change it\n",
|
||||||
|
linenumber,
|
||||||
|
(filename == NULL)? "" : " of file ",
|
||||||
|
(filename == NULL)? "" : filename,
|
||||||
|
bufthird);
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Extra processing for Jeffrey Friedl's debugging. */
|
/* Extra processing for Jeffrey Friedl's debugging. */
|
||||||
|
@ -2320,8 +2370,9 @@ while (ptr < endptr)
|
||||||
lastmatchnumber > 0 &&
|
lastmatchnumber > 0 &&
|
||||||
lastmatchrestart < main_buffer + bufthird)
|
lastmatchrestart < main_buffer + bufthird)
|
||||||
{
|
{
|
||||||
|
|
||||||
do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
|
do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
|
||||||
lastmatchnumber = 0;
|
lastmatchnumber = 0; /* Indicates no after lines pending */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Now do the shuffle */
|
/* Now do the shuffle */
|
||||||
|
@ -2329,24 +2380,8 @@ while (ptr < endptr)
|
||||||
memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
|
memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
|
||||||
ptr -= bufthird;
|
ptr -= bufthird;
|
||||||
|
|
||||||
#ifdef SUPPORT_LIBZ
|
bufflength = 2*bufthird + fill_buffer(handle, frtype,
|
||||||
if (frtype == FR_LIBZ)
|
main_buffer + 2*bufthird, bufthird, input_line_buffered);
|
||||||
bufflength = 2*bufthird +
|
|
||||||
gzread (ingz, main_buffer + 2*bufthird, bufthird);
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef SUPPORT_LIBBZ2
|
|
||||||
if (frtype == FR_LIBBZ2)
|
|
||||||
bufflength = 2*bufthird +
|
|
||||||
BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
|
|
||||||
bufflength = 2*bufthird +
|
|
||||||
(input_line_buffered?
|
|
||||||
read_one_line(main_buffer + 2*bufthird, bufthird, in) :
|
|
||||||
fread(main_buffer + 2*bufthird, 1, bufthird, in));
|
|
||||||
endptr = main_buffer + bufflength;
|
endptr = main_buffer + bufflength;
|
||||||
|
|
||||||
/* Adjust any last match point */
|
/* Adjust any last match point */
|
||||||
|
@ -3427,6 +3462,12 @@ if (jfriedl_XT != 0 || jfriedl_XR != 0)
|
||||||
|
|
||||||
/* Get memory for the main buffer. */
|
/* Get memory for the main buffer. */
|
||||||
|
|
||||||
|
if (bufthird <= 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
|
||||||
|
goto EXIT2;
|
||||||
|
}
|
||||||
|
|
||||||
bufsize = 3*bufthird;
|
bufsize = 3*bufthird;
|
||||||
main_buffer = (char *)malloc(bufsize);
|
main_buffer = (char *)malloc(bufsize);
|
||||||
|
|
||||||
|
|
|
@ -637,8 +637,8 @@ RC=0
|
||||||
RC=0
|
RC=0
|
||||||
---------------------------- Test 83 -----------------------------
|
---------------------------- Test 83 -----------------------------
|
||||||
pcre2grep: line 4 of file ./testdata/grepinput3 is too long for the internal buffer
|
pcre2grep: line 4 of file ./testdata/grepinput3 is too long for the internal buffer
|
||||||
pcre2grep: the buffer size is 100
|
pcre2grep: the maximum buffer size is 100
|
||||||
pcre2grep: use the --buffer-size option to change it
|
pcre2grep: use the --max-buffer-size option to change it
|
||||||
RC=2
|
RC=2
|
||||||
---------------------------- Test 84 -----------------------------
|
---------------------------- Test 84 -----------------------------
|
||||||
testdata/grepinputv:fox jumps
|
testdata/grepinputv:fox jumps
|
||||||
|
|
Loading…
Reference in New Issue