pcre2grep: fix \r\r\n problem for Windows, and give buffer size in error

message.
This commit is contained in:
Philip.Hazel 2016-04-01 09:15:38 +00:00
parent 3e486da3de
commit ddcedf0338
3 changed files with 64 additions and 31 deletions

View File

@ -89,6 +89,14 @@ older MSVC compilers.
19. Applied Chris Wilson's patch (Bugzilla #1681) to CMakeLists.txt for MSVC 19. Applied Chris Wilson's patch (Bugzilla #1681) to CMakeLists.txt for MSVC
static compilation. static compilation.
20. Updated pcre2grep to set stdout as binary when run under Windows, so as not
to convert \r\n at the ends of reflected lines into \r\r\n. This required
ensuring that other output that is written to stdout (e.g. file names) uses the
appropriate line terminator: \r\n for Windows, \n otherwise.
21. When a line is too long for pcre2grep's internal buffer, show the maximum
length in the error message.
Version 10.21 12-January-2016 Version 10.21 12-January-2016
----------------------------- -----------------------------

View File

@ -121,6 +121,17 @@ apply to fprintf(). */
#define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {} #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
/* Under Windows, we have to set stdout to be binary, so that it does not
convert \r\n at the ends of output lines to \r\r\n. However, that means that
any messages written to stdout must have \r\n as their line terminator. This is
handled by using STDOUT_NL as the newline string. */
#if defined(_WIN32) || defined(WIN32)
#define STDOUT_NL "\r\n"
#else
#define STDOUT_NL "\n"
#endif
/************************************************* /*************************************************
@ -885,27 +896,27 @@ help(void)
{ {
option_item *op; option_item *op;
printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n"); printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
printf("Search for PATTERN in each FILE or standard input.\n"); printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
printf("PATTERN must be present if neither -e nor -f is used.\n"); printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
printf("\"-\" can be used as a file name to mean STDIN.\n"); printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
#ifdef SUPPORT_LIBZ #ifdef SUPPORT_LIBZ
printf("Files whose names end in .gz are read using zlib.\n"); printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
#endif #endif
#ifdef SUPPORT_LIBBZ2 #ifdef SUPPORT_LIBBZ2
printf("Files whose names end in .bz2 are read using bzlib2.\n"); printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
#endif #endif
#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
printf("Other files and the standard input are read as plain files.\n\n"); printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
#else #else
printf("All files are read as plain files, without any interpretation.\n\n"); printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
#endif #endif
printf("Example: pcre2grep -i 'hello.*world' menu.h main.c\n\n"); printf("Example: pcre2grep -i 'hello.*world' menu.h main.c" STDOUT_NL STDOUT_NL);
printf("Options:\n"); printf("Options:" STDOUT_NL);
for (op = optionlist; op->one_char != 0; op++) for (op = optionlist; op->one_char != 0; op++)
{ {
@ -922,17 +933,17 @@ for (op = optionlist; op->one_char != 0; op++)
} }
if (n < 1) n = 1; if (n < 1) n = 1;
printf("%.*s%s\n", n, " ", op->help_text); printf("%.*s%s" STDOUT_NL, n, " ", op->help_text);
} }
printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n"); printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --buffer-size=100K." STDOUT_NL);
printf("The default value for --buffer-size is %d.\n", PCRE2GREP_BUFSIZE); printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
printf("When reading patterns or file names from a file, trailing white\n"); printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
printf("space is removed and blank lines are ignored.\n"); printf("space is removed and blank lines are ignored." STDOUT_NL);
printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN); printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n"); printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n"); printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
} }
@ -1609,10 +1620,12 @@ while (ptr < endptr)
if (endlinelength == 0 && t == main_buffer + bufsize) if (endlinelength == 0 && t == main_buffer + bufsize)
{ {
fprintf(stderr, "pcre2grep: line %d%s%s is too long for the internal buffer\n" fprintf(stderr, "pcre2grep: line %d%s%s is too long for the internal buffer\n"
"pcre2grep: check the --buffer-size option\n", "pcre2grep: the buffer size is %d\n"
"pcre2grep: use the --buffer-size option to change it\n",
linenumber, linenumber,
(filename == NULL)? "" : " of file ", (filename == NULL)? "" : " of file ",
(filename == NULL)? "" : filename); (filename == NULL)? "" : filename,
bufthird);
return 2; return 2;
} }
@ -1705,7 +1718,7 @@ while (ptr < endptr)
else if (binary) else if (binary)
{ {
fprintf(stdout, "Binary file %s matches\n", filename); fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
return 0; return 0;
} }
@ -1714,7 +1727,7 @@ while (ptr < endptr)
else if (filenames == FN_MATCH_ONLY) else if (filenames == FN_MATCH_ONLY)
{ {
fprintf(stdout, "%s\n", printname); fprintf(stdout, "%s" STDOUT_NL, printname);
return 0; return 0;
} }
@ -1739,13 +1752,13 @@ while (ptr < endptr)
/* Handle --line-offsets */ /* Handle --line-offsets */
if (line_offsets) if (line_offsets)
fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr), fprintf(stdout, "%d,%d" STDOUT_NL, (int)(matchptr + offsets[0] - ptr),
(int)(offsets[1] - offsets[0])); (int)(offsets[1] - offsets[0]));
/* Handle --file-offsets */ /* Handle --file-offsets */
else if (file_offsets) else if (file_offsets)
fprintf(stdout, "%d,%d\n", fprintf(stdout, "%d,%d" STDOUT_NL,
(int)(filepos + matchptr + offsets[0] - ptr), (int)(filepos + matchptr + offsets[0] - ptr),
(int)(offsets[1] - offsets[0])); (int)(offsets[1] - offsets[0]));
@ -1773,7 +1786,8 @@ while (ptr < endptr)
} }
} }
if (printed || printname != NULL || number) fprintf(stdout, "\n"); if (printed || printname != NULL || number)
fprintf(stdout, STDOUT_NL);
} }
/* Prepare to repeat to find the next match. If the pattern contained a /* Prepare to repeat to find the next match. If the pattern contained a
@ -1838,7 +1852,7 @@ while (ptr < endptr)
if (hyphenpending) if (hyphenpending)
{ {
fprintf(stdout, "--\n"); fprintf(stdout, "--" STDOUT_NL);
hyphenpending = FALSE; hyphenpending = FALSE;
hyphenprinted = TRUE; hyphenprinted = TRUE;
} }
@ -1859,7 +1873,7 @@ while (ptr < endptr)
} }
if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted) if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
fprintf(stdout, "--\n"); fprintf(stdout, "--" STDOUT_NL);
while (p < ptr) while (p < ptr)
{ {
@ -2063,7 +2077,7 @@ were none. If we found a match, we won't have got this far. */
if (filenames == FN_NOMATCH_ONLY) if (filenames == FN_NOMATCH_ONLY)
{ {
fprintf(stdout, "%s\n", printname); fprintf(stdout, "%s" STDOUT_NL, printname);
return 0; return 0;
} }
@ -2075,7 +2089,7 @@ if (count_only && !quiet)
{ {
if (printname != NULL && filenames != FN_NONE) if (printname != NULL && filenames != FN_NONE)
fprintf(stdout, "%s:", printname); fprintf(stdout, "%s:", printname);
fprintf(stdout, "%d\n", count); fprintf(stdout, "%d" STDOUT_NL, count);
} }
} }
@ -2396,7 +2410,7 @@ switch(letter)
{ {
unsigned char buffer[128]; unsigned char buffer[128];
(void)pcre2_config(PCRE2_CONFIG_VERSION, buffer); (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
fprintf(stdout, "pcre2grep version %s\n", buffer); fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
} }
pcre2grep_exit(0); pcre2grep_exit(0);
break; break;
@ -2623,6 +2637,16 @@ const char *locale_from = "--locale";
pcre2_jit_stack *jit_stack = NULL; pcre2_jit_stack *jit_stack = NULL;
#endif #endif
/* In Windows, stdout is set up as a text stream, which means that \n is
converted to \r\n. This causes output lines that are copied from the input to
change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
that stdout is a binary stream. Note that this means all other output to stdout
must use STDOUT_NL to terminate lines. */
#if defined(_WIN32) || defined(WIN32)
setmode(fileno(stdout), O_BINARY);
#endif
/* Set up a default compile and match contexts and a match data block. */ /* Set up a default compile and match contexts and a match data block. */
compile_context = pcre2_compile_context_create(NULL); compile_context = pcre2_compile_context_create(NULL);

3
testdata/grepoutput vendored
View File

@ -637,7 +637,8 @@ RC=0
RC=0 RC=0
---------------------------- Test 83 ----------------------------- ---------------------------- Test 83 -----------------------------
pcre2grep: line 4 of file ./testdata/grepinput3 is too long for the internal buffer pcre2grep: line 4 of file ./testdata/grepinput3 is too long for the internal buffer
pcre2grep: check the --buffer-size option pcre2grep: the buffer size is 100
pcre2grep: use the --buffer-size option to change it
RC=2 RC=2
---------------------------- Test 84 ----------------------------- ---------------------------- Test 84 -----------------------------
testdata/grepinputv:fox jumps testdata/grepinputv:fox jumps