diff --git a/ChangeLog b/ChangeLog index b739b50..c9fe550 100644 --- a/ChangeLog +++ b/ChangeLog @@ -226,6 +226,9 @@ overflow. make it easier to test long repetitive patterns. The tests for 63 above are converted to use the new feature. +66. In the POSIX wrapper, if regerror() was given too small a buffer, it could +misbehave. + Version 10.20 30-June-2015 -------------------------- diff --git a/doc/pcre2posix.3 b/doc/pcre2posix.3 index 5d5164b..9d1b96d 100644 --- a/doc/pcre2posix.3 +++ b/doc/pcre2posix.3 @@ -1,4 +1,4 @@ -.TH PCRE2POSIX 3 "03 September 2015" "PCRE2 10.21" +.TH PCRE2POSIX 3 "30 October 2015" "PCRE2 10.21" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "SYNOPSIS" @@ -240,9 +240,11 @@ header file, of which REG_NOMATCH is the "expected" failure code. The \fBregerror()\fP function maps a non-zero errorcode from either \fBregcomp()\fP or \fBregexec()\fP to a printable message. If \fIpreg\fP is not NULL, the error should have arisen from the use of that structure. A message -terminated by a binary zero is placed in \fIerrbuf\fP. The length of the -message, including the zero, is limited to \fIerrbuf_size\fP. The yield of the -function is the size of buffer needed to hold the whole message. +terminated by a binary zero is placed in \fIerrbuf\fP. If the buffer is too +short, only the first \fIerrbuf_size\fP - 1 characters of the error message are +used. The yield of the function is the size of buffer needed to hold the whole +message, including the terminating zero. This value is greater than +\fIerrbuf_size\fP if the message was truncated. . . .SH MEMORY USAGE @@ -267,6 +269,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 03 September 2015 +Last updated: 30 October 2015 Copyright (c) 1997-2015 University of Cambridge. .fi diff --git a/src/pcre2posix.c b/src/pcre2posix.c index 778d985..134a0c1 100644 --- a/src/pcre2posix.c +++ b/src/pcre2posix.c @@ -144,29 +144,23 @@ static const char *const pstring[] = { PCRE2POSIX_EXP_DEFN size_t PCRE2_CALL_CONVENTION regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) { -const char *message, *addmessage; -size_t length, addlength; +int used; +const char *message; message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))? "unknown error code" : pstring[errcode]; -length = strlen(message) + 1; -addmessage = " at offset "; -addlength = (preg != NULL && (int)preg->re_erroffset != -1)? - strlen(addmessage) + 6 : 0; - -if (errbuf_size > 0) +if (preg != NULL && (int)preg->re_erroffset != -1) { - if (addlength > 0 && errbuf_size >= length + addlength) - sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset); - else - { - strncpy(errbuf, message, errbuf_size - 1); - errbuf[errbuf_size-1] = 0; - } + used = snprintf(errbuf, errbuf_size, "%s at offset %-6d", message, + (int)preg->re_erroffset); } - -return length + addlength; +else + { + used = snprintf(errbuf, errbuf_size, "%s", message); + } + +return used + 1; } diff --git a/src/pcre2test.c b/src/pcre2test.c index a556a7a..04491ef 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -445,6 +445,7 @@ typedef struct patctl { /* Structure for pattern modifiers. */ uint32_t jit; uint32_t stackguard_test; uint32_t tables_id; + uint32_t regerror_buffsize; uint8_t locale[LOCALESIZE]; } patctl; @@ -566,6 +567,7 @@ static modstruct modlist[] = { { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) }, { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) }, { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(recursion_limit) }, + { "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) }, { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) }, { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) }, { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) }, @@ -774,7 +776,7 @@ buffer is where all input lines are read. Its size is the same as pbuffer8. Pattern lines are always copied to pbuffer8 for use in callouts, even if they are actually compiled from pbuffer16 or pbuffer32. */ -static int pbuffer8_size = 50000; /* Initial size, bytes */ +static size_t pbuffer8_size = 50000; /* Initial size, bytes */ static uint8_t *pbuffer8 = NULL; static uint8_t *buffer = NULL; @@ -4575,8 +4577,21 @@ if ((pat_patctl.control & CTL_POSIX) != 0) rc = regcomp(&preg, (char *)pbuffer8, cflags); if (rc != 0) /* Failure */ { - (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size); + size_t bsize, usize; + + bsize = (pat_patctl.regerror_buffsize != 0)? + pat_patctl.regerror_buffsize : pbuffer8_size; + if (bsize + 8 < pbuffer8_size) + memcpy(pbuffer8 + bsize, "DEADBEEF", 8); + usize = regerror(rc, &preg, (char *)pbuffer8, bsize); + fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, pbuffer8); + if (usize > bsize) + { + fprintf(outfile, "** regerror() message truncated\n"); + if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0) + fprintf(outfile, "** regerror() buffer overflow\n"); + } return PR_SKIP; } return PR_OK; diff --git a/testdata/testinput18 b/testdata/testinput18 index 5f30940..c75b842 100644 --- a/testdata/testinput18 +++ b/testdata/testinput18 @@ -94,4 +94,8 @@ /abcd/substitute_extended +/\[A]{1000000}**/expand,regerror_buffsize=31 + +/\[A]{1000000}**/expand,regerror_buffsize=32 + # End of testdata/testinput18 diff --git a/testdata/testoutput18 b/testdata/testoutput18 index 9a5620e..0a5ffff 100644 --- a/testdata/testoutput18 +++ b/testdata/testoutput18 @@ -143,4 +143,11 @@ Failed: POSIX code 3: pattern error at offset 2 /abcd/substitute_extended ** Ignored with POSIX interface: substitute_extended +/\[A]{1000000}**/expand,regerror_buffsize=31 +Failed: POSIX code 4: ? * + invalid at offset 100000 +** regerror() message truncated + +/\[A]{1000000}**/expand,regerror_buffsize=32 +Failed: POSIX code 4: ? * + invalid at offset 1000001 + # End of testdata/testinput18