Fix callout string read overrun; do better with catching these when using

zero-terminated patterns under valgrind.
This commit is contained in:
Philip.Hazel 2016-11-01 17:45:54 +00:00
parent 4fd8feaa50
commit 6a15c1cbcc
2 changed files with 63 additions and 34 deletions

View File

@ -21,14 +21,22 @@ some minor bugs and Perl incompatibilities were fixed, including:
(a) \Q\E in the middle of a quantifier such as A+\Q\E+ is now ignored instead
of giving an invalid quantifier error.
(b) {0} can now be used after a group in a lookbehind assertion; previously
this caused an "assertion is not fixed length" error.
(c) Perl always treats (?(DEFINE) as a "define" group, even if a group with
the name "DEFINE" exists. PCRE2 now does likewise.
(d) A recursion condition test such as (?(R2)...) must now refer to an
existing subpattern.
(e) A conditional recursion test such as (?(R)...) misbehaved if there was a
group whose name began with "R".
(f) When testing zero-terminated patterns under valgrind, the terminating
zero is now marked "no access". This catches bugs that would otherwise
show up only with non-zero-terminated patterns.
One effect of the refactoring is that some error numbers and messages have
changed, and the pattern offset given for compiling errors is not always the
@ -63,6 +71,9 @@ Some bugs in the refactored code were subsequently fixed before release:
(f) An unterminated repeat at the end of a non-zero-terminated pattern (e.g.
"{2,2") could cause reading beyond the pattern.
(g) When reading a callout string, if the end delimiter was at the end of the
pattern one further code unit was read.
4. Back references are now permitted in lookbehind assertions when there are
no duplicated group numbers (that is, (?| has not been used), and, if the

View File

@ -3436,7 +3436,8 @@ while (ptr < ptrend)
ptr = startptr; /* To give a more useful message */
goto FAILED;
}
if (*ptr == delimiter && (++ptr > ptrend || *ptr != delimiter)) break;
if (*ptr == delimiter && (++ptr >= ptrend || *ptr != delimiter))
break;
}
calloutlength = (PCRE2_SIZE)(ptr - startptr);
@ -7653,7 +7654,7 @@ do {
if (op == OP_BRA || op == OP_BRAPOS ||
op == OP_SBRA || op == OP_SBRAPOS)
{
if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
return FALSE;
}
@ -7678,7 +7679,7 @@ do {
else if (op == OP_COND)
{
if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
return FALSE;
}
@ -8825,36 +8826,37 @@ PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION
pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options,
int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext)
{
BOOL utf; /* Set TRUE for UTF mode */
BOOL has_lookbehind; /* Set TRUE if a lookbehind is found */
pcre2_real_code *re = NULL; /* What we will return */
compile_block cb; /* "Static" compile-time data */
const uint8_t *tables; /* Char tables base pointer */
BOOL utf; /* Set TRUE for UTF mode */
BOOL has_lookbehind; /* Set TRUE if a lookbehind is found */
BOOL zero_terminated; /* Set TRUE for zero-terminated pattern */
pcre2_real_code *re = NULL; /* What we will return */
compile_block cb; /* "Static" compile-time data */
const uint8_t *tables; /* Char tables base pointer */
PCRE2_UCHAR *code; /* Current pointer in compiled code */
PCRE2_SPTR codestart; /* Start of compiled code */
PCRE2_SPTR ptr; /* Current pointer in pattern */
uint32_t *pptr; /* Current pointer in parsed pattern */
PCRE2_UCHAR *code; /* Current pointer in compiled code */
PCRE2_SPTR codestart; /* Start of compiled code */
PCRE2_SPTR ptr; /* Current pointer in pattern */
uint32_t *pptr; /* Current pointer in parsed pattern */
PCRE2_SIZE length = 1; /* Allow for final END opcode */
PCRE2_SIZE usedlength; /* Actual length used */
PCRE2_SIZE re_blocksize; /* Size of memory block */
PCRE2_SIZE big32count = 0; /* 32-bit literals >= 0x80000000 */
PCRE2_SIZE parsed_size_needed; /* Needed for parsed pattern */
PCRE2_SIZE length = 1; /* Allow for final END opcode */
PCRE2_SIZE usedlength; /* Actual length used */
PCRE2_SIZE re_blocksize; /* Size of memory block */
PCRE2_SIZE big32count = 0; /* 32-bit literals >= 0x80000000 */
PCRE2_SIZE parsed_size_needed; /* Needed for parsed pattern */
int32_t firstcuflags, reqcuflags; /* Type of first/req code unit */
uint32_t firstcu, reqcu; /* Value of first/req code unit */
uint32_t setflags = 0; /* NL and BSR set flags */
int32_t firstcuflags, reqcuflags; /* Type of first/req code unit */
uint32_t firstcu, reqcu; /* Value of first/req code unit */
uint32_t setflags = 0; /* NL and BSR set flags */
uint32_t skipatstart; /* When checking (*UTF) etc */
uint32_t limit_match = UINT32_MAX; /* Unset match limits */
uint32_t skipatstart; /* When checking (*UTF) etc */
uint32_t limit_match = UINT32_MAX; /* Unset match limits */
uint32_t limit_recursion = UINT32_MAX;
int newline = 0; /* Unset; can be set by the pattern */
int bsr = 0; /* Unset; can be set by the pattern */
int errorcode = 0; /* Initialize to avoid compiler warn */
int newline = 0; /* Unset; can be set by the pattern */
int bsr = 0; /* Unset; can be set by the pattern */
int errorcode = 0; /* Initialize to avoid compiler warn */
uint32_t i; /* Local loop counter */
uint32_t i; /* Local loop counter */
/* Comments at the head of this file explain about these variables. */
@ -8901,13 +8903,19 @@ if (ccontext == NULL)
/* A zero-terminated pattern is indicated by the special length value
PCRE2_ZERO_TERMINATED. Check for an overlong pattern. */
if (patlen == PCRE2_ZERO_TERMINATED) patlen = PRIV(strlen)(pattern);
if ((zero_terminated = (patlen == PCRE2_ZERO_TERMINATED)))
patlen = PRIV(strlen)(pattern);
if (patlen > ccontext->max_pattern_length)
{
*errorptr = ERR88;
return NULL;
}
/* From here on, all returns from this function should end up going via the
EXIT label. */
/* ------------ Initialize the "static" compile data -------------- */
tables = (ccontext->tables != NULL)? ccontext->tables : PRIV(default_tables);
@ -8966,7 +8974,13 @@ for (i = 0; i < 10; i++) cb.small_ref_offset[i] = PCRE2_UNSET;
/* --------------- Start looking at the pattern --------------- */
/* Check for global one-time option settings at the start of the pattern, and
remember the offset to the actual regex. */
remember the offset to the actual regex. With valgrind support, make the
terminator of a zero-terminated pattern inaccessible. This catches bugs that
would otherwise only show up for non-zero-terminated patterns. */
#ifdef SUPPORT_VALGRIND
if (zero_terminated) VALGRIND_MAKE_MEM_NOACCESS(pattern + patlen, CU2BYTES(1));
#endif
ptr = pattern;
skipatstart = 0;
@ -9148,14 +9162,14 @@ if ((options & PCRE2_AUTO_CALLOUT) != 0)
if (parsed_size_needed >= PARSED_PATTERN_DEFAULT_SIZE)
{
cb.parsed_pattern = ccontext->memctl.malloc(
(parsed_size_needed + 1) * sizeof(uint32_t),
ccontext->memctl.memory_data);
if (cb.parsed_pattern == NULL)
uint32_t *heap_parsed_pattern = ccontext->memctl.malloc(
(parsed_size_needed + 1) * sizeof(uint32_t), ccontext->memctl.memory_data);
if (heap_parsed_pattern == NULL)
{
*errorptr = ERR21;
return NULL;
goto EXIT;
}
cb.parsed_pattern = heap_parsed_pattern;
}
/* Do the parsing scan. */
@ -9547,12 +9561,16 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
goto HAD_CB_ERROR;
}
/* Control ends up here in all cases. If memory was obtained for the parsed
/* Control ends up here in all cases. When running under valgrind, make a
pattern's terminating zero defined again. If memory was obtained for the parsed
version of the pattern, free it before returning. Also free the list of named
groups if a larger one had to be obtained, and likewise the group information
vector. */
EXIT:
#ifdef SUPPORT_VALGRIND
if (zero_terminated) VALGRIND_MAKE_MEM_DEFINED(pattern + patlen, CU2BYTES(1));
#endif
if (cb.parsed_pattern != stack_parsed_pattern)
ccontext->memctl.free(cb.parsed_pattern, ccontext->memctl.memory_data);
if (cb.named_group_list_size > NAMED_GROUP_LIST_SIZE)