Fix callout string read overrun; do better with catching these when using

zero-terminated patterns under valgrind.
This commit is contained in:
Philip.Hazel 2016-11-01 17:45:54 +00:00
parent 4fd8feaa50
commit 6a15c1cbcc
2 changed files with 63 additions and 34 deletions

View File

@ -21,15 +21,23 @@ some minor bugs and Perl incompatibilities were fixed, including:
(a) \Q\E in the middle of a quantifier such as A+\Q\E+ is now ignored instead (a) \Q\E in the middle of a quantifier such as A+\Q\E+ is now ignored instead
of giving an invalid quantifier error. of giving an invalid quantifier error.
(b) {0} can now be used after a group in a lookbehind assertion; previously (b) {0} can now be used after a group in a lookbehind assertion; previously
this caused an "assertion is not fixed length" error. this caused an "assertion is not fixed length" error.
(c) Perl always treats (?(DEFINE) as a "define" group, even if a group with (c) Perl always treats (?(DEFINE) as a "define" group, even if a group with
the name "DEFINE" exists. PCRE2 now does likewise. the name "DEFINE" exists. PCRE2 now does likewise.
(d) A recursion condition test such as (?(R2)...) must now refer to an (d) A recursion condition test such as (?(R2)...) must now refer to an
existing subpattern. existing subpattern.
(e) A conditional recursion test such as (?(R)...) misbehaved if there was a (e) A conditional recursion test such as (?(R)...) misbehaved if there was a
group whose name began with "R". group whose name began with "R".
(f) When testing zero-terminated patterns under valgrind, the terminating
zero is now marked "no access". This catches bugs that would otherwise
show up only with non-zero-terminated patterns.
One effect of the refactoring is that some error numbers and messages have One effect of the refactoring is that some error numbers and messages have
changed, and the pattern offset given for compiling errors is not always the changed, and the pattern offset given for compiling errors is not always the
right-most character that has been read. In particular, for a variable-length right-most character that has been read. In particular, for a variable-length
@ -64,6 +72,9 @@ Some bugs in the refactored code were subsequently fixed before release:
(f) An unterminated repeat at the end of a non-zero-terminated pattern (e.g. (f) An unterminated repeat at the end of a non-zero-terminated pattern (e.g.
"{2,2") could cause reading beyond the pattern. "{2,2") could cause reading beyond the pattern.
(g) When reading a callout string, if the end delimiter was at the end of the
pattern one further code unit was read.
4. Back references are now permitted in lookbehind assertions when there are 4. Back references are now permitted in lookbehind assertions when there are
no duplicated group numbers (that is, (?| has not been used), and, if the no duplicated group numbers (that is, (?| has not been used), and, if the
reference is by name, there is only one group of that name. The referenced reference is by name, there is only one group of that name. The referenced

View File

@ -3436,7 +3436,8 @@ while (ptr < ptrend)
ptr = startptr; /* To give a more useful message */ ptr = startptr; /* To give a more useful message */
goto FAILED; goto FAILED;
} }
if (*ptr == delimiter && (++ptr > ptrend || *ptr != delimiter)) break; if (*ptr == delimiter && (++ptr >= ptrend || *ptr != delimiter))
break;
} }
calloutlength = (PCRE2_SIZE)(ptr - startptr); calloutlength = (PCRE2_SIZE)(ptr - startptr);
@ -8827,6 +8828,7 @@ pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options,
{ {
BOOL utf; /* Set TRUE for UTF mode */ BOOL utf; /* Set TRUE for UTF mode */
BOOL has_lookbehind; /* Set TRUE if a lookbehind is found */ BOOL has_lookbehind; /* Set TRUE if a lookbehind is found */
BOOL zero_terminated; /* Set TRUE for zero-terminated pattern */
pcre2_real_code *re = NULL; /* What we will return */ pcre2_real_code *re = NULL; /* What we will return */
compile_block cb; /* "Static" compile-time data */ compile_block cb; /* "Static" compile-time data */
const uint8_t *tables; /* Char tables base pointer */ const uint8_t *tables; /* Char tables base pointer */
@ -8901,13 +8903,19 @@ if (ccontext == NULL)
/* A zero-terminated pattern is indicated by the special length value /* A zero-terminated pattern is indicated by the special length value
PCRE2_ZERO_TERMINATED. Check for an overlong pattern. */ PCRE2_ZERO_TERMINATED. Check for an overlong pattern. */
if (patlen == PCRE2_ZERO_TERMINATED) patlen = PRIV(strlen)(pattern); if ((zero_terminated = (patlen == PCRE2_ZERO_TERMINATED)))
patlen = PRIV(strlen)(pattern);
if (patlen > ccontext->max_pattern_length) if (patlen > ccontext->max_pattern_length)
{ {
*errorptr = ERR88; *errorptr = ERR88;
return NULL; return NULL;
} }
/* From here on, all returns from this function should end up going via the
EXIT label. */
/* ------------ Initialize the "static" compile data -------------- */ /* ------------ Initialize the "static" compile data -------------- */
tables = (ccontext->tables != NULL)? ccontext->tables : PRIV(default_tables); tables = (ccontext->tables != NULL)? ccontext->tables : PRIV(default_tables);
@ -8966,7 +8974,13 @@ for (i = 0; i < 10; i++) cb.small_ref_offset[i] = PCRE2_UNSET;
/* --------------- Start looking at the pattern --------------- */ /* --------------- Start looking at the pattern --------------- */
/* Check for global one-time option settings at the start of the pattern, and /* Check for global one-time option settings at the start of the pattern, and
remember the offset to the actual regex. */ remember the offset to the actual regex. With valgrind support, make the
terminator of a zero-terminated pattern inaccessible. This catches bugs that
would otherwise only show up for non-zero-terminated patterns. */
#ifdef SUPPORT_VALGRIND
if (zero_terminated) VALGRIND_MAKE_MEM_NOACCESS(pattern + patlen, CU2BYTES(1));
#endif
ptr = pattern; ptr = pattern;
skipatstart = 0; skipatstart = 0;
@ -9148,14 +9162,14 @@ if ((options & PCRE2_AUTO_CALLOUT) != 0)
if (parsed_size_needed >= PARSED_PATTERN_DEFAULT_SIZE) if (parsed_size_needed >= PARSED_PATTERN_DEFAULT_SIZE)
{ {
cb.parsed_pattern = ccontext->memctl.malloc( uint32_t *heap_parsed_pattern = ccontext->memctl.malloc(
(parsed_size_needed + 1) * sizeof(uint32_t), (parsed_size_needed + 1) * sizeof(uint32_t), ccontext->memctl.memory_data);
ccontext->memctl.memory_data); if (heap_parsed_pattern == NULL)
if (cb.parsed_pattern == NULL)
{ {
*errorptr = ERR21; *errorptr = ERR21;
return NULL; goto EXIT;
} }
cb.parsed_pattern = heap_parsed_pattern;
} }
/* Do the parsing scan. */ /* Do the parsing scan. */
@ -9547,12 +9561,16 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
goto HAD_CB_ERROR; goto HAD_CB_ERROR;
} }
/* Control ends up here in all cases. If memory was obtained for the parsed /* Control ends up here in all cases. When running under valgrind, make a
pattern's terminating zero defined again. If memory was obtained for the parsed
version of the pattern, free it before returning. Also free the list of named version of the pattern, free it before returning. Also free the list of named
groups if a larger one had to be obtained, and likewise the group information groups if a larger one had to be obtained, and likewise the group information
vector. */ vector. */
EXIT: EXIT:
#ifdef SUPPORT_VALGRIND
if (zero_terminated) VALGRIND_MAKE_MEM_DEFINED(pattern + patlen, CU2BYTES(1));
#endif
if (cb.parsed_pattern != stack_parsed_pattern) if (cb.parsed_pattern != stack_parsed_pattern)
ccontext->memctl.free(cb.parsed_pattern, ccontext->memctl.memory_data); ccontext->memctl.free(cb.parsed_pattern, ccontext->memctl.memory_data);
if (cb.named_group_list_size > NAMED_GROUP_LIST_SIZE) if (cb.named_group_list_size > NAMED_GROUP_LIST_SIZE)