Fix callout string read overrun; do better with catching these when using
zero-terminated patterns under valgrind.
This commit is contained in:
parent
4fd8feaa50
commit
6a15c1cbcc
11
ChangeLog
11
ChangeLog
|
@ -21,15 +21,23 @@ some minor bugs and Perl incompatibilities were fixed, including:
|
||||||
|
|
||||||
(a) \Q\E in the middle of a quantifier such as A+\Q\E+ is now ignored instead
|
(a) \Q\E in the middle of a quantifier such as A+\Q\E+ is now ignored instead
|
||||||
of giving an invalid quantifier error.
|
of giving an invalid quantifier error.
|
||||||
|
|
||||||
(b) {0} can now be used after a group in a lookbehind assertion; previously
|
(b) {0} can now be used after a group in a lookbehind assertion; previously
|
||||||
this caused an "assertion is not fixed length" error.
|
this caused an "assertion is not fixed length" error.
|
||||||
|
|
||||||
(c) Perl always treats (?(DEFINE) as a "define" group, even if a group with
|
(c) Perl always treats (?(DEFINE) as a "define" group, even if a group with
|
||||||
the name "DEFINE" exists. PCRE2 now does likewise.
|
the name "DEFINE" exists. PCRE2 now does likewise.
|
||||||
|
|
||||||
(d) A recursion condition test such as (?(R2)...) must now refer to an
|
(d) A recursion condition test such as (?(R2)...) must now refer to an
|
||||||
existing subpattern.
|
existing subpattern.
|
||||||
|
|
||||||
(e) A conditional recursion test such as (?(R)...) misbehaved if there was a
|
(e) A conditional recursion test such as (?(R)...) misbehaved if there was a
|
||||||
group whose name began with "R".
|
group whose name began with "R".
|
||||||
|
|
||||||
|
(f) When testing zero-terminated patterns under valgrind, the terminating
|
||||||
|
zero is now marked "no access". This catches bugs that would otherwise
|
||||||
|
show up only with non-zero-terminated patterns.
|
||||||
|
|
||||||
One effect of the refactoring is that some error numbers and messages have
|
One effect of the refactoring is that some error numbers and messages have
|
||||||
changed, and the pattern offset given for compiling errors is not always the
|
changed, and the pattern offset given for compiling errors is not always the
|
||||||
right-most character that has been read. In particular, for a variable-length
|
right-most character that has been read. In particular, for a variable-length
|
||||||
|
@ -64,6 +72,9 @@ Some bugs in the refactored code were subsequently fixed before release:
|
||||||
(f) An unterminated repeat at the end of a non-zero-terminated pattern (e.g.
|
(f) An unterminated repeat at the end of a non-zero-terminated pattern (e.g.
|
||||||
"{2,2") could cause reading beyond the pattern.
|
"{2,2") could cause reading beyond the pattern.
|
||||||
|
|
||||||
|
(g) When reading a callout string, if the end delimiter was at the end of the
|
||||||
|
pattern one further code unit was read.
|
||||||
|
|
||||||
4. Back references are now permitted in lookbehind assertions when there are
|
4. Back references are now permitted in lookbehind assertions when there are
|
||||||
no duplicated group numbers (that is, (?| has not been used), and, if the
|
no duplicated group numbers (that is, (?| has not been used), and, if the
|
||||||
reference is by name, there is only one group of that name. The referenced
|
reference is by name, there is only one group of that name. The referenced
|
||||||
|
|
|
@ -3436,7 +3436,8 @@ while (ptr < ptrend)
|
||||||
ptr = startptr; /* To give a more useful message */
|
ptr = startptr; /* To give a more useful message */
|
||||||
goto FAILED;
|
goto FAILED;
|
||||||
}
|
}
|
||||||
if (*ptr == delimiter && (++ptr > ptrend || *ptr != delimiter)) break;
|
if (*ptr == delimiter && (++ptr >= ptrend || *ptr != delimiter))
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
calloutlength = (PCRE2_SIZE)(ptr - startptr);
|
calloutlength = (PCRE2_SIZE)(ptr - startptr);
|
||||||
|
@ -8825,36 +8826,37 @@ PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION
|
||||||
pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options,
|
pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options,
|
||||||
int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext)
|
int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext)
|
||||||
{
|
{
|
||||||
BOOL utf; /* Set TRUE for UTF mode */
|
BOOL utf; /* Set TRUE for UTF mode */
|
||||||
BOOL has_lookbehind; /* Set TRUE if a lookbehind is found */
|
BOOL has_lookbehind; /* Set TRUE if a lookbehind is found */
|
||||||
pcre2_real_code *re = NULL; /* What we will return */
|
BOOL zero_terminated; /* Set TRUE for zero-terminated pattern */
|
||||||
compile_block cb; /* "Static" compile-time data */
|
pcre2_real_code *re = NULL; /* What we will return */
|
||||||
const uint8_t *tables; /* Char tables base pointer */
|
compile_block cb; /* "Static" compile-time data */
|
||||||
|
const uint8_t *tables; /* Char tables base pointer */
|
||||||
|
|
||||||
PCRE2_UCHAR *code; /* Current pointer in compiled code */
|
PCRE2_UCHAR *code; /* Current pointer in compiled code */
|
||||||
PCRE2_SPTR codestart; /* Start of compiled code */
|
PCRE2_SPTR codestart; /* Start of compiled code */
|
||||||
PCRE2_SPTR ptr; /* Current pointer in pattern */
|
PCRE2_SPTR ptr; /* Current pointer in pattern */
|
||||||
uint32_t *pptr; /* Current pointer in parsed pattern */
|
uint32_t *pptr; /* Current pointer in parsed pattern */
|
||||||
|
|
||||||
PCRE2_SIZE length = 1; /* Allow for final END opcode */
|
PCRE2_SIZE length = 1; /* Allow for final END opcode */
|
||||||
PCRE2_SIZE usedlength; /* Actual length used */
|
PCRE2_SIZE usedlength; /* Actual length used */
|
||||||
PCRE2_SIZE re_blocksize; /* Size of memory block */
|
PCRE2_SIZE re_blocksize; /* Size of memory block */
|
||||||
PCRE2_SIZE big32count = 0; /* 32-bit literals >= 0x80000000 */
|
PCRE2_SIZE big32count = 0; /* 32-bit literals >= 0x80000000 */
|
||||||
PCRE2_SIZE parsed_size_needed; /* Needed for parsed pattern */
|
PCRE2_SIZE parsed_size_needed; /* Needed for parsed pattern */
|
||||||
|
|
||||||
int32_t firstcuflags, reqcuflags; /* Type of first/req code unit */
|
int32_t firstcuflags, reqcuflags; /* Type of first/req code unit */
|
||||||
uint32_t firstcu, reqcu; /* Value of first/req code unit */
|
uint32_t firstcu, reqcu; /* Value of first/req code unit */
|
||||||
uint32_t setflags = 0; /* NL and BSR set flags */
|
uint32_t setflags = 0; /* NL and BSR set flags */
|
||||||
|
|
||||||
uint32_t skipatstart; /* When checking (*UTF) etc */
|
uint32_t skipatstart; /* When checking (*UTF) etc */
|
||||||
uint32_t limit_match = UINT32_MAX; /* Unset match limits */
|
uint32_t limit_match = UINT32_MAX; /* Unset match limits */
|
||||||
uint32_t limit_recursion = UINT32_MAX;
|
uint32_t limit_recursion = UINT32_MAX;
|
||||||
|
|
||||||
int newline = 0; /* Unset; can be set by the pattern */
|
int newline = 0; /* Unset; can be set by the pattern */
|
||||||
int bsr = 0; /* Unset; can be set by the pattern */
|
int bsr = 0; /* Unset; can be set by the pattern */
|
||||||
int errorcode = 0; /* Initialize to avoid compiler warn */
|
int errorcode = 0; /* Initialize to avoid compiler warn */
|
||||||
|
|
||||||
uint32_t i; /* Local loop counter */
|
uint32_t i; /* Local loop counter */
|
||||||
|
|
||||||
/* Comments at the head of this file explain about these variables. */
|
/* Comments at the head of this file explain about these variables. */
|
||||||
|
|
||||||
|
@ -8901,13 +8903,19 @@ if (ccontext == NULL)
|
||||||
/* A zero-terminated pattern is indicated by the special length value
|
/* A zero-terminated pattern is indicated by the special length value
|
||||||
PCRE2_ZERO_TERMINATED. Check for an overlong pattern. */
|
PCRE2_ZERO_TERMINATED. Check for an overlong pattern. */
|
||||||
|
|
||||||
if (patlen == PCRE2_ZERO_TERMINATED) patlen = PRIV(strlen)(pattern);
|
if ((zero_terminated = (patlen == PCRE2_ZERO_TERMINATED)))
|
||||||
|
patlen = PRIV(strlen)(pattern);
|
||||||
|
|
||||||
if (patlen > ccontext->max_pattern_length)
|
if (patlen > ccontext->max_pattern_length)
|
||||||
{
|
{
|
||||||
*errorptr = ERR88;
|
*errorptr = ERR88;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* From here on, all returns from this function should end up going via the
|
||||||
|
EXIT label. */
|
||||||
|
|
||||||
|
|
||||||
/* ------------ Initialize the "static" compile data -------------- */
|
/* ------------ Initialize the "static" compile data -------------- */
|
||||||
|
|
||||||
tables = (ccontext->tables != NULL)? ccontext->tables : PRIV(default_tables);
|
tables = (ccontext->tables != NULL)? ccontext->tables : PRIV(default_tables);
|
||||||
|
@ -8966,7 +8974,13 @@ for (i = 0; i < 10; i++) cb.small_ref_offset[i] = PCRE2_UNSET;
|
||||||
/* --------------- Start looking at the pattern --------------- */
|
/* --------------- Start looking at the pattern --------------- */
|
||||||
|
|
||||||
/* Check for global one-time option settings at the start of the pattern, and
|
/* Check for global one-time option settings at the start of the pattern, and
|
||||||
remember the offset to the actual regex. */
|
remember the offset to the actual regex. With valgrind support, make the
|
||||||
|
terminator of a zero-terminated pattern inaccessible. This catches bugs that
|
||||||
|
would otherwise only show up for non-zero-terminated patterns. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_VALGRIND
|
||||||
|
if (zero_terminated) VALGRIND_MAKE_MEM_NOACCESS(pattern + patlen, CU2BYTES(1));
|
||||||
|
#endif
|
||||||
|
|
||||||
ptr = pattern;
|
ptr = pattern;
|
||||||
skipatstart = 0;
|
skipatstart = 0;
|
||||||
|
@ -9148,14 +9162,14 @@ if ((options & PCRE2_AUTO_CALLOUT) != 0)
|
||||||
|
|
||||||
if (parsed_size_needed >= PARSED_PATTERN_DEFAULT_SIZE)
|
if (parsed_size_needed >= PARSED_PATTERN_DEFAULT_SIZE)
|
||||||
{
|
{
|
||||||
cb.parsed_pattern = ccontext->memctl.malloc(
|
uint32_t *heap_parsed_pattern = ccontext->memctl.malloc(
|
||||||
(parsed_size_needed + 1) * sizeof(uint32_t),
|
(parsed_size_needed + 1) * sizeof(uint32_t), ccontext->memctl.memory_data);
|
||||||
ccontext->memctl.memory_data);
|
if (heap_parsed_pattern == NULL)
|
||||||
if (cb.parsed_pattern == NULL)
|
|
||||||
{
|
{
|
||||||
*errorptr = ERR21;
|
*errorptr = ERR21;
|
||||||
return NULL;
|
goto EXIT;
|
||||||
}
|
}
|
||||||
|
cb.parsed_pattern = heap_parsed_pattern;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Do the parsing scan. */
|
/* Do the parsing scan. */
|
||||||
|
@ -9547,12 +9561,16 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
|
||||||
goto HAD_CB_ERROR;
|
goto HAD_CB_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Control ends up here in all cases. If memory was obtained for the parsed
|
/* Control ends up here in all cases. When running under valgrind, make a
|
||||||
|
pattern's terminating zero defined again. If memory was obtained for the parsed
|
||||||
version of the pattern, free it before returning. Also free the list of named
|
version of the pattern, free it before returning. Also free the list of named
|
||||||
groups if a larger one had to be obtained, and likewise the group information
|
groups if a larger one had to be obtained, and likewise the group information
|
||||||
vector. */
|
vector. */
|
||||||
|
|
||||||
EXIT:
|
EXIT:
|
||||||
|
#ifdef SUPPORT_VALGRIND
|
||||||
|
if (zero_terminated) VALGRIND_MAKE_MEM_DEFINED(pattern + patlen, CU2BYTES(1));
|
||||||
|
#endif
|
||||||
if (cb.parsed_pattern != stack_parsed_pattern)
|
if (cb.parsed_pattern != stack_parsed_pattern)
|
||||||
ccontext->memctl.free(cb.parsed_pattern, ccontext->memctl.memory_data);
|
ccontext->memctl.free(cb.parsed_pattern, ccontext->memctl.memory_data);
|
||||||
if (cb.named_group_list_size > NAMED_GROUP_LIST_SIZE)
|
if (cb.named_group_list_size > NAMED_GROUP_LIST_SIZE)
|
||||||
|
|
Loading…
Reference in New Issue