Fix incorrect size calculation when a reference to a duplicate name occurs

in a part of the pattern where PCRE2_DUPNAMES is not set.
This commit is contained in:
Philip.Hazel 2015-01-27 17:21:32 +00:00
parent ca77bdd5c4
commit 911b800984
5 changed files with 40 additions and 32 deletions

View File

@ -31,6 +31,15 @@ in order to avoid accessing uninitialized data when serializing.
5. The (*NO_JIT) feature is implemented. 5. The (*NO_JIT) feature is implemented.
6. If a bug that caused pcre2_compile() to use more memory than allocated was
triggered when using valgrind, the code in (3) above passed a stupidly large
value to valgrind. This caused a crash instead of an "internal error" return.
7. A reference to a duplicated named group (either a back reference or a test
for being set in a conditional) that occurred in a part of the pattern where
PCRE2_DUPNAMES was not set caused the amount of memory needed for the pattern
to be incorrectly calculated, leading to overwriting.
Version 10.00 05-January-2015 Version 10.00 05-January-2015
----------------------------- -----------------------------

View File

@ -5296,9 +5296,10 @@ for (;; ptr++)
} }
/* Otherwise we expect to read a name; anything else is an error. When /* Otherwise we expect to read a name; anything else is an error. When
a name is one of a number of duplicates, a different opcode is used and the referenced name is one of a number of duplicates, a different
it needs more memory. Unfortunately we cannot tell whether a name is a opcode is used and it needs more memory. Unfortunately we cannot tell
duplicate in the first pass, so we have to allow for more memory. */ whether this is the case in the first pass, so we have to allow for
more memory always. */
else else
{ {
@ -5318,8 +5319,7 @@ for (;; ptr++)
ptr++; ptr++;
} }
namelen = (int)(ptr - name); namelen = (int)(ptr - name);
if (lengthptr != NULL && (options & PCRE2_DUPNAMES) != 0) if (lengthptr != NULL) *lengthptr += IMM2_SIZE;
*lengthptr += IMM2_SIZE;
} }
/* Check the terminator */ /* Check the terminator */
@ -5736,15 +5736,15 @@ for (;; ptr++)
} }
recno = (i < cb->names_found)? ng->number : 0; recno = (i < cb->names_found)? ng->number : 0;
/* Count named back references. */
if (!is_recurse) cb->namedrefcount++;
/* If duplicate names are permitted, we have to allow for a named /* If duplicate names are permitted, we have to allow for a named
reference to a duplicated name (this cannot be determined until the reference to a duplicated name (this cannot be determined until the
second pass). This needs an extra 16-bit data item. */ second pass). This needs an extra data item. Counting named back
references and incrementing the count at the end does not work
because it does not account for duplication of groups containing such
references. Nor does checking for PCRE2_DUPNAMES because that need
not be set at the point of reference. */
if ((options & PCRE2_DUPNAMES) != 0) *lengthptr += IMM2_SIZE; *lengthptr += IMM2_SIZE;
} }
/* In the real compile, search the name table. We check the name /* In the real compile, search the name table. We check the name
@ -6874,7 +6874,7 @@ for (;;)
} }
/* Fill in the ket */ /* Fill in the ket */
*code = OP_KET; *code = OP_KET;
PUT(code, 1, (int)(code - start_bracket)); PUT(code, 1, (int)(code - start_bracket));
code += 1 + LINK_SIZE; code += 1 + LINK_SIZE;
@ -7479,7 +7479,6 @@ cb.name_entry_size = 0;
cb.name_table = NULL; cb.name_table = NULL;
cb.named_groups = named_groups; cb.named_groups = named_groups;
cb.named_group_list_size = NAMED_GROUP_LIST_SIZE; cb.named_group_list_size = NAMED_GROUP_LIST_SIZE;
cb.namedrefcount = 0;
cb.names_found = 0; cb.names_found = 0;
cb.open_caps = NULL; cb.open_caps = NULL;
cb.parens_depth = 0; cb.parens_depth = 0;
@ -7668,13 +7667,6 @@ if (length > MAX_PATTERN_SIZE)
goto HAD_ERROR; goto HAD_ERROR;
} }
/* If there are groups with duplicate names and there are also references by
name, we must allow for the possibility of named references to duplicated
groups. These require an extra data item each. */
if (cb.dupnames && cb.namedrefcount > 0)
length += cb.namedrefcount * IMM2_SIZE * sizeof(PCRE2_UCHAR);
/* Compute the size of, and then get and initialize, the data block for storing /* Compute the size of, and then get and initialize, the data block for storing
the compiled pattern and names table. Integer overflow should no longer be the compiled pattern and names table. Integer overflow should no longer be
possible because nowadays we limit the maximum value of cb.names_found and possible because nowadays we limit the maximum value of cb.names_found and
@ -7689,7 +7681,7 @@ if (re == NULL)
errorcode = ERR21; errorcode = ERR21;
goto HAD_ERROR; goto HAD_ERROR;
} }
re->memctl = ccontext->memctl; re->memctl = ccontext->memctl;
re->tables = tables; re->tables = tables;
re->executable_jit = NULL; re->executable_jit = NULL;
@ -7775,22 +7767,22 @@ if (cb.had_accept)
} }
/* If we have not reached end of pattern after a successful compile, there's an /* If we have not reached end of pattern after a successful compile, there's an
excess bracket. Otherwise, fill in the final opcode and check for disastrous excess bracket. Fill in the final opcode and check for disastrous overflow.
overflow. */ If no overflow, but the estimated length exceeds the really used length, adjust
the value of re->blocksize, and if valgrind support is configured, mark the
extra allocated memory as unaddressable, so that any out-of-bound reads can be
detected. */
if (errorcode == 0 && ptr < cb.end_pattern) errorcode = ERR22; if (errorcode == 0 && ptr < cb.end_pattern) errorcode = ERR22;
*code++ = OP_END; *code++ = OP_END;
usedlength = code - codestart; usedlength = code - codestart;
if (usedlength > length) errorcode = ERR23; if (usedlength > length) errorcode = ERR23; else
{
/* If the estimated length exceeds the really used length, adjust the value of re->blocksize -= CU2BYTES(length - usedlength);
re->blocksize, and if valgrind support is configured, mark the extra allocated
memory as unaddressable, so that any out-of-bound reads can be detected. */
re->blocksize -= CU2BYTES(length - usedlength);
#ifdef SUPPORT_VALGRIND #ifdef SUPPORT_VALGRIND
VALGRIND_MAKE_MEM_NOACCESS(code, CU2BYTES(length - usedlength)); VALGRIND_MAKE_MEM_NOACCESS(code, CU2BYTES(length - usedlength));
#endif #endif
}
/* Fill in any forward references that are required. There may be repeated /* Fill in any forward references that are required. There may be repeated
references; optimize for them, as searching a large regex takes time. */ references; optimize for them, as searching a large regex takes time. */

View File

@ -677,7 +677,6 @@ typedef struct compile_block {
uint32_t final_bracount; /* Saved value after first pass */ uint32_t final_bracount; /* Saved value after first pass */
uint32_t top_backref; /* Maximum back reference */ uint32_t top_backref; /* Maximum back reference */
uint32_t backref_map; /* Bitmap of low back refs */ uint32_t backref_map; /* Bitmap of low back refs */
uint32_t namedrefcount; /* Number of backreferences by name */
uint32_t nltype; /* Newline type */ uint32_t nltype; /* Newline type */
uint32_t nllen; /* Newline string length */ uint32_t nllen; /* Newline string length */
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */ PCRE2_UCHAR nl[4]; /* Newline string when fixed length */

4
testdata/testinput2 vendored
View File

@ -4134,4 +4134,8 @@ a random value. /Ix
aa123\=ovector=2 aa123\=ovector=2
aa123\=ovector=3 aa123\=ovector=3
/(?<N111>(?J)(?<N111>1(111111)11|)1|1|)(?(<N111>)1)/
/(?<N>(?J)(?<N>))(?-J)\k<N>/
# End of testinput2 # End of testinput2

View File

@ -13893,4 +13893,8 @@ Matched, but too many substrings
1: <unset> 1: <unset>
2: a 2: a
/(?<N111>(?J)(?<N111>1(111111)11|)1|1|)(?(<N111>)1)/
/(?<N>(?J)(?<N>))(?-J)\k<N>/
# End of testinput2 # End of testinput2