Fix "internal error" bug caused by patterns like "((?2){0,1999}())?".
This commit is contained in:
parent
67800ba810
commit
ea03932668
|
@ -97,6 +97,14 @@ only on Windows.
|
||||||
21. "make distclean" was not removing config.h, a file that is created for use
|
21. "make distclean" was not removing config.h, a file that is created for use
|
||||||
with CMake.
|
with CMake.
|
||||||
|
|
||||||
|
22. A pattern such as "((?2){0,1999}())?", which has a group containing a
|
||||||
|
forward reference repeated a large (but limited) number of times within a
|
||||||
|
repeated outer group that has a zero minimum quantifier, caused incorrect code
|
||||||
|
to be compiled, leading to the error "internal error: previously-checked
|
||||||
|
referenced subpattern not found" when an incorrect memory address was read.
|
||||||
|
This bug was reported as "heap overflow", discovered by Kai Lu of Fortinet's
|
||||||
|
FortiGuard Labs.
|
||||||
|
|
||||||
|
|
||||||
Version 10.00 05-January-2015
|
Version 10.00 05-January-2015
|
||||||
-----------------------------
|
-----------------------------
|
||||||
|
|
|
@ -2586,18 +2586,18 @@ the current group is on this list, it adjusts the offset in the list, not the
|
||||||
value in the reference (which is a group number).
|
value in the reference (which is a group number).
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
group points to the start of the group
|
group points to the start of the group
|
||||||
adjust the amount by which the group is to be moved
|
adjust the amount by which the group is to be moved
|
||||||
utf TRUE in UTF mode
|
utf TRUE in UTF mode
|
||||||
cb compile data
|
cb compile data
|
||||||
save_hwm the hwm forward reference pointer at the start of the group
|
save_hwm_offset the hwm forward reference offset at the start of the group
|
||||||
|
|
||||||
Returns: nothing
|
Returns: nothing
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void
|
static void
|
||||||
adjust_recurse(PCRE2_UCHAR *group, int adjust, BOOL utf, compile_block *cb,
|
adjust_recurse(PCRE2_UCHAR *group, int adjust, BOOL utf, compile_block *cb,
|
||||||
PCRE2_UCHAR *save_hwm)
|
size_t save_hwm_offset)
|
||||||
{
|
{
|
||||||
PCRE2_UCHAR *ptr = group;
|
PCRE2_UCHAR *ptr = group;
|
||||||
|
|
||||||
|
@ -2609,7 +2609,8 @@ while ((ptr = (PCRE2_UCHAR *)find_recurse(ptr, utf)) != NULL)
|
||||||
/* See if this recursion is on the forward reference list. If so, adjust the
|
/* See if this recursion is on the forward reference list. If so, adjust the
|
||||||
reference. */
|
reference. */
|
||||||
|
|
||||||
for (hc = save_hwm; hc < cb->hwm; hc += LINK_SIZE)
|
for (hc = (PCRE2_UCHAR *)cb->start_workspace + save_hwm_offset; hc < cb->hwm;
|
||||||
|
hc += LINK_SIZE)
|
||||||
{
|
{
|
||||||
offset = (int)GET(hc, 0);
|
offset = (int)GET(hc, 0);
|
||||||
if (cb->start_code + offset == ptr + 1)
|
if (cb->start_code + offset == ptr + 1)
|
||||||
|
@ -3093,7 +3094,7 @@ PCRE2_SPTR tempptr;
|
||||||
PCRE2_SPTR nestptr = NULL;
|
PCRE2_SPTR nestptr = NULL;
|
||||||
PCRE2_UCHAR *previous = NULL;
|
PCRE2_UCHAR *previous = NULL;
|
||||||
PCRE2_UCHAR *previous_callout = NULL;
|
PCRE2_UCHAR *previous_callout = NULL;
|
||||||
PCRE2_UCHAR *save_hwm = NULL;
|
size_t save_hwm_offset = 0;
|
||||||
uint8_t classbits[32];
|
uint8_t classbits[32];
|
||||||
|
|
||||||
/* We can fish out the UTF setting once and for all into a BOOL, but we must
|
/* We can fish out the UTF setting once and for all into a BOOL, but we must
|
||||||
|
@ -4565,7 +4566,7 @@ for (;; ptr++)
|
||||||
if (repeat_max <= 1) /* Covers 0, 1, and unlimited */
|
if (repeat_max <= 1) /* Covers 0, 1, and unlimited */
|
||||||
{
|
{
|
||||||
*code = OP_END;
|
*code = OP_END;
|
||||||
adjust_recurse(previous, 1, utf, cb, save_hwm);
|
adjust_recurse(previous, 1, utf, cb, save_hwm_offset);
|
||||||
memmove(previous + 1, previous, CU2BYTES(len));
|
memmove(previous + 1, previous, CU2BYTES(len));
|
||||||
code++;
|
code++;
|
||||||
if (repeat_max == 0)
|
if (repeat_max == 0)
|
||||||
|
@ -4589,7 +4590,7 @@ for (;; ptr++)
|
||||||
{
|
{
|
||||||
int offset;
|
int offset;
|
||||||
*code = OP_END;
|
*code = OP_END;
|
||||||
adjust_recurse(previous, 2 + LINK_SIZE, utf, cb, save_hwm);
|
adjust_recurse(previous, 2 + LINK_SIZE, utf, cb, save_hwm_offset);
|
||||||
memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
|
memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
|
||||||
code += 2 + LINK_SIZE;
|
code += 2 + LINK_SIZE;
|
||||||
*previous++ = OP_BRAZERO + repeat_type;
|
*previous++ = OP_BRAZERO + repeat_type;
|
||||||
|
@ -4652,26 +4653,25 @@ for (;; ptr++)
|
||||||
for (i = 1; i < repeat_min; i++)
|
for (i = 1; i < repeat_min; i++)
|
||||||
{
|
{
|
||||||
PCRE2_UCHAR *hc;
|
PCRE2_UCHAR *hc;
|
||||||
PCRE2_UCHAR *this_hwm = cb->hwm;
|
size_t this_hwm_offset = cb->hwm - cb->start_workspace;
|
||||||
memcpy(code, previous, CU2BYTES(len));
|
memcpy(code, previous, CU2BYTES(len));
|
||||||
|
|
||||||
while (cb->hwm > cb->start_workspace + cb->workspace_size -
|
while (cb->hwm > cb->start_workspace + cb->workspace_size -
|
||||||
WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm))
|
WORK_SIZE_SAFETY_MARGIN -
|
||||||
|
(this_hwm_offset - save_hwm_offset))
|
||||||
{
|
{
|
||||||
size_t save_offset = save_hwm - cb->start_workspace;
|
|
||||||
size_t this_offset = this_hwm - cb->start_workspace;
|
|
||||||
*errorcodeptr = expand_workspace(cb);
|
*errorcodeptr = expand_workspace(cb);
|
||||||
if (*errorcodeptr != 0) goto FAILED;
|
if (*errorcodeptr != 0) goto FAILED;
|
||||||
save_hwm = (PCRE2_UCHAR *)cb->start_workspace + save_offset;
|
|
||||||
this_hwm = (PCRE2_UCHAR *)cb->start_workspace + this_offset;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
|
for (hc = (PCRE2_UCHAR *)cb->start_workspace + save_hwm_offset;
|
||||||
|
hc < (PCRE2_UCHAR *)cb->start_workspace + this_hwm_offset;
|
||||||
|
hc += LINK_SIZE)
|
||||||
{
|
{
|
||||||
PUT(cb->hwm, 0, GET(hc, 0) + len);
|
PUT(cb->hwm, 0, GET(hc, 0) + len);
|
||||||
cb->hwm += LINK_SIZE;
|
cb->hwm += LINK_SIZE;
|
||||||
}
|
}
|
||||||
save_hwm = this_hwm;
|
save_hwm_offset = this_hwm_offset;
|
||||||
code += len;
|
code += len;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4716,7 +4716,7 @@ for (;; ptr++)
|
||||||
else for (i = repeat_max - 1; i >= 0; i--)
|
else for (i = repeat_max - 1; i >= 0; i--)
|
||||||
{
|
{
|
||||||
PCRE2_UCHAR *hc;
|
PCRE2_UCHAR *hc;
|
||||||
PCRE2_UCHAR *this_hwm = cb->hwm;
|
size_t this_hwm_offset = cb->hwm - cb->start_workspace;
|
||||||
|
|
||||||
*code++ = OP_BRAZERO + repeat_type;
|
*code++ = OP_BRAZERO + repeat_type;
|
||||||
|
|
||||||
|
@ -4738,22 +4738,21 @@ for (;; ptr++)
|
||||||
copying them. */
|
copying them. */
|
||||||
|
|
||||||
while (cb->hwm > cb->start_workspace + cb->workspace_size -
|
while (cb->hwm > cb->start_workspace + cb->workspace_size -
|
||||||
WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm))
|
WORK_SIZE_SAFETY_MARGIN -
|
||||||
|
(this_hwm_offset - save_hwm_offset))
|
||||||
{
|
{
|
||||||
size_t save_offset = save_hwm - cb->start_workspace;
|
|
||||||
size_t this_offset = this_hwm - cb->start_workspace;
|
|
||||||
*errorcodeptr = expand_workspace(cb);
|
*errorcodeptr = expand_workspace(cb);
|
||||||
if (*errorcodeptr != 0) goto FAILED;
|
if (*errorcodeptr != 0) goto FAILED;
|
||||||
save_hwm = (PCRE2_UCHAR *)cb->start_workspace + save_offset;
|
|
||||||
this_hwm = (PCRE2_UCHAR *)cb->start_workspace + this_offset;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
|
for (hc = (PCRE2_UCHAR *)cb->start_workspace + save_hwm_offset;
|
||||||
|
hc < (PCRE2_UCHAR *)cb->start_workspace + this_hwm_offset;
|
||||||
|
hc += LINK_SIZE)
|
||||||
{
|
{
|
||||||
PUT(cb->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
|
PUT(cb->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
|
||||||
cb->hwm += LINK_SIZE;
|
cb->hwm += LINK_SIZE;
|
||||||
}
|
}
|
||||||
save_hwm = this_hwm;
|
save_hwm_offset = this_hwm_offset;
|
||||||
code += len;
|
code += len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4849,7 +4848,7 @@ for (;; ptr++)
|
||||||
{
|
{
|
||||||
int nlen = (int)(code - bracode);
|
int nlen = (int)(code - bracode);
|
||||||
*code = OP_END;
|
*code = OP_END;
|
||||||
adjust_recurse(bracode, 1 + LINK_SIZE, utf, cb, save_hwm);
|
adjust_recurse(bracode, 1 + LINK_SIZE, utf, cb, save_hwm_offset);
|
||||||
memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
|
memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
|
||||||
code += 1 + LINK_SIZE;
|
code += 1 + LINK_SIZE;
|
||||||
nlen += 1 + LINK_SIZE;
|
nlen += 1 + LINK_SIZE;
|
||||||
|
@ -4984,7 +4983,7 @@ for (;; ptr++)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
*code = OP_END;
|
*code = OP_END;
|
||||||
adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cb, save_hwm);
|
adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cb, save_hwm_offset);
|
||||||
memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
|
memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
|
||||||
code += 1 + LINK_SIZE;
|
code += 1 + LINK_SIZE;
|
||||||
len += 1 + LINK_SIZE;
|
len += 1 + LINK_SIZE;
|
||||||
|
@ -5009,13 +5008,17 @@ for (;; ptr++)
|
||||||
/* ===================================================================*/
|
/* ===================================================================*/
|
||||||
/* Start of nested parenthesized sub-expression, or comment or lookahead or
|
/* Start of nested parenthesized sub-expression, or comment or lookahead or
|
||||||
lookbehind or option setting or condition or all the other extended
|
lookbehind or option setting or condition or all the other extended
|
||||||
parenthesis forms. */
|
parenthesis forms. We must save the current high-water-mark for the
|
||||||
|
forward reference list so that we know where they start for this group.
|
||||||
|
However, because the list may be extended when there are very many forward
|
||||||
|
references (usually the result of a replicated inner group), we must use
|
||||||
|
an offset rather than an absolute address. */
|
||||||
|
|
||||||
case CHAR_LEFT_PARENTHESIS:
|
case CHAR_LEFT_PARENTHESIS:
|
||||||
newoptions = options;
|
newoptions = options;
|
||||||
skipbytes = 0;
|
skipbytes = 0;
|
||||||
bravalue = OP_CBRA;
|
bravalue = OP_CBRA;
|
||||||
save_hwm = cb->hwm;
|
save_hwm_offset = cb->hwm - cb->start_workspace;
|
||||||
reset_bracount = FALSE;
|
reset_bracount = FALSE;
|
||||||
|
|
||||||
/* First deal with various "verbs" that can be introduced by '*'. */
|
/* First deal with various "verbs" that can be introduced by '*'. */
|
||||||
|
@ -5972,7 +5975,8 @@ for (;; ptr++)
|
||||||
|
|
||||||
/* Fudge the value of "called" so that when it is inserted as an
|
/* Fudge the value of "called" so that when it is inserted as an
|
||||||
offset below, what it actually inserted is the reference number
|
offset below, what it actually inserted is the reference number
|
||||||
of the group. Then remember the forward reference. */
|
of the group. Then remember the forward reference, expanding the
|
||||||
|
working space where the list is kept if necessary. */
|
||||||
|
|
||||||
called = cb->start_code + recno;
|
called = cb->start_code + recno;
|
||||||
if (cb->hwm >= cb->start_workspace + cb->workspace_size -
|
if (cb->hwm >= cb->start_workspace + cb->workspace_size -
|
||||||
|
@ -6395,7 +6399,9 @@ for (;; ptr++)
|
||||||
PCRE2_SPTR p;
|
PCRE2_SPTR p;
|
||||||
uint32_t cf;
|
uint32_t cf;
|
||||||
|
|
||||||
save_hwm = cb->hwm; /* Normally this is set when '(' is read */
|
/* Normally save_hwm_offset is set when '(' is read */
|
||||||
|
|
||||||
|
save_hwm_offset = cb->hwm - cb->start_workspace;
|
||||||
terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
|
terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
|
||||||
CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
|
CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
|
||||||
|
|
||||||
|
@ -6933,7 +6939,7 @@ for (;;)
|
||||||
{
|
{
|
||||||
*code = OP_END;
|
*code = OP_END;
|
||||||
adjust_recurse(start_bracket, 1 + LINK_SIZE,
|
adjust_recurse(start_bracket, 1 + LINK_SIZE,
|
||||||
(options & PCRE2_UTF) != 0, cb, cb->hwm);
|
(options & PCRE2_UTF) != 0, cb, cb->hwm - cb->start_workspace);
|
||||||
memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
|
memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
|
||||||
CU2BYTES(code - start_bracket));
|
CU2BYTES(code - start_bracket));
|
||||||
*start_bracket = OP_ONCE;
|
*start_bracket = OP_ONCE;
|
||||||
|
|
|
@ -4171,5 +4171,9 @@ a random value. /Ix
|
||||||
'^(?:a)*+(\w)'
|
'^(?:a)*+(\w)'
|
||||||
g
|
g
|
||||||
g\=ovector=1
|
g\=ovector=1
|
||||||
|
|
||||||
|
# This pattern showed up a compile-time bug
|
||||||
|
|
||||||
|
"((?2){0,1999}())?"
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -13949,5 +13949,9 @@ Matched, but too many substrings
|
||||||
g\=ovector=1
|
g\=ovector=1
|
||||||
Matched, but too many substrings
|
Matched, but too many substrings
|
||||||
0: g
|
0: g
|
||||||
|
|
||||||
|
# This pattern showed up a compile-time bug
|
||||||
|
|
||||||
|
"((?2){0,1999}())?"
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
Loading…
Reference in New Issue