Fix overrun bug caused by conditional with assertion using too much memory.
This commit is contained in:
parent
21c084125c
commit
e0cccdd028
|
@ -83,6 +83,13 @@ copied).
|
||||||
(i) An insufficient memory size was being computed for compiling with
|
(i) An insufficient memory size was being computed for compiling with
|
||||||
PCRE2_AUTO_CALLOUT.
|
PCRE2_AUTO_CALLOUT.
|
||||||
|
|
||||||
|
(j) A conditional group with an assertion condition used more memory than was
|
||||||
|
allowed for it during parsing, so too many of them could therefore
|
||||||
|
overrun a buffer.
|
||||||
|
|
||||||
|
(k) If parsing a pattern exactly filled the buffer, the internal test for
|
||||||
|
overrun did not check when the final META_END item was added.
|
||||||
|
|
||||||
4. Back references are now permitted in lookbehind assertions when there are
|
4. Back references are now permitted in lookbehind assertions when there are
|
||||||
no duplicated group numbers (that is, (?| has not been used), and, if the
|
no duplicated group numbers (that is, (?| has not been used), and, if the
|
||||||
reference is by name, there is only one group of that name. The referenced
|
reference is by name, there is only one group of that name. The referenced
|
||||||
|
|
4
HACKING
4
HACKING
|
@ -186,6 +186,7 @@ META_CLASS_EMPTY_NOT [^] negative empty class - ditto
|
||||||
META_CLASS_END ] end of non-empty class
|
META_CLASS_END ] end of non-empty class
|
||||||
META_CLASS_NOT [^ start non-empty negative class
|
META_CLASS_NOT [^ start non-empty negative class
|
||||||
META_COMMIT (*COMMIT)
|
META_COMMIT (*COMMIT)
|
||||||
|
META_COND_ASSERT (?(?assertion)
|
||||||
META_DOLLAR $ metacharacter
|
META_DOLLAR $ metacharacter
|
||||||
META_DOT . metacharacter
|
META_DOT . metacharacter
|
||||||
META_END End of pattern (this value is 0x80000000)
|
META_END End of pattern (this value is 0x80000000)
|
||||||
|
@ -274,9 +275,8 @@ This one is followed by an offset, for use in error messages, then a number:
|
||||||
|
|
||||||
META_COND_NUMBER (?([+-]digits)
|
META_COND_NUMBER (?([+-]digits)
|
||||||
|
|
||||||
The following are followed just by an offset, for use in error messages:
|
The following is followed just by an offset, for use in error messages:
|
||||||
|
|
||||||
META_COND_ASSERT (?(?assertion)
|
|
||||||
META_COND_DEFINE (?(DEFINE)
|
META_COND_DEFINE (?(DEFINE)
|
||||||
|
|
||||||
In fact, META_COND_ASSERT is used for any group starting (?( that does not
|
In fact, META_COND_ASSERT is used for any group starting (?( that does not
|
||||||
|
|
|
@ -856,6 +856,7 @@ for (;;)
|
||||||
|
|
||||||
case META_BIGVALUE: fprintf(stderr, "META_BIGVALUE %.8x", *pptr++); break;
|
case META_BIGVALUE: fprintf(stderr, "META_BIGVALUE %.8x", *pptr++); break;
|
||||||
case META_CIRCUMFLEX: fprintf(stderr, "META_CIRCUMFLEX"); break;
|
case META_CIRCUMFLEX: fprintf(stderr, "META_CIRCUMFLEX"); break;
|
||||||
|
case META_COND_ASSERT: fprintf(stderr, "META_COND_ASSERT"); break;
|
||||||
case META_DOLLAR: fprintf(stderr, "META_DOLLAR"); break;
|
case META_DOLLAR: fprintf(stderr, "META_DOLLAR"); break;
|
||||||
case META_DOT: fprintf(stderr, "META_DOT"); break;
|
case META_DOT: fprintf(stderr, "META_DOT"); break;
|
||||||
case META_ASTERISK: fprintf(stderr, "META *"); break;
|
case META_ASTERISK: fprintf(stderr, "META *"); break;
|
||||||
|
@ -949,12 +950,6 @@ for (;;)
|
||||||
fprintf(stderr, "%zd", offset);
|
fprintf(stderr, "%zd", offset);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case META_COND_ASSERT:
|
|
||||||
fprintf(stderr, "META_COND_ASSERT offset=");
|
|
||||||
GETOFFSET(offset, pptr);
|
|
||||||
fprintf(stderr, "%zd", offset);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case META_COND_VERSION:
|
case META_COND_VERSION:
|
||||||
fprintf(stderr, "META (?(VERSION%s", (*pptr++ == 0)? "=" : ">=");
|
fprintf(stderr, "META (?(VERSION%s", (*pptr++ == 0)? "=" : ">=");
|
||||||
fprintf(stderr, "%d.", *pptr++);
|
fprintf(stderr, "%d.", *pptr++);
|
||||||
|
@ -2368,6 +2363,42 @@ while (ptr < ptrend)
|
||||||
parsed_pattern, cb);
|
parsed_pattern, cb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If expect_cond_assert is 2, we have just passed (?( and are expecting an
|
||||||
|
assertion, possibly preceded by a callout. If the value is 1, we have just
|
||||||
|
had the callout and expect an assertion. There must be at least 3 more
|
||||||
|
characters in all cases. We know that the current character is an opening
|
||||||
|
parenthesis, as otherwise we wouldn't be here. Note that expect_cond_assert
|
||||||
|
may be negative, since all callouts just decrement it. */
|
||||||
|
|
||||||
|
if (expect_cond_assert > 0)
|
||||||
|
{
|
||||||
|
BOOL ok = ptrend - ptr >= 3 && ptr[0] == CHAR_QUESTION_MARK;
|
||||||
|
if (ok) switch(ptr[1])
|
||||||
|
{
|
||||||
|
case CHAR_C:
|
||||||
|
ok = expect_cond_assert == 2;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CHAR_EQUALS_SIGN:
|
||||||
|
case CHAR_EXCLAMATION_MARK:
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CHAR_LESS_THAN_SIGN:
|
||||||
|
ok = ptr[2] == CHAR_EQUALS_SIGN || ptr[2] == CHAR_EXCLAMATION_MARK;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
ok = FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ok)
|
||||||
|
{
|
||||||
|
ptr--; /* Adjust error offset */
|
||||||
|
errorcode = ERR28;
|
||||||
|
goto FAILED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Remember whether we are expecting a conditional assertion, and set the
|
/* Remember whether we are expecting a conditional assertion, and set the
|
||||||
default for this item. */
|
default for this item. */
|
||||||
|
|
||||||
|
@ -3519,18 +3550,16 @@ while (ptr < ptrend)
|
||||||
nest_depth++;
|
nest_depth++;
|
||||||
|
|
||||||
/* If the next character is ? there must be an assertion next (optionally
|
/* If the next character is ? there must be an assertion next (optionally
|
||||||
preceded by a callout). We do not check this here, but instead we just
|
preceded by a callout). We do not check this here, but instead we set
|
||||||
preserve the offset so that the later check can give a sensible error
|
expect_cond_assert to 2. If this is still greater than zero (callouts
|
||||||
message. Pull back the pointer to the start of the assertion (or
|
decrement it) when the next assertion is read, it will be marked as a
|
||||||
callout), and set expect_cond_assert to 2. If this is still greater than
|
condition that must not be repeated. A value greater than zero also
|
||||||
zero (callouts decrement it) when the next assertion is read, it will be
|
causes checking that an assertion (possibly with callout) follows. */
|
||||||
marked as a condition that must not be repeated. */
|
|
||||||
|
|
||||||
if (*ptr == CHAR_QUESTION_MARK)
|
if (*ptr == CHAR_QUESTION_MARK)
|
||||||
{
|
{
|
||||||
*parsed_pattern++ = META_COND_ASSERT;
|
*parsed_pattern++ = META_COND_ASSERT;
|
||||||
offset = (PCRE2_SIZE)(--ptr - cb->start_pattern - 2);
|
ptr--; /* Pull pointer back to the opening parenthesis. */
|
||||||
PUTOFFSET(offset, parsed_pattern);
|
|
||||||
expect_cond_assert = 2;
|
expect_cond_assert = 2;
|
||||||
break; /* End of conditional */
|
break; /* End of conditional */
|
||||||
}
|
}
|
||||||
|
@ -3902,6 +3931,11 @@ parsed_pattern = manage_callouts(ptr, &previous_callout, options,
|
||||||
/* Terminate the parsed pattern, then return success if all groups are closed.
|
/* Terminate the parsed pattern, then return success if all groups are closed.
|
||||||
Otherwise we have unclosed parentheses. */
|
Otherwise we have unclosed parentheses. */
|
||||||
|
|
||||||
|
if (parsed_pattern >= parsed_pattern_end)
|
||||||
|
{
|
||||||
|
errorcode = ERR63; /* Internal error (parsed pattern overflow) */
|
||||||
|
goto FAILED;
|
||||||
|
}
|
||||||
*parsed_pattern = META_END;
|
*parsed_pattern = META_END;
|
||||||
if (nest_depth == 0) return 0;
|
if (nest_depth == 0) return 0;
|
||||||
|
|
||||||
|
@ -5806,23 +5840,10 @@ for (;; pptr++)
|
||||||
pptr += 3;
|
pptr += 3;
|
||||||
goto GROUP_PROCESS;
|
goto GROUP_PROCESS;
|
||||||
|
|
||||||
/* The condition is alleged to be an assertion, possibly preceded by a
|
/* The condition is an assertion, possibly preceded by a callout. */
|
||||||
callout, because it's not one of the others, and began with (?(?. This is
|
|
||||||
where the check for the next thing being an assertion (with optional
|
|
||||||
callout) is done. */
|
|
||||||
|
|
||||||
case META_COND_ASSERT:
|
case META_COND_ASSERT:
|
||||||
bravalue = OP_COND;
|
bravalue = OP_COND;
|
||||||
GETPLUSOFFSET(offset, pptr);
|
|
||||||
i = (pptr[1] == META_CALLOUT_NUMBER)? 5 :
|
|
||||||
(pptr[1] == META_CALLOUT_STRING)? (5 + SIZEOFFSET) : 1;
|
|
||||||
if (META_CODE(pptr[i]) < META_LOOKAHEAD ||
|
|
||||||
META_CODE(pptr[i]) > META_LOOKBEHINDNOT)
|
|
||||||
{
|
|
||||||
*errorcodeptr = ERR28; /* Assertion expected */
|
|
||||||
cb->erroroffset = offset + 2; /* Point after initial (? */
|
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
goto GROUP_PROCESS;
|
goto GROUP_PROCESS;
|
||||||
|
|
||||||
|
|
||||||
|
@ -8563,7 +8584,7 @@ for (;; pptr++)
|
||||||
goto CHECK_GROUP;
|
goto CHECK_GROUP;
|
||||||
|
|
||||||
case META_COND_ASSERT:
|
case META_COND_ASSERT:
|
||||||
pptr += 1 + SIZEOFFSET;
|
pptr += 1;
|
||||||
goto CHECK_GROUP;
|
goto CHECK_GROUP;
|
||||||
|
|
||||||
case META_COND_VERSION:
|
case META_COND_VERSION:
|
||||||
|
@ -8733,6 +8754,7 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
|
||||||
case META_CLASS_END:
|
case META_CLASS_END:
|
||||||
case META_CLASS_NOT:
|
case META_CLASS_NOT:
|
||||||
case META_COMMIT:
|
case META_COMMIT:
|
||||||
|
case META_COND_ASSERT:
|
||||||
case META_DOLLAR:
|
case META_DOLLAR:
|
||||||
case META_DOT:
|
case META_DOT:
|
||||||
case META_FAIL:
|
case META_FAIL:
|
||||||
|
@ -8753,7 +8775,6 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
|
||||||
case META_THEN:
|
case META_THEN:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case META_COND_ASSERT:
|
|
||||||
case META_RECURSE:
|
case META_RECURSE:
|
||||||
pptr += SIZEOFFSET;
|
pptr += SIZEOFFSET;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -4912,4 +4912,8 @@ a)"xI
|
||||||
\=get=i00000000000000000000000000000000
|
\=get=i00000000000000000000000000000000
|
||||||
\=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012
|
\=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012
|
||||||
|
|
||||||
|
"(?(?C))"
|
||||||
|
|
||||||
|
/(?(?(?(?(?(?))))))/
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -145,7 +145,7 @@ Failed: POSIX code 9: bad escape sequence at offset 4
|
||||||
Failed: POSIX code 11: unbalanced () at offset 6
|
Failed: POSIX code 11: unbalanced () at offset 6
|
||||||
|
|
||||||
"(?(?C))"
|
"(?(?C))"
|
||||||
Failed: POSIX code 3: pattern error at offset 2
|
Failed: POSIX code 3: pattern error at offset 6
|
||||||
|
|
||||||
/abcd/substitute_extended
|
/abcd/substitute_extended
|
||||||
** Ignored with POSIX interface: substitute_extended
|
** Ignored with POSIX interface: substitute_extended
|
||||||
|
|
|
@ -556,7 +556,7 @@ Failed: error 128 at offset 2: assertion expected after (?( or (?(?C)
|
||||||
Failed: error 115 at offset 3: reference to non-existent subpattern
|
Failed: error 115 at offset 3: reference to non-existent subpattern
|
||||||
|
|
||||||
/(?(?<ab))/
|
/(?(?<ab))/
|
||||||
Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator)
|
Failed: error 128 at offset 2: assertion expected after (?( or (?(?C)
|
||||||
|
|
||||||
/((?s)blah)\s+\1/I
|
/((?s)blah)\s+\1/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
@ -15361,6 +15361,12 @@ Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed,
|
||||||
\=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012
|
\=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012
|
||||||
** Too many characters in named 'get' modifiers
|
** Too many characters in named 'get' modifiers
|
||||||
|
|
||||||
|
"(?(?C))"
|
||||||
|
Failed: error 128 at offset 6: assertion expected after (?( or (?(?C)
|
||||||
|
|
||||||
|
/(?(?(?(?(?(?))))))/
|
||||||
|
Failed: error 128 at offset 2: assertion expected after (?( or (?(?C)
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
|
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
|
||||||
Error -62: bad serialized data
|
Error -62: bad serialized data
|
||||||
|
|
Loading…
Reference in New Issue