Fix overrun bug caused by conditional with assertion using too much memory.

This commit is contained in:
Philip.Hazel 2016-11-18 18:59:37 +00:00
parent 21c084125c
commit e0cccdd028
6 changed files with 79 additions and 41 deletions

View File

@ -83,6 +83,13 @@ copied).
(i) An insufficient memory size was being computed for compiling with (i) An insufficient memory size was being computed for compiling with
PCRE2_AUTO_CALLOUT. PCRE2_AUTO_CALLOUT.
(j) A conditional group with an assertion condition used more memory than was
allowed for it during parsing, so too many of them could therefore
overrun a buffer.
(k) If parsing a pattern exactly filled the buffer, the internal test for
overrun did not check when the final META_END item was added.
4. Back references are now permitted in lookbehind assertions when there are 4. Back references are now permitted in lookbehind assertions when there are
no duplicated group numbers (that is, (?| has not been used), and, if the no duplicated group numbers (that is, (?| has not been used), and, if the
reference is by name, there is only one group of that name. The referenced reference is by name, there is only one group of that name. The referenced

View File

@ -186,6 +186,7 @@ META_CLASS_EMPTY_NOT [^] negative empty class - ditto
META_CLASS_END ] end of non-empty class META_CLASS_END ] end of non-empty class
META_CLASS_NOT [^ start non-empty negative class META_CLASS_NOT [^ start non-empty negative class
META_COMMIT (*COMMIT) META_COMMIT (*COMMIT)
META_COND_ASSERT (?(?assertion)
META_DOLLAR $ metacharacter META_DOLLAR $ metacharacter
META_DOT . metacharacter META_DOT . metacharacter
META_END End of pattern (this value is 0x80000000) META_END End of pattern (this value is 0x80000000)
@ -274,9 +275,8 @@ This one is followed by an offset, for use in error messages, then a number:
META_COND_NUMBER (?([+-]digits) META_COND_NUMBER (?([+-]digits)
The following are followed just by an offset, for use in error messages: The following is followed just by an offset, for use in error messages:
META_COND_ASSERT (?(?assertion)
META_COND_DEFINE (?(DEFINE) META_COND_DEFINE (?(DEFINE)
In fact, META_COND_ASSERT is used for any group starting (?( that does not In fact, META_COND_ASSERT is used for any group starting (?( that does not

View File

@ -856,6 +856,7 @@ for (;;)
case META_BIGVALUE: fprintf(stderr, "META_BIGVALUE %.8x", *pptr++); break; case META_BIGVALUE: fprintf(stderr, "META_BIGVALUE %.8x", *pptr++); break;
case META_CIRCUMFLEX: fprintf(stderr, "META_CIRCUMFLEX"); break; case META_CIRCUMFLEX: fprintf(stderr, "META_CIRCUMFLEX"); break;
case META_COND_ASSERT: fprintf(stderr, "META_COND_ASSERT"); break;
case META_DOLLAR: fprintf(stderr, "META_DOLLAR"); break; case META_DOLLAR: fprintf(stderr, "META_DOLLAR"); break;
case META_DOT: fprintf(stderr, "META_DOT"); break; case META_DOT: fprintf(stderr, "META_DOT"); break;
case META_ASTERISK: fprintf(stderr, "META *"); break; case META_ASTERISK: fprintf(stderr, "META *"); break;
@ -949,12 +950,6 @@ for (;;)
fprintf(stderr, "%zd", offset); fprintf(stderr, "%zd", offset);
break; break;
case META_COND_ASSERT:
fprintf(stderr, "META_COND_ASSERT offset=");
GETOFFSET(offset, pptr);
fprintf(stderr, "%zd", offset);
break;
case META_COND_VERSION: case META_COND_VERSION:
fprintf(stderr, "META (?(VERSION%s", (*pptr++ == 0)? "=" : ">="); fprintf(stderr, "META (?(VERSION%s", (*pptr++ == 0)? "=" : ">=");
fprintf(stderr, "%d.", *pptr++); fprintf(stderr, "%d.", *pptr++);
@ -2368,6 +2363,42 @@ while (ptr < ptrend)
parsed_pattern, cb); parsed_pattern, cb);
} }
/* If expect_cond_assert is 2, we have just passed (?( and are expecting an
assertion, possibly preceded by a callout. If the value is 1, we have just
had the callout and expect an assertion. There must be at least 3 more
characters in all cases. We know that the current character is an opening
parenthesis, as otherwise we wouldn't be here. Note that expect_cond_assert
may be negative, since all callouts just decrement it. */
if (expect_cond_assert > 0)
{
BOOL ok = ptrend - ptr >= 3 && ptr[0] == CHAR_QUESTION_MARK;
if (ok) switch(ptr[1])
{
case CHAR_C:
ok = expect_cond_assert == 2;
break;
case CHAR_EQUALS_SIGN:
case CHAR_EXCLAMATION_MARK:
break;
case CHAR_LESS_THAN_SIGN:
ok = ptr[2] == CHAR_EQUALS_SIGN || ptr[2] == CHAR_EXCLAMATION_MARK;
break;
default:
ok = FALSE;
}
if (!ok)
{
ptr--; /* Adjust error offset */
errorcode = ERR28;
goto FAILED;
}
}
/* Remember whether we are expecting a conditional assertion, and set the /* Remember whether we are expecting a conditional assertion, and set the
default for this item. */ default for this item. */
@ -3519,18 +3550,16 @@ while (ptr < ptrend)
nest_depth++; nest_depth++;
/* If the next character is ? there must be an assertion next (optionally /* If the next character is ? there must be an assertion next (optionally
preceded by a callout). We do not check this here, but instead we just preceded by a callout). We do not check this here, but instead we set
preserve the offset so that the later check can give a sensible error expect_cond_assert to 2. If this is still greater than zero (callouts
message. Pull back the pointer to the start of the assertion (or decrement it) when the next assertion is read, it will be marked as a
callout), and set expect_cond_assert to 2. If this is still greater than condition that must not be repeated. A value greater than zero also
zero (callouts decrement it) when the next assertion is read, it will be causes checking that an assertion (possibly with callout) follows. */
marked as a condition that must not be repeated. */
if (*ptr == CHAR_QUESTION_MARK) if (*ptr == CHAR_QUESTION_MARK)
{ {
*parsed_pattern++ = META_COND_ASSERT; *parsed_pattern++ = META_COND_ASSERT;
offset = (PCRE2_SIZE)(--ptr - cb->start_pattern - 2); ptr--; /* Pull pointer back to the opening parenthesis. */
PUTOFFSET(offset, parsed_pattern);
expect_cond_assert = 2; expect_cond_assert = 2;
break; /* End of conditional */ break; /* End of conditional */
} }
@ -3902,6 +3931,11 @@ parsed_pattern = manage_callouts(ptr, &previous_callout, options,
/* Terminate the parsed pattern, then return success if all groups are closed. /* Terminate the parsed pattern, then return success if all groups are closed.
Otherwise we have unclosed parentheses. */ Otherwise we have unclosed parentheses. */
if (parsed_pattern >= parsed_pattern_end)
{
errorcode = ERR63; /* Internal error (parsed pattern overflow) */
goto FAILED;
}
*parsed_pattern = META_END; *parsed_pattern = META_END;
if (nest_depth == 0) return 0; if (nest_depth == 0) return 0;
@ -5806,23 +5840,10 @@ for (;; pptr++)
pptr += 3; pptr += 3;
goto GROUP_PROCESS; goto GROUP_PROCESS;
/* The condition is alleged to be an assertion, possibly preceded by a /* The condition is an assertion, possibly preceded by a callout. */
callout, because it's not one of the others, and began with (?(?. This is
where the check for the next thing being an assertion (with optional
callout) is done. */
case META_COND_ASSERT: case META_COND_ASSERT:
bravalue = OP_COND; bravalue = OP_COND;
GETPLUSOFFSET(offset, pptr);
i = (pptr[1] == META_CALLOUT_NUMBER)? 5 :
(pptr[1] == META_CALLOUT_STRING)? (5 + SIZEOFFSET) : 1;
if (META_CODE(pptr[i]) < META_LOOKAHEAD ||
META_CODE(pptr[i]) > META_LOOKBEHINDNOT)
{
*errorcodeptr = ERR28; /* Assertion expected */
cb->erroroffset = offset + 2; /* Point after initial (? */
return FALSE;
}
goto GROUP_PROCESS; goto GROUP_PROCESS;
@ -8563,7 +8584,7 @@ for (;; pptr++)
goto CHECK_GROUP; goto CHECK_GROUP;
case META_COND_ASSERT: case META_COND_ASSERT:
pptr += 1 + SIZEOFFSET; pptr += 1;
goto CHECK_GROUP; goto CHECK_GROUP;
case META_COND_VERSION: case META_COND_VERSION:
@ -8733,6 +8754,7 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
case META_CLASS_END: case META_CLASS_END:
case META_CLASS_NOT: case META_CLASS_NOT:
case META_COMMIT: case META_COMMIT:
case META_COND_ASSERT:
case META_DOLLAR: case META_DOLLAR:
case META_DOT: case META_DOT:
case META_FAIL: case META_FAIL:
@ -8753,7 +8775,6 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
case META_THEN: case META_THEN:
break; break;
case META_COND_ASSERT:
case META_RECURSE: case META_RECURSE:
pptr += SIZEOFFSET; pptr += SIZEOFFSET;
break; break;

4
testdata/testinput2 vendored
View File

@ -4912,4 +4912,8 @@ a)"xI
\=get=i00000000000000000000000000000000 \=get=i00000000000000000000000000000000
\=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012 \=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012
"(?(?C))"
/(?(?(?(?(?(?))))))/
# End of testinput2 # End of testinput2

View File

@ -145,7 +145,7 @@ Failed: POSIX code 9: bad escape sequence at offset 4
Failed: POSIX code 11: unbalanced () at offset 6 Failed: POSIX code 11: unbalanced () at offset 6
"(?(?C))" "(?(?C))"
Failed: POSIX code 3: pattern error at offset 2 Failed: POSIX code 3: pattern error at offset 6
/abcd/substitute_extended /abcd/substitute_extended
** Ignored with POSIX interface: substitute_extended ** Ignored with POSIX interface: substitute_extended

View File

@ -556,7 +556,7 @@ Failed: error 128 at offset 2: assertion expected after (?( or (?(?C)
Failed: error 115 at offset 3: reference to non-existent subpattern Failed: error 115 at offset 3: reference to non-existent subpattern
/(?(?<ab))/ /(?(?<ab))/
Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator) Failed: error 128 at offset 2: assertion expected after (?( or (?(?C)
/((?s)blah)\s+\1/I /((?s)blah)\s+\1/I
Capturing subpattern count = 1 Capturing subpattern count = 1
@ -15361,6 +15361,12 @@ Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed,
\=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012 \=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012
** Too many characters in named 'get' modifiers ** Too many characters in named 'get' modifiers
"(?(?C))"
Failed: error 128 at offset 6: assertion expected after (?( or (?(?C)
/(?(?(?(?(?(?))))))/
Failed: error 128 at offset 2: assertion expected after (?( or (?(?C)
# End of testinput2 # End of testinput2
Error -63: PCRE2_ERROR_BADDATA (unknown error number) Error -63: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data Error -62: bad serialized data