Give an internal error for a bad opcode during auto-possessification. This can

stop a loop when compiling an invalid UTF string with PCRE2_NO_UTF_CHECK.
This commit is contained in:
Philip.Hazel 2015-02-06 16:47:15 +00:00
parent a398ae3bef
commit ad452f4036
5 changed files with 40 additions and 24 deletions

View File

@ -55,6 +55,13 @@ patterns.
10. The error message for an invalid quantifier has been changed from "nothing 10. The error message for an invalid quantifier has been changed from "nothing
to repeat" to "quantifier does not follow a repeatable item". to repeat" to "quantifier does not follow a repeatable item".
11. If a bad UTF string is compiled with NO_UTF_CHECK, it may succeed, but
scanning the compiled pattern in subsequent auto-possessification can get out
of step and lead to an unknown opcode. Previously this could have caused an
infinite loop. Now it generates an "internal error" error. This is a tidyup,
not a bug fix; passing bad UTF with NO_UTF_CHECK is documented as having an
undefined outcome.
Version 10.00 05-January-2015 Version 10.00 05-January-2015
----------------------------- -----------------------------

View File

@ -1090,17 +1090,20 @@ but some compilers complain about an unreachable statement. */
*************************************************/ *************************************************/
/* Replaces single character iterations with their possessive alternatives /* Replaces single character iterations with their possessive alternatives
if appropriate. This function modifies the compiled opcode! if appropriate. This function modifies the compiled opcode! Hitting a
non-existant opcode may indicate a bug in PCRE2, but it can also be caused if a
bad UTF string was compiled with PCRE2_NO_UTF_CHECK.
Arguments: Arguments:
code points to start of the byte code code points to start of the byte code
utf TRUE in UTF mode utf TRUE in UTF mode
cb compile data block cb compile data block
Returns: nothing Returns: 0 for success
-1 if a non-existant opcode is encountered
*/ */
void int
PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb) PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb)
{ {
register PCRE2_UCHAR c; register PCRE2_UCHAR c;
@ -1112,6 +1115,8 @@ for (;;)
{ {
c = *code; c = *code;
if (c > OP_TABLE_LENGTH) return -1; /* Something gone wrong */
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO) if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
{ {
c -= get_repeat_base(c) - OP_STAR; c -= get_repeat_base(c) - OP_STAR;
@ -1207,7 +1212,7 @@ for (;;)
switch(c) switch(c)
{ {
case OP_END: case OP_END:
return; return 0;
case OP_TYPESTAR: case OP_TYPESTAR:
case OP_TYPEMINSTAR: case OP_TYPEMINSTAR:

View File

@ -573,7 +573,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79 }; ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80 };
/* This is a table of start-of-pattern options such as (*UTF) and settings such /* This is a table of start-of-pattern options such as (*UTF) and settings such
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@ -7802,7 +7802,9 @@ if (usedlength > length) errorcode = ERR23; else
} }
/* Fill in any forward references that are required. There may be repeated /* Fill in any forward references that are required. There may be repeated
references; optimize for them, as searching a large regex takes time. */ references; optimize for them, as searching a large regex takes time. The
test of errorcode inside the loop means that nothing is done if it is already
non-zero. */
if (cb.hwm > cb.start_workspace) if (cb.hwm > cb.start_workspace)
{ {
@ -7832,22 +7834,22 @@ if (cb.workspace_size > COMPILE_WORK_SIZE)
ccontext->memctl.memory_data); ccontext->memctl.memory_data);
cb.start_workspace = NULL; cb.start_workspace = NULL;
/* Give an error if there's back reference to a non-existent capturing /* After a successful compile, give an error if there's back reference to a
subpattern. */ non-existent capturing subpattern. Then, unless disabled, check whether any
single character iterators can be auto-possessified. The function overwrites
the appropriate opcode values, so the type of the pointer must be cast. NOTE:
the intermediate variable "temp" is used in this code because at least one
compiler gives a warning about loss of "const" attribute if the cast
(PCRE2_UCHAR *)codestart is used directly in the function call. */
if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15; if (errorcode == 0)
{
/* Unless disabled, check whether any single character iterators can be if (re->top_backref > re->top_bracket) errorcode = ERR15;
auto-possessified. The function overwrites the appropriate opcode values, so else if ((options & PCRE2_NO_AUTO_POSSESS) == 0)
the type of the pointer must be cast. NOTE: the intermediate variable "temp" is
used in this code because at least one compiler gives a warning about loss of
"const" attribute if the cast (PCRE2_UCHAR *)codestart is used directly in the
function call. */
if ((options & PCRE2_NO_AUTO_POSSESS) == 0)
{ {
PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart; PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart;
PRIV(auto_possessify)(temp, utf, &cb); if (PRIV(auto_possessify)(temp, utf, &cb) != 0) errorcode = ERR80;
}
} }
/* If there were any lookbehind assertions that contained OP_RECURSE /* If there were any lookbehind assertions that contained OP_RECURSE
@ -7858,7 +7860,7 @@ OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The
exceptional ones forgo this. We scan the pattern to check that they are fixed exceptional ones forgo this. We scan the pattern to check that they are fixed
length, and set their lengths. */ length, and set their lengths. */
if (cb.check_lookbehind) if (errorcode == 0 && cb.check_lookbehind)
{ {
PCRE2_UCHAR *cc = (PCRE2_UCHAR *)codestart; PCRE2_UCHAR *cc = (PCRE2_UCHAR *)codestart;

View File

@ -159,6 +159,8 @@ static const char compile_error_texts[] =
"character code point value in \\u.... sequence is too large\0" "character code point value in \\u.... sequence is too large\0"
"digits missing in \\x{} or \\o{}\0" "digits missing in \\x{} or \\o{}\0"
"syntax error in (?(VERSION condition\0" "syntax error in (?(VERSION condition\0"
/* 80 */
"internal error: unknown opcode in auto_possessify()\0"
; ;
/* Match-time and UTF error texts are in the same format. */ /* Match-time and UTF error texts are in the same format. */

View File

@ -1882,7 +1882,7 @@ is available. */
#define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_) #define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_)
#define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_) #define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_)
extern void _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL, extern int _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
const compile_block *); const compile_block *);
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int); extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,