Give an internal error for a bad opcode during auto-possessification. This can
stop a loop when compiling an invalid UTF string with PCRE2_NO_UTF_CHECK.
This commit is contained in:
parent
a398ae3bef
commit
ad452f4036
|
@ -55,6 +55,13 @@ patterns.
|
|||
10. The error message for an invalid quantifier has been changed from "nothing
|
||||
to repeat" to "quantifier does not follow a repeatable item".
|
||||
|
||||
11. If a bad UTF string is compiled with NO_UTF_CHECK, it may succeed, but
|
||||
scanning the compiled pattern in subsequent auto-possessification can get out
|
||||
of step and lead to an unknown opcode. Previously this could have caused an
|
||||
infinite loop. Now it generates an "internal error" error. This is a tidyup,
|
||||
not a bug fix; passing bad UTF with NO_UTF_CHECK is documented as having an
|
||||
undefined outcome.
|
||||
|
||||
|
||||
Version 10.00 05-January-2015
|
||||
-----------------------------
|
||||
|
|
|
@ -1090,17 +1090,20 @@ but some compilers complain about an unreachable statement. */
|
|||
*************************************************/
|
||||
|
||||
/* Replaces single character iterations with their possessive alternatives
|
||||
if appropriate. This function modifies the compiled opcode!
|
||||
if appropriate. This function modifies the compiled opcode! Hitting a
|
||||
non-existant opcode may indicate a bug in PCRE2, but it can also be caused if a
|
||||
bad UTF string was compiled with PCRE2_NO_UTF_CHECK.
|
||||
|
||||
Arguments:
|
||||
code points to start of the byte code
|
||||
utf TRUE in UTF mode
|
||||
cb compile data block
|
||||
|
||||
Returns: nothing
|
||||
Returns: 0 for success
|
||||
-1 if a non-existant opcode is encountered
|
||||
*/
|
||||
|
||||
void
|
||||
int
|
||||
PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb)
|
||||
{
|
||||
register PCRE2_UCHAR c;
|
||||
|
@ -1111,7 +1114,9 @@ uint32_t list[8];
|
|||
for (;;)
|
||||
{
|
||||
c = *code;
|
||||
|
||||
|
||||
if (c > OP_TABLE_LENGTH) return -1; /* Something gone wrong */
|
||||
|
||||
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
|
||||
{
|
||||
c -= get_repeat_base(c) - OP_STAR;
|
||||
|
@ -1207,7 +1212,7 @@ for (;;)
|
|||
switch(c)
|
||||
{
|
||||
case OP_END:
|
||||
return;
|
||||
return 0;
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
|
|
|
@ -573,7 +573,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
|
|||
ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
|
||||
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
|
||||
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
||||
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79 };
|
||||
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80 };
|
||||
|
||||
/* This is a table of start-of-pattern options such as (*UTF) and settings such
|
||||
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
|
||||
|
@ -7802,7 +7802,9 @@ if (usedlength > length) errorcode = ERR23; else
|
|||
}
|
||||
|
||||
/* Fill in any forward references that are required. There may be repeated
|
||||
references; optimize for them, as searching a large regex takes time. */
|
||||
references; optimize for them, as searching a large regex takes time. The
|
||||
test of errorcode inside the loop means that nothing is done if it is already
|
||||
non-zero. */
|
||||
|
||||
if (cb.hwm > cb.start_workspace)
|
||||
{
|
||||
|
@ -7832,23 +7834,23 @@ if (cb.workspace_size > COMPILE_WORK_SIZE)
|
|||
ccontext->memctl.memory_data);
|
||||
cb.start_workspace = NULL;
|
||||
|
||||
/* Give an error if there's back reference to a non-existent capturing
|
||||
subpattern. */
|
||||
/* After a successful compile, give an error if there's back reference to a
|
||||
non-existent capturing subpattern. Then, unless disabled, check whether any
|
||||
single character iterators can be auto-possessified. The function overwrites
|
||||
the appropriate opcode values, so the type of the pointer must be cast. NOTE:
|
||||
the intermediate variable "temp" is used in this code because at least one
|
||||
compiler gives a warning about loss of "const" attribute if the cast
|
||||
(PCRE2_UCHAR *)codestart is used directly in the function call. */
|
||||
|
||||
if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;
|
||||
|
||||
/* Unless disabled, check whether any single character iterators can be
|
||||
auto-possessified. The function overwrites the appropriate opcode values, so
|
||||
the type of the pointer must be cast. NOTE: the intermediate variable "temp" is
|
||||
used in this code because at least one compiler gives a warning about loss of
|
||||
"const" attribute if the cast (PCRE2_UCHAR *)codestart is used directly in the
|
||||
function call. */
|
||||
|
||||
if ((options & PCRE2_NO_AUTO_POSSESS) == 0)
|
||||
if (errorcode == 0)
|
||||
{
|
||||
PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart;
|
||||
PRIV(auto_possessify)(temp, utf, &cb);
|
||||
}
|
||||
if (re->top_backref > re->top_bracket) errorcode = ERR15;
|
||||
else if ((options & PCRE2_NO_AUTO_POSSESS) == 0)
|
||||
{
|
||||
PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart;
|
||||
if (PRIV(auto_possessify)(temp, utf, &cb) != 0) errorcode = ERR80;
|
||||
}
|
||||
}
|
||||
|
||||
/* If there were any lookbehind assertions that contained OP_RECURSE
|
||||
(recursions or subroutine calls), a flag is set for them to be checked here,
|
||||
|
@ -7858,7 +7860,7 @@ OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The
|
|||
exceptional ones forgo this. We scan the pattern to check that they are fixed
|
||||
length, and set their lengths. */
|
||||
|
||||
if (cb.check_lookbehind)
|
||||
if (errorcode == 0 && cb.check_lookbehind)
|
||||
{
|
||||
PCRE2_UCHAR *cc = (PCRE2_UCHAR *)codestart;
|
||||
|
||||
|
|
|
@ -159,6 +159,8 @@ static const char compile_error_texts[] =
|
|||
"character code point value in \\u.... sequence is too large\0"
|
||||
"digits missing in \\x{} or \\o{}\0"
|
||||
"syntax error in (?(VERSION condition\0"
|
||||
/* 80 */
|
||||
"internal error: unknown opcode in auto_possessify()\0"
|
||||
;
|
||||
|
||||
/* Match-time and UTF error texts are in the same format. */
|
||||
|
|
|
@ -1882,7 +1882,7 @@ is available. */
|
|||
#define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_)
|
||||
#define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_)
|
||||
|
||||
extern void _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
|
||||
extern int _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
|
||||
const compile_block *);
|
||||
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
||||
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
||||
|
|
Loading…
Reference in New Issue