Give an internal error for a bad opcode during auto-possessification. This can
stop a loop when compiling an invalid UTF string with PCRE2_NO_UTF_CHECK.
This commit is contained in:
parent
a398ae3bef
commit
ad452f4036
|
@ -55,6 +55,13 @@ patterns.
|
||||||
10. The error message for an invalid quantifier has been changed from "nothing
|
10. The error message for an invalid quantifier has been changed from "nothing
|
||||||
to repeat" to "quantifier does not follow a repeatable item".
|
to repeat" to "quantifier does not follow a repeatable item".
|
||||||
|
|
||||||
|
11. If a bad UTF string is compiled with NO_UTF_CHECK, it may succeed, but
|
||||||
|
scanning the compiled pattern in subsequent auto-possessification can get out
|
||||||
|
of step and lead to an unknown opcode. Previously this could have caused an
|
||||||
|
infinite loop. Now it generates an "internal error" error. This is a tidyup,
|
||||||
|
not a bug fix; passing bad UTF with NO_UTF_CHECK is documented as having an
|
||||||
|
undefined outcome.
|
||||||
|
|
||||||
|
|
||||||
Version 10.00 05-January-2015
|
Version 10.00 05-January-2015
|
||||||
-----------------------------
|
-----------------------------
|
||||||
|
|
|
@ -1090,17 +1090,20 @@ but some compilers complain about an unreachable statement. */
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* Replaces single character iterations with their possessive alternatives
|
/* Replaces single character iterations with their possessive alternatives
|
||||||
if appropriate. This function modifies the compiled opcode!
|
if appropriate. This function modifies the compiled opcode! Hitting a
|
||||||
|
non-existant opcode may indicate a bug in PCRE2, but it can also be caused if a
|
||||||
|
bad UTF string was compiled with PCRE2_NO_UTF_CHECK.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
code points to start of the byte code
|
code points to start of the byte code
|
||||||
utf TRUE in UTF mode
|
utf TRUE in UTF mode
|
||||||
cb compile data block
|
cb compile data block
|
||||||
|
|
||||||
Returns: nothing
|
Returns: 0 for success
|
||||||
|
-1 if a non-existant opcode is encountered
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void
|
int
|
||||||
PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb)
|
PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb)
|
||||||
{
|
{
|
||||||
register PCRE2_UCHAR c;
|
register PCRE2_UCHAR c;
|
||||||
|
@ -1111,7 +1114,9 @@ uint32_t list[8];
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
c = *code;
|
c = *code;
|
||||||
|
|
||||||
|
if (c > OP_TABLE_LENGTH) return -1; /* Something gone wrong */
|
||||||
|
|
||||||
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
|
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
|
||||||
{
|
{
|
||||||
c -= get_repeat_base(c) - OP_STAR;
|
c -= get_repeat_base(c) - OP_STAR;
|
||||||
|
@ -1207,7 +1212,7 @@ for (;;)
|
||||||
switch(c)
|
switch(c)
|
||||||
{
|
{
|
||||||
case OP_END:
|
case OP_END:
|
||||||
return;
|
return 0;
|
||||||
|
|
||||||
case OP_TYPESTAR:
|
case OP_TYPESTAR:
|
||||||
case OP_TYPEMINSTAR:
|
case OP_TYPEMINSTAR:
|
||||||
|
|
|
@ -573,7 +573,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
|
||||||
ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
|
ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
|
||||||
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
|
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
|
||||||
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
||||||
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79 };
|
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80 };
|
||||||
|
|
||||||
/* This is a table of start-of-pattern options such as (*UTF) and settings such
|
/* This is a table of start-of-pattern options such as (*UTF) and settings such
|
||||||
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
|
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
|
||||||
|
@ -7802,7 +7802,9 @@ if (usedlength > length) errorcode = ERR23; else
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Fill in any forward references that are required. There may be repeated
|
/* Fill in any forward references that are required. There may be repeated
|
||||||
references; optimize for them, as searching a large regex takes time. */
|
references; optimize for them, as searching a large regex takes time. The
|
||||||
|
test of errorcode inside the loop means that nothing is done if it is already
|
||||||
|
non-zero. */
|
||||||
|
|
||||||
if (cb.hwm > cb.start_workspace)
|
if (cb.hwm > cb.start_workspace)
|
||||||
{
|
{
|
||||||
|
@ -7832,23 +7834,23 @@ if (cb.workspace_size > COMPILE_WORK_SIZE)
|
||||||
ccontext->memctl.memory_data);
|
ccontext->memctl.memory_data);
|
||||||
cb.start_workspace = NULL;
|
cb.start_workspace = NULL;
|
||||||
|
|
||||||
/* Give an error if there's back reference to a non-existent capturing
|
/* After a successful compile, give an error if there's back reference to a
|
||||||
subpattern. */
|
non-existent capturing subpattern. Then, unless disabled, check whether any
|
||||||
|
single character iterators can be auto-possessified. The function overwrites
|
||||||
|
the appropriate opcode values, so the type of the pointer must be cast. NOTE:
|
||||||
|
the intermediate variable "temp" is used in this code because at least one
|
||||||
|
compiler gives a warning about loss of "const" attribute if the cast
|
||||||
|
(PCRE2_UCHAR *)codestart is used directly in the function call. */
|
||||||
|
|
||||||
if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;
|
if (errorcode == 0)
|
||||||
|
|
||||||
/* Unless disabled, check whether any single character iterators can be
|
|
||||||
auto-possessified. The function overwrites the appropriate opcode values, so
|
|
||||||
the type of the pointer must be cast. NOTE: the intermediate variable "temp" is
|
|
||||||
used in this code because at least one compiler gives a warning about loss of
|
|
||||||
"const" attribute if the cast (PCRE2_UCHAR *)codestart is used directly in the
|
|
||||||
function call. */
|
|
||||||
|
|
||||||
if ((options & PCRE2_NO_AUTO_POSSESS) == 0)
|
|
||||||
{
|
{
|
||||||
PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart;
|
if (re->top_backref > re->top_bracket) errorcode = ERR15;
|
||||||
PRIV(auto_possessify)(temp, utf, &cb);
|
else if ((options & PCRE2_NO_AUTO_POSSESS) == 0)
|
||||||
}
|
{
|
||||||
|
PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart;
|
||||||
|
if (PRIV(auto_possessify)(temp, utf, &cb) != 0) errorcode = ERR80;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* If there were any lookbehind assertions that contained OP_RECURSE
|
/* If there were any lookbehind assertions that contained OP_RECURSE
|
||||||
(recursions or subroutine calls), a flag is set for them to be checked here,
|
(recursions or subroutine calls), a flag is set for them to be checked here,
|
||||||
|
@ -7858,7 +7860,7 @@ OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The
|
||||||
exceptional ones forgo this. We scan the pattern to check that they are fixed
|
exceptional ones forgo this. We scan the pattern to check that they are fixed
|
||||||
length, and set their lengths. */
|
length, and set their lengths. */
|
||||||
|
|
||||||
if (cb.check_lookbehind)
|
if (errorcode == 0 && cb.check_lookbehind)
|
||||||
{
|
{
|
||||||
PCRE2_UCHAR *cc = (PCRE2_UCHAR *)codestart;
|
PCRE2_UCHAR *cc = (PCRE2_UCHAR *)codestart;
|
||||||
|
|
||||||
|
|
|
@ -159,6 +159,8 @@ static const char compile_error_texts[] =
|
||||||
"character code point value in \\u.... sequence is too large\0"
|
"character code point value in \\u.... sequence is too large\0"
|
||||||
"digits missing in \\x{} or \\o{}\0"
|
"digits missing in \\x{} or \\o{}\0"
|
||||||
"syntax error in (?(VERSION condition\0"
|
"syntax error in (?(VERSION condition\0"
|
||||||
|
/* 80 */
|
||||||
|
"internal error: unknown opcode in auto_possessify()\0"
|
||||||
;
|
;
|
||||||
|
|
||||||
/* Match-time and UTF error texts are in the same format. */
|
/* Match-time and UTF error texts are in the same format. */
|
||||||
|
|
|
@ -1882,7 +1882,7 @@ is available. */
|
||||||
#define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_)
|
#define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_)
|
||||||
#define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_)
|
#define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_)
|
||||||
|
|
||||||
extern void _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
|
extern int _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
|
||||||
const compile_block *);
|
const compile_block *);
|
||||||
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
||||||
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
||||||
|
|
Loading…
Reference in New Issue