Fix bad offset value in invalid UTF pattern error.
This commit is contained in:
parent
2eb24e2dac
commit
aec5c96cf5
|
@ -332,7 +332,10 @@ because it sets the "startoffset" parameter for pcre2_match().
|
|||
|
||||
99. If PCRE2_AUTO_CALLOUT was set on a pattern that had a (?# comment between
|
||||
an item and its qualifier (for example, A(?#comment)?B) pcre2_compile()
|
||||
misbehaved.
|
||||
misbehaved. This bug was found by the LLVM fuzzer.
|
||||
|
||||
100. The error for an invalid UTF pattern string always gave the code unit
|
||||
offset as zero instead of where the invalidity was found.
|
||||
|
||||
|
||||
Version 10.20 30-June-2015
|
||||
|
|
|
@ -8468,7 +8468,7 @@ if (utf)
|
|||
}
|
||||
if ((options & PCRE2_NO_UTF_CHECK) == 0 &&
|
||||
(errorcode = PRIV(valid_utf)(pattern, patlen, erroroffset)) != 0)
|
||||
goto HAD_ERROR;
|
||||
goto HAD_UTF_ERROR;
|
||||
}
|
||||
|
||||
/* Check UCP lockout. */
|
||||
|
@ -8849,10 +8849,11 @@ via the dreaded goto. */
|
|||
if (errorcode != 0)
|
||||
{
|
||||
HAD_ERROR:
|
||||
*erroroffset = (int)(ptr - pattern);
|
||||
HAD_UTF_ERROR:
|
||||
*errorptr = errorcode;
|
||||
pcre2_code_free(re);
|
||||
re = NULL;
|
||||
*errorptr = errorcode;
|
||||
*erroroffset = (int)(ptr - pattern);
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
|
|
|
@ -204,7 +204,7 @@ static const char match_error_texts[] =
|
|||
/* 20 */
|
||||
"UTF-8 error: overlong 5-byte sequence\0"
|
||||
"UTF-8 error: overlong 6-byte sequence\0"
|
||||
"UTF-8 error: isolated 0x80 byte\0"
|
||||
"UTF-8 error: isolated byte with 0x80 bit set\0"
|
||||
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
|
||||
"UTF-16 error: missing low surrogate at end\0"
|
||||
/* 25 */
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# This set of tests is for UTF-8 support and Unicode property support, with
|
||||
# relevance only for the 8-bit library.
|
||||
|
||||
# The next 3 patterns have UTF-8 errors
|
||||
# The next 4 patterns have UTF-8 errors
|
||||
|
||||
/[Ã]/utf
|
||||
|
||||
|
@ -9,6 +9,8 @@
|
|||
|
||||
/ÃÃÃxxx/utf
|
||||
|
||||
/‚‚‚‚‚‚‚Ã/utf
|
||||
|
||||
# Now test subjects
|
||||
|
||||
/badutf/utf
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
# This set of tests is for UTF-8 support and Unicode property support, with
|
||||
# relevance only for the 8-bit library.
|
||||
|
||||
# The next 3 patterns have UTF-8 errors
|
||||
# The next 4 patterns have UTF-8 errors
|
||||
|
||||
/[Ã]/utf
|
||||
Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80
|
||||
Failed: error -8 at offset 1: UTF-8 error: byte 2 top bits not 0x80
|
||||
|
||||
/Ã/utf
|
||||
Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
|
||||
|
@ -12,6 +12,9 @@ Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
|
|||
/ÃÃÃxxx/utf
|
||||
Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80
|
||||
|
||||
/‚‚‚‚‚‚‚Ã/utf
|
||||
Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set
|
||||
|
||||
# Now test subjects
|
||||
|
||||
/badutf/utf
|
||||
|
@ -89,7 +92,7 @@ Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0
|
|||
\xfc\x80\x80\x80\x80\x8f
|
||||
Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
|
||||
\x80
|
||||
Failed: error -22: UTF-8 error: isolated 0x80 byte at offset 0
|
||||
Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
|
||||
\xfe
|
||||
Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
|
||||
\xff
|
||||
|
@ -1534,6 +1537,6 @@ Options: utf
|
|||
First code unit = 'x'
|
||||
Subject length lower bound = 1
|
||||
a\x80zx\=offset=3
|
||||
Failed: error -22: UTF-8 error: isolated 0x80 byte at offset 1
|
||||
Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1
|
||||
|
||||
# End of testinput10
|
||||
|
|
Loading…
Reference in New Issue