Fix bad offset value in invalid UTF pattern error.
This commit is contained in:
parent
2eb24e2dac
commit
aec5c96cf5
|
@ -332,7 +332,10 @@ because it sets the "startoffset" parameter for pcre2_match().
|
||||||
|
|
||||||
99. If PCRE2_AUTO_CALLOUT was set on a pattern that had a (?# comment between
|
99. If PCRE2_AUTO_CALLOUT was set on a pattern that had a (?# comment between
|
||||||
an item and its qualifier (for example, A(?#comment)?B) pcre2_compile()
|
an item and its qualifier (for example, A(?#comment)?B) pcre2_compile()
|
||||||
misbehaved.
|
misbehaved. This bug was found by the LLVM fuzzer.
|
||||||
|
|
||||||
|
100. The error for an invalid UTF pattern string always gave the code unit
|
||||||
|
offset as zero instead of where the invalidity was found.
|
||||||
|
|
||||||
|
|
||||||
Version 10.20 30-June-2015
|
Version 10.20 30-June-2015
|
||||||
|
|
|
@ -8468,7 +8468,7 @@ if (utf)
|
||||||
}
|
}
|
||||||
if ((options & PCRE2_NO_UTF_CHECK) == 0 &&
|
if ((options & PCRE2_NO_UTF_CHECK) == 0 &&
|
||||||
(errorcode = PRIV(valid_utf)(pattern, patlen, erroroffset)) != 0)
|
(errorcode = PRIV(valid_utf)(pattern, patlen, erroroffset)) != 0)
|
||||||
goto HAD_ERROR;
|
goto HAD_UTF_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check UCP lockout. */
|
/* Check UCP lockout. */
|
||||||
|
@ -8849,10 +8849,11 @@ via the dreaded goto. */
|
||||||
if (errorcode != 0)
|
if (errorcode != 0)
|
||||||
{
|
{
|
||||||
HAD_ERROR:
|
HAD_ERROR:
|
||||||
|
*erroroffset = (int)(ptr - pattern);
|
||||||
|
HAD_UTF_ERROR:
|
||||||
|
*errorptr = errorcode;
|
||||||
pcre2_code_free(re);
|
pcre2_code_free(re);
|
||||||
re = NULL;
|
re = NULL;
|
||||||
*errorptr = errorcode;
|
|
||||||
*erroroffset = (int)(ptr - pattern);
|
|
||||||
goto EXIT;
|
goto EXIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -204,7 +204,7 @@ static const char match_error_texts[] =
|
||||||
/* 20 */
|
/* 20 */
|
||||||
"UTF-8 error: overlong 5-byte sequence\0"
|
"UTF-8 error: overlong 5-byte sequence\0"
|
||||||
"UTF-8 error: overlong 6-byte sequence\0"
|
"UTF-8 error: overlong 6-byte sequence\0"
|
||||||
"UTF-8 error: isolated 0x80 byte\0"
|
"UTF-8 error: isolated byte with 0x80 bit set\0"
|
||||||
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
|
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
|
||||||
"UTF-16 error: missing low surrogate at end\0"
|
"UTF-16 error: missing low surrogate at end\0"
|
||||||
/* 25 */
|
/* 25 */
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# This set of tests is for UTF-8 support and Unicode property support, with
|
# This set of tests is for UTF-8 support and Unicode property support, with
|
||||||
# relevance only for the 8-bit library.
|
# relevance only for the 8-bit library.
|
||||||
|
|
||||||
# The next 3 patterns have UTF-8 errors
|
# The next 4 patterns have UTF-8 errors
|
||||||
|
|
||||||
/[Ã]/utf
|
/[Ã]/utf
|
||||||
|
|
||||||
|
@ -9,6 +9,8 @@
|
||||||
|
|
||||||
/ÃÃÃxxx/utf
|
/ÃÃÃxxx/utf
|
||||||
|
|
||||||
|
/‚‚‚‚‚‚‚Ã/utf
|
||||||
|
|
||||||
# Now test subjects
|
# Now test subjects
|
||||||
|
|
||||||
/badutf/utf
|
/badutf/utf
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
# This set of tests is for UTF-8 support and Unicode property support, with
|
# This set of tests is for UTF-8 support and Unicode property support, with
|
||||||
# relevance only for the 8-bit library.
|
# relevance only for the 8-bit library.
|
||||||
|
|
||||||
# The next 3 patterns have UTF-8 errors
|
# The next 4 patterns have UTF-8 errors
|
||||||
|
|
||||||
/[Ã]/utf
|
/[Ã]/utf
|
||||||
Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80
|
Failed: error -8 at offset 1: UTF-8 error: byte 2 top bits not 0x80
|
||||||
|
|
||||||
/Ã/utf
|
/Ã/utf
|
||||||
Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
|
Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
|
||||||
|
@ -12,6 +12,9 @@ Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
|
||||||
/ÃÃÃxxx/utf
|
/ÃÃÃxxx/utf
|
||||||
Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80
|
Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80
|
||||||
|
|
||||||
|
/‚‚‚‚‚‚‚Ã/utf
|
||||||
|
Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set
|
||||||
|
|
||||||
# Now test subjects
|
# Now test subjects
|
||||||
|
|
||||||
/badutf/utf
|
/badutf/utf
|
||||||
|
@ -89,7 +92,7 @@ Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0
|
||||||
\xfc\x80\x80\x80\x80\x8f
|
\xfc\x80\x80\x80\x80\x8f
|
||||||
Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
|
Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
|
||||||
\x80
|
\x80
|
||||||
Failed: error -22: UTF-8 error: isolated 0x80 byte at offset 0
|
Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
|
||||||
\xfe
|
\xfe
|
||||||
Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
|
Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
|
||||||
\xff
|
\xff
|
||||||
|
@ -1534,6 +1537,6 @@ Options: utf
|
||||||
First code unit = 'x'
|
First code unit = 'x'
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 1
|
||||||
a\x80zx\=offset=3
|
a\x80zx\=offset=3
|
||||||
Failed: error -22: UTF-8 error: isolated 0x80 byte at offset 1
|
Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1
|
||||||
|
|
||||||
# End of testinput10
|
# End of testinput10
|
||||||
|
|
Loading…
Reference in New Issue