Fix error offset bug introduced at 1176.

This commit is contained in:
Philip.Hazel 2019-10-16 17:12:13 +00:00
parent f768448fd3
commit 7ecc9cdfaf
3 changed files with 20 additions and 0 deletions

View File

@ -6184,6 +6184,10 @@ if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
} }
match_data->subject = NULL; match_data->subject = NULL;
/* Zero the error offset in case the first code unit is invalid UTF. */
match_data->startchar = 0;
/* ============================= JIT matching ============================== */ /* ============================= JIT matching ============================== */

View File

@ -579,4 +579,10 @@
/(?:\x{ff}|\x{3000})/I,utf /(?:\x{ff}|\x{3000})/I,utf
/x/utf
abxyz
\x80\=startchar
abc\x80\=startchar
abc\x80\=startchar,offset=3
# End of testinput10 # End of testinput10

10
testdata/testoutput10 vendored
View File

@ -1803,4 +1803,14 @@ Options: utf
Starting code units: \xc3 \xe3 Starting code units: \xc3 \xe3
Subject length lower bound = 1 Subject length lower bound = 1
/x/utf
abxyz
0: x
\x80\=startchar
Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
abc\x80\=startchar
Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3
abc\x80\=startchar,offset=3
Error -36 (bad UTF-8 offset)
# End of testinput10 # End of testinput10