Consume unmatched character correctly

We must consume the whole UTF-8 character, not just a single byte.
This commit is contained in:
Guldoman 2021-12-11 03:43:33 +01:00
parent 1944f8b9e5
commit 4f079918c9
No known key found for this signature in database
GPG Key ID: C08A498EC7F1AFDD
1 changed files with 7 additions and 2 deletions

View File

@ -237,8 +237,13 @@ function tokenizer.tokenize(incoming_syntax, text, state)
-- consume character if we didn't match -- consume character if we didn't match
if not matched then if not matched then
push_token(res, "normal", text:sub(i, i)) local n = 0
i = i + 1 -- reach the next character
while text:byte(i + n + 1) and common.is_utf8_cont(text, i + n + 1) do
n = n + 1
end
push_token(res, "normal", text:sub(i, i + n))
i = i + n + 1
end end
end end