Consume unmatched character correctly
We must consume the whole UTF-8 character, not just a single byte.
This commit is contained in:
parent
994c62b64a
commit
4faaf089ef
|
@ -237,8 +237,13 @@ function tokenizer.tokenize(incoming_syntax, text, state)
|
||||||
|
|
||||||
-- consume character if we didn't match
|
-- consume character if we didn't match
|
||||||
if not matched then
|
if not matched then
|
||||||
push_token(res, "normal", text:sub(i, i))
|
local n = 0
|
||||||
i = i + 1
|
-- reach the next character
|
||||||
|
while text:byte(i + n + 1) and common.is_utf8_cont(text, i + n + 1) do
|
||||||
|
n = n + 1
|
||||||
|
end
|
||||||
|
push_token(res, "normal", text:sub(i, i + n))
|
||||||
|
i = i + n + 1
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue