Correctly identify the start of the next character in `tokenizer`
When moving to the next character, we have to consider that the current one might be multi-byte.
This commit is contained in:
parent
1872e82141
commit
8a516d35ce
|
@ -1,4 +1,5 @@
|
||||||
local syntax = require "core.syntax"
|
local syntax = require "core.syntax"
|
||||||
|
local common = require "core.common"
|
||||||
|
|
||||||
local tokenizer = {}
|
local tokenizer = {}
|
||||||
|
|
||||||
|
@ -142,8 +143,13 @@ function tokenizer.tokenize(incoming_syntax, text, state)
|
||||||
code = p._regex
|
code = p._regex
|
||||||
end
|
end
|
||||||
repeat
|
repeat
|
||||||
res = p.pattern and { text:find(at_start and "^" .. code or code, res[2]+1) }
|
local next = res[2] + 1
|
||||||
or { regex.match(code, text, res[2]+1, at_start and regex.ANCHORED or 0) }
|
-- go to the start of the next utf-8 character
|
||||||
|
while common.is_utf8_cont(text, next) do
|
||||||
|
next = next + 1
|
||||||
|
end
|
||||||
|
res = p.pattern and { text:find(at_start and "^" .. code or code, next) }
|
||||||
|
or { regex.match(code, text, next, at_start and regex.ANCHORED or 0) }
|
||||||
if res[1] and close and target[3] then
|
if res[1] and close and target[3] then
|
||||||
local count = 0
|
local count = 0
|
||||||
for i = res[1] - 1, 1, -1 do
|
for i = res[1] - 1, 1, -1 do
|
||||||
|
|
Loading…
Reference in New Issue