Allow `tokenizer` to pause and resume in the middle of a line (#1444)

This commit is contained in:
Guldoman 2023-04-01 18:12:39 +02:00 committed by George Sokianos
parent b30ea9e9ef
commit ca6fedd3f7
2 changed files with 50 additions and 11 deletions

View File

@ -19,25 +19,34 @@ function Highlighter:start()
if self.running then return end
self.running = true
core.add_thread(function()
while self.first_invalid_line < self.max_wanted_line do
while self.first_invalid_line <= self.max_wanted_line do
local max = math.min(self.first_invalid_line + 40, self.max_wanted_line)
local retokenized_from
for i = self.first_invalid_line, max do
local state = (i > 1) and self.lines[i - 1].state
local line = self.lines[i]
if not (line and line.init_state == state and line.text == self.doc.lines[i]) then
if line and line.resume and (line.init_state ~= state or line.text ~= self.doc.lines[i]) then
-- Reset the progress if no longer valid
line.resume = nil
end
if not (line and line.init_state == state and line.text == self.doc.lines[i] and not line.resume) then
retokenized_from = retokenized_from or i
self.lines[i] = self:tokenize_line(i, state)
self.lines[i] = self:tokenize_line(i, state, line and line.resume)
if self.lines[i].resume then
self.first_invalid_line = i
goto yield
end
elseif retokenized_from then
self:update_notify(retokenized_from, i - retokenized_from - 1)
retokenized_from = nil
end
end
self.first_invalid_line = max + 1
::yield::
if retokenized_from then
self:update_notify(retokenized_from, max - retokenized_from)
end
self.first_invalid_line = max + 1
core.redraw = true
coroutine.yield()
end
@ -48,7 +57,7 @@ end
local function set_max_wanted_lines(self, amount)
self.max_wanted_line = amount
if self.first_invalid_line < self.max_wanted_line then
if self.first_invalid_line <= self.max_wanted_line then
self:start()
end
end
@ -91,11 +100,11 @@ function Highlighter:update_notify(line, n)
end
function Highlighter:tokenize_line(idx, state)
function Highlighter:tokenize_line(idx, state, resume)
local res = {}
res.init_state = state
res.text = self.doc.lines[idx]
res.tokens, res.state = tokenizer.tokenize(self.doc.syntax, res.text, state)
res.tokens, res.state, res.resume = tokenizer.tokenize(self.doc.syntax, res.text, state, resume)
return res
end

View File

@ -1,5 +1,6 @@
local core = require "core"
local syntax = require "core.syntax"
local config = require "core.config"
local tokenizer = {}
local bad_patterns = {}
@ -9,7 +10,7 @@ local function push_token(t, type, text)
type = type or "normal"
local prev_type = t[#t-1]
local prev_text = t[#t]
if prev_type and (prev_type == type or prev_text:ufind("^%s*$")) then
if prev_type and (prev_type == type or (prev_text:ufind("^%s*$") and type ~= "incomplete")) then
t[#t-1] = type
t[#t] = prev_text .. text
else
@ -128,8 +129,8 @@ end
---@param incoming_syntax table
---@param text string
---@param state string
function tokenizer.tokenize(incoming_syntax, text, state)
local res = {}
function tokenizer.tokenize(incoming_syntax, text, state, resume)
local res
local i = 1
if #incoming_syntax.patterns == 0 then
@ -137,6 +138,20 @@ function tokenizer.tokenize(incoming_syntax, text, state)
end
state = state or string.char(0)
if resume then
res = resume.res
-- Remove "incomplete" tokens
while res[#res-1] == "incomplete" do
table.remove(res, #res)
table.remove(res, #res)
end
i = resume.i
state = resume.state
end
res = res or {}
-- incoming_syntax : the parent syntax of the file.
-- state : a string of bytes representing syntax state (see above)
@ -246,7 +261,22 @@ function tokenizer.tokenize(incoming_syntax, text, state)
end
local text_len = text:ulen()
local start_time = system.get_time()
local starting_i = i
while i <= text_len do
-- Every 200 chars, check if we're out of time
if i - starting_i > 200 then
starting_i = i
if system.get_time() - start_time > 0.5 / config.fps then
-- We're out of time
push_token(res, "incomplete", string.usub(text, i))
return res, string.char(0), {
res = res,
i = i,
state = state
}
end
end
-- continue trying to match the end pattern of a pair if we have a state set
if current_pattern_idx > 0 then
local p = current_syntax.patterns[current_pattern_idx]