a small fix in case the text is nil

This commit is contained in:
George Sokianos 2023-01-08 20:42:19 +00:00
parent 789004ea2c
commit 0cb20ab7b2
1 changed files with 81 additions and 79 deletions

View File

@ -225,95 +225,97 @@ function tokenizer.tokenize(incoming_syntax, text, state)
end end
local text_len = text:ulen() local text_len = text:ulen()
while i <= text_len do if text_len ~= nil then
-- continue trying to match the end pattern of a pair if we have a state set while i <= text_len do
if current_pattern_idx > 0 then -- continue trying to match the end pattern of a pair if we have a state set
local p = current_syntax.patterns[current_pattern_idx] if current_pattern_idx > 0 then
local s, e = find_text(text, p, i, false, true) local p = current_syntax.patterns[current_pattern_idx]
local s, e = find_text(text, p, i, false, true)
local cont = true local cont = true
-- If we're in subsyntax mode, always check to see if we end our syntax -- If we're in subsyntax mode, always check to see if we end our syntax
-- first, before the found delimeter, as ending the subsyntax takes -- first, before the found delimeter, as ending the subsyntax takes
-- precedence over ending the delimiter in the subsyntax. -- precedence over ending the delimiter in the subsyntax.
if subsyntax_info then if subsyntax_info then
local ss, se = find_text(text, subsyntax_info, i, false, true) local ss, se = find_text(text, subsyntax_info, i, false, true)
-- If we find that we end the subsyntax before the -- If we find that we end the subsyntax before the
-- delimiter, push the token, and signal we shouldn't -- delimiter, push the token, and signal we shouldn't
-- treat the bit after as a token to be normally parsed -- treat the bit after as a token to be normally parsed
-- (as it's the syntax delimiter). -- (as it's the syntax delimiter).
if ss and (s == nil or ss < s) then if ss and (s == nil or ss < s) then
push_token(res, p.type, text:usub(i, ss - 1)) push_token(res, p.type, text:usub(i, ss - 1))
i = ss i = ss
cont = false cont = false
end
end
-- If we don't have any concerns about syntax delimiters,
-- continue on as normal.
if cont then
if s then
push_token(res, p.type, text:usub(i, e))
set_subsyntax_pattern_idx(0)
i = e + 1
else
push_token(res, p.type, text:usub(i))
break
end
end end
end end
-- If we don't have any concerns about syntax delimiters, -- General end of syntax check. Applies in the case where
-- continue on as normal. -- we're ending early in the middle of a delimiter, or
if cont then -- just normally, upon finding a token.
if subsyntax_info then
local s, e = find_text(text, subsyntax_info, i, true, true)
if s then if s then
push_token(res, p.type, text:usub(i, e)) push_token(res, subsyntax_info.type, text:usub(i, e))
set_subsyntax_pattern_idx(0) -- On finding unescaped delimiter, pop it.
pop_subsyntax()
i = e + 1 i = e + 1
else end
push_token(res, p.type, text:usub(i)) end
-- find matching pattern
local matched = false
for n, p in ipairs(current_syntax.patterns) do
local find_results = { find_text(text, p, i, true, false) }
if find_results[1] then
local type_is_table = type(p.type) == "table"
local n_types = type_is_table and #p.type or 1
if #find_results == 2 and type_is_table then
report_bad_pattern(core.warn, current_syntax, n,
"Token type is a table, but a string was expected.")
p.type = p.type[1]
elseif #find_results - 1 > n_types then
report_bad_pattern(core.error, current_syntax, n,
"Not enough token types: got %d needed %d.", n_types, #find_results - 1)
elseif #find_results - 1 < n_types then
report_bad_pattern(core.warn, current_syntax, n,
"Too many token types: got %d needed %d.", n_types, #find_results - 1)
end
-- matched pattern; make and add tokens
push_tokens(res, current_syntax, p, text, find_results)
-- update state if this was a start|end pattern pair
if type(p.pattern or p.regex) == "table" then
-- If we have a subsyntax, push that onto the subsyntax stack.
if p.syntax then
push_subsyntax(p, n)
else
set_subsyntax_pattern_idx(n)
end
end
-- move cursor past this token
i = find_results[2] + 1
matched = true
break break
end end
end end
end
-- General end of syntax check. Applies in the case where
-- we're ending early in the middle of a delimiter, or
-- just normally, upon finding a token.
if subsyntax_info then
local s, e = find_text(text, subsyntax_info, i, true, true)
if s then
push_token(res, subsyntax_info.type, text:usub(i, e))
-- On finding unescaped delimiter, pop it.
pop_subsyntax()
i = e + 1
end
end
-- find matching pattern -- consume character if we didn't match
local matched = false if not matched then
for n, p in ipairs(current_syntax.patterns) do push_token(res, "normal", text:usub(i, i))
local find_results = { find_text(text, p, i, true, false) } i = i + 1
if find_results[1] then
local type_is_table = type(p.type) == "table"
local n_types = type_is_table and #p.type or 1
if #find_results == 2 and type_is_table then
report_bad_pattern(core.warn, current_syntax, n,
"Token type is a table, but a string was expected.")
p.type = p.type[1]
elseif #find_results - 1 > n_types then
report_bad_pattern(core.error, current_syntax, n,
"Not enough token types: got %d needed %d.", n_types, #find_results - 1)
elseif #find_results - 1 < n_types then
report_bad_pattern(core.warn, current_syntax, n,
"Too many token types: got %d needed %d.", n_types, #find_results - 1)
end
-- matched pattern; make and add tokens
push_tokens(res, current_syntax, p, text, find_results)
-- update state if this was a start|end pattern pair
if type(p.pattern or p.regex) == "table" then
-- If we have a subsyntax, push that onto the subsyntax stack.
if p.syntax then
push_subsyntax(p, n)
else
set_subsyntax_pattern_idx(n)
end
end
-- move cursor past this token
i = find_results[2] + 1
matched = true
break
end end
end end
-- consume character if we didn't match
if not matched then
push_token(res, "normal", text:usub(i, i))
i = i + 1
end
end end
return res, state return res, state