a small fix in case the text is nil
This commit is contained in:
parent
789004ea2c
commit
0cb20ab7b2
|
@ -225,97 +225,99 @@ function tokenizer.tokenize(incoming_syntax, text, state)
|
||||||
end
|
end
|
||||||
|
|
||||||
local text_len = text:ulen()
|
local text_len = text:ulen()
|
||||||
while i <= text_len do
|
if text_len ~= nil then
|
||||||
-- continue trying to match the end pattern of a pair if we have a state set
|
while i <= text_len do
|
||||||
if current_pattern_idx > 0 then
|
-- continue trying to match the end pattern of a pair if we have a state set
|
||||||
local p = current_syntax.patterns[current_pattern_idx]
|
if current_pattern_idx > 0 then
|
||||||
local s, e = find_text(text, p, i, false, true)
|
local p = current_syntax.patterns[current_pattern_idx]
|
||||||
|
local s, e = find_text(text, p, i, false, true)
|
||||||
|
|
||||||
local cont = true
|
local cont = true
|
||||||
-- If we're in subsyntax mode, always check to see if we end our syntax
|
-- If we're in subsyntax mode, always check to see if we end our syntax
|
||||||
-- first, before the found delimeter, as ending the subsyntax takes
|
-- first, before the found delimeter, as ending the subsyntax takes
|
||||||
-- precedence over ending the delimiter in the subsyntax.
|
-- precedence over ending the delimiter in the subsyntax.
|
||||||
if subsyntax_info then
|
if subsyntax_info then
|
||||||
local ss, se = find_text(text, subsyntax_info, i, false, true)
|
local ss, se = find_text(text, subsyntax_info, i, false, true)
|
||||||
-- If we find that we end the subsyntax before the
|
-- If we find that we end the subsyntax before the
|
||||||
-- delimiter, push the token, and signal we shouldn't
|
-- delimiter, push the token, and signal we shouldn't
|
||||||
-- treat the bit after as a token to be normally parsed
|
-- treat the bit after as a token to be normally parsed
|
||||||
-- (as it's the syntax delimiter).
|
-- (as it's the syntax delimiter).
|
||||||
if ss and (s == nil or ss < s) then
|
if ss and (s == nil or ss < s) then
|
||||||
push_token(res, p.type, text:usub(i, ss - 1))
|
push_token(res, p.type, text:usub(i, ss - 1))
|
||||||
i = ss
|
i = ss
|
||||||
cont = false
|
cont = false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
-- If we don't have any concerns about syntax delimiters,
|
||||||
|
-- continue on as normal.
|
||||||
|
if cont then
|
||||||
|
if s then
|
||||||
|
push_token(res, p.type, text:usub(i, e))
|
||||||
|
set_subsyntax_pattern_idx(0)
|
||||||
|
i = e + 1
|
||||||
|
else
|
||||||
|
push_token(res, p.type, text:usub(i))
|
||||||
|
break
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
-- If we don't have any concerns about syntax delimiters,
|
-- General end of syntax check. Applies in the case where
|
||||||
-- continue on as normal.
|
-- we're ending early in the middle of a delimiter, or
|
||||||
if cont then
|
-- just normally, upon finding a token.
|
||||||
|
if subsyntax_info then
|
||||||
|
local s, e = find_text(text, subsyntax_info, i, true, true)
|
||||||
if s then
|
if s then
|
||||||
push_token(res, p.type, text:usub(i, e))
|
push_token(res, subsyntax_info.type, text:usub(i, e))
|
||||||
set_subsyntax_pattern_idx(0)
|
-- On finding unescaped delimiter, pop it.
|
||||||
|
pop_subsyntax()
|
||||||
i = e + 1
|
i = e + 1
|
||||||
else
|
end
|
||||||
push_token(res, p.type, text:usub(i))
|
end
|
||||||
|
|
||||||
|
-- find matching pattern
|
||||||
|
local matched = false
|
||||||
|
for n, p in ipairs(current_syntax.patterns) do
|
||||||
|
local find_results = { find_text(text, p, i, true, false) }
|
||||||
|
if find_results[1] then
|
||||||
|
local type_is_table = type(p.type) == "table"
|
||||||
|
local n_types = type_is_table and #p.type or 1
|
||||||
|
if #find_results == 2 and type_is_table then
|
||||||
|
report_bad_pattern(core.warn, current_syntax, n,
|
||||||
|
"Token type is a table, but a string was expected.")
|
||||||
|
p.type = p.type[1]
|
||||||
|
elseif #find_results - 1 > n_types then
|
||||||
|
report_bad_pattern(core.error, current_syntax, n,
|
||||||
|
"Not enough token types: got %d needed %d.", n_types, #find_results - 1)
|
||||||
|
elseif #find_results - 1 < n_types then
|
||||||
|
report_bad_pattern(core.warn, current_syntax, n,
|
||||||
|
"Too many token types: got %d needed %d.", n_types, #find_results - 1)
|
||||||
|
end
|
||||||
|
-- matched pattern; make and add tokens
|
||||||
|
push_tokens(res, current_syntax, p, text, find_results)
|
||||||
|
-- update state if this was a start|end pattern pair
|
||||||
|
if type(p.pattern or p.regex) == "table" then
|
||||||
|
-- If we have a subsyntax, push that onto the subsyntax stack.
|
||||||
|
if p.syntax then
|
||||||
|
push_subsyntax(p, n)
|
||||||
|
else
|
||||||
|
set_subsyntax_pattern_idx(n)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
-- move cursor past this token
|
||||||
|
i = find_results[2] + 1
|
||||||
|
matched = true
|
||||||
break
|
break
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
-- General end of syntax check. Applies in the case where
|
|
||||||
-- we're ending early in the middle of a delimiter, or
|
|
||||||
-- just normally, upon finding a token.
|
|
||||||
if subsyntax_info then
|
|
||||||
local s, e = find_text(text, subsyntax_info, i, true, true)
|
|
||||||
if s then
|
|
||||||
push_token(res, subsyntax_info.type, text:usub(i, e))
|
|
||||||
-- On finding unescaped delimiter, pop it.
|
|
||||||
pop_subsyntax()
|
|
||||||
i = e + 1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
-- find matching pattern
|
-- consume character if we didn't match
|
||||||
local matched = false
|
if not matched then
|
||||||
for n, p in ipairs(current_syntax.patterns) do
|
push_token(res, "normal", text:usub(i, i))
|
||||||
local find_results = { find_text(text, p, i, true, false) }
|
i = i + 1
|
||||||
if find_results[1] then
|
|
||||||
local type_is_table = type(p.type) == "table"
|
|
||||||
local n_types = type_is_table and #p.type or 1
|
|
||||||
if #find_results == 2 and type_is_table then
|
|
||||||
report_bad_pattern(core.warn, current_syntax, n,
|
|
||||||
"Token type is a table, but a string was expected.")
|
|
||||||
p.type = p.type[1]
|
|
||||||
elseif #find_results - 1 > n_types then
|
|
||||||
report_bad_pattern(core.error, current_syntax, n,
|
|
||||||
"Not enough token types: got %d needed %d.", n_types, #find_results - 1)
|
|
||||||
elseif #find_results - 1 < n_types then
|
|
||||||
report_bad_pattern(core.warn, current_syntax, n,
|
|
||||||
"Too many token types: got %d needed %d.", n_types, #find_results - 1)
|
|
||||||
end
|
|
||||||
-- matched pattern; make and add tokens
|
|
||||||
push_tokens(res, current_syntax, p, text, find_results)
|
|
||||||
-- update state if this was a start|end pattern pair
|
|
||||||
if type(p.pattern or p.regex) == "table" then
|
|
||||||
-- If we have a subsyntax, push that onto the subsyntax stack.
|
|
||||||
if p.syntax then
|
|
||||||
push_subsyntax(p, n)
|
|
||||||
else
|
|
||||||
set_subsyntax_pattern_idx(n)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
-- move cursor past this token
|
|
||||||
i = find_results[2] + 1
|
|
||||||
matched = true
|
|
||||||
break
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
-- consume character if we didn't match
|
|
||||||
if not matched then
|
|
||||||
push_token(res, "normal", text:usub(i, i))
|
|
||||||
i = i + 1
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
return res, state
|
return res, state
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue