Nested Syntax Highlighting (#160)
This commit is contained in:
parent
a72431ace7
commit
3fe6665b9a
|
@ -1,5 +1,6 @@
|
||||||
local tokenizer = {}
|
local syntax = require "core.syntax"
|
||||||
|
|
||||||
|
local tokenizer = {}
|
||||||
|
|
||||||
local function push_token(t, type, text)
|
local function push_token(t, type, text)
|
||||||
local prev_type = t[#t-1]
|
local prev_type = t[#t-1]
|
||||||
|
@ -37,45 +38,127 @@ local function find_non_escaped(text, pattern, offset, esc)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- State is a 32-bit number that is four separate bytes, illustrating how many
|
||||||
|
-- differnet delimiters we have open, and which subsyntaxes we have active.
|
||||||
|
-- At most, there are 3 subsyntaxes active at the same time. Beyond that,
|
||||||
|
-- does not support further highlighting.
|
||||||
|
local function retrieve_syntax_state(incoming_syntax, state)
|
||||||
|
local current_syntax, subsyntax_info, current_state, current_level =
|
||||||
|
incoming_syntax, nil, state, 0
|
||||||
|
if state > 0 and (state > 255 or current_syntax.patterns[state].syntax) then
|
||||||
|
-- If we have higher bits, then decode them one at a time, and find which
|
||||||
|
-- syntax we're using. Rather than walking the bytes, and calling into
|
||||||
|
-- `syntax` each time, we could probably cache this in a single table.
|
||||||
|
for i=0,2 do
|
||||||
|
local target = bit32.extract(state, i*8, 8)
|
||||||
|
if target ~= 0 then
|
||||||
|
if current_syntax.patterns[target].syntax then
|
||||||
|
subsyntax_info = current_syntax.patterns[target]
|
||||||
|
current_syntax = type(subsyntax_info.syntax) == "table" and
|
||||||
|
subsyntax_info.syntax or syntax.get(subsyntax_info.syntax)
|
||||||
|
current_state = 0
|
||||||
|
current_level = i+1
|
||||||
|
else
|
||||||
|
current_state = target
|
||||||
|
break
|
||||||
|
end
|
||||||
|
else
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return current_syntax, subsyntax_info, current_state, current_level
|
||||||
|
end
|
||||||
|
|
||||||
function tokenizer.tokenize(syntax, text, state)
|
function tokenizer.tokenize(incoming_syntax, text, state)
|
||||||
local res = {}
|
local res = {}
|
||||||
local i = 1
|
local i = 1
|
||||||
|
|
||||||
if #syntax.patterns == 0 then
|
if #incoming_syntax.patterns == 0 then
|
||||||
return { "normal", text }
|
return { "normal", text }
|
||||||
end
|
end
|
||||||
|
|
||||||
|
state = state or 0
|
||||||
|
local current_syntax, subsyntax_info, current_state, current_level =
|
||||||
|
retrieve_syntax_state(incoming_syntax, state)
|
||||||
while i <= #text do
|
while i <= #text do
|
||||||
-- continue trying to match the end pattern of a pair if we have a state set
|
-- continue trying to match the end pattern of a pair if we have a state set
|
||||||
if state then
|
if current_state > 0 then
|
||||||
local p = syntax.patterns[state]
|
local p = current_syntax.patterns[current_state]
|
||||||
local s, e = find_non_escaped(text, p.pattern[2], i, p.pattern[3])
|
local s, e = find_non_escaped(text, p.pattern[2], i, p.pattern[3])
|
||||||
|
|
||||||
|
local cont = true
|
||||||
|
-- If we're in subsyntax mode, always check to see if we end our syntax
|
||||||
|
-- first.
|
||||||
|
if subsyntax_info then
|
||||||
|
local ss, se = find_non_escaped(
|
||||||
|
text,
|
||||||
|
subsyntax_info.pattern[2],
|
||||||
|
i,
|
||||||
|
subsyntax_info.pattern[3]
|
||||||
|
)
|
||||||
|
if ss and (s == nil or ss < s) then
|
||||||
|
push_token(res, p.type, text:sub(i, ss - 1))
|
||||||
|
i = ss
|
||||||
|
cont = false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if cont then
|
||||||
|
if s then
|
||||||
|
push_token(res, p.type, text:sub(i, e))
|
||||||
|
current_state = 0
|
||||||
|
state = bit32.replace(state, 0, current_level*8, 8)
|
||||||
|
i = e + 1
|
||||||
|
else
|
||||||
|
push_token(res, p.type, text:sub(i))
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
-- Check for end of syntax.
|
||||||
|
if subsyntax_info then
|
||||||
|
local s, e = find_non_escaped(
|
||||||
|
text,
|
||||||
|
"^" .. subsyntax_info.pattern[2],
|
||||||
|
i,
|
||||||
|
nil
|
||||||
|
)
|
||||||
if s then
|
if s then
|
||||||
push_token(res, p.type, text:sub(i, e))
|
push_token(res, subsyntax_info.type, text:sub(i, e))
|
||||||
state = nil
|
current_level = current_level - 1
|
||||||
|
-- Zero out the state above us, as well as our new current state.
|
||||||
|
state = bit32.replace(state, 0, current_level*8, 16)
|
||||||
|
current_syntax, subsyntax_info, current_state, current_level =
|
||||||
|
retrieve_syntax_state(incoming_syntax, state)
|
||||||
i = e + 1
|
i = e + 1
|
||||||
else
|
|
||||||
push_token(res, p.type, text:sub(i))
|
|
||||||
break
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
-- find matching pattern
|
-- find matching pattern
|
||||||
local matched = false
|
local matched = false
|
||||||
for n, p in ipairs(syntax.patterns) do
|
for n, p in ipairs(current_syntax.patterns) do
|
||||||
local pattern = (type(p.pattern) == "table") and p.pattern[1] or p.pattern
|
local pattern = (type(p.pattern) == "table") and p.pattern[1] or p.pattern
|
||||||
local s, e = text:find("^" .. pattern, i)
|
local s, e = text:find("^" .. pattern, i)
|
||||||
|
|
||||||
if s then
|
if s then
|
||||||
-- matched pattern; make and add token
|
-- matched pattern; make and add token
|
||||||
local t = text:sub(s, e)
|
local t = text:sub(s, e)
|
||||||
push_token(res, syntax.symbols[t] or p.type, t)
|
|
||||||
|
|
||||||
|
push_token(res, current_syntax.symbols[t] or p.type, t)
|
||||||
-- update state if this was a start|end pattern pair
|
-- update state if this was a start|end pattern pair
|
||||||
if type(p.pattern) == "table" then
|
if type(p.pattern) == "table" then
|
||||||
state = n
|
state = bit32.replace(state, n, current_level*8, 8)
|
||||||
|
-- If we've found a new subsyntax, bump our level, and set the
|
||||||
|
-- appropriate variables.
|
||||||
|
if p.syntax then
|
||||||
|
current_level = current_level + 1
|
||||||
|
subsyntax_info = p
|
||||||
|
current_syntax = type(p.syntax) == "table" and
|
||||||
|
p.syntax or syntax.get(p.syntax)
|
||||||
|
current_state = 0
|
||||||
|
else
|
||||||
|
current_state = n
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
-- move cursor past this token
|
-- move cursor past this token
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
-- lite-xl 1.16
|
||||||
|
local syntax = require "core.syntax"
|
||||||
|
|
||||||
|
syntax.add {
|
||||||
|
files = { "%.html?$" },
|
||||||
|
patterns = {
|
||||||
|
{
|
||||||
|
pattern = { "<script type=['\"]%a+/javascript['\"]>", "</script>" },
|
||||||
|
syntax = ".js",
|
||||||
|
type = "function"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern = { "<script>", "</script>" },
|
||||||
|
syntax = ".js",
|
||||||
|
type = "function"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern = { "<style[^>]*>", "</style>" },
|
||||||
|
syntax = ".css",
|
||||||
|
type = "function"
|
||||||
|
},
|
||||||
|
{ pattern = { "<!%-%-", "%-%->" }, type = "comment" },
|
||||||
|
{ pattern = { '%f[^>][^<]', '%f[<]' }, type = "normal" },
|
||||||
|
{ pattern = { '"', '"', '\\' }, type = "string" },
|
||||||
|
{ pattern = { "'", "'", '\\' }, type = "string" },
|
||||||
|
{ pattern = "0x[%da-fA-F]+", type = "number" },
|
||||||
|
{ pattern = "-?%d+[%d%.]*f?", type = "number" },
|
||||||
|
{ pattern = "-?%.?%d+f?", type = "number" },
|
||||||
|
{ pattern = "%f[^<]![%a_][%w_]*", type = "keyword2" },
|
||||||
|
{ pattern = "%f[^<][%a_][%w_]*", type = "function" },
|
||||||
|
{ pattern = "%f[^<]/[%a_][%w_]*", type = "function" },
|
||||||
|
{ pattern = "[%a_][%w_]*", type = "keyword" },
|
||||||
|
{ pattern = "[/<>=]", type = "operator" },
|
||||||
|
},
|
||||||
|
symbols = {},
|
||||||
|
}
|
|
@ -2,7 +2,7 @@
|
||||||
local syntax = require "core.syntax"
|
local syntax = require "core.syntax"
|
||||||
|
|
||||||
syntax.add {
|
syntax.add {
|
||||||
files = { "%.xml$", "%.html?$" },
|
files = { "%.xml$" },
|
||||||
headers = "<%?xml",
|
headers = "<%?xml",
|
||||||
patterns = {
|
patterns = {
|
||||||
{ pattern = { "<!%-%-", "%-%->" }, type = "comment" },
|
{ pattern = { "<!%-%-", "%-%->" }, type = "comment" },
|
||||||
|
|
Loading…
Reference in New Issue