support for multiple groups in one pattern (#196)
This commit is contained in:
parent
ba4fbde33d
commit
86a7037ed9
|
@ -15,6 +15,39 @@ local function push_token(t, type, text)
|
|||
end
|
||||
|
||||
|
||||
local function push_tokens(t, syn, pattern, full_text, find_results)
|
||||
if #find_results > 2 then
|
||||
-- We do some manipulation with find_results so that it's arranged
|
||||
-- like this:
|
||||
-- { start, end, i_1, i_2, i_3, …, i_last }
|
||||
-- Each position spans characters from i_n to ((i_n+1) - 1), to form
|
||||
-- consecutive spans of text.
|
||||
--
|
||||
-- If i_1 is not equal to start, start is automatically inserted at
|
||||
-- that index.
|
||||
if find_results[3] ~= find_results[1] then
|
||||
table.insert(find_results, 3, find_results[1])
|
||||
end
|
||||
-- Copy the ending index to the end of the table, so that an ending index
|
||||
-- always follows a starting index after position 3 in the table.
|
||||
table.insert(find_results, find_results[2] + 1)
|
||||
-- Then, we just iterate over our modified table.
|
||||
for i = 3, #find_results - 1 do
|
||||
local start = find_results[i]
|
||||
local fin = find_results[i + 1] - 1
|
||||
local type = pattern.type[i - 2]
|
||||
-- ↑ (i - 2) to convert from [3; n] to [1; n]
|
||||
local text = full_text:sub(start, fin)
|
||||
push_token(t, syn.symbols[text] or type, text)
|
||||
end
|
||||
else
|
||||
local start, fin = find_results[1], find_results[2]
|
||||
local text = full_text:sub(start, fin)
|
||||
push_token(t, syn.symbols[text] or pattern.type, text)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
local function is_escaped(text, idx, esc)
|
||||
local byte = esc:byte()
|
||||
local count = 0
|
||||
|
@ -49,7 +82,7 @@ local function retrieve_syntax_state(incoming_syntax, state)
|
|||
-- If we have higher bits, then decode them one at a time, and find which
|
||||
-- syntax we're using. Rather than walking the bytes, and calling into
|
||||
-- `syntax` each time, we could probably cache this in a single table.
|
||||
for i=0,2 do
|
||||
for i = 0, 2 do
|
||||
local target = bit32.extract(state, i*8, 8)
|
||||
if target ~= 0 then
|
||||
if current_syntax.patterns[target].syntax then
|
||||
|
@ -138,13 +171,13 @@ function tokenizer.tokenize(incoming_syntax, text, state)
|
|||
local matched = false
|
||||
for n, p in ipairs(current_syntax.patterns) do
|
||||
local pattern = (type(p.pattern) == "table") and p.pattern[1] or p.pattern
|
||||
local s, e = text:find("^" .. pattern, i)
|
||||
local find_results = { text:find("^" .. pattern, i) }
|
||||
local start, fin = find_results[1], find_results[2]
|
||||
|
||||
if s then
|
||||
-- matched pattern; make and add token
|
||||
local t = text:sub(s, e)
|
||||
if start then
|
||||
-- matched pattern; make and add tokens
|
||||
push_tokens(res, current_syntax, p, text, find_results)
|
||||
|
||||
push_token(res, current_syntax.symbols[t] or p.type, t)
|
||||
-- update state if this was a start|end pattern pair
|
||||
if type(p.pattern) == "table" then
|
||||
state = bit32.replace(state, n, current_level*8, 8)
|
||||
|
@ -162,7 +195,7 @@ function tokenizer.tokenize(incoming_syntax, text, state)
|
|||
end
|
||||
|
||||
-- move cursor past this token
|
||||
i = e + 1
|
||||
i = fin + 1
|
||||
matched = true
|
||||
break
|
||||
end
|
||||
|
|
|
@ -5,17 +5,20 @@ syntax.add {
|
|||
files = { "%.c$", "%.h$", "%.inl$", "%.cpp$", "%.hpp$" },
|
||||
comment = "//",
|
||||
patterns = {
|
||||
{ pattern = "//.-\n", type = "comment" },
|
||||
{ pattern = { "/%*", "%*/" }, type = "comment" },
|
||||
{ pattern = { "#", "[^\\]\n" }, type = "comment" },
|
||||
{ pattern = { '"', '"', '\\' }, type = "string" },
|
||||
{ pattern = { "'", "'", '\\' }, type = "string" },
|
||||
{ pattern = "-?0x%x+", type = "number" },
|
||||
{ pattern = "-?%d+[%d%.eE]*f?", type = "number" },
|
||||
{ pattern = "-?%.?%d+f?", type = "number" },
|
||||
{ pattern = "[%+%-=/%*%^%%<>!~|&]", type = "operator" },
|
||||
{ pattern = "[%a_][%w_]*%f[(]", type = "function" },
|
||||
{ pattern = "[%a_][%w_]*", type = "symbol" },
|
||||
{ pattern = "//.-\n", type = "comment" },
|
||||
{ pattern = { "/%*", "%*/" }, type = "comment" },
|
||||
{ pattern = { '"', '"', '\\' }, type = "string" },
|
||||
{ pattern = { "'", "'", '\\' }, type = "string" },
|
||||
{ pattern = "0x%x+", type = "number" },
|
||||
{ pattern = "%d+[%d%.eE]*f?", type = "number" },
|
||||
{ pattern = "%.?%d+f?", type = "number" },
|
||||
{ pattern = "[%+%-=/%*%^%%<>!~|&]", type = "operator" },
|
||||
{ pattern = "struct%s()[%a_][%w_]*", type = {"keyword", "keyword2"} },
|
||||
{ pattern = "union%s()[%a_][%w_]*", type = {"keyword", "keyword2"} },
|
||||
{ pattern = "[%a_][%w_]*%f[(]", type = "function" },
|
||||
{ pattern = "[%a_][%w_]*", type = "symbol" },
|
||||
{ pattern = "#include%s()<.->", type = {"keyword", "string"} },
|
||||
{ pattern = "#[%a_][%w_]*", type = "keyword" },
|
||||
},
|
||||
symbols = {
|
||||
["if"] = "keyword",
|
||||
|
@ -29,8 +32,6 @@ syntax.add {
|
|||
["continue"] = "keyword",
|
||||
["return"] = "keyword",
|
||||
["goto"] = "keyword",
|
||||
["struct"] = "keyword",
|
||||
["union"] = "keyword",
|
||||
["typedef"] = "keyword",
|
||||
["enum"] = "keyword",
|
||||
["extern"] = "keyword",
|
||||
|
@ -42,7 +43,6 @@ syntax.add {
|
|||
["case"] = "keyword",
|
||||
["default"] = "keyword",
|
||||
["auto"] = "keyword",
|
||||
["const"] = "keyword",
|
||||
["void"] = "keyword",
|
||||
["int"] = "keyword2",
|
||||
["short"] = "keyword2",
|
||||
|
|
Loading…
Reference in New Issue