support for multiple groups in one pattern (#196)

This commit is contained in:
liquidev 2021-05-19 22:35:28 +02:00 committed by GitHub
parent ba4fbde33d
commit 86a7037ed9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 54 additions and 21 deletions

View File

@ -15,6 +15,39 @@ local function push_token(t, type, text)
end
local function push_tokens(t, syn, pattern, full_text, find_results)
if #find_results > 2 then
-- We do some manipulation with find_results so that it's arranged
-- like this:
-- { start, end, i_1, i_2, i_3, …, i_last }
-- Each position spans characters from i_n to ((i_n+1) - 1), to form
-- consecutive spans of text.
--
-- If i_1 is not equal to start, start is automatically inserted at
-- that index.
if find_results[3] ~= find_results[1] then
table.insert(find_results, 3, find_results[1])
end
-- Copy the ending index to the end of the table, so that an ending index
-- always follows a starting index after position 3 in the table.
table.insert(find_results, find_results[2] + 1)
-- Then, we just iterate over our modified table.
for i = 3, #find_results - 1 do
local start = find_results[i]
local fin = find_results[i + 1] - 1
local type = pattern.type[i - 2]
-- ↑ (i - 2) to convert from [3; n] to [1; n]
local text = full_text:sub(start, fin)
push_token(t, syn.symbols[text] or type, text)
end
else
local start, fin = find_results[1], find_results[2]
local text = full_text:sub(start, fin)
push_token(t, syn.symbols[text] or pattern.type, text)
end
end
local function is_escaped(text, idx, esc)
local byte = esc:byte()
local count = 0
@ -49,7 +82,7 @@ local function retrieve_syntax_state(incoming_syntax, state)
-- If we have higher bits, then decode them one at a time, and find which
-- syntax we're using. Rather than walking the bytes, and calling into
-- `syntax` each time, we could probably cache this in a single table.
for i=0,2 do
for i = 0, 2 do
local target = bit32.extract(state, i*8, 8)
if target ~= 0 then
if current_syntax.patterns[target].syntax then
@ -138,13 +171,13 @@ function tokenizer.tokenize(incoming_syntax, text, state)
local matched = false
for n, p in ipairs(current_syntax.patterns) do
local pattern = (type(p.pattern) == "table") and p.pattern[1] or p.pattern
local s, e = text:find("^" .. pattern, i)
local find_results = { text:find("^" .. pattern, i) }
local start, fin = find_results[1], find_results[2]
if s then
-- matched pattern; make and add token
local t = text:sub(s, e)
if start then
-- matched pattern; make and add tokens
push_tokens(res, current_syntax, p, text, find_results)
push_token(res, current_syntax.symbols[t] or p.type, t)
-- update state if this was a start|end pattern pair
if type(p.pattern) == "table" then
state = bit32.replace(state, n, current_level*8, 8)
@ -162,7 +195,7 @@ function tokenizer.tokenize(incoming_syntax, text, state)
end
-- move cursor past this token
i = e + 1
i = fin + 1
matched = true
break
end

View File

@ -5,17 +5,20 @@ syntax.add {
files = { "%.c$", "%.h$", "%.inl$", "%.cpp$", "%.hpp$" },
comment = "//",
patterns = {
{ pattern = "//.-\n", type = "comment" },
{ pattern = { "/%*", "%*/" }, type = "comment" },
{ pattern = { "#", "[^\\]\n" }, type = "comment" },
{ pattern = { '"', '"', '\\' }, type = "string" },
{ pattern = { "'", "'", '\\' }, type = "string" },
{ pattern = "-?0x%x+", type = "number" },
{ pattern = "-?%d+[%d%.eE]*f?", type = "number" },
{ pattern = "-?%.?%d+f?", type = "number" },
{ pattern = "[%+%-=/%*%^%%<>!~|&]", type = "operator" },
{ pattern = "[%a_][%w_]*%f[(]", type = "function" },
{ pattern = "[%a_][%w_]*", type = "symbol" },
{ pattern = "//.-\n", type = "comment" },
{ pattern = { "/%*", "%*/" }, type = "comment" },
{ pattern = { '"', '"', '\\' }, type = "string" },
{ pattern = { "'", "'", '\\' }, type = "string" },
{ pattern = "0x%x+", type = "number" },
{ pattern = "%d+[%d%.eE]*f?", type = "number" },
{ pattern = "%.?%d+f?", type = "number" },
{ pattern = "[%+%-=/%*%^%%<>!~|&]", type = "operator" },
{ pattern = "struct%s()[%a_][%w_]*", type = {"keyword", "keyword2"} },
{ pattern = "union%s()[%a_][%w_]*", type = {"keyword", "keyword2"} },
{ pattern = "[%a_][%w_]*%f[(]", type = "function" },
{ pattern = "[%a_][%w_]*", type = "symbol" },
{ pattern = "#include%s()<.->", type = {"keyword", "string"} },
{ pattern = "#[%a_][%w_]*", type = "keyword" },
},
symbols = {
["if"] = "keyword",
@ -29,8 +32,6 @@ syntax.add {
["continue"] = "keyword",
["return"] = "keyword",
["goto"] = "keyword",
["struct"] = "keyword",
["union"] = "keyword",
["typedef"] = "keyword",
["enum"] = "keyword",
["extern"] = "keyword",
@ -42,7 +43,6 @@ syntax.add {
["case"] = "keyword",
["default"] = "keyword",
["auto"] = "keyword",
["const"] = "keyword",
["void"] = "keyword",
["int"] = "keyword2",
["short"] = "keyword2",