support for multiple groups in one pattern (#196)
This commit is contained in:
parent
ba4fbde33d
commit
86a7037ed9
|
@ -15,6 +15,39 @@ local function push_token(t, type, text)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function push_tokens(t, syn, pattern, full_text, find_results)
|
||||||
|
if #find_results > 2 then
|
||||||
|
-- We do some manipulation with find_results so that it's arranged
|
||||||
|
-- like this:
|
||||||
|
-- { start, end, i_1, i_2, i_3, …, i_last }
|
||||||
|
-- Each position spans characters from i_n to ((i_n+1) - 1), to form
|
||||||
|
-- consecutive spans of text.
|
||||||
|
--
|
||||||
|
-- If i_1 is not equal to start, start is automatically inserted at
|
||||||
|
-- that index.
|
||||||
|
if find_results[3] ~= find_results[1] then
|
||||||
|
table.insert(find_results, 3, find_results[1])
|
||||||
|
end
|
||||||
|
-- Copy the ending index to the end of the table, so that an ending index
|
||||||
|
-- always follows a starting index after position 3 in the table.
|
||||||
|
table.insert(find_results, find_results[2] + 1)
|
||||||
|
-- Then, we just iterate over our modified table.
|
||||||
|
for i = 3, #find_results - 1 do
|
||||||
|
local start = find_results[i]
|
||||||
|
local fin = find_results[i + 1] - 1
|
||||||
|
local type = pattern.type[i - 2]
|
||||||
|
-- ↑ (i - 2) to convert from [3; n] to [1; n]
|
||||||
|
local text = full_text:sub(start, fin)
|
||||||
|
push_token(t, syn.symbols[text] or type, text)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
local start, fin = find_results[1], find_results[2]
|
||||||
|
local text = full_text:sub(start, fin)
|
||||||
|
push_token(t, syn.symbols[text] or pattern.type, text)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
local function is_escaped(text, idx, esc)
|
local function is_escaped(text, idx, esc)
|
||||||
local byte = esc:byte()
|
local byte = esc:byte()
|
||||||
local count = 0
|
local count = 0
|
||||||
|
@ -49,7 +82,7 @@ local function retrieve_syntax_state(incoming_syntax, state)
|
||||||
-- If we have higher bits, then decode them one at a time, and find which
|
-- If we have higher bits, then decode them one at a time, and find which
|
||||||
-- syntax we're using. Rather than walking the bytes, and calling into
|
-- syntax we're using. Rather than walking the bytes, and calling into
|
||||||
-- `syntax` each time, we could probably cache this in a single table.
|
-- `syntax` each time, we could probably cache this in a single table.
|
||||||
for i=0,2 do
|
for i = 0, 2 do
|
||||||
local target = bit32.extract(state, i*8, 8)
|
local target = bit32.extract(state, i*8, 8)
|
||||||
if target ~= 0 then
|
if target ~= 0 then
|
||||||
if current_syntax.patterns[target].syntax then
|
if current_syntax.patterns[target].syntax then
|
||||||
|
@ -138,13 +171,13 @@ function tokenizer.tokenize(incoming_syntax, text, state)
|
||||||
local matched = false
|
local matched = false
|
||||||
for n, p in ipairs(current_syntax.patterns) do
|
for n, p in ipairs(current_syntax.patterns) do
|
||||||
local pattern = (type(p.pattern) == "table") and p.pattern[1] or p.pattern
|
local pattern = (type(p.pattern) == "table") and p.pattern[1] or p.pattern
|
||||||
local s, e = text:find("^" .. pattern, i)
|
local find_results = { text:find("^" .. pattern, i) }
|
||||||
|
local start, fin = find_results[1], find_results[2]
|
||||||
|
|
||||||
if s then
|
if start then
|
||||||
-- matched pattern; make and add token
|
-- matched pattern; make and add tokens
|
||||||
local t = text:sub(s, e)
|
push_tokens(res, current_syntax, p, text, find_results)
|
||||||
|
|
||||||
push_token(res, current_syntax.symbols[t] or p.type, t)
|
|
||||||
-- update state if this was a start|end pattern pair
|
-- update state if this was a start|end pattern pair
|
||||||
if type(p.pattern) == "table" then
|
if type(p.pattern) == "table" then
|
||||||
state = bit32.replace(state, n, current_level*8, 8)
|
state = bit32.replace(state, n, current_level*8, 8)
|
||||||
|
@ -162,7 +195,7 @@ function tokenizer.tokenize(incoming_syntax, text, state)
|
||||||
end
|
end
|
||||||
|
|
||||||
-- move cursor past this token
|
-- move cursor past this token
|
||||||
i = e + 1
|
i = fin + 1
|
||||||
matched = true
|
matched = true
|
||||||
break
|
break
|
||||||
end
|
end
|
||||||
|
|
|
@ -5,17 +5,20 @@ syntax.add {
|
||||||
files = { "%.c$", "%.h$", "%.inl$", "%.cpp$", "%.hpp$" },
|
files = { "%.c$", "%.h$", "%.inl$", "%.cpp$", "%.hpp$" },
|
||||||
comment = "//",
|
comment = "//",
|
||||||
patterns = {
|
patterns = {
|
||||||
{ pattern = "//.-\n", type = "comment" },
|
{ pattern = "//.-\n", type = "comment" },
|
||||||
{ pattern = { "/%*", "%*/" }, type = "comment" },
|
{ pattern = { "/%*", "%*/" }, type = "comment" },
|
||||||
{ pattern = { "#", "[^\\]\n" }, type = "comment" },
|
{ pattern = { '"', '"', '\\' }, type = "string" },
|
||||||
{ pattern = { '"', '"', '\\' }, type = "string" },
|
{ pattern = { "'", "'", '\\' }, type = "string" },
|
||||||
{ pattern = { "'", "'", '\\' }, type = "string" },
|
{ pattern = "0x%x+", type = "number" },
|
||||||
{ pattern = "-?0x%x+", type = "number" },
|
{ pattern = "%d+[%d%.eE]*f?", type = "number" },
|
||||||
{ pattern = "-?%d+[%d%.eE]*f?", type = "number" },
|
{ pattern = "%.?%d+f?", type = "number" },
|
||||||
{ pattern = "-?%.?%d+f?", type = "number" },
|
{ pattern = "[%+%-=/%*%^%%<>!~|&]", type = "operator" },
|
||||||
{ pattern = "[%+%-=/%*%^%%<>!~|&]", type = "operator" },
|
{ pattern = "struct%s()[%a_][%w_]*", type = {"keyword", "keyword2"} },
|
||||||
{ pattern = "[%a_][%w_]*%f[(]", type = "function" },
|
{ pattern = "union%s()[%a_][%w_]*", type = {"keyword", "keyword2"} },
|
||||||
{ pattern = "[%a_][%w_]*", type = "symbol" },
|
{ pattern = "[%a_][%w_]*%f[(]", type = "function" },
|
||||||
|
{ pattern = "[%a_][%w_]*", type = "symbol" },
|
||||||
|
{ pattern = "#include%s()<.->", type = {"keyword", "string"} },
|
||||||
|
{ pattern = "#[%a_][%w_]*", type = "keyword" },
|
||||||
},
|
},
|
||||||
symbols = {
|
symbols = {
|
||||||
["if"] = "keyword",
|
["if"] = "keyword",
|
||||||
|
@ -29,8 +32,6 @@ syntax.add {
|
||||||
["continue"] = "keyword",
|
["continue"] = "keyword",
|
||||||
["return"] = "keyword",
|
["return"] = "keyword",
|
||||||
["goto"] = "keyword",
|
["goto"] = "keyword",
|
||||||
["struct"] = "keyword",
|
|
||||||
["union"] = "keyword",
|
|
||||||
["typedef"] = "keyword",
|
["typedef"] = "keyword",
|
||||||
["enum"] = "keyword",
|
["enum"] = "keyword",
|
||||||
["extern"] = "keyword",
|
["extern"] = "keyword",
|
||||||
|
@ -42,7 +43,6 @@ syntax.add {
|
||||||
["case"] = "keyword",
|
["case"] = "keyword",
|
||||||
["default"] = "keyword",
|
["default"] = "keyword",
|
||||||
["auto"] = "keyword",
|
["auto"] = "keyword",
|
||||||
["const"] = "keyword",
|
|
||||||
["void"] = "keyword",
|
["void"] = "keyword",
|
||||||
["int"] = "keyword2",
|
["int"] = "keyword2",
|
||||||
["short"] = "keyword2",
|
["short"] = "keyword2",
|
||||||
|
|
Loading…
Reference in New Issue