2020-05-07 11:27:37 +02:00
|
|
|
local common = require "core.common"
|
2019-12-28 12:16:32 +01:00
|
|
|
|
2020-05-07 11:27:37 +02:00
|
|
|
local syntax = {}
|
2019-12-28 12:16:32 +01:00
|
|
|
syntax.items = {}
|
|
|
|
|
2024-02-11 18:51:12 +01:00
|
|
|
local plain_text_syntax = { name = "Plain Text", patterns = {}, symbols = {} }
|
2019-12-28 12:16:32 +01:00
|
|
|
|
|
|
|
|
|
|
|
function syntax.add(t)
|
2022-03-30 03:48:34 +02:00
|
|
|
if type(t.space_handling) ~= "boolean" then t.space_handling = true end
|
|
|
|
|
2022-03-25 16:25:32 +01:00
|
|
|
if t.patterns then
|
2022-03-30 03:48:34 +02:00
|
|
|
-- the rule %s+ gives us a performance gain for the tokenizer in lines with
|
|
|
|
-- long amounts of consecutive spaces, can be disabled by plugins where it
|
|
|
|
-- causes conflicts by declaring the table property: space_handling = false
|
|
|
|
if t.space_handling then
|
|
|
|
table.insert(t.patterns, { pattern = "%s+", type = "normal" })
|
2022-03-29 02:51:09 +02:00
|
|
|
end
|
2022-03-30 03:48:34 +02:00
|
|
|
|
|
|
|
-- this rule gives us additional performance gain by matching every word
|
|
|
|
-- that was not matched by the syntax patterns as a single token, preventing
|
|
|
|
-- the tokenizer from iterating over each character individually which is a
|
|
|
|
-- lot slower since iteration occurs in lua instead of C and adding to that
|
|
|
|
-- it will also try to match every pattern to a single char (same as spaces)
|
|
|
|
table.insert(t.patterns, { pattern = "%w+%f[%s]", type = "normal" })
|
2022-03-25 16:25:32 +01:00
|
|
|
end
|
2022-03-29 02:51:09 +02:00
|
|
|
|
2019-12-28 12:16:32 +01:00
|
|
|
table.insert(syntax.items, t)
|
|
|
|
end
|
|
|
|
|
|
|
|
|
2020-06-08 10:44:51 +02:00
|
|
|
local function find(string, field)
|
2022-10-13 00:10:11 +02:00
|
|
|
local best_match = 0
|
|
|
|
local best_syntax
|
2019-12-28 12:16:32 +01:00
|
|
|
for i = #syntax.items, 1, -1 do
|
|
|
|
local t = syntax.items[i]
|
2022-10-13 00:10:11 +02:00
|
|
|
local s, e = common.match_pattern(string, t[field] or {})
|
|
|
|
if s and e - s > best_match then
|
|
|
|
best_match = e - s
|
|
|
|
best_syntax = t
|
2019-12-28 12:16:32 +01:00
|
|
|
end
|
|
|
|
end
|
2022-10-13 00:10:11 +02:00
|
|
|
return best_syntax
|
2020-06-08 10:44:51 +02:00
|
|
|
end
|
|
|
|
|
|
|
|
function syntax.get(filename, header)
|
2023-07-03 22:39:54 +02:00
|
|
|
return (filename and find(filename, "files"))
|
2021-10-25 14:06:07 +02:00
|
|
|
or (header and find(header, "headers"))
|
2024-02-11 18:51:12 +01:00
|
|
|
or plain_text_syntax
|
2019-12-28 12:16:32 +01:00
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
return syntax
|