syntax: remove pattern re-ordering on optimization
* Introduces a flag that syntax writers can turn off named space_handling, turning it off means that your syntax will take care of handling the excessive amount of spaces that can slow down the tokenizer. * Adds another pattern at the end of every single table that also improves tokenizer performance by matching words that weren't match by any of the synxtax patterns. * Modifies language_md to turn off the provided space_handling and do its own since it has rules that require a space at the beginning, also handles long consecutives amount of dashes used in tables that degrade performance. * This changes where discussed in collaboration with @Guldoman and @takase1121 thanks to all!
This commit is contained in:
parent
7372d2f82d
commit
b0c005a5ac
|
@ -7,47 +7,22 @@ local plain_text_syntax = { name = "Plain Text", patterns = {}, symbols = {} }
|
||||||
|
|
||||||
|
|
||||||
function syntax.add(t)
|
function syntax.add(t)
|
||||||
-- the rule %s+ gives us a performance gain for the tokenizer in lines with
|
if type(t.space_handling) ~= "boolean" then t.space_handling = true end
|
||||||
-- long amounts of consecutive spaces, to not affect other patterns we
|
|
||||||
-- insert it after any rule that starts with spaces to prevent conflicts
|
|
||||||
if t.patterns then
|
if t.patterns then
|
||||||
local temp_patterns = {}
|
-- the rule %s+ gives us a performance gain for the tokenizer in lines with
|
||||||
::pattern_remove_loop::
|
-- long amounts of consecutive spaces, can be disabled by plugins where it
|
||||||
for pos, pattern in ipairs(t.patterns) do
|
-- causes conflicts by declaring the table property: space_handling = false
|
||||||
local pattern_str = ""
|
if t.space_handling then
|
||||||
local ptype = pattern.pattern
|
table.insert(t.patterns, { pattern = "%s+", type = "normal" })
|
||||||
and "pattern" or (pattern.regex and "regex" or nil)
|
|
||||||
if ptype then
|
|
||||||
if type(pattern[ptype]) == "table" then
|
|
||||||
pattern_str = pattern[ptype][1]
|
|
||||||
else
|
|
||||||
pattern_str = pattern[ptype]
|
|
||||||
end
|
|
||||||
if (ptype == "pattern" and(
|
|
||||||
pattern_str:find("^%^?%%s")
|
|
||||||
or
|
|
||||||
pattern_str:find("^%^?%s")
|
|
||||||
))
|
|
||||||
or
|
|
||||||
(ptype == "regex" and (
|
|
||||||
pattern_str:find("^%^?\\s")
|
|
||||||
or
|
|
||||||
pattern_str:find("^%^?%s")
|
|
||||||
))
|
|
||||||
then
|
|
||||||
table.insert(temp_patterns, table.remove(t.patterns, pos))
|
|
||||||
-- since we are removing from iterated table we need to start
|
|
||||||
-- from the beginning again to prevent any issues
|
|
||||||
goto pattern_remove_loop
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
for pos, pattern in ipairs(temp_patterns) do
|
|
||||||
table.insert(t.patterns, pos, pattern)
|
-- this rule gives us additional performance gain by matching every word
|
||||||
end
|
-- that was not matched by the syntax patterns as a single token, preventing
|
||||||
local pos = 1
|
-- the tokenizer from iterating over each character individually which is a
|
||||||
if #temp_patterns > 0 then pos = #temp_patterns+1 end
|
-- lot slower since iteration occurs in lua instead of C and adding to that
|
||||||
table.insert(t.patterns, pos, { pattern = "%s+", type = "normal" })
|
-- it will also try to match every pattern to a single char (same as spaces)
|
||||||
|
table.insert(t.patterns, { pattern = "%w+%f[%s]", type = "normal" })
|
||||||
end
|
end
|
||||||
|
|
||||||
table.insert(syntax.items, t)
|
table.insert(syntax.items, t)
|
||||||
|
|
|
@ -31,7 +31,35 @@ syntax.add {
|
||||||
name = "Markdown",
|
name = "Markdown",
|
||||||
files = { "%.md$", "%.markdown$" },
|
files = { "%.md$", "%.markdown$" },
|
||||||
block_comment = { "<!--", "-->" },
|
block_comment = { "<!--", "-->" },
|
||||||
|
space_handling = false, -- turn off this feature to handle it our selfs
|
||||||
patterns = {
|
patterns = {
|
||||||
|
---- Place patterns that require spaces at start to optimize matching speed
|
||||||
|
---- and apply the %s+ optimization immediately afterwards
|
||||||
|
-- bullets
|
||||||
|
{ pattern = "^%s*%*%s", type = "number" },
|
||||||
|
{ pattern = "^%s*%-%s", type = "number" },
|
||||||
|
{ pattern = "^%s*%+%s", type = "number" },
|
||||||
|
-- numbered bullet
|
||||||
|
{ pattern = "^%s*[0-9]+[%.%)]%s", type = "number" },
|
||||||
|
-- blockquote
|
||||||
|
{ pattern = "^%s*>+%s", type = "string" },
|
||||||
|
-- alternative bold italic formats
|
||||||
|
{ pattern = { "%s___", "___%f[%s]" }, type = "markdown_bold_italic" },
|
||||||
|
{ pattern = { "%s__", "__%f[%s]" }, type = "markdown_bold" },
|
||||||
|
{ pattern = { "%s_[%S]", "_%f[%s]" }, type = "markdown_italic" },
|
||||||
|
-- reference links
|
||||||
|
{
|
||||||
|
pattern = "^%s*%[%^()["..in_squares_match.."]+()%]: ",
|
||||||
|
type = { "function", "number", "function" }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern = "^%s*%[%^?()["..in_squares_match.."]+()%]:%s+.+\n",
|
||||||
|
type = { "function", "number", "function" }
|
||||||
|
},
|
||||||
|
-- optimization
|
||||||
|
{ pattern = "%s+", type = "normal" },
|
||||||
|
|
||||||
|
|
||||||
---- HTML rules imported and adapted from language_html
|
---- HTML rules imported and adapted from language_html
|
||||||
---- to not conflict with markdown rules
|
---- to not conflict with markdown rules
|
||||||
-- Inline JS and CSS
|
-- Inline JS and CSS
|
||||||
|
@ -129,14 +157,6 @@ syntax.add {
|
||||||
{ pattern = "^%-%-%-+\n", type = "comment" },
|
{ pattern = "^%-%-%-+\n", type = "comment" },
|
||||||
{ pattern = "^%*%*%*+\n", type = "comment" },
|
{ pattern = "^%*%*%*+\n", type = "comment" },
|
||||||
{ pattern = "^___+\n", type = "comment" },
|
{ pattern = "^___+\n", type = "comment" },
|
||||||
-- bullets
|
|
||||||
{ pattern = "^%s*%*%s", type = "number" },
|
|
||||||
{ pattern = "^%s*%-%s", type = "number" },
|
|
||||||
{ pattern = "^%s*%+%s", type = "number" },
|
|
||||||
-- numbered bullet
|
|
||||||
{ pattern = "^%s*[0-9]+[%.%)]%s", type = "number" },
|
|
||||||
-- blockquote
|
|
||||||
{ pattern = "^%s*>+%s", type = "string" },
|
|
||||||
-- bold and italic
|
-- bold and italic
|
||||||
{ pattern = { "%*%*%*%S", "%*%*%*" }, type = "markdown_bold_italic" },
|
{ pattern = { "%*%*%*%S", "%*%*%*" }, type = "markdown_bold_italic" },
|
||||||
{ pattern = { "%*%*%S", "%*%*" }, type = "markdown_bold" },
|
{ pattern = { "%*%*%S", "%*%*" }, type = "markdown_bold" },
|
||||||
|
@ -149,9 +169,6 @@ syntax.add {
|
||||||
{ pattern = "^___[%s%p%w]+___%s" , type = "markdown_bold_italic" },
|
{ pattern = "^___[%s%p%w]+___%s" , type = "markdown_bold_italic" },
|
||||||
{ pattern = "^__[%s%p%w]+__%s" , type = "markdown_bold" },
|
{ pattern = "^__[%s%p%w]+__%s" , type = "markdown_bold" },
|
||||||
{ pattern = "^_[%s%p%w]+_%s" , type = "markdown_italic" },
|
{ pattern = "^_[%s%p%w]+_%s" , type = "markdown_italic" },
|
||||||
{ pattern = { "%s___", "___%f[%s]" }, type = "markdown_bold_italic" },
|
|
||||||
{ pattern = { "%s__", "__%f[%s]" }, type = "markdown_bold" },
|
|
||||||
{ pattern = { "%s_[%S]", "_%f[%s]" }, type = "markdown_italic" },
|
|
||||||
-- heading with custom id
|
-- heading with custom id
|
||||||
{
|
{
|
||||||
pattern = "^#+%s[%w%s%p]+(){()#[%w%-]+()}",
|
pattern = "^#+%s[%w%s%p]+(){()#[%w%-]+()}",
|
||||||
|
@ -186,14 +203,6 @@ syntax.add {
|
||||||
pattern = "%[()["..in_squares_match.."]+()%] *()%[()["..in_squares_match.."]+()%]",
|
pattern = "%[()["..in_squares_match.."]+()%] *()%[()["..in_squares_match.."]+()%]",
|
||||||
type = { "function", "string", "function", "function", "number", "function" }
|
type = { "function", "string", "function", "function", "number", "function" }
|
||||||
},
|
},
|
||||||
{
|
|
||||||
pattern = "^%s*%[%^()["..in_squares_match.."]+()%]: ",
|
|
||||||
type = { "function", "number", "function" }
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pattern = "^%s*%[%^?()["..in_squares_match.."]+()%]:%s+.+\n",
|
|
||||||
type = { "function", "number", "function" }
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
pattern = "!?%[%^?()["..in_squares_match.."]+()%]",
|
pattern = "!?%[%^?()["..in_squares_match.."]+()%]",
|
||||||
type = { "function", "number", "function" }
|
type = { "function", "number", "function" }
|
||||||
|
@ -204,7 +213,9 @@ syntax.add {
|
||||||
type = "function"
|
type = "function"
|
||||||
},
|
},
|
||||||
{ pattern = "<https?://%S+>", type = "function" },
|
{ pattern = "<https?://%S+>", type = "function" },
|
||||||
{ pattern = "https?://%S+", type = "function" }
|
{ pattern = "https?://%S+", type = "function" },
|
||||||
|
-- optimize consecutive dashes used in tables
|
||||||
|
{ pattern = "%-+", type = "normal" },
|
||||||
},
|
},
|
||||||
symbols = { },
|
symbols = { },
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue