diff --git a/data/core/syntax.lua b/data/core/syntax.lua index 6d59d2ea..89208bce 100644 --- a/data/core/syntax.lua +++ b/data/core/syntax.lua @@ -7,47 +7,22 @@ local plain_text_syntax = { name = "Plain Text", patterns = {}, symbols = {} } function syntax.add(t) - -- the rule %s+ gives us a performance gain for the tokenizer in lines with - -- long amounts of consecutive spaces, to not affect other patterns we - -- insert it after any rule that starts with spaces to prevent conflicts + if type(t.space_handling) ~= "boolean" then t.space_handling = true end + if t.patterns then - local temp_patterns = {} - ::pattern_remove_loop:: - for pos, pattern in ipairs(t.patterns) do - local pattern_str = "" - local ptype = pattern.pattern - and "pattern" or (pattern.regex and "regex" or nil) - if ptype then - if type(pattern[ptype]) == "table" then - pattern_str = pattern[ptype][1] - else - pattern_str = pattern[ptype] - end - if (ptype == "pattern" and( - pattern_str:find("^%^?%%s") - or - pattern_str:find("^%^?%s") - )) - or - (ptype == "regex" and ( - pattern_str:find("^%^?\\s") - or - pattern_str:find("^%^?%s") - )) - then - table.insert(temp_patterns, table.remove(t.patterns, pos)) - -- since we are removing from iterated table we need to start - -- from the beginning again to prevent any issues - goto pattern_remove_loop - end - end + -- the rule %s+ gives us a performance gain for the tokenizer in lines with + -- long amounts of consecutive spaces, can be disabled by plugins where it + -- causes conflicts by declaring the table property: space_handling = false + if t.space_handling then + table.insert(t.patterns, { pattern = "%s+", type = "normal" }) end - for pos, pattern in ipairs(temp_patterns) do - table.insert(t.patterns, pos, pattern) - end - local pos = 1 - if #temp_patterns > 0 then pos = #temp_patterns+1 end - table.insert(t.patterns, pos, { pattern = "%s+", type = "normal" }) + + -- this rule gives us additional performance gain by matching every word + -- that was not matched by the syntax patterns as a single token, preventing + -- the tokenizer from iterating over each character individually which is a + -- lot slower since iteration occurs in lua instead of C and adding to that + -- it will also try to match every pattern to a single char (same as spaces) + table.insert(t.patterns, { pattern = "%w+%f[%s]", type = "normal" }) end table.insert(syntax.items, t) diff --git a/data/plugins/language_md.lua b/data/plugins/language_md.lua index 2a622d8c..e7fa9f49 100644 --- a/data/plugins/language_md.lua +++ b/data/plugins/language_md.lua @@ -31,7 +31,35 @@ syntax.add { name = "Markdown", files = { "%.md$", "%.markdown$" }, block_comment = { "" }, + space_handling = false, -- turn off this feature to handle it our selfs patterns = { + ---- Place patterns that require spaces at start to optimize matching speed + ---- and apply the %s+ optimization immediately afterwards + -- bullets + { pattern = "^%s*%*%s", type = "number" }, + { pattern = "^%s*%-%s", type = "number" }, + { pattern = "^%s*%+%s", type = "number" }, + -- numbered bullet + { pattern = "^%s*[0-9]+[%.%)]%s", type = "number" }, + -- blockquote + { pattern = "^%s*>+%s", type = "string" }, + -- alternative bold italic formats + { pattern = { "%s___", "___%f[%s]" }, type = "markdown_bold_italic" }, + { pattern = { "%s__", "__%f[%s]" }, type = "markdown_bold" }, + { pattern = { "%s_[%S]", "_%f[%s]" }, type = "markdown_italic" }, + -- reference links + { + pattern = "^%s*%[%^()["..in_squares_match.."]+()%]: ", + type = { "function", "number", "function" } + }, + { + pattern = "^%s*%[%^?()["..in_squares_match.."]+()%]:%s+.+\n", + type = { "function", "number", "function" } + }, + -- optimization + { pattern = "%s+", type = "normal" }, + + ---- HTML rules imported and adapted from language_html ---- to not conflict with markdown rules -- Inline JS and CSS @@ -129,14 +157,6 @@ syntax.add { { pattern = "^%-%-%-+\n", type = "comment" }, { pattern = "^%*%*%*+\n", type = "comment" }, { pattern = "^___+\n", type = "comment" }, - -- bullets - { pattern = "^%s*%*%s", type = "number" }, - { pattern = "^%s*%-%s", type = "number" }, - { pattern = "^%s*%+%s", type = "number" }, - -- numbered bullet - { pattern = "^%s*[0-9]+[%.%)]%s", type = "number" }, - -- blockquote - { pattern = "^%s*>+%s", type = "string" }, -- bold and italic { pattern = { "%*%*%*%S", "%*%*%*" }, type = "markdown_bold_italic" }, { pattern = { "%*%*%S", "%*%*" }, type = "markdown_bold" }, @@ -149,9 +169,6 @@ syntax.add { { pattern = "^___[%s%p%w]+___%s" , type = "markdown_bold_italic" }, { pattern = "^__[%s%p%w]+__%s" , type = "markdown_bold" }, { pattern = "^_[%s%p%w]+_%s" , type = "markdown_italic" }, - { pattern = { "%s___", "___%f[%s]" }, type = "markdown_bold_italic" }, - { pattern = { "%s__", "__%f[%s]" }, type = "markdown_bold" }, - { pattern = { "%s_[%S]", "_%f[%s]" }, type = "markdown_italic" }, -- heading with custom id { pattern = "^#+%s[%w%s%p]+(){()#[%w%-]+()}", @@ -186,14 +203,6 @@ syntax.add { pattern = "%[()["..in_squares_match.."]+()%] *()%[()["..in_squares_match.."]+()%]", type = { "function", "string", "function", "function", "number", "function" } }, - { - pattern = "^%s*%[%^()["..in_squares_match.."]+()%]: ", - type = { "function", "number", "function" } - }, - { - pattern = "^%s*%[%^?()["..in_squares_match.."]+()%]:%s+.+\n", - type = { "function", "number", "function" } - }, { pattern = "!?%[%^?()["..in_squares_match.."]+()%]", type = { "function", "number", "function" } @@ -204,7 +213,9 @@ syntax.add { type = "function" }, { pattern = "", type = "function" }, - { pattern = "https?://%S+", type = "function" } + { pattern = "https?://%S+", type = "function" }, + -- optimize consecutive dashes used in tables + { pattern = "%-+", type = "normal" }, }, symbols = { }, }