Fix `language_js` regex constant detection (#1581)
* Fix `language_js` regex constant detection * Simplify regex constant detection in `language_js` * Add more possessive quantifiers in `language_js` regex constant detection This avoids more catastrophic backtracking cases. * Allow `.` after regex constant in `language_js`
This commit is contained in:
parent
de043f2e13
commit
ee02d0e0b6
|
@ -1,24 +1,74 @@
|
|||
-- mod-version:3
|
||||
local syntax = require "core.syntax"
|
||||
|
||||
-- Regex pattern explanation:
|
||||
-- This will match / and will look ahead for something that looks like a regex.
|
||||
--
|
||||
-- (?!/) Don't match empty regexes.
|
||||
--
|
||||
-- (?>...) this is using an atomic group to minimize backtracking, as that'd
|
||||
-- cause "Catastrophic Backtracking" in some cases.
|
||||
--
|
||||
-- [^\\[\/]++ will match anything that's isn't an escape, a start of character
|
||||
-- class or an end of pattern, without backtracking (the second +).
|
||||
--
|
||||
-- \\. will match anything that's escaped.
|
||||
--
|
||||
-- \[(?:[^\\\]++]|\\.)*+\] will match character classes.
|
||||
--
|
||||
-- /[gmiyuvsd]*\s*[\n,;\)\]\}\.]) will match the end of pattern delimiter, optionally
|
||||
-- followed by pattern options, and anything that can
|
||||
-- be after a pattern.
|
||||
--
|
||||
-- Demo with some unit tests (click on the Unit Tests entry): https://regex101.com/r/R0w8Qw/1
|
||||
-- Note that it has a couple of changes to make it work on that platform.
|
||||
local regex_pattern = {
|
||||
[=[/(?=(?!/)(?:(?>[^\\[\/]++|\\.|\[(?:[^\\\]]++|\\.)*+\])*+)++/[gmiyuvsd]*\s*[\n,;\)\]\}\.])()]=],
|
||||
"/()[gmiyuvsd]*", "\\"
|
||||
}
|
||||
|
||||
-- For the moment let's not actually differentiate the insides of the regex,
|
||||
-- as this will need new token types...
|
||||
local inner_regex_syntax = {
|
||||
patterns = {
|
||||
{ pattern = "%(()%?[:!=><]", type = { "string", "string" } },
|
||||
{ pattern = "[.?+*%(%)|]", type = "string" },
|
||||
{ pattern = "{%d*,?%d*}", type = "string" },
|
||||
{ regex = { [=[\[()\^?]=], [=[(?:\]|(?=\n))()]=], "\\" },
|
||||
type = { "string", "string" },
|
||||
syntax = { -- Inside character class
|
||||
patterns = {
|
||||
{ pattern = "\\\\", type = "string" },
|
||||
{ pattern = "\\%]", type = "string" },
|
||||
{ pattern = "[^%]\n]", type = "string" }
|
||||
},
|
||||
symbols = {}
|
||||
}
|
||||
},
|
||||
{ regex = "\\/", type = "string" },
|
||||
{ regex = "[^/\n]", type = "string" },
|
||||
},
|
||||
symbols = {}
|
||||
}
|
||||
|
||||
syntax.add {
|
||||
name = "JavaScript",
|
||||
files = { "%.js$", "%.json$", "%.cson$", "%.mjs$", "%.cjs$" },
|
||||
comment = "//",
|
||||
block_comment = { "/*", "*/" },
|
||||
patterns = {
|
||||
{ pattern = "//.*", type = "comment" },
|
||||
{ pattern = { "/%*", "%*/" }, type = "comment" },
|
||||
{ pattern = { '/[^= ]', '/', '\\' },type = "string" },
|
||||
{ pattern = { '"', '"', '\\' }, type = "string" },
|
||||
{ pattern = { "'", "'", '\\' }, type = "string" },
|
||||
{ pattern = { "`", "`", '\\' }, type = "string" },
|
||||
{ pattern = "0x[%da-fA-F_]+n?", type = "number" },
|
||||
{ pattern = "-?%d+[%d%.eE_n]*", type = "number" },
|
||||
{ pattern = "-?%.?%d+", type = "number" },
|
||||
{ pattern = "[%+%-=/%*%^%%<>!~|&]", type = "operator" },
|
||||
{ pattern = "[%a_][%w_]*%f[(]", type = "function" },
|
||||
{ pattern = "[%a_][%w_]*", type = "symbol" },
|
||||
{ pattern = "//.*", type = "comment" },
|
||||
{ pattern = { "/%*", "%*/" }, type = "comment" },
|
||||
{ regex = regex_pattern, syntax = inner_regex_syntax, type = {"string", "string"} },
|
||||
{ pattern = { '"', '"', '\\' }, type = "string" },
|
||||
{ pattern = { "'", "'", '\\' }, type = "string" },
|
||||
{ pattern = { "`", "`", '\\' }, type = "string" },
|
||||
{ pattern = "0x[%da-fA-F_]+n?()%s*()/?", type = {"number", "normal", "operator"} },
|
||||
{ pattern = "-?%d+[%d%.eE_n]*()%s*()/?", type = {"number", "normal", "operator"} },
|
||||
{ pattern = "-?%.?%d+()%s*()/?", type = {"number", "normal", "operator"} },
|
||||
{ pattern = "[%+%-=/%*%^%%<>!~|&]", type = "operator" },
|
||||
{ pattern = "[%a_][%w_]*%f[(]", type = "function" },
|
||||
{ pattern = "[%a_][%w_]*()%s*()/?", type = {"symbol", "normal", "operator"} },
|
||||
},
|
||||
symbols = {
|
||||
["async"] = "keyword",
|
||||
|
|
Loading…
Reference in New Issue