Fix `language_js` regex constant detection (#1581)

* Fix `language_js` regex constant detection

* Simplify regex constant detection in `language_js`

* Add more possessive quantifiers in `language_js` regex constant detection

This avoids more catastrophic backtracking cases.

* Allow `.` after regex constant in `language_js`
This commit is contained in:
Guldoman 2023-11-29 17:00:09 +01:00 committed by George Sokianos
parent de043f2e13
commit ee02d0e0b6
1 changed files with 62 additions and 12 deletions

View File

@ -1,24 +1,74 @@
-- mod-version:3
local syntax = require "core.syntax"
-- Regex pattern explanation:
-- This will match / and will look ahead for something that looks like a regex.
--
-- (?!/) Don't match empty regexes.
--
-- (?>...) this is using an atomic group to minimize backtracking, as that'd
-- cause "Catastrophic Backtracking" in some cases.
--
-- [^\\[\/]++ will match anything that's isn't an escape, a start of character
-- class or an end of pattern, without backtracking (the second +).
--
-- \\. will match anything that's escaped.
--
-- \[(?:[^\\\]++]|\\.)*+\] will match character classes.
--
-- /[gmiyuvsd]*\s*[\n,;\)\]\}\.]) will match the end of pattern delimiter, optionally
-- followed by pattern options, and anything that can
-- be after a pattern.
--
-- Demo with some unit tests (click on the Unit Tests entry): https://regex101.com/r/R0w8Qw/1
-- Note that it has a couple of changes to make it work on that platform.
local regex_pattern = {
[=[/(?=(?!/)(?:(?>[^\\[\/]++|\\.|\[(?:[^\\\]]++|\\.)*+\])*+)++/[gmiyuvsd]*\s*[\n,;\)\]\}\.])()]=],
"/()[gmiyuvsd]*", "\\"
}
-- For the moment let's not actually differentiate the insides of the regex,
-- as this will need new token types...
local inner_regex_syntax = {
patterns = {
{ pattern = "%(()%?[:!=><]", type = { "string", "string" } },
{ pattern = "[.?+*%(%)|]", type = "string" },
{ pattern = "{%d*,?%d*}", type = "string" },
{ regex = { [=[\[()\^?]=], [=[(?:\]|(?=\n))()]=], "\\" },
type = { "string", "string" },
syntax = { -- Inside character class
patterns = {
{ pattern = "\\\\", type = "string" },
{ pattern = "\\%]", type = "string" },
{ pattern = "[^%]\n]", type = "string" }
},
symbols = {}
}
},
{ regex = "\\/", type = "string" },
{ regex = "[^/\n]", type = "string" },
},
symbols = {}
}
syntax.add {
name = "JavaScript",
files = { "%.js$", "%.json$", "%.cson$", "%.mjs$", "%.cjs$" },
comment = "//",
block_comment = { "/*", "*/" },
patterns = {
{ pattern = "//.*", type = "comment" },
{ pattern = { "/%*", "%*/" }, type = "comment" },
{ pattern = { '/[^= ]', '/', '\\' },type = "string" },
{ pattern = { '"', '"', '\\' }, type = "string" },
{ pattern = { "'", "'", '\\' }, type = "string" },
{ pattern = { "`", "`", '\\' }, type = "string" },
{ pattern = "0x[%da-fA-F_]+n?", type = "number" },
{ pattern = "-?%d+[%d%.eE_n]*", type = "number" },
{ pattern = "-?%.?%d+", type = "number" },
{ pattern = "[%+%-=/%*%^%%<>!~|&]", type = "operator" },
{ pattern = "[%a_][%w_]*%f[(]", type = "function" },
{ pattern = "[%a_][%w_]*", type = "symbol" },
{ pattern = "//.*", type = "comment" },
{ pattern = { "/%*", "%*/" }, type = "comment" },
{ regex = regex_pattern, syntax = inner_regex_syntax, type = {"string", "string"} },
{ pattern = { '"', '"', '\\' }, type = "string" },
{ pattern = { "'", "'", '\\' }, type = "string" },
{ pattern = { "`", "`", '\\' }, type = "string" },
{ pattern = "0x[%da-fA-F_]+n?()%s*()/?", type = {"number", "normal", "operator"} },
{ pattern = "-?%d+[%d%.eE_n]*()%s*()/?", type = {"number", "normal", "operator"} },
{ pattern = "-?%.?%d+()%s*()/?", type = {"number", "normal", "operator"} },
{ pattern = "[%+%-=/%*%^%%<>!~|&]", type = "operator" },
{ pattern = "[%a_][%w_]*%f[(]", type = "function" },
{ pattern = "[%a_][%w_]*()%s*()/?", type = {"symbol", "normal", "operator"} },
},
symbols = {
["async"] = "keyword",