diff --git a/data/plugins/language_js.lua b/data/plugins/language_js.lua index f79fece6..307aeecf 100644 --- a/data/plugins/language_js.lua +++ b/data/plugins/language_js.lua @@ -1,24 +1,74 @@ -- mod-version:3 local syntax = require "core.syntax" +-- Regex pattern explanation: +-- This will match / and will look ahead for something that looks like a regex. +-- +-- (?!/) Don't match empty regexes. +-- +-- (?>...) this is using an atomic group to minimize backtracking, as that'd +-- cause "Catastrophic Backtracking" in some cases. +-- +-- [^\\[\/]++ will match anything that's isn't an escape, a start of character +-- class or an end of pattern, without backtracking (the second +). +-- +-- \\. will match anything that's escaped. +-- +-- \[(?:[^\\\]++]|\\.)*+\] will match character classes. +-- +-- /[gmiyuvsd]*\s*[\n,;\)\]\}\.]) will match the end of pattern delimiter, optionally +-- followed by pattern options, and anything that can +-- be after a pattern. +-- +-- Demo with some unit tests (click on the Unit Tests entry): https://regex101.com/r/R0w8Qw/1 +-- Note that it has a couple of changes to make it work on that platform. +local regex_pattern = { + [=[/(?=(?!/)(?:(?>[^\\[\/]++|\\.|\[(?:[^\\\]]++|\\.)*+\])*+)++/[gmiyuvsd]*\s*[\n,;\)\]\}\.])()]=], + "/()[gmiyuvsd]*", "\\" +} + +-- For the moment let's not actually differentiate the insides of the regex, +-- as this will need new token types... +local inner_regex_syntax = { + patterns = { + { pattern = "%(()%?[:!=><]", type = { "string", "string" } }, + { pattern = "[.?+*%(%)|]", type = "string" }, + { pattern = "{%d*,?%d*}", type = "string" }, + { regex = { [=[\[()\^?]=], [=[(?:\]|(?=\n))()]=], "\\" }, + type = { "string", "string" }, + syntax = { -- Inside character class + patterns = { + { pattern = "\\\\", type = "string" }, + { pattern = "\\%]", type = "string" }, + { pattern = "[^%]\n]", type = "string" } + }, + symbols = {} + } + }, + { regex = "\\/", type = "string" }, + { regex = "[^/\n]", type = "string" }, + }, + symbols = {} +} + syntax.add { name = "JavaScript", files = { "%.js$", "%.json$", "%.cson$", "%.mjs$", "%.cjs$" }, comment = "//", block_comment = { "/*", "*/" }, patterns = { - { pattern = "//.*", type = "comment" }, - { pattern = { "/%*", "%*/" }, type = "comment" }, - { pattern = { '/[^= ]', '/', '\\' },type = "string" }, - { pattern = { '"', '"', '\\' }, type = "string" }, - { pattern = { "'", "'", '\\' }, type = "string" }, - { pattern = { "`", "`", '\\' }, type = "string" }, - { pattern = "0x[%da-fA-F_]+n?", type = "number" }, - { pattern = "-?%d+[%d%.eE_n]*", type = "number" }, - { pattern = "-?%.?%d+", type = "number" }, - { pattern = "[%+%-=/%*%^%%<>!~|&]", type = "operator" }, - { pattern = "[%a_][%w_]*%f[(]", type = "function" }, - { pattern = "[%a_][%w_]*", type = "symbol" }, + { pattern = "//.*", type = "comment" }, + { pattern = { "/%*", "%*/" }, type = "comment" }, + { regex = regex_pattern, syntax = inner_regex_syntax, type = {"string", "string"} }, + { pattern = { '"', '"', '\\' }, type = "string" }, + { pattern = { "'", "'", '\\' }, type = "string" }, + { pattern = { "`", "`", '\\' }, type = "string" }, + { pattern = "0x[%da-fA-F_]+n?()%s*()/?", type = {"number", "normal", "operator"} }, + { pattern = "-?%d+[%d%.eE_n]*()%s*()/?", type = {"number", "normal", "operator"} }, + { pattern = "-?%.?%d+()%s*()/?", type = {"number", "normal", "operator"} }, + { pattern = "[%+%-=/%*%^%%<>!~|&]", type = "operator" }, + { pattern = "[%a_][%w_]*%f[(]", type = "function" }, + { pattern = "[%a_][%w_]*()%s*()/?", type = {"symbol", "normal", "operator"} }, }, symbols = { ["async"] = "keyword",