From 5027a0f12bae97c494ff3700d4b5486b86fddfc9 Mon Sep 17 00:00:00 2001 From: Guldoman Date: Wed, 15 Jun 2022 19:33:58 +0200 Subject: [PATCH 1/3] Fix malformed pattern check for group patterns in tokenizer If the token type was a simple string (and not a table), the size of the string was used instead of `1`. --- data/core/tokenizer.lua | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/data/core/tokenizer.lua b/data/core/tokenizer.lua index 555d60b5..b785c4ea 100644 --- a/data/core/tokenizer.lua +++ b/data/core/tokenizer.lua @@ -259,16 +259,16 @@ function tokenizer.tokenize(incoming_syntax, text, state) local matched = false for n, p in ipairs(current_syntax.patterns) do local find_results = { find_text(text, p, i, true, false) } - if #find_results - 1 > #p.type then - if not bad_patterns[current_syntax] then - bad_patterns[current_syntax] = { } - end - if not bad_patterns[current_syntax][n] then - bad_patterns[current_syntax][n] = true - core.error("Malformed pattern #%d in %s language plugin", n, current_syntax.name or "unnamed") - end - end if find_results[1] then + if #find_results - 1 > (type(p.type) == "table" and #p.type or 1) then + if not bad_patterns[current_syntax] then + bad_patterns[current_syntax] = { } + end + if not bad_patterns[current_syntax][n] then + bad_patterns[current_syntax][n] = true + core.error("Malformed pattern #%d in %s language plugin", n, current_syntax.name or "unnamed") + end + end -- matched pattern; make and add tokens push_tokens(res, current_syntax, p, text, find_results) -- update state if this was a start|end pattern pair From 2e37e85a48c453abe6b76196145f8890567c9d18 Mon Sep 17 00:00:00 2001 From: Guldoman Date: Wed, 15 Jun 2022 21:28:46 +0200 Subject: [PATCH 2/3] Add helper function to report bad patterns in tokenizer --- data/core/tokenizer.lua | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/data/core/tokenizer.lua b/data/core/tokenizer.lua index b785c4ea..b00d30e5 100644 --- a/data/core/tokenizer.lua +++ b/data/core/tokenizer.lua @@ -95,6 +95,16 @@ local function retrieve_syntax_state(incoming_syntax, state) return current_syntax, subsyntax_info, current_pattern_idx, current_level end +local function report_bad_pattern(log_fn, syntax, pattern_idx, msg, ...) + if not bad_patterns[syntax] then + bad_patterns[syntax] = { } + end + if bad_patterns[syntax][pattern_idx] then return end + bad_patterns[syntax][pattern_idx] = true + log_fn("Malformed pattern #%d in %s language plugin. " .. msg, + pattern_idx, syntax.name or "unnamed", ...) +end + ---@param incoming_syntax table ---@param text string ---@param state integer @@ -260,14 +270,14 @@ function tokenizer.tokenize(incoming_syntax, text, state) for n, p in ipairs(current_syntax.patterns) do local find_results = { find_text(text, p, i, true, false) } if find_results[1] then - if #find_results - 1 > (type(p.type) == "table" and #p.type or 1) then - if not bad_patterns[current_syntax] then - bad_patterns[current_syntax] = { } - end - if not bad_patterns[current_syntax][n] then - bad_patterns[current_syntax][n] = true - core.error("Malformed pattern #%d in %s language plugin", n, current_syntax.name or "unnamed") - end + local type_is_table = type(p.type) == "table" + local n_types = type_is_table and #p.type or 1 + if #find_results - 1 > n_types then + report_bad_pattern(core.error, current_syntax, n, + "Not enough token types: got %d needed %d.", n_types, #find_results - 1) + elseif #find_results - 1 < n_types then + report_bad_pattern(core.warn, current_syntax, n, + "Too many token types: got %d needed %d.", n_types, #find_results - 1) end -- matched pattern; make and add tokens push_tokens(res, current_syntax, p, text, find_results) From d169619f69189a4cbda8eb806abdc865906e8e58 Mon Sep 17 00:00:00 2001 From: Guldoman Date: Wed, 15 Jun 2022 21:31:16 +0200 Subject: [PATCH 3/3] Warn if token type is a table when not needed --- data/core/tokenizer.lua | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/data/core/tokenizer.lua b/data/core/tokenizer.lua index b00d30e5..fe826aae 100644 --- a/data/core/tokenizer.lua +++ b/data/core/tokenizer.lua @@ -272,7 +272,11 @@ function tokenizer.tokenize(incoming_syntax, text, state) if find_results[1] then local type_is_table = type(p.type) == "table" local n_types = type_is_table and #p.type or 1 - if #find_results - 1 > n_types then + if #find_results == 2 and type_is_table then + report_bad_pattern(core.warn, current_syntax, n, + "Token type is a table, but a string was expected.") + p.type = p.type[1] + elseif #find_results - 1 > n_types then report_bad_pattern(core.error, current_syntax, n, "Not enough token types: got %d needed %d.", n_types, #find_results - 1) elseif #find_results - 1 < n_types then