plugin detectident: fixes and improvements
* Improved performance 67x by not using the tokenizer, this means that now opening files or saving them where indentation is re-detected is much more faster. * Improved the algorithm to detect the space size.
This commit is contained in:
parent
17017b63ae
commit
dcbebef2ab
|
@ -3,93 +3,253 @@ local core = require "core"
|
|||
local command = require "core.command"
|
||||
local common = require "core.common"
|
||||
local config = require "core.config"
|
||||
local core_syntax = require "core.syntax"
|
||||
local DocView = require "core.docview"
|
||||
local Doc = require "core.doc"
|
||||
local tokenizer = require "core.tokenizer"
|
||||
|
||||
local cache = setmetatable({}, { __mode = "k" })
|
||||
local comments_cache = {}
|
||||
local auto_detect_max_lines = 150
|
||||
|
||||
|
||||
local function add_to_stat(stat, val)
|
||||
for i = 1, #stat do
|
||||
if val == stat[i][1] then
|
||||
stat[i][2] = stat[i][2] + 1
|
||||
return
|
||||
local function indent_occurrences_more_than_once(stat, idx)
|
||||
if stat[idx-1] and stat[idx-1] == stat[idx] then
|
||||
return true
|
||||
elseif stat[idx+1] and stat[idx+1] == stat[idx] then
|
||||
return true
|
||||
end
|
||||
end
|
||||
stat[#stat + 1] = {val, 1}
|
||||
return false
|
||||
end
|
||||
|
||||
|
||||
local function optimal_indent_from_stat(stat)
|
||||
if #stat == 0 then return nil, 0 end
|
||||
local bins = {}
|
||||
for k = 1, #stat do
|
||||
local indent = stat[k][1]
|
||||
table.sort(stat, function(a, b) return a > b end)
|
||||
local best_indent = 0
|
||||
local best_score = 0
|
||||
local count = #stat
|
||||
for x=1, count do
|
||||
local indent = stat[x]
|
||||
local score = 0
|
||||
local mult_prev, lines_prev
|
||||
for i = k, #stat do
|
||||
if stat[i][1] % indent == 0 then
|
||||
local mult = stat[i][1] / indent
|
||||
if not mult_prev or (mult_prev + 1 == mult and lines_prev / stat[i][2] > 0.1) then
|
||||
-- we add the number of lines to the score only if the previous
|
||||
-- multiple of "indent" was populated with enough lines.
|
||||
score = score + stat[i][2]
|
||||
end
|
||||
mult_prev, lines_prev = mult, stat[i][2]
|
||||
for y=1, count do
|
||||
if y ~= x and stat[y] % indent == 0 then
|
||||
score = score + 1
|
||||
elseif
|
||||
indent > stat[y]
|
||||
and
|
||||
indent_occurrences_more_than_once(stat, y)
|
||||
then
|
||||
score = 0
|
||||
break
|
||||
end
|
||||
end
|
||||
bins[#bins + 1] = {indent, score}
|
||||
if score > best_score then
|
||||
best_indent = indent
|
||||
best_score = score
|
||||
end
|
||||
table.sort(bins, function(a, b) return a[2] > b[2] end)
|
||||
return bins[1][1], bins[1][2]
|
||||
if score > 0 then
|
||||
break
|
||||
end
|
||||
end
|
||||
return best_score > 0 and best_indent or nil, best_score
|
||||
end
|
||||
|
||||
|
||||
-- return nil if it is a comment or blank line or the initial part of the
|
||||
-- line otherwise.
|
||||
-- we don't need to have the whole line to detect indentation.
|
||||
local function get_first_line_part(tokens)
|
||||
local i, n = 1, #tokens
|
||||
while i + 1 <= n do
|
||||
local ttype, ttext = tokens[i], tokens[i + 1]
|
||||
if ttype ~= "comment" and ttext:gsub("%s+", "") ~= "" then
|
||||
return ttext
|
||||
end
|
||||
i = i + 2
|
||||
local function escape_comment_tokens(token)
|
||||
local special_chars = "*-%[].()+?^$"
|
||||
local escaped = ""
|
||||
for x=1, token:len() do
|
||||
local found = false
|
||||
for y=1, special_chars:len() do
|
||||
if token:sub(x, x) == special_chars:sub(y, y) then
|
||||
escaped = escaped .. "%" .. token:sub(x, x)
|
||||
found = true
|
||||
break
|
||||
end
|
||||
end
|
||||
if not found then
|
||||
escaped = escaped .. token:sub(x, x)
|
||||
end
|
||||
end
|
||||
return escaped
|
||||
end
|
||||
|
||||
|
||||
local function get_comment_patterns(syntax)
|
||||
if comments_cache[syntax.name] then
|
||||
if #comments_cache[syntax.name] > 0 then
|
||||
return comments_cache[syntax.name]
|
||||
else
|
||||
return nil
|
||||
end
|
||||
end
|
||||
local comments = {}
|
||||
for idx=1, #syntax.patterns do
|
||||
local pattern = syntax.patterns[idx]
|
||||
local startp = ""
|
||||
if
|
||||
type(pattern.type) == "string"
|
||||
and
|
||||
(pattern.type == "comment" or pattern.type == "string")
|
||||
then
|
||||
local not_is_string = pattern.type ~= "string"
|
||||
if pattern.pattern then
|
||||
startp = type(pattern.pattern) == "table"
|
||||
and pattern.pattern[1] or pattern.pattern
|
||||
if not_is_string and startp:sub(1, 1) ~= "^" then
|
||||
startp = "^%s*" .. startp
|
||||
elseif not_is_string then
|
||||
startp = "^%s*" .. startp:sub(2, startp:len())
|
||||
end
|
||||
if type(pattern.pattern) == "table" then
|
||||
table.insert(comments, {"p", startp, pattern.pattern[2]})
|
||||
elseif not_is_string then
|
||||
table.insert(comments, {"p", startp})
|
||||
end
|
||||
elseif pattern.regex then
|
||||
startp = type(pattern.regex) == "table"
|
||||
and pattern.regex[1] or pattern.regex
|
||||
if not_is_string and startp:sub(1, 1) ~= "^" then
|
||||
startp = "^\\s*" .. startp
|
||||
elseif not_is_string then
|
||||
startp = "^\\s*" .. startp:sub(2, startp:len())
|
||||
end
|
||||
if type(pattern.regex) == "table" then
|
||||
table.insert(comments, {
|
||||
"r", startp, pattern.regex[2]
|
||||
})
|
||||
elseif not_is_string then
|
||||
table.insert(comments, {"r", startp})
|
||||
end
|
||||
end
|
||||
elseif pattern.syntax then
|
||||
local subsyntax = core_syntax.get("file"..pattern.syntax, "")
|
||||
local sub_comments = get_comment_patterns(subsyntax)
|
||||
if sub_comments then
|
||||
for s=1, #sub_comments do
|
||||
table.insert(comments, sub_comments[s])
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
if #comments == 0 then
|
||||
local single_line_comment = syntax.comment
|
||||
and escape_comment_tokens(syntax.comment) or nil
|
||||
local block_comment = nil
|
||||
if syntax.block_comment then
|
||||
block_comment = {
|
||||
escape_comment_tokens(syntax.block_comment[1]),
|
||||
escape_comment_tokens(syntax.block_comment[2])
|
||||
}
|
||||
end
|
||||
if single_line_comment then
|
||||
table.insert(comments, {"p", "^%s*" .. single_line_comment})
|
||||
end
|
||||
if block_comment then
|
||||
table.insert(comments, {"p", "^%s*" .. block_comment[1], block_comment[2]})
|
||||
end
|
||||
end
|
||||
comments_cache[syntax.name] = comments
|
||||
if #comments > 0 then
|
||||
return comments
|
||||
end
|
||||
return nil
|
||||
end
|
||||
|
||||
|
||||
local function get_non_empty_lines(syntax, lines)
|
||||
return coroutine.wrap(function()
|
||||
local tokens, state
|
||||
local comments = get_comment_patterns(syntax)
|
||||
|
||||
local i = 0
|
||||
local end_regex = nil
|
||||
local end_pattern = nil
|
||||
local inside_comment = false
|
||||
for _, line in ipairs(lines) do
|
||||
tokens, state = tokenizer.tokenize(syntax, line, state)
|
||||
local line_start = get_first_line_part(tokens)
|
||||
if line_start then
|
||||
if line:gsub("^%s+", "") ~= "" then
|
||||
local is_comment = false
|
||||
if comments then
|
||||
if not inside_comment then
|
||||
for c=1, #comments do
|
||||
local comment = comments[c]
|
||||
if comment[1] == "p" then
|
||||
if comment[3] then
|
||||
local start, ending = line:find(comment[2])
|
||||
if start then
|
||||
if not line:find(comment[3], ending+1) then
|
||||
is_comment = true
|
||||
inside_comment = true
|
||||
end_pattern = comment[3]
|
||||
end
|
||||
break
|
||||
end
|
||||
elseif line:find(comment[2]) then
|
||||
is_comment = true
|
||||
break
|
||||
end
|
||||
else
|
||||
if comment[3] then
|
||||
local start, ending = regex.match(
|
||||
comment[2], line, 1, regex.ANCHORED
|
||||
)
|
||||
if start then
|
||||
if not regex.match(
|
||||
comment[3], line, ending+1, regex.ANCHORED
|
||||
)
|
||||
then
|
||||
is_comment = true
|
||||
inside_comment = true
|
||||
end_regex = comment[3]
|
||||
end
|
||||
break
|
||||
end
|
||||
elseif regex.match(comment[2], line, 1, regex.ANCHORED) then
|
||||
is_comment = true
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
elseif end_pattern and line:find(end_pattern) then
|
||||
is_comment = true
|
||||
inside_comment = false
|
||||
end_pattern = nil
|
||||
elseif end_regex and regex.match(end_regex, line) then
|
||||
is_comment = true
|
||||
inside_comment = false
|
||||
end_regex = nil
|
||||
end
|
||||
end
|
||||
if
|
||||
not is_comment
|
||||
and
|
||||
not inside_comment
|
||||
then
|
||||
i = i + 1
|
||||
coroutine.yield(i, line_start)
|
||||
coroutine.yield(i, line)
|
||||
end
|
||||
end
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
|
||||
local auto_detect_max_lines = 100
|
||||
|
||||
local function detect_indent_stat(doc)
|
||||
local stat = {}
|
||||
local tab_count = 0
|
||||
local runs = 1
|
||||
local max_lines = auto_detect_max_lines
|
||||
for i, text in get_non_empty_lines(doc.syntax, doc.lines) do
|
||||
local str = text:match("^ %s+%S")
|
||||
if str then add_to_stat(stat, #str - 1) end
|
||||
local str = text:match("^\t+")
|
||||
if str then tab_count = tab_count + 1 end
|
||||
local spaces = text:match("^ +")
|
||||
if spaces then table.insert(stat, spaces:len()) end
|
||||
local tabs = text:match("^\t+")
|
||||
if tabs then tab_count = tab_count + 1 end
|
||||
-- if nothing found for first lines try at least 4 more times
|
||||
if i == max_lines and runs < 5 and #stat == 0 and tab_count == 0 then
|
||||
max_lines = max_lines + auto_detect_max_lines
|
||||
runs = runs + 1
|
||||
-- Stop parsing when files is very long. Not needed for euristic determination.
|
||||
if i > auto_detect_max_lines then break end
|
||||
elseif i > max_lines then break end
|
||||
end
|
||||
table.sort(stat, function(a, b) return a[1] < b[1] end)
|
||||
local indent, score = optimal_indent_from_stat(stat)
|
||||
if tab_count > score then
|
||||
return "hard", config.indent_size, tab_count
|
||||
|
@ -101,7 +261,7 @@ end
|
|||
|
||||
local function update_cache(doc)
|
||||
local type, size, score = detect_indent_stat(doc)
|
||||
local score_threshold = 4
|
||||
local score_threshold = 2
|
||||
if score < score_threshold then
|
||||
-- use default values
|
||||
type = config.tab_type
|
||||
|
@ -130,9 +290,11 @@ end
|
|||
|
||||
local function set_indent_type(doc, type)
|
||||
local _, indent_size = doc:get_indent_info()
|
||||
cache[doc] = {type = type,
|
||||
cache[doc] = {
|
||||
type = type,
|
||||
size = indent_size,
|
||||
confirmed = true}
|
||||
confirmed = true
|
||||
}
|
||||
doc.indent_info = cache[doc]
|
||||
end
|
||||
|
||||
|
@ -158,9 +320,11 @@ end
|
|||
|
||||
local function set_indent_size(doc, size)
|
||||
local indent_type = doc:get_indent_info()
|
||||
cache[doc] = {type = indent_type,
|
||||
cache[doc] = {
|
||||
type = indent_type,
|
||||
size = size,
|
||||
confirmed = true}
|
||||
confirmed = true
|
||||
}
|
||||
doc.indent_info = cache[doc]
|
||||
end
|
||||
|
||||
|
@ -168,14 +332,14 @@ local function set_indent_size_command()
|
|||
core.command_view:enter(
|
||||
"Specify indent size for current file",
|
||||
function(value) -- submit
|
||||
local value = math.floor(tonumber(value))
|
||||
value = math.floor(tonumber(value))
|
||||
local doc = core.active_view.doc
|
||||
set_indent_size(doc, value)
|
||||
end,
|
||||
nil, -- suggest
|
||||
nil, -- cancel
|
||||
function(value) -- validate
|
||||
local value = tonumber(value)
|
||||
value = tonumber(value)
|
||||
return value ~= nil and value >= 1
|
||||
end
|
||||
)
|
||||
|
@ -187,20 +351,24 @@ command.add("core.docview", {
|
|||
["indent:set-file-indent-size"] = set_indent_size_command
|
||||
})
|
||||
|
||||
|
||||
command.add(function()
|
||||
command.add(
|
||||
function()
|
||||
return core.active_view:is(DocView)
|
||||
and cache[core.active_view.doc]
|
||||
and cache[core.active_view.doc].type == "soft"
|
||||
end, {
|
||||
["indent:switch-file-to-tabs-indentation"] = function() set_indent_type(core.active_view.doc, "hard") end
|
||||
["indent:switch-file-to-tabs-indentation"] = function()
|
||||
set_indent_type(core.active_view.doc, "hard")
|
||||
end
|
||||
})
|
||||
|
||||
|
||||
command.add(function()
|
||||
command.add(
|
||||
function()
|
||||
return core.active_view:is(DocView)
|
||||
and cache[core.active_view.doc]
|
||||
and cache[core.active_view.doc].type == "hard"
|
||||
end, {
|
||||
["indent:switch-file-to-spaces-indentation"] = function() set_indent_type(core.active_view.doc, "soft") end
|
||||
["indent:switch-file-to-spaces-indentation"] = function()
|
||||
set_indent_type(core.active_view.doc, "soft")
|
||||
end
|
||||
})
|
||||
|
|
Loading…
Reference in New Issue