Merge pull request #883 from jgmdev/detectindent-improvements
plugin detectident: fixes and improvements
This commit is contained in:
commit
1f468fca24
|
@ -3,93 +3,253 @@ local core = require "core"
|
||||||
local command = require "core.command"
|
local command = require "core.command"
|
||||||
local common = require "core.common"
|
local common = require "core.common"
|
||||||
local config = require "core.config"
|
local config = require "core.config"
|
||||||
|
local core_syntax = require "core.syntax"
|
||||||
local DocView = require "core.docview"
|
local DocView = require "core.docview"
|
||||||
local Doc = require "core.doc"
|
local Doc = require "core.doc"
|
||||||
local tokenizer = require "core.tokenizer"
|
|
||||||
|
|
||||||
local cache = setmetatable({}, { __mode = "k" })
|
local cache = setmetatable({}, { __mode = "k" })
|
||||||
|
local comments_cache = {}
|
||||||
|
local auto_detect_max_lines = 150
|
||||||
|
|
||||||
|
|
||||||
local function add_to_stat(stat, val)
|
local function indent_occurrences_more_than_once(stat, idx)
|
||||||
for i = 1, #stat do
|
if stat[idx-1] and stat[idx-1] == stat[idx] then
|
||||||
if val == stat[i][1] then
|
return true
|
||||||
stat[i][2] = stat[i][2] + 1
|
elseif stat[idx+1] and stat[idx+1] == stat[idx] then
|
||||||
return
|
return true
|
||||||
end
|
|
||||||
end
|
end
|
||||||
stat[#stat + 1] = {val, 1}
|
return false
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
local function optimal_indent_from_stat(stat)
|
local function optimal_indent_from_stat(stat)
|
||||||
if #stat == 0 then return nil, 0 end
|
if #stat == 0 then return nil, 0 end
|
||||||
local bins = {}
|
table.sort(stat, function(a, b) return a > b end)
|
||||||
for k = 1, #stat do
|
local best_indent = 0
|
||||||
local indent = stat[k][1]
|
local best_score = 0
|
||||||
|
local count = #stat
|
||||||
|
for x=1, count do
|
||||||
|
local indent = stat[x]
|
||||||
local score = 0
|
local score = 0
|
||||||
local mult_prev, lines_prev
|
for y=1, count do
|
||||||
for i = k, #stat do
|
if y ~= x and stat[y] % indent == 0 then
|
||||||
if stat[i][1] % indent == 0 then
|
score = score + 1
|
||||||
local mult = stat[i][1] / indent
|
elseif
|
||||||
if not mult_prev or (mult_prev + 1 == mult and lines_prev / stat[i][2] > 0.1) then
|
indent > stat[y]
|
||||||
-- we add the number of lines to the score only if the previous
|
and
|
||||||
-- multiple of "indent" was populated with enough lines.
|
indent_occurrences_more_than_once(stat, y)
|
||||||
score = score + stat[i][2]
|
then
|
||||||
end
|
score = 0
|
||||||
mult_prev, lines_prev = mult, stat[i][2]
|
break
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
bins[#bins + 1] = {indent, score}
|
if score > best_score then
|
||||||
end
|
best_indent = indent
|
||||||
table.sort(bins, function(a, b) return a[2] > b[2] end)
|
best_score = score
|
||||||
return bins[1][1], bins[1][2]
|
end
|
||||||
end
|
if score > 0 then
|
||||||
|
break
|
||||||
|
|
||||||
-- return nil if it is a comment or blank line or the initial part of the
|
|
||||||
-- line otherwise.
|
|
||||||
-- we don't need to have the whole line to detect indentation.
|
|
||||||
local function get_first_line_part(tokens)
|
|
||||||
local i, n = 1, #tokens
|
|
||||||
while i + 1 <= n do
|
|
||||||
local ttype, ttext = tokens[i], tokens[i + 1]
|
|
||||||
if ttype ~= "comment" and ttext:gsub("%s+", "") ~= "" then
|
|
||||||
return ttext
|
|
||||||
end
|
end
|
||||||
i = i + 2
|
|
||||||
end
|
end
|
||||||
|
return best_score > 0 and best_indent or nil, best_score
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function escape_comment_tokens(token)
|
||||||
|
local special_chars = "*-%[].()+?^$"
|
||||||
|
local escaped = ""
|
||||||
|
for x=1, token:len() do
|
||||||
|
local found = false
|
||||||
|
for y=1, special_chars:len() do
|
||||||
|
if token:sub(x, x) == special_chars:sub(y, y) then
|
||||||
|
escaped = escaped .. "%" .. token:sub(x, x)
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if not found then
|
||||||
|
escaped = escaped .. token:sub(x, x)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return escaped
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function get_comment_patterns(syntax)
|
||||||
|
if comments_cache[syntax.name] then
|
||||||
|
if #comments_cache[syntax.name] > 0 then
|
||||||
|
return comments_cache[syntax.name]
|
||||||
|
else
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
local comments = {}
|
||||||
|
for idx=1, #syntax.patterns do
|
||||||
|
local pattern = syntax.patterns[idx]
|
||||||
|
local startp = ""
|
||||||
|
if
|
||||||
|
type(pattern.type) == "string"
|
||||||
|
and
|
||||||
|
(pattern.type == "comment" or pattern.type == "string")
|
||||||
|
then
|
||||||
|
local not_is_string = pattern.type ~= "string"
|
||||||
|
if pattern.pattern then
|
||||||
|
startp = type(pattern.pattern) == "table"
|
||||||
|
and pattern.pattern[1] or pattern.pattern
|
||||||
|
if not_is_string and startp:sub(1, 1) ~= "^" then
|
||||||
|
startp = "^%s*" .. startp
|
||||||
|
elseif not_is_string then
|
||||||
|
startp = "^%s*" .. startp:sub(2, startp:len())
|
||||||
|
end
|
||||||
|
if type(pattern.pattern) == "table" then
|
||||||
|
table.insert(comments, {"p", startp, pattern.pattern[2]})
|
||||||
|
elseif not_is_string then
|
||||||
|
table.insert(comments, {"p", startp})
|
||||||
|
end
|
||||||
|
elseif pattern.regex then
|
||||||
|
startp = type(pattern.regex) == "table"
|
||||||
|
and pattern.regex[1] or pattern.regex
|
||||||
|
if not_is_string and startp:sub(1, 1) ~= "^" then
|
||||||
|
startp = "^\\s*" .. startp
|
||||||
|
elseif not_is_string then
|
||||||
|
startp = "^\\s*" .. startp:sub(2, startp:len())
|
||||||
|
end
|
||||||
|
if type(pattern.regex) == "table" then
|
||||||
|
table.insert(comments, {
|
||||||
|
"r", regex.compile(startp), regex.compile(pattern.regex[2])
|
||||||
|
})
|
||||||
|
elseif not_is_string then
|
||||||
|
table.insert(comments, {"r", regex.compile(startp)})
|
||||||
|
end
|
||||||
|
end
|
||||||
|
elseif pattern.syntax then
|
||||||
|
local subsyntax = core_syntax.get("file"..pattern.syntax, "")
|
||||||
|
local sub_comments = get_comment_patterns(subsyntax)
|
||||||
|
if sub_comments then
|
||||||
|
for s=1, #sub_comments do
|
||||||
|
table.insert(comments, sub_comments[s])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if #comments == 0 then
|
||||||
|
local single_line_comment = syntax.comment
|
||||||
|
and escape_comment_tokens(syntax.comment) or nil
|
||||||
|
local block_comment = nil
|
||||||
|
if syntax.block_comment then
|
||||||
|
block_comment = {
|
||||||
|
escape_comment_tokens(syntax.block_comment[1]),
|
||||||
|
escape_comment_tokens(syntax.block_comment[2])
|
||||||
|
}
|
||||||
|
end
|
||||||
|
if single_line_comment then
|
||||||
|
table.insert(comments, {"p", "^%s*" .. single_line_comment})
|
||||||
|
end
|
||||||
|
if block_comment then
|
||||||
|
table.insert(comments, {"p", "^%s*" .. block_comment[1], block_comment[2]})
|
||||||
|
end
|
||||||
|
end
|
||||||
|
comments_cache[syntax.name] = comments
|
||||||
|
if #comments > 0 then
|
||||||
|
return comments
|
||||||
|
end
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
local function get_non_empty_lines(syntax, lines)
|
local function get_non_empty_lines(syntax, lines)
|
||||||
return coroutine.wrap(function()
|
return coroutine.wrap(function()
|
||||||
local tokens, state
|
local comments = get_comment_patterns(syntax)
|
||||||
|
|
||||||
local i = 0
|
local i = 0
|
||||||
|
local end_regex = nil
|
||||||
|
local end_pattern = nil
|
||||||
|
local inside_comment = false
|
||||||
for _, line in ipairs(lines) do
|
for _, line in ipairs(lines) do
|
||||||
tokens, state = tokenizer.tokenize(syntax, line, state)
|
if line:gsub("^%s+", "") ~= "" then
|
||||||
local line_start = get_first_line_part(tokens)
|
local is_comment = false
|
||||||
if line_start then
|
if comments then
|
||||||
i = i + 1
|
if not inside_comment then
|
||||||
coroutine.yield(i, line_start)
|
for c=1, #comments do
|
||||||
|
local comment = comments[c]
|
||||||
|
if comment[1] == "p" then
|
||||||
|
if comment[3] then
|
||||||
|
local start, ending = line:find(comment[2])
|
||||||
|
if start then
|
||||||
|
if not line:find(comment[3], ending+1) then
|
||||||
|
is_comment = true
|
||||||
|
inside_comment = true
|
||||||
|
end_pattern = comment[3]
|
||||||
|
end
|
||||||
|
break
|
||||||
|
end
|
||||||
|
elseif line:find(comment[2]) then
|
||||||
|
is_comment = true
|
||||||
|
break
|
||||||
|
end
|
||||||
|
else
|
||||||
|
if comment[3] then
|
||||||
|
local start, ending = regex.match(
|
||||||
|
comment[2], line, 1, regex.ANCHORED
|
||||||
|
)
|
||||||
|
if start then
|
||||||
|
if not regex.match(
|
||||||
|
comment[3], line, ending+1, regex.ANCHORED
|
||||||
|
)
|
||||||
|
then
|
||||||
|
is_comment = true
|
||||||
|
inside_comment = true
|
||||||
|
end_regex = comment[3]
|
||||||
|
end
|
||||||
|
break
|
||||||
|
end
|
||||||
|
elseif regex.match(comment[2], line, 1, regex.ANCHORED) then
|
||||||
|
is_comment = true
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
elseif end_pattern and line:find(end_pattern) then
|
||||||
|
is_comment = true
|
||||||
|
inside_comment = false
|
||||||
|
end_pattern = nil
|
||||||
|
elseif end_regex and regex.match(end_regex, line) then
|
||||||
|
is_comment = true
|
||||||
|
inside_comment = false
|
||||||
|
end_regex = nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if
|
||||||
|
not is_comment
|
||||||
|
and
|
||||||
|
not inside_comment
|
||||||
|
then
|
||||||
|
i = i + 1
|
||||||
|
coroutine.yield(i, line)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end)
|
end)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
local auto_detect_max_lines = 100
|
|
||||||
|
|
||||||
local function detect_indent_stat(doc)
|
local function detect_indent_stat(doc)
|
||||||
local stat = {}
|
local stat = {}
|
||||||
local tab_count = 0
|
local tab_count = 0
|
||||||
|
local runs = 1
|
||||||
|
local max_lines = auto_detect_max_lines
|
||||||
for i, text in get_non_empty_lines(doc.syntax, doc.lines) do
|
for i, text in get_non_empty_lines(doc.syntax, doc.lines) do
|
||||||
local str = text:match("^ %s+%S")
|
local spaces = text:match("^ +")
|
||||||
if str then add_to_stat(stat, #str - 1) end
|
if spaces then table.insert(stat, spaces:len()) end
|
||||||
local str = text:match("^\t+")
|
local tabs = text:match("^\t+")
|
||||||
if str then tab_count = tab_count + 1 end
|
if tabs then tab_count = tab_count + 1 end
|
||||||
|
-- if nothing found for first lines try at least 4 more times
|
||||||
|
if i == max_lines and runs < 5 and #stat == 0 and tab_count == 0 then
|
||||||
|
max_lines = max_lines + auto_detect_max_lines
|
||||||
|
runs = runs + 1
|
||||||
-- Stop parsing when files is very long. Not needed for euristic determination.
|
-- Stop parsing when files is very long. Not needed for euristic determination.
|
||||||
if i > auto_detect_max_lines then break end
|
elseif i > max_lines then break end
|
||||||
end
|
end
|
||||||
table.sort(stat, function(a, b) return a[1] < b[1] end)
|
|
||||||
local indent, score = optimal_indent_from_stat(stat)
|
local indent, score = optimal_indent_from_stat(stat)
|
||||||
if tab_count > score then
|
if tab_count > score then
|
||||||
return "hard", config.indent_size, tab_count
|
return "hard", config.indent_size, tab_count
|
||||||
|
@ -101,7 +261,7 @@ end
|
||||||
|
|
||||||
local function update_cache(doc)
|
local function update_cache(doc)
|
||||||
local type, size, score = detect_indent_stat(doc)
|
local type, size, score = detect_indent_stat(doc)
|
||||||
local score_threshold = 4
|
local score_threshold = 2
|
||||||
if score < score_threshold then
|
if score < score_threshold then
|
||||||
-- use default values
|
-- use default values
|
||||||
type = config.tab_type
|
type = config.tab_type
|
||||||
|
@ -130,9 +290,11 @@ end
|
||||||
|
|
||||||
local function set_indent_type(doc, type)
|
local function set_indent_type(doc, type)
|
||||||
local _, indent_size = doc:get_indent_info()
|
local _, indent_size = doc:get_indent_info()
|
||||||
cache[doc] = {type = type,
|
cache[doc] = {
|
||||||
size = indent_size,
|
type = type,
|
||||||
confirmed = true}
|
size = indent_size,
|
||||||
|
confirmed = true
|
||||||
|
}
|
||||||
doc.indent_info = cache[doc]
|
doc.indent_info = cache[doc]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -158,9 +320,11 @@ end
|
||||||
|
|
||||||
local function set_indent_size(doc, size)
|
local function set_indent_size(doc, size)
|
||||||
local indent_type = doc:get_indent_info()
|
local indent_type = doc:get_indent_info()
|
||||||
cache[doc] = {type = indent_type,
|
cache[doc] = {
|
||||||
size = size,
|
type = indent_type,
|
||||||
confirmed = true}
|
size = size,
|
||||||
|
confirmed = true
|
||||||
|
}
|
||||||
doc.indent_info = cache[doc]
|
doc.indent_info = cache[doc]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -168,14 +332,14 @@ local function set_indent_size_command()
|
||||||
core.command_view:enter(
|
core.command_view:enter(
|
||||||
"Specify indent size for current file",
|
"Specify indent size for current file",
|
||||||
function(value) -- submit
|
function(value) -- submit
|
||||||
local value = math.floor(tonumber(value))
|
value = math.floor(tonumber(value))
|
||||||
local doc = core.active_view.doc
|
local doc = core.active_view.doc
|
||||||
set_indent_size(doc, value)
|
set_indent_size(doc, value)
|
||||||
end,
|
end,
|
||||||
nil, -- suggest
|
nil, -- suggest
|
||||||
nil, -- cancel
|
nil, -- cancel
|
||||||
function(value) -- validate
|
function(value) -- validate
|
||||||
local value = tonumber(value)
|
value = tonumber(value)
|
||||||
return value ~= nil and value >= 1
|
return value ~= nil and value >= 1
|
||||||
end
|
end
|
||||||
)
|
)
|
||||||
|
@ -187,20 +351,24 @@ command.add("core.docview", {
|
||||||
["indent:set-file-indent-size"] = set_indent_size_command
|
["indent:set-file-indent-size"] = set_indent_size_command
|
||||||
})
|
})
|
||||||
|
|
||||||
|
command.add(
|
||||||
command.add(function()
|
function()
|
||||||
return core.active_view:is(DocView)
|
return core.active_view:is(DocView)
|
||||||
and cache[core.active_view.doc]
|
and cache[core.active_view.doc]
|
||||||
and cache[core.active_view.doc].type == "soft"
|
and cache[core.active_view.doc].type == "soft"
|
||||||
end, {
|
end, {
|
||||||
["indent:switch-file-to-tabs-indentation"] = function() set_indent_type(core.active_view.doc, "hard") end
|
["indent:switch-file-to-tabs-indentation"] = function()
|
||||||
|
set_indent_type(core.active_view.doc, "hard")
|
||||||
|
end
|
||||||
})
|
})
|
||||||
|
|
||||||
|
command.add(
|
||||||
command.add(function()
|
function()
|
||||||
return core.active_view:is(DocView)
|
return core.active_view:is(DocView)
|
||||||
and cache[core.active_view.doc]
|
and cache[core.active_view.doc]
|
||||||
and cache[core.active_view.doc].type == "hard"
|
and cache[core.active_view.doc].type == "hard"
|
||||||
end, {
|
end, {
|
||||||
["indent:switch-file-to-spaces-indentation"] = function() set_indent_type(core.active_view.doc, "soft") end
|
["indent:switch-file-to-spaces-indentation"] = function()
|
||||||
|
set_indent_type(core.active_view.doc, "soft")
|
||||||
|
end
|
||||||
})
|
})
|
||||||
|
|
Loading…
Reference in New Issue