371 lines
12 KiB
Lua
371 lines
12 KiB
Lua
|
--mod-version:3 --priority:5
|
||
|
|
||
|
--[[
|
||
|
This code is responsible for the encoding change
|
||
|
using codesets library. It requires LiteXL 2.1.1r3
|
||
|
and above to work.
|
||
|
|
||
|
Heavily inspired from the encoding plugin
|
||
|
https://github.com/jgmdev/lite-xl-encoding
|
||
|
|
||
|
Configuration:
|
||
|
useSystemEncoding
|
||
|
By default the system encoding is used to open
|
||
|
a file. If you want to disable that you may add
|
||
|
the following line in you config file
|
||
|
config.plugins.codesets.useSystemEncoding = false
|
||
|
]]
|
||
|
|
||
|
local core = require "core"
|
||
|
local common = require "core.common"
|
||
|
local command = require "core.command"
|
||
|
local config = require "core.config"
|
||
|
local style = require "core.style"
|
||
|
local Doc = require "core.doc"
|
||
|
local DocView = require "core.docview"
|
||
|
local CommandView = require "core.commandview"
|
||
|
local StatusView = require "core.statusview"
|
||
|
|
||
|
---@type encoding
|
||
|
local encoding = require "codesetsextra"
|
||
|
config.plugins.codesets = common.merge({
|
||
|
useSystemEncoding = true
|
||
|
}, config.plugins.codesets)
|
||
|
|
||
|
-- Reference to plugin config
|
||
|
local conf = config.plugins.codesets
|
||
|
|
||
|
local encodings = {}
|
||
|
|
||
|
---@class encodings.encoding
|
||
|
---@field charset string
|
||
|
---@field name string
|
||
|
|
||
|
---List of encoding regions.
|
||
|
---@type table<integer,string>
|
||
|
encodings.groups = {
|
||
|
"West European",
|
||
|
"East European",
|
||
|
"East Asian",
|
||
|
"SE & SW Asian",
|
||
|
"Middle Eastern",
|
||
|
"Unicode"
|
||
|
}
|
||
|
|
||
|
---Supported iconv encodings grouped by region.
|
||
|
---@type table<integer,encodings.encoding[]>
|
||
|
encodings.list = {
|
||
|
-- West European
|
||
|
{
|
||
|
{ charset = "ISO-8859-14", name = "Celtic" },
|
||
|
{ charset = "ISO-8859-7", name = "Greek" },
|
||
|
{ charset = "WINDOWS-1253", name = "Greek" },
|
||
|
{ charset = "ISO-8859-10", name = "Nordic" },
|
||
|
{ charset = "ISO-8859-3", name = "South European" },
|
||
|
{ charset = "IBM850", name = "Western" },
|
||
|
{ charset = "ISO-8859-1", name = "Western" },
|
||
|
{ charset = "ISO-8859-15", name = "Western" },
|
||
|
{ charset = "WINDOWS-1252", name = "Western" }
|
||
|
},
|
||
|
-- East European
|
||
|
{
|
||
|
{ charset = "ISO-8859-4", name = "Baltic" },
|
||
|
{ charset = "ISO-8859-13", name = "Baltic" },
|
||
|
{ charset = "WINDOWS-1257", name = "Baltic" },
|
||
|
{ charset = "IBM852", name = "Central European" },
|
||
|
{ charset = "ISO-8859-2", name = "Central European" },
|
||
|
{ charset = "WINDOWS-1250", name = "Central European" },
|
||
|
{ charset = "IBM855", name = "Cyrillic" },
|
||
|
{ charset = "ISO-8859-5", name = "Cyrillic" },
|
||
|
{ charset = "ISO-IR-111", name = "Cyrillic" },
|
||
|
{ charset = "KOI8-R", name = "Cyrillic" },
|
||
|
{ charset = "WINDOWS-1251", name = "Cyrillic" },
|
||
|
{ charset = "CP866", name = "Cyrillic/Russian" },
|
||
|
{ charset = "KOI8-U", name = "Cyrillic/Ukrainian" },
|
||
|
{ charset = "ISO-8859-16", name = "Romanian" }
|
||
|
},
|
||
|
-- East Asian
|
||
|
{
|
||
|
{ charset = "GB18030", name = "Chinese Simplified" },
|
||
|
{ charset = "GB2312", name = "Chinese Simplified" },
|
||
|
{ charset = "GBK", name = "Chinese Simplified" },
|
||
|
{ charset = "HZ", name = "Chinese Simplified" },
|
||
|
{ charset = "BIG5", name = "Chinese Traditional" },
|
||
|
{ charset = "BIG5-HKSCS", name = "Chinese Traditional" },
|
||
|
{ charset = "EUC-TW", name = "Chinese Traditional" },
|
||
|
{ charset = "EUC-JP", name = "Japanese" },
|
||
|
{ charset = "ISO-2022-JP", name = "Japanese" },
|
||
|
{ charset = "SHIFT_JIS", name = "Japanese" },
|
||
|
{ charset = "CP932", name = "Japanese" },
|
||
|
{ charset = "EUC-KR", name = "Korean" },
|
||
|
{ charset = "ISO-2022-KR", name = "Korean" },
|
||
|
{ charset = "JOHAB", name = "Korean" },
|
||
|
{ charset = "UHC", name = "Korean" }
|
||
|
},
|
||
|
-- SE & SW Asian
|
||
|
{
|
||
|
{ charset = "ARMSCII-8", name = "Armenian" },
|
||
|
{ charset = "GEORGIAN-ACADEMY", name = "Georgian" },
|
||
|
{ charset = "TIS-620", name = "Thai" },
|
||
|
{ charset = "IBM857", name = "Turkish" },
|
||
|
{ charset = "WINDOWS-1254", name = "Turkish" },
|
||
|
{ charset = "ISO-8859-9", name = "Turkish" },
|
||
|
{ charset = "TCVN", name = "Vietnamese" },
|
||
|
{ charset = "VISCII", name = "Vietnamese" },
|
||
|
{ charset = "WINDOWS-1258", name = "Vietnamese" }
|
||
|
},
|
||
|
-- Middle Eastern
|
||
|
{
|
||
|
{ charset = "IBM864", name = "Arabic" },
|
||
|
{ charset = "ISO-8859-6", name = "Arabic" },
|
||
|
{ charset = "WINDOWS-1256", name = "Arabic" },
|
||
|
{ charset = "IBM862", name = "Hebrew" },
|
||
|
{ charset = "ISO-8859-8-I", name = "Hebrew" },
|
||
|
{ charset = "WINDOWS-1255", name = "Hebrew" },
|
||
|
{ charset = "ISO-8859-8", name = "Hebrew Visual" }
|
||
|
},
|
||
|
-- Unicode
|
||
|
{
|
||
|
{ charset = "UTF-7", name = "Unicode" },
|
||
|
{ charset = "UTF-8", name = "Unicode" },
|
||
|
{ charset = "UTF-16LE", name = "Unicode" },
|
||
|
{ charset = "UTF-16BE", name = "Unicode" },
|
||
|
{ charset = "UCS-2LE", name = "Unicode" },
|
||
|
{ charset = "UCS-2BE", name = "Unicode" },
|
||
|
{ charset = "UTF-32LE", name = "Unicode" },
|
||
|
{ charset = "UTF-32BE", name = "Unicode" }
|
||
|
}
|
||
|
};
|
||
|
|
||
|
---Get the list of encodings associated to a region.
|
||
|
---@param label string
|
||
|
---@return encodings.encoding[] | nil
|
||
|
function encodings.get_group(label)
|
||
|
for idx, name in ipairs(encodings.groups) do
|
||
|
if name == label then
|
||
|
return encodings.list[idx]
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
---Get the list of encodings associated to a region.
|
||
|
---@return encodings.encoding[] | nil
|
||
|
function encodings.get_all()
|
||
|
local all = {}
|
||
|
for idx, _ in ipairs(encodings.groups) do
|
||
|
for _, item in ipairs(encodings.list[idx]) do
|
||
|
table.insert(all, item)
|
||
|
end
|
||
|
end
|
||
|
return all
|
||
|
end
|
||
|
|
||
|
---Open a commandview to select a charset and executes the given callback,
|
||
|
---@param title_label string Title displayed on the commandview
|
||
|
---@param callback fun(charset: string)
|
||
|
function encodings.select_encoding(title_label, callback)
|
||
|
core.command_view:enter(title_label, {
|
||
|
submit = function(_, item)
|
||
|
callback(item.charset)
|
||
|
end,
|
||
|
suggest = function(text)
|
||
|
local charsets = encodings.get_all()
|
||
|
local list_labels = {}
|
||
|
local list_charset = {}
|
||
|
for _, element in ipairs(charsets) do
|
||
|
local label = element.name .. " (" .. element.charset .. ")"
|
||
|
table.insert(list_labels, label)
|
||
|
list_charset[label] = element.charset
|
||
|
end
|
||
|
local res = common.fuzzy_match(list_labels, text)
|
||
|
for i, name in ipairs(res) do
|
||
|
res[i] = {
|
||
|
text = name,
|
||
|
charset = list_charset[name]
|
||
|
}
|
||
|
end
|
||
|
return res
|
||
|
end
|
||
|
})
|
||
|
end
|
||
|
|
||
|
--------------------------------------------------------------------------------
|
||
|
-- Overwrite Doc methods to properly add encoding detection and conversion.
|
||
|
--------------------------------------------------------------------------------
|
||
|
function Doc:new(filename, abs_filename, new_file)
|
||
|
self.new_file = new_file
|
||
|
self.encoding = nil
|
||
|
self.convert = false
|
||
|
self:reset()
|
||
|
if filename then
|
||
|
self:set_filename(filename, abs_filename)
|
||
|
if not new_file then
|
||
|
self:load(filename)
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
function Doc:load(filename)
|
||
|
if not self.encoding then
|
||
|
local errmsg
|
||
|
if conf.useSystemEncoding then
|
||
|
self.encoding, errmsg = encoding.systemCodeset();
|
||
|
else
|
||
|
self.encoding, errmsg = encoding.detect(filename);
|
||
|
end
|
||
|
if not self.encoding then core.error("%s", errmsg) error(errmsg) end
|
||
|
end
|
||
|
self.convert = false
|
||
|
if self.encoding ~= "UTF-8" and self.encoding ~= "ASCII"
|
||
|
and self.encoding ~= "US-ASCII" and self.encoding ~= "ISO-8859-1"
|
||
|
then
|
||
|
self.convert = true
|
||
|
end
|
||
|
local fp = assert( io.open(filename, "rb") )
|
||
|
self:reset()
|
||
|
self.lines = {}
|
||
|
local i = 1
|
||
|
if self.convert then
|
||
|
local content = fp:read("*a");
|
||
|
content = assert(encoding.convert("UTF-8", self.encoding, content, {
|
||
|
strict = false,
|
||
|
handle_from_bom = true
|
||
|
}))
|
||
|
for line in content:gmatch("([^\n]*)\n?") do
|
||
|
if line:byte(-1) == 13 then
|
||
|
line = line:sub(1, -2)
|
||
|
self.crlf = true
|
||
|
end
|
||
|
table.insert(self.lines, line .. "\n")
|
||
|
self.highlighter.lines[i] = false
|
||
|
i = i + 1
|
||
|
end
|
||
|
content = nil
|
||
|
else
|
||
|
for line in fp:lines() do
|
||
|
if (i == 1) then line = encoding.strip_bom(line, "UTF-8") end
|
||
|
if line:byte(-1) == 13 then
|
||
|
line = line:sub(1, -2)
|
||
|
self.crlf = true
|
||
|
end
|
||
|
table.insert(self.lines, line .. "\n")
|
||
|
self.highlighter.lines[i] = false
|
||
|
i = i + 1
|
||
|
end
|
||
|
end
|
||
|
if #self.lines == 0 then
|
||
|
table.insert(self.lines, "\n")
|
||
|
end
|
||
|
fp:close()
|
||
|
self:reset_syntax()
|
||
|
end
|
||
|
|
||
|
function Doc:save(filename, abs_filename)
|
||
|
if not filename then
|
||
|
assert(self.filename, "no filename set to default to")
|
||
|
filename = self.filename
|
||
|
abs_filename = self.abs_filename
|
||
|
else
|
||
|
assert(self.filename or abs_filename, "calling save on unnamed doc without absolute path")
|
||
|
end
|
||
|
local fp
|
||
|
local output = ""
|
||
|
if not self.convert then
|
||
|
fp = assert( io.open(filename, "wb") )
|
||
|
for _, line in ipairs(self.lines) do
|
||
|
if self.crlf then line = line:gsub("\n", "\r\n") end
|
||
|
fp:write(line)
|
||
|
end
|
||
|
else
|
||
|
output = table.concat(self.lines);
|
||
|
if self.crlf then output = output:gsub("\n", "\r\n") end
|
||
|
end
|
||
|
local conversion_error = false
|
||
|
if self.convert then
|
||
|
local errmsg
|
||
|
output, errmsg = encoding.convert(self.encoding, "UTF-8", output, {
|
||
|
strict = true,
|
||
|
handle_to_bom = true
|
||
|
})
|
||
|
if output then
|
||
|
fp = assert( io.open(filename, "wb") )
|
||
|
fp:write(encoding.get_charset_bom(self.encoding) .. output)
|
||
|
fp:close()
|
||
|
else
|
||
|
conversion_error = true
|
||
|
core.error("%s", errmsg)
|
||
|
end
|
||
|
else
|
||
|
fp:close()
|
||
|
end
|
||
|
self:set_filename(filename, abs_filename)
|
||
|
if not conversion_error then
|
||
|
self.new_file = false
|
||
|
else
|
||
|
self.new_file = true
|
||
|
end
|
||
|
self:clean()
|
||
|
end
|
||
|
|
||
|
--------------------------------------------------------------------------------
|
||
|
-- Register command to change current document encoding.
|
||
|
--------------------------------------------------------------------------------
|
||
|
command.add("core.docview", {
|
||
|
["doc:change-encoding"] = function(dv)
|
||
|
encodings.select_encoding("Select Output Encoding", function(charset)
|
||
|
dv.doc.encoding = charset
|
||
|
if charset ~= "UTF-8" and charset ~= "ASCII"
|
||
|
and charset ~= "US-ASCII" and charset ~= "ISO-8859-1"
|
||
|
then
|
||
|
dv.doc.convert = true
|
||
|
else
|
||
|
dv.doc.convert = false
|
||
|
end
|
||
|
dv.doc:save()
|
||
|
end)
|
||
|
end,
|
||
|
|
||
|
["doc:reload-with-encoding"] = function(dv)
|
||
|
encodings.select_encoding("Reload With Encoding", function(charset)
|
||
|
dv.doc.encoding = charset
|
||
|
if charset ~= "UTF-8" and charset ~= "ASCII"
|
||
|
and charset ~= "US-ASCII" and charset ~= "ISO-8859-1"
|
||
|
then
|
||
|
dv.doc.convert = true
|
||
|
else
|
||
|
dv.doc.convert = false
|
||
|
end
|
||
|
dv.doc:reload()
|
||
|
end)
|
||
|
end
|
||
|
})
|
||
|
|
||
|
--------------------------------------------------------------------------------
|
||
|
-- Register a statusbar item to view change current doc encoding.
|
||
|
--------------------------------------------------------------------------------
|
||
|
core.status_view:add_item({
|
||
|
predicate = function()
|
||
|
return core.active_view:is(DocView)
|
||
|
and not core.active_view:is(CommandView)
|
||
|
end,
|
||
|
name = "doc:encoding",
|
||
|
alignment = StatusView.Item.RIGHT,
|
||
|
get_item = function()
|
||
|
local dv = core.active_view
|
||
|
return {
|
||
|
style.text, dv.doc.encoding or "none"
|
||
|
}
|
||
|
end,
|
||
|
command = function(button)
|
||
|
if button == "left" then
|
||
|
command.perform "doc:change-encoding"
|
||
|
elseif button == "right" then
|
||
|
command.perform "doc:reload-with-encoding"
|
||
|
end
|
||
|
end,
|
||
|
tooltip = "encoding"
|
||
|
})
|
||
|
|
||
|
|
||
|
return encodings;
|