Updated PCRE to use utf8, and created sample plugin.
This commit is contained in:
parent
a9dd790321
commit
272067124d
|
@ -4,26 +4,58 @@
|
||||||
regex.__index = function(table, key) return regex[key]; end
|
regex.__index = function(table, key) return regex[key]; end
|
||||||
|
|
||||||
regex.match = function(pattern_string, string)
|
regex.match = function(pattern_string, string)
|
||||||
local pattern = type(pattern_string) == "userdata" and
|
local pattern = type(pattern_string) == "table" and
|
||||||
pattern_string or regex.compile(pattern_string)
|
pattern_string or regex.compile(pattern_string)
|
||||||
return regex.cmatch(pattern, string)
|
return regex.cmatch(pattern, string)
|
||||||
end
|
end
|
||||||
|
|
||||||
-- Build off matching. For now, only support basic replacements, but capture
|
-- Will iterate back through any UTF-8 bytes so that we don't replace bits mid character.
|
||||||
-- groupings should be doable. We can even have custom group replacements and
|
local function previous_character(str, index)
|
||||||
-- transformations and stuff in lua.
|
local byte
|
||||||
regex.gsub = function(pattern_string, string, replacement)
|
|
||||||
local pattern = type(pattern_string) == "userdata" and
|
|
||||||
pattern_string or regex.compile(pattern_string)
|
|
||||||
local offset, result, str, indices = 0, "", string
|
|
||||||
repeat
|
repeat
|
||||||
str = str:sub(offset)
|
index = index - 1
|
||||||
indices = { regex.cmatch(pattern, str) }
|
byte = string.byte(str, index)
|
||||||
if #indices > 0 then
|
until byte < 128 or byte >= 192
|
||||||
result = result .. str:sub(offset, indices[1] - 1) .. replacement
|
return index
|
||||||
offset = indices[2]
|
end
|
||||||
end
|
|
||||||
until #indices == 0 or indices[1] == indices[2]
|
-- Moves to the end of the identified character.
|
||||||
return result .. str:sub(offset - 1)
|
local function end_character(str, index)
|
||||||
|
local byte = string.byte(str, index + 1)
|
||||||
|
while byte >= 128 and byte < 192 do
|
||||||
|
index = index + 1
|
||||||
|
byte = string.byte(str, index + 1)
|
||||||
|
end
|
||||||
|
return index
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Build off matching. For now, only support basic replacements, but capture
|
||||||
|
-- groupings should be doable. We can even have custom group replacements and
|
||||||
|
-- transformations and stuff in lua. Currently, this takes group replacements
|
||||||
|
-- as \1 - \9.
|
||||||
|
-- Should work on UTF-8 text.
|
||||||
|
regex.gsub = function(pattern_string, string, replacement)
|
||||||
|
local pattern = type(pattern_string) == "table" and
|
||||||
|
pattern_string or regex.compile(pattern_string)
|
||||||
|
local result, str, indices = "", string
|
||||||
|
repeat
|
||||||
|
indices = { regex.cmatch(pattern, str) }
|
||||||
|
if #indices > 0 then
|
||||||
|
local currentReplacement = replacement
|
||||||
|
if #indices > 2 then
|
||||||
|
for i = 1, (#indices/2 - 1) do
|
||||||
|
currentReplacement = string.gsub(currentReplacement, "\\" .. i, str:sub(indices[i*2+1], end_character(str,indices[i*2+2]-1)))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
currentReplacement = string.gsub(currentReplacement, "\\%d", "")
|
||||||
|
if indices[1] > 1 then
|
||||||
|
result = result .. str:sub(1, previous_character(str, indices[1])) .. currentReplacement
|
||||||
|
else
|
||||||
|
result = result .. currentReplacement
|
||||||
|
end
|
||||||
|
str = str:sub(indices[2])
|
||||||
|
end
|
||||||
|
until #indices == 0 or indices[1] == indices[2]
|
||||||
|
return result .. str
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,53 @@
|
||||||
|
-- lite-xl 1.16
|
||||||
|
|
||||||
|
local core = require "core"
|
||||||
|
local command = require "core.command"
|
||||||
|
|
||||||
|
-- Takes the following pattern: /pattern/replace/
|
||||||
|
-- Capture groupings can be replaced using \1 through \9
|
||||||
|
local function regex_replace_file(pattern)
|
||||||
|
local doc = core.active_view.doc
|
||||||
|
|
||||||
|
local start_pattern = 2;
|
||||||
|
local end_pattern = 2
|
||||||
|
repeat
|
||||||
|
end_pattern = string.find(pattern, "/", end_pattern)
|
||||||
|
until end_pattern == nil or pattern[end_pattern-1] ~= "\\"
|
||||||
|
if end_pattern == nil then
|
||||||
|
core.log("Can't find end to pattern.")
|
||||||
|
return
|
||||||
|
end
|
||||||
|
end_pattern = end_pattern - 1
|
||||||
|
local start_replacement = end_pattern+2;
|
||||||
|
local end_replacement = end_pattern+2;
|
||||||
|
repeat
|
||||||
|
end_replacement = string.find(pattern, "/", end_replacement)
|
||||||
|
until end_replacement == nil or pattern[end_replacement-1] ~= "\\"
|
||||||
|
if end_replacement == nil then
|
||||||
|
core.log("Can't find end to replacement.")
|
||||||
|
return
|
||||||
|
end
|
||||||
|
end_replacement = end_replacement - 1
|
||||||
|
|
||||||
|
local re = regex.compile(pattern:sub(start_pattern, end_pattern))
|
||||||
|
local replacement = pattern:sub(start_replacement, end_replacement)
|
||||||
|
for i=1,#doc.lines do
|
||||||
|
local old_length = #doc.lines[i]
|
||||||
|
local old_text = doc:get_text(i, 1, i, old_length)
|
||||||
|
local new_text = regex.gsub(re, old_text, replacement)
|
||||||
|
if old_text ~= new_text then
|
||||||
|
doc:insert(i, old_length, new_text)
|
||||||
|
doc:remove(i, 1, i, old_length)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
local initial_regex_replace = "/"
|
||||||
|
command.add("core.docview", {
|
||||||
|
["regex:find-replace"] = function()
|
||||||
|
core.command_view:set_text(initial_regex_replace)
|
||||||
|
core.command_view:enter("Regex Replace (enter pattern as /old/new/)", function(pattern)
|
||||||
|
regex_replace_file(pattern)
|
||||||
|
initial_regex_replace = pattern
|
||||||
|
end) end
|
||||||
|
})
|
|
@ -17,11 +17,21 @@ static int f_pcre_compile(lua_State *L) {
|
||||||
size_t len;
|
size_t len;
|
||||||
PCRE2_SIZE errorOffset;
|
PCRE2_SIZE errorOffset;
|
||||||
int errorNumber;
|
int errorNumber;
|
||||||
const char* str = luaL_checklstring(L, -1, &len);
|
int pattern = PCRE2_UTF;
|
||||||
|
const char* str = luaL_checklstring(L, 1, &len);
|
||||||
|
if (lua_gettop(L) > 1) {
|
||||||
|
const char* options = luaL_checkstring(L, 2);
|
||||||
|
if (strstr(options,"i"))
|
||||||
|
pattern |= PCRE2_CASELESS;
|
||||||
|
if (strstr(options,"m"))
|
||||||
|
pattern |= PCRE2_MULTILINE;
|
||||||
|
if (strstr(options,"s"))
|
||||||
|
pattern |= PCRE2_DOTALL;
|
||||||
|
}
|
||||||
pcre2_code* re = pcre2_compile(
|
pcre2_code* re = pcre2_compile(
|
||||||
(PCRE2_SPTR)str,
|
(PCRE2_SPTR)str,
|
||||||
len,
|
len,
|
||||||
0,
|
PCRE2_UTF,
|
||||||
&errorNumber,
|
&errorNumber,
|
||||||
&errorOffset,
|
&errorOffset,
|
||||||
NULL
|
NULL
|
||||||
|
|
Loading…
Reference in New Issue