Updated PCRE to use utf8, and created sample plugin.
This commit is contained in:
parent
a9dd790321
commit
272067124d
|
@ -4,26 +4,58 @@
|
|||
regex.__index = function(table, key) return regex[key]; end
|
||||
|
||||
regex.match = function(pattern_string, string)
|
||||
local pattern = type(pattern_string) == "userdata" and
|
||||
local pattern = type(pattern_string) == "table" and
|
||||
pattern_string or regex.compile(pattern_string)
|
||||
return regex.cmatch(pattern, string)
|
||||
end
|
||||
|
||||
-- Build off matching. For now, only support basic replacements, but capture
|
||||
-- groupings should be doable. We can even have custom group replacements and
|
||||
-- transformations and stuff in lua.
|
||||
regex.gsub = function(pattern_string, string, replacement)
|
||||
local pattern = type(pattern_string) == "userdata" and
|
||||
pattern_string or regex.compile(pattern_string)
|
||||
local offset, result, str, indices = 0, "", string
|
||||
-- Will iterate back through any UTF-8 bytes so that we don't replace bits mid character.
|
||||
local function previous_character(str, index)
|
||||
local byte
|
||||
repeat
|
||||
str = str:sub(offset)
|
||||
indices = { regex.cmatch(pattern, str) }
|
||||
if #indices > 0 then
|
||||
result = result .. str:sub(offset, indices[1] - 1) .. replacement
|
||||
offset = indices[2]
|
||||
end
|
||||
until #indices == 0 or indices[1] == indices[2]
|
||||
return result .. str:sub(offset - 1)
|
||||
index = index - 1
|
||||
byte = string.byte(str, index)
|
||||
until byte < 128 or byte >= 192
|
||||
return index
|
||||
end
|
||||
|
||||
-- Moves to the end of the identified character.
|
||||
local function end_character(str, index)
|
||||
local byte = string.byte(str, index + 1)
|
||||
while byte >= 128 and byte < 192 do
|
||||
index = index + 1
|
||||
byte = string.byte(str, index + 1)
|
||||
end
|
||||
return index
|
||||
end
|
||||
|
||||
-- Build off matching. For now, only support basic replacements, but capture
|
||||
-- groupings should be doable. We can even have custom group replacements and
|
||||
-- transformations and stuff in lua. Currently, this takes group replacements
|
||||
-- as \1 - \9.
|
||||
-- Should work on UTF-8 text.
|
||||
regex.gsub = function(pattern_string, string, replacement)
|
||||
local pattern = type(pattern_string) == "table" and
|
||||
pattern_string or regex.compile(pattern_string)
|
||||
local result, str, indices = "", string
|
||||
repeat
|
||||
indices = { regex.cmatch(pattern, str) }
|
||||
if #indices > 0 then
|
||||
local currentReplacement = replacement
|
||||
if #indices > 2 then
|
||||
for i = 1, (#indices/2 - 1) do
|
||||
currentReplacement = string.gsub(currentReplacement, "\\" .. i, str:sub(indices[i*2+1], end_character(str,indices[i*2+2]-1)))
|
||||
end
|
||||
end
|
||||
currentReplacement = string.gsub(currentReplacement, "\\%d", "")
|
||||
if indices[1] > 1 then
|
||||
result = result .. str:sub(1, previous_character(str, indices[1])) .. currentReplacement
|
||||
else
|
||||
result = result .. currentReplacement
|
||||
end
|
||||
str = str:sub(indices[2])
|
||||
end
|
||||
until #indices == 0 or indices[1] == indices[2]
|
||||
return result .. str
|
||||
end
|
||||
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
-- lite-xl 1.16
|
||||
|
||||
local core = require "core"
|
||||
local command = require "core.command"
|
||||
|
||||
-- Takes the following pattern: /pattern/replace/
|
||||
-- Capture groupings can be replaced using \1 through \9
|
||||
local function regex_replace_file(pattern)
|
||||
local doc = core.active_view.doc
|
||||
|
||||
local start_pattern = 2;
|
||||
local end_pattern = 2
|
||||
repeat
|
||||
end_pattern = string.find(pattern, "/", end_pattern)
|
||||
until end_pattern == nil or pattern[end_pattern-1] ~= "\\"
|
||||
if end_pattern == nil then
|
||||
core.log("Can't find end to pattern.")
|
||||
return
|
||||
end
|
||||
end_pattern = end_pattern - 1
|
||||
local start_replacement = end_pattern+2;
|
||||
local end_replacement = end_pattern+2;
|
||||
repeat
|
||||
end_replacement = string.find(pattern, "/", end_replacement)
|
||||
until end_replacement == nil or pattern[end_replacement-1] ~= "\\"
|
||||
if end_replacement == nil then
|
||||
core.log("Can't find end to replacement.")
|
||||
return
|
||||
end
|
||||
end_replacement = end_replacement - 1
|
||||
|
||||
local re = regex.compile(pattern:sub(start_pattern, end_pattern))
|
||||
local replacement = pattern:sub(start_replacement, end_replacement)
|
||||
for i=1,#doc.lines do
|
||||
local old_length = #doc.lines[i]
|
||||
local old_text = doc:get_text(i, 1, i, old_length)
|
||||
local new_text = regex.gsub(re, old_text, replacement)
|
||||
if old_text ~= new_text then
|
||||
doc:insert(i, old_length, new_text)
|
||||
doc:remove(i, 1, i, old_length)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
local initial_regex_replace = "/"
|
||||
command.add("core.docview", {
|
||||
["regex:find-replace"] = function()
|
||||
core.command_view:set_text(initial_regex_replace)
|
||||
core.command_view:enter("Regex Replace (enter pattern as /old/new/)", function(pattern)
|
||||
regex_replace_file(pattern)
|
||||
initial_regex_replace = pattern
|
||||
end) end
|
||||
})
|
|
@ -17,11 +17,21 @@ static int f_pcre_compile(lua_State *L) {
|
|||
size_t len;
|
||||
PCRE2_SIZE errorOffset;
|
||||
int errorNumber;
|
||||
const char* str = luaL_checklstring(L, -1, &len);
|
||||
int pattern = PCRE2_UTF;
|
||||
const char* str = luaL_checklstring(L, 1, &len);
|
||||
if (lua_gettop(L) > 1) {
|
||||
const char* options = luaL_checkstring(L, 2);
|
||||
if (strstr(options,"i"))
|
||||
pattern |= PCRE2_CASELESS;
|
||||
if (strstr(options,"m"))
|
||||
pattern |= PCRE2_MULTILINE;
|
||||
if (strstr(options,"s"))
|
||||
pattern |= PCRE2_DOTALL;
|
||||
}
|
||||
pcre2_code* re = pcre2_compile(
|
||||
(PCRE2_SPTR)str,
|
||||
len,
|
||||
0,
|
||||
PCRE2_UTF,
|
||||
&errorNumber,
|
||||
&errorOffset,
|
||||
NULL
|
||||
|
|
Loading…
Reference in New Issue