Updated PCRE to use utf8, and created sample plugin.

This commit is contained in:
Adam Harrison 2021-05-01 01:31:49 -04:00
parent a9dd790321
commit 272067124d
3 changed files with 113 additions and 18 deletions

View File

@ -4,26 +4,58 @@
regex.__index = function(table, key) return regex[key]; end regex.__index = function(table, key) return regex[key]; end
regex.match = function(pattern_string, string) regex.match = function(pattern_string, string)
local pattern = type(pattern_string) == "userdata" and local pattern = type(pattern_string) == "table" and
pattern_string or regex.compile(pattern_string) pattern_string or regex.compile(pattern_string)
return regex.cmatch(pattern, string) return regex.cmatch(pattern, string)
end end
-- Build off matching. For now, only support basic replacements, but capture -- Will iterate back through any UTF-8 bytes so that we don't replace bits mid character.
-- groupings should be doable. We can even have custom group replacements and local function previous_character(str, index)
-- transformations and stuff in lua. local byte
regex.gsub = function(pattern_string, string, replacement)
local pattern = type(pattern_string) == "userdata" and
pattern_string or regex.compile(pattern_string)
local offset, result, str, indices = 0, "", string
repeat repeat
str = str:sub(offset) index = index - 1
indices = { regex.cmatch(pattern, str) } byte = string.byte(str, index)
if #indices > 0 then until byte < 128 or byte >= 192
result = result .. str:sub(offset, indices[1] - 1) .. replacement return index
offset = indices[2] end
end
until #indices == 0 or indices[1] == indices[2] -- Moves to the end of the identified character.
return result .. str:sub(offset - 1) local function end_character(str, index)
local byte = string.byte(str, index + 1)
while byte >= 128 and byte < 192 do
index = index + 1
byte = string.byte(str, index + 1)
end
return index
end
-- Build off matching. For now, only support basic replacements, but capture
-- groupings should be doable. We can even have custom group replacements and
-- transformations and stuff in lua. Currently, this takes group replacements
-- as \1 - \9.
-- Should work on UTF-8 text.
regex.gsub = function(pattern_string, string, replacement)
local pattern = type(pattern_string) == "table" and
pattern_string or regex.compile(pattern_string)
local result, str, indices = "", string
repeat
indices = { regex.cmatch(pattern, str) }
if #indices > 0 then
local currentReplacement = replacement
if #indices > 2 then
for i = 1, (#indices/2 - 1) do
currentReplacement = string.gsub(currentReplacement, "\\" .. i, str:sub(indices[i*2+1], end_character(str,indices[i*2+2]-1)))
end
end
currentReplacement = string.gsub(currentReplacement, "\\%d", "")
if indices[1] > 1 then
result = result .. str:sub(1, previous_character(str, indices[1])) .. currentReplacement
else
result = result .. currentReplacement
end
str = str:sub(indices[2])
end
until #indices == 0 or indices[1] == indices[2]
return result .. str
end end

View File

@ -0,0 +1,53 @@
-- lite-xl 1.16
local core = require "core"
local command = require "core.command"
-- Takes the following pattern: /pattern/replace/
-- Capture groupings can be replaced using \1 through \9
local function regex_replace_file(pattern)
local doc = core.active_view.doc
local start_pattern = 2;
local end_pattern = 2
repeat
end_pattern = string.find(pattern, "/", end_pattern)
until end_pattern == nil or pattern[end_pattern-1] ~= "\\"
if end_pattern == nil then
core.log("Can't find end to pattern.")
return
end
end_pattern = end_pattern - 1
local start_replacement = end_pattern+2;
local end_replacement = end_pattern+2;
repeat
end_replacement = string.find(pattern, "/", end_replacement)
until end_replacement == nil or pattern[end_replacement-1] ~= "\\"
if end_replacement == nil then
core.log("Can't find end to replacement.")
return
end
end_replacement = end_replacement - 1
local re = regex.compile(pattern:sub(start_pattern, end_pattern))
local replacement = pattern:sub(start_replacement, end_replacement)
for i=1,#doc.lines do
local old_length = #doc.lines[i]
local old_text = doc:get_text(i, 1, i, old_length)
local new_text = regex.gsub(re, old_text, replacement)
if old_text ~= new_text then
doc:insert(i, old_length, new_text)
doc:remove(i, 1, i, old_length)
end
end
end
local initial_regex_replace = "/"
command.add("core.docview", {
["regex:find-replace"] = function()
core.command_view:set_text(initial_regex_replace)
core.command_view:enter("Regex Replace (enter pattern as /old/new/)", function(pattern)
regex_replace_file(pattern)
initial_regex_replace = pattern
end) end
})

View File

@ -17,11 +17,21 @@ static int f_pcre_compile(lua_State *L) {
size_t len; size_t len;
PCRE2_SIZE errorOffset; PCRE2_SIZE errorOffset;
int errorNumber; int errorNumber;
const char* str = luaL_checklstring(L, -1, &len); int pattern = PCRE2_UTF;
const char* str = luaL_checklstring(L, 1, &len);
if (lua_gettop(L) > 1) {
const char* options = luaL_checkstring(L, 2);
if (strstr(options,"i"))
pattern |= PCRE2_CASELESS;
if (strstr(options,"m"))
pattern |= PCRE2_MULTILINE;
if (strstr(options,"s"))
pattern |= PCRE2_DOTALL;
}
pcre2_code* re = pcre2_compile( pcre2_code* re = pcre2_compile(
(PCRE2_SPTR)str, (PCRE2_SPTR)str,
len, len,
0, PCRE2_UTF,
&errorNumber, &errorNumber,
&errorOffset, &errorOffset,
NULL NULL