143 lines
5.8 KiB
Lua
143 lines
5.8 KiB
Lua
-- So that in addition to regex.gsub(pattern, string), we can also do
|
|
-- pattern:gsub(string).
|
|
regex.__index = function(table, key) return regex[key]; end
|
|
|
|
---Looks for the first match of `pattern` in the string `str`.
|
|
---If it finds a match, it returns the indices of `str` where this occurrence
|
|
---starts and ends; otherwise, it returns `nil`.
|
|
---If the pattern has captures, the captured start and end indexes are returned,
|
|
---after the two initial ones.
|
|
---
|
|
---@param pattern string|table The regex pattern to use, either as a simple string or precompiled.
|
|
---@param str string The string to search for valid matches.
|
|
---@param offset? integer The position on the subject to start searching.
|
|
---@param options? integer A bit field of matching options, eg: regex.NOTBOL | regex.NOTEMPTY
|
|
---
|
|
---@return integer? start Offset where the first match was found; `nil` if no match.
|
|
---@return integer? end Offset where the first match ends; `nil` if no match.
|
|
---@return integer? ... #Captured matches offsets.
|
|
regex.find_offsets = function(pattern, str, offset, options)
|
|
if type(pattern) ~= "table" then
|
|
pattern = regex.compile(pattern)
|
|
end
|
|
local res = { regex.cmatch(pattern, str, offset or 1, options or 0) }
|
|
-- Reduce every end delimiter by 1
|
|
for i = 2,#res,2 do
|
|
res[i] = res[i] - 1
|
|
end
|
|
return table.unpack(res)
|
|
end
|
|
|
|
---Behaves like `string.match`.
|
|
---Looks for the first match of `pattern` in the string `str`.
|
|
---If it finds a match, it returns the matched string; otherwise, it returns `nil`.
|
|
---If the pattern has captures, only the captured strings are returned.
|
|
---If a capture is empty, its offset is returned instead.
|
|
---
|
|
---@param pattern string|table The regex pattern to use, either as a simple string or precompiled.
|
|
---@param str string The string to search for valid matches.
|
|
---@param offset? integer The position on the subject to start searching.
|
|
---@param options? integer A bit field of matching options, eg: regex.NOTBOL | regex.NOTEMPTY
|
|
---
|
|
---@return (string|integer)? ... #List of captured matches; the entire match if no matches were specified; if the match is empty, its offset is returned instead.
|
|
regex.match = function(pattern, str, offset, options)
|
|
local res = { regex.find(pattern, str, offset, options) }
|
|
if #res == 0 then return end
|
|
-- If available, only return captures
|
|
if #res > 2 then return table.unpack(res, 3) end
|
|
return string.sub(str, res[1], res[2])
|
|
end
|
|
|
|
---Behaves like `string.find`.
|
|
---Looks for the first match of `pattern` in the string `str`.
|
|
---If it finds a match, it returns the indices of `str` where this occurrence
|
|
---starts and ends; otherwise, it returns `nil`.
|
|
---If the pattern has captures, the captured strings are returned,
|
|
---after the two indexes ones.
|
|
---If a capture is empty, its offset is returned instead.
|
|
---
|
|
---@param pattern string|table The regex pattern to use, either as a simple string or precompiled.
|
|
---@param str string The string to search for valid matches.
|
|
---@param offset? integer The position on the subject to start searching.
|
|
---@param options? integer A bit field of matching options, eg: regex.NOTBOL | regex.NOTEMPTY
|
|
---
|
|
---@return integer? start Offset where the first match was found; `nil` if no match.
|
|
---@return integer? end Offset where the first match ends; `nil` if no match.
|
|
---@return (string|integer)? ... #List of captured matches; if the match is empty, its offset is returned instead.
|
|
regex.find = function(pattern, str, offset, options)
|
|
local res = { regex.find_offsets(pattern, str, offset, options) }
|
|
local out = { }
|
|
if #res == 0 then return end
|
|
out[1] = res[1]
|
|
out[2] = res[2]
|
|
for i = 3,#res,2 do
|
|
if res[i] > res[i+1] then
|
|
-- Like in string.find, if the group has size 0, return the index
|
|
table.insert(out, res[i])
|
|
else
|
|
table.insert(out, string.sub(str, res[i], res[i+1]))
|
|
end
|
|
end
|
|
return table.unpack(out)
|
|
end
|
|
|
|
-- Will iterate back through any UTF-8 bytes so that we don't replace bits
|
|
-- mid character.
|
|
local function previous_character(str, index)
|
|
local byte
|
|
repeat
|
|
index = index - 1
|
|
byte = string.byte(str, index)
|
|
until byte < 128 or byte >= 192
|
|
return index
|
|
end
|
|
|
|
-- Moves to the end of the identified character.
|
|
local function end_character(str, index)
|
|
local byte = string.byte(str, index + 1)
|
|
while byte and byte >= 128 and byte < 192 do
|
|
index = index + 1
|
|
byte = string.byte(str, index + 1)
|
|
end
|
|
return index
|
|
end
|
|
|
|
-- Build off matching. For now, only support basic replacements, but capture
|
|
-- groupings should be doable. We can even have custom group replacements and
|
|
-- transformations and stuff in lua. Currently, this takes group replacements
|
|
-- as \1 - \9.
|
|
-- Should work on UTF-8 text.
|
|
regex.gsub = function(pattern_string, str, replacement)
|
|
local pattern = type(pattern_string) == "table" and
|
|
pattern_string or regex.compile(pattern_string)
|
|
local result, indices = {}
|
|
local matches, replacements = {}, {}
|
|
local offset = 0
|
|
repeat
|
|
indices = { regex.cmatch(pattern, str, offset) }
|
|
if #indices > 0 then
|
|
table.insert(matches, indices)
|
|
local currentReplacement = replacement
|
|
if #indices > 2 then
|
|
for i = 1, (#indices/2 - 1) do
|
|
currentReplacement = string.gsub(
|
|
currentReplacement,
|
|
"\\" .. i,
|
|
str:sub(indices[i*2+1], end_character(str,indices[i*2+2]-1))
|
|
)
|
|
end
|
|
end
|
|
currentReplacement = string.gsub(currentReplacement, "\\%d", "")
|
|
table.insert(replacements, { indices[1], #currentReplacement+indices[1] })
|
|
if indices[1] > 1 then
|
|
table.insert(result, str:sub(offset, previous_character(str, indices[1])) .. currentReplacement)
|
|
else
|
|
table.insert(result, currentReplacement)
|
|
end
|
|
offset = indices[2]
|
|
end
|
|
until #indices == 0 or indices[1] == indices[2]
|
|
return table.concat(result) .. str:sub(offset), matches, replacements
|
|
end
|
|
|