From 0c9e5177914fc8283eba61e5aa4be1d95d4a69aa Mon Sep 17 00:00:00 2001 From: Adam Harrison Date: Wed, 28 Apr 2021 00:22:26 -0400 Subject: [PATCH] Initial commit of PCRE engine. --- build.sh | 2 +- data/core/init.lua | 1 + data/core/regex.lua | 25 ++++++++++++++ src/api/api.c | 2 ++ src/api/regex.c | 79 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 108 insertions(+), 1 deletion(-) create mode 100644 data/core/regex.lua create mode 100644 src/api/regex.c diff --git a/build.sh b/build.sh index bfb679b0..6f605797 100755 --- a/build.sh +++ b/build.sh @@ -3,7 +3,7 @@ cflags+="-Wall -O3 -g -std=gnu11 -fno-strict-aliasing -Isrc -Ilib/font_renderer" cflags+=" $(pkg-config --cflags lua5.2) $(sdl2-config --cflags)" lflags="-static-libgcc -static-libstdc++" -for package in libagg freetype2 lua5.2 x11; do +for package in libagg freetype2 lua5.2 x11 libpcre2-8; do lflags+=" $(pkg-config --libs $package)" done lflags+=" $(sdl2-config --libs) -lm" diff --git a/data/core/init.lua b/data/core/init.lua index 8d4d84c3..1db71164 100644 --- a/data/core/init.lua +++ b/data/core/init.lua @@ -1,4 +1,5 @@ require "core.strict" +require "core.regex" local common = require "core.common" local config = require "core.config" local style = require "core.style" diff --git a/data/core/regex.lua b/data/core/regex.lua new file mode 100644 index 00000000..a8873a5f --- /dev/null +++ b/data/core/regex.lua @@ -0,0 +1,25 @@ + +-- So that in addition to regex.gsub(pattern, string), we can also do pattern:gsub(string). +regex.__index = function(table, key) return regex[key]; end + +regex.match = function(pattern_string, string) + local pattern = type(pattern_string) == "userdata" and pattern_string or regex.compile(pattern_string) + return regex.cmatch(pattern, string) +end + +-- Build off matching. For now, only support basic replacements, but capture groupings should be doable. +-- We can even have custom group replacements and transformations and stuff in lua. +regex.gsub = function(pattern_string, string, replacement) + local pattern = type(pattern_string) == "userdata" and pattern_string or regex.compile(pattern_string) + local offset, result, str, indices = 0, "", string + repeat + str = str:sub(offset) + indices = { regex.cmatch(pattern, str) } + if #indices > 0 then + result = result .. str:sub(offset, indices[1] - 1) .. replacement + offset = indices[2] + end + until #indices == 0 or indices[1] == indices[2] + return result .. str:sub(offset - 1) +end + diff --git a/src/api/api.c b/src/api/api.c index 34067a9c..5ea2e782 100644 --- a/src/api/api.c +++ b/src/api/api.c @@ -3,11 +3,13 @@ int luaopen_system(lua_State *L); int luaopen_renderer(lua_State *L); +int luaopen_regex(lua_State *L); static const luaL_Reg libs[] = { { "system", luaopen_system }, { "renderer", luaopen_renderer }, + { "regex", luaopen_regex }, { NULL, NULL } }; diff --git a/src/api/regex.c b/src/api/regex.c new file mode 100644 index 00000000..0392d783 --- /dev/null +++ b/src/api/regex.c @@ -0,0 +1,79 @@ +#include "api.h" + +#define PCRE2_CODE_UNIT_WIDTH 8 + +#include +#include + +static int f_pcre_gc(lua_State* L) { + lua_rawgeti(L, -1, 1); + pcre2_code* re = (pcre2_code*)lua_touserdata(L, -1); + if (re) + pcre2_code_free(re); + return 0; +} + +static int f_pcre_compile(lua_State *L) { + size_t len; + PCRE2_SIZE errorOffset; + int errorNumber; + const char* str = luaL_checklstring(L, -1, &len); + pcre2_code* re = pcre2_compile((PCRE2_SPTR)str, len, 0, &errorNumber, &errorOffset, NULL); + if (re) { + lua_newtable(L); + lua_pushlightuserdata(L, re); + lua_rawseti(L, -2, 1); + lua_getfield(L, LUA_REGISTRYINDEX, "regex"); + lua_setmetatable(L, -2); + return 1; + } + PCRE2_UCHAR buffer[256]; + pcre2_get_error_message(errorNumber, buffer, sizeof(buffer)); + luaL_error(L, "regex compilation failed at offset %d: %s", (int)errorOffset, buffer); + return 0; +} + +// Takes string, compiled regex, returns list of indices of matched groups (including the whole match), if a match was found. +static int f_pcre_match(lua_State *L) { + size_t len; + const char* str = luaL_checklstring(L, -1, &len); + luaL_checktype(L, -2, LUA_TTABLE); + lua_rawgeti(L, -2, 1); + pcre2_code* re = (pcre2_code*)lua_touserdata(L, -1); + pcre2_match_data* match_data = pcre2_match_data_create_from_pattern(re, NULL); + int rc = pcre2_match(re, (PCRE2_SPTR)str, len, 0, 0, match_data, NULL); + if (rc < 0) { + pcre2_match_data_free(match_data); + if (rc != PCRE2_ERROR_NOMATCH) + luaL_error(L, "regex matching error %d", rc); + return 0; + } + PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(match_data); + if (ovector[0] > ovector[1]) { + /* We must guard against patterns such as /(?=.\K)/ that use \K in an assertion + to set the start of a match later than its end. In the editor, we just detect this case and give up. */ + luaL_error(L, "regex matching error: \\K was used in an assertion to set the match start after its end"); + pcre2_match_data_free(match_data); + return 0; + } + for (int i = 0; i < rc*2; i++) + lua_pushnumber(L, ovector[i]+1); + pcre2_match_data_free(match_data); + return rc*2; +} + +static const luaL_Reg lib[] = { + { "compile", f_pcre_compile }, + { "cmatch", f_pcre_match }, + { "__gc", f_pcre_gc }, + { NULL, NULL } +}; + +int luaopen_regex(lua_State *L) { + luaL_newlib(L, lib); + lua_pushliteral(L, "regex"); + lua_setfield(L, -2, "__name"); + lua_pushvalue(L, -1); + lua_setfield(L, LUA_REGISTRYINDEX, "regex"); + return 1; +}