From 1de4d0a4a3d25793cc2e12f7ed98688474ea1da0 Mon Sep 17 00:00:00 2001 From: takase1121 <20792268+takase1121@users.noreply.github.com> Date: Sat, 9 Oct 2021 12:32:36 +0800 Subject: [PATCH] refactor regex API again This time making it easier to use from Lua side. - regex is now a proper userdata instead of a table wrapped in userdata - add regex:get_metadata() - remove regex:nametable() option in favor of regex:get_metadata() --- src/api/regex.c | 112 ++++++++++++++++++++++++++---------------------- 1 file changed, 60 insertions(+), 52 deletions(-) diff --git a/src/api/regex.c b/src/api/regex.c index 4ab24c8b..b5a01283 100644 --- a/src/api/regex.c +++ b/src/api/regex.c @@ -6,28 +6,10 @@ #include #include - -// something similiar to luaL_checkudata() but for regex only -static pcre2_code* check_regex(lua_State* L, int arg) { - luaL_checktype(L, arg, LUA_TTABLE); - int hasmt = 0; - pcre2_code* re = NULL; - - if (lua_getmetatable(L, arg)) { - luaL_getmetatable(L, API_TYPE_REGEX); - hasmt = lua_rawequal(L, -1, -2); - lua_pop(L, 2); - } - - lua_rawgeti(L, arg, 1); - re = lua_touserdata(L, -1); - lua_pop(L, 1); - - if (!hasmt || re == NULL) - luaL_argerror(L, arg, "invalid regex object"); - - return re; -} +typedef struct regex_t { + pcre2_code *re; + int metadata; +} regex_t; static int pcre2_error(lua_State *L, int rc, const char *fmt, ...) { va_list ap; @@ -45,9 +27,10 @@ static int pcre2_error(lua_State *L, int rc, const char *fmt, ...) { } static int f_pcre_gc(lua_State* L) { - pcre2_code* re = check_regex(L, 1); - if (re) - pcre2_code_free(re); + regex_t *re = luaL_checkudata(L, 1, API_TYPE_REGEX); + luaL_unref(L, LUA_REGISTRYINDEX, re->metadata); + if (re->re) + pcre2_code_free(re->re); return 0; } @@ -76,25 +59,25 @@ static int f_pcre_compile(lua_State *L) { if (re == NULL) return pcre2_error(L, error, "regex compilation failed at %d: ", error_offset); + + regex_t *regex = (regex_t*) lua_newuserdata(L, sizeof(regex_t)); + luaL_setmetatable(L, API_TYPE_REGEX); + regex->re = re; lua_newtable(L); - luaL_setmetatable(L, API_TYPE_REGEX); - - lua_pushlightuserdata(L, (void*) re); - lua_rawseti(L, -2, 1); lua_pushvalue(L, 1); lua_setfield(L, -2, "source"); lua_pushstring(L, optstr); lua_setfield(L, -2, "flags"); + + regex->metadata = luaL_ref(L, LUA_REGISTRYINDEX); return 1; } -// get nametable (useful for named captures) -static int f_pcre_nametable(lua_State* L) { - pcre2_code* re = check_regex(L, 1); +static int get_nametable(lua_State *L, pcre2_code *re) { int ret; uint32_t namecount, entrysize; PCRE2_SPTR nametable; @@ -107,7 +90,7 @@ static int f_pcre_nametable(lua_State* L) { pcre2_pattern_info(re, PCRE2_INFO_NAMEENTRYSIZE, &entrysize); lua_createtable(L, namecount, 0); - for (uint32_t i = 0; i < namecount; i++) { + for (uint32_t i = 1; i <= namecount; i++) { uint16_t index = (nametable[1] << 0) | (nametable[0] << 8); nametable += 2; @@ -116,7 +99,7 @@ static int f_pcre_nametable(lua_State* L) { lua_setfield(L, -2, "index"); lua_pushstring(L, (const char *) nametable); lua_setfield(L, -2, "name"); - lua_rawseti(L, -2, i + 1); + lua_rawseti(L, -2, i); nametable += (entrysize - 2); } @@ -124,26 +107,52 @@ static int f_pcre_nametable(lua_State* L) { return 1; } +static int f_pcre_get_metadata(lua_State *L) { + regex_t *re = luaL_checkudata(L, 1, API_TYPE_REGEX); + + lua_rawgeti(L, LUA_REGISTRYINDEX, re->metadata); + lua_getfield(L, -1, "nametable"); + if (lua_type(L, -1) == LUA_TTABLE) { + lua_pop(L, 1); + } else { + lua_pop(L, 1); + get_nametable(L, re->re); + lua_setfield(L, -2, "nametable"); + } + + return 1; +} + +static size_t check_bitfield(lua_State *L, int t) { + size_t size, bit = 0; + luaL_checktype(L, t, LUA_TTABLE); + size = lua_rawlen(L, t); + for (size_t i = 1; i <= size; i++) { + lua_rawgeti(L, t, i); + bit |= luaL_checkinteger(L, -1); + lua_pop(L, 1); + } + return bit; +} + // Takes string, compiled regex, returns list of indices of matched groups // (including the whole match), if a match was found. static int f_pcre_match(lua_State *L) { size_t len, offset, options; - pcre2_code* re = check_regex(L, 1); + regex_t *re = luaL_checkudata(L, 1, API_TYPE_REGEX); const char* str = luaL_checklstring(L, 2, &len); offset = luaL_optnumber(L, 3, 1); - options = luaL_optnumber(L, 4, 0); + options = lua_gettop(L) == 4 ? check_bitfield(L, 4) : 0; - pcre2_match_data* md = pcre2_match_data_create_from_pattern(re, NULL); - int rc = pcre2_match(re, (PCRE2_SPTR)str, len, offset - 1, options, md, NULL); + pcre2_match_data* md = pcre2_match_data_create_from_pattern(re->re, NULL); + int rc = pcre2_match(re->re, (PCRE2_SPTR)str, len, offset - 1, options, md, NULL); if (rc < 0) { pcre2_match_data_free(md); - if (rc != PCRE2_ERROR_NOMATCH) { - PCRE2_UCHAR buffer[256]; - pcre2_get_error_message(rc, buffer, sizeof(buffer)); - luaL_error(L, "regex matching error: %s", buffer); - } - return 0; + if (rc != PCRE2_ERROR_NOMATCH) + return pcre2_error(L, rc, "regex matching error: "); + else + return 0; } PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(md); @@ -152,9 +161,8 @@ static int f_pcre_match(lua_State *L) { /* We must guard against patterns such as /(?=.\K)/ that use \K in an assertion to set the start of a match later than its end. In the editor, we just detect this case and give up. */ - luaL_error(L, "regex matching error: \\K was used in an assertion to " - " set the match start after its end"); - return 0; + return luaL_error(L, "regex matching error: \\K was used in an assertion to " + " set the match start after its end"); } rc *= 2; @@ -167,11 +175,11 @@ static int f_pcre_match(lua_State *L) { } static const luaL_Reg lib[] = { - { "nametable", f_pcre_nametable }, - { "compile", f_pcre_compile }, - { "cmatch", f_pcre_match }, - { "__gc", f_pcre_gc }, - { NULL, NULL } + { "get_metadata", f_pcre_get_metadata }, + { "compile", f_pcre_compile }, + { "cmatch", f_pcre_match }, + { "__gc", f_pcre_gc }, + { NULL, NULL } }; int luaopen_regex(lua_State *L) {