refactor regex API again
This time making it easier to use from Lua side. - regex is now a proper userdata instead of a table wrapped in userdata - add regex:get_metadata() - remove regex:nametable() option in favor of regex:get_metadata()
This commit is contained in:
parent
09d6087df8
commit
1de4d0a4a3
112
src/api/regex.c
112
src/api/regex.c
|
@ -6,28 +6,10 @@
|
|||
#include <string.h>
|
||||
#include <pcre2.h>
|
||||
|
||||
|
||||
// something similiar to luaL_checkudata() but for regex only
|
||||
static pcre2_code* check_regex(lua_State* L, int arg) {
|
||||
luaL_checktype(L, arg, LUA_TTABLE);
|
||||
int hasmt = 0;
|
||||
pcre2_code* re = NULL;
|
||||
|
||||
if (lua_getmetatable(L, arg)) {
|
||||
luaL_getmetatable(L, API_TYPE_REGEX);
|
||||
hasmt = lua_rawequal(L, -1, -2);
|
||||
lua_pop(L, 2);
|
||||
}
|
||||
|
||||
lua_rawgeti(L, arg, 1);
|
||||
re = lua_touserdata(L, -1);
|
||||
lua_pop(L, 1);
|
||||
|
||||
if (!hasmt || re == NULL)
|
||||
luaL_argerror(L, arg, "invalid regex object");
|
||||
|
||||
return re;
|
||||
}
|
||||
typedef struct regex_t {
|
||||
pcre2_code *re;
|
||||
int metadata;
|
||||
} regex_t;
|
||||
|
||||
static int pcre2_error(lua_State *L, int rc, const char *fmt, ...) {
|
||||
va_list ap;
|
||||
|
@ -45,9 +27,10 @@ static int pcre2_error(lua_State *L, int rc, const char *fmt, ...) {
|
|||
}
|
||||
|
||||
static int f_pcre_gc(lua_State* L) {
|
||||
pcre2_code* re = check_regex(L, 1);
|
||||
if (re)
|
||||
pcre2_code_free(re);
|
||||
regex_t *re = luaL_checkudata(L, 1, API_TYPE_REGEX);
|
||||
luaL_unref(L, LUA_REGISTRYINDEX, re->metadata);
|
||||
if (re->re)
|
||||
pcre2_code_free(re->re);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -76,25 +59,25 @@ static int f_pcre_compile(lua_State *L) {
|
|||
|
||||
if (re == NULL)
|
||||
return pcre2_error(L, error, "regex compilation failed at %d: ", error_offset);
|
||||
|
||||
regex_t *regex = (regex_t*) lua_newuserdata(L, sizeof(regex_t));
|
||||
luaL_setmetatable(L, API_TYPE_REGEX);
|
||||
regex->re = re;
|
||||
|
||||
lua_newtable(L);
|
||||
luaL_setmetatable(L, API_TYPE_REGEX);
|
||||
|
||||
lua_pushlightuserdata(L, (void*) re);
|
||||
lua_rawseti(L, -2, 1);
|
||||
|
||||
lua_pushvalue(L, 1);
|
||||
lua_setfield(L, -2, "source");
|
||||
|
||||
lua_pushstring(L, optstr);
|
||||
lua_setfield(L, -2, "flags");
|
||||
|
||||
regex->metadata = luaL_ref(L, LUA_REGISTRYINDEX);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// get nametable (useful for named captures)
|
||||
static int f_pcre_nametable(lua_State* L) {
|
||||
pcre2_code* re = check_regex(L, 1);
|
||||
static int get_nametable(lua_State *L, pcre2_code *re) {
|
||||
int ret;
|
||||
uint32_t namecount, entrysize;
|
||||
PCRE2_SPTR nametable;
|
||||
|
@ -107,7 +90,7 @@ static int f_pcre_nametable(lua_State* L) {
|
|||
pcre2_pattern_info(re, PCRE2_INFO_NAMEENTRYSIZE, &entrysize);
|
||||
|
||||
lua_createtable(L, namecount, 0);
|
||||
for (uint32_t i = 0; i < namecount; i++) {
|
||||
for (uint32_t i = 1; i <= namecount; i++) {
|
||||
uint16_t index = (nametable[1] << 0) | (nametable[0] << 8);
|
||||
nametable += 2;
|
||||
|
||||
|
@ -116,7 +99,7 @@ static int f_pcre_nametable(lua_State* L) {
|
|||
lua_setfield(L, -2, "index");
|
||||
lua_pushstring(L, (const char *) nametable);
|
||||
lua_setfield(L, -2, "name");
|
||||
lua_rawseti(L, -2, i + 1);
|
||||
lua_rawseti(L, -2, i);
|
||||
|
||||
nametable += (entrysize - 2);
|
||||
}
|
||||
|
@ -124,26 +107,52 @@ static int f_pcre_nametable(lua_State* L) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int f_pcre_get_metadata(lua_State *L) {
|
||||
regex_t *re = luaL_checkudata(L, 1, API_TYPE_REGEX);
|
||||
|
||||
lua_rawgeti(L, LUA_REGISTRYINDEX, re->metadata);
|
||||
lua_getfield(L, -1, "nametable");
|
||||
if (lua_type(L, -1) == LUA_TTABLE) {
|
||||
lua_pop(L, 1);
|
||||
} else {
|
||||
lua_pop(L, 1);
|
||||
get_nametable(L, re->re);
|
||||
lua_setfield(L, -2, "nametable");
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static size_t check_bitfield(lua_State *L, int t) {
|
||||
size_t size, bit = 0;
|
||||
luaL_checktype(L, t, LUA_TTABLE);
|
||||
size = lua_rawlen(L, t);
|
||||
for (size_t i = 1; i <= size; i++) {
|
||||
lua_rawgeti(L, t, i);
|
||||
bit |= luaL_checkinteger(L, -1);
|
||||
lua_pop(L, 1);
|
||||
}
|
||||
return bit;
|
||||
}
|
||||
|
||||
// Takes string, compiled regex, returns list of indices of matched groups
|
||||
// (including the whole match), if a match was found.
|
||||
static int f_pcre_match(lua_State *L) {
|
||||
size_t len, offset, options;
|
||||
pcre2_code* re = check_regex(L, 1);
|
||||
regex_t *re = luaL_checkudata(L, 1, API_TYPE_REGEX);
|
||||
const char* str = luaL_checklstring(L, 2, &len);
|
||||
|
||||
offset = luaL_optnumber(L, 3, 1);
|
||||
options = luaL_optnumber(L, 4, 0);
|
||||
options = lua_gettop(L) == 4 ? check_bitfield(L, 4) : 0;
|
||||
|
||||
pcre2_match_data* md = pcre2_match_data_create_from_pattern(re, NULL);
|
||||
int rc = pcre2_match(re, (PCRE2_SPTR)str, len, offset - 1, options, md, NULL);
|
||||
pcre2_match_data* md = pcre2_match_data_create_from_pattern(re->re, NULL);
|
||||
int rc = pcre2_match(re->re, (PCRE2_SPTR)str, len, offset - 1, options, md, NULL);
|
||||
if (rc < 0) {
|
||||
pcre2_match_data_free(md);
|
||||
if (rc != PCRE2_ERROR_NOMATCH) {
|
||||
PCRE2_UCHAR buffer[256];
|
||||
pcre2_get_error_message(rc, buffer, sizeof(buffer));
|
||||
luaL_error(L, "regex matching error: %s", buffer);
|
||||
}
|
||||
return 0;
|
||||
if (rc != PCRE2_ERROR_NOMATCH)
|
||||
return pcre2_error(L, rc, "regex matching error: ");
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(md);
|
||||
|
@ -152,9 +161,8 @@ static int f_pcre_match(lua_State *L) {
|
|||
/* We must guard against patterns such as /(?=.\K)/ that use \K in an
|
||||
assertion to set the start of a match later than its end. In the editor,
|
||||
we just detect this case and give up. */
|
||||
luaL_error(L, "regex matching error: \\K was used in an assertion to "
|
||||
" set the match start after its end");
|
||||
return 0;
|
||||
return luaL_error(L, "regex matching error: \\K was used in an assertion to "
|
||||
" set the match start after its end");
|
||||
}
|
||||
|
||||
rc *= 2;
|
||||
|
@ -167,11 +175,11 @@ static int f_pcre_match(lua_State *L) {
|
|||
}
|
||||
|
||||
static const luaL_Reg lib[] = {
|
||||
{ "nametable", f_pcre_nametable },
|
||||
{ "compile", f_pcre_compile },
|
||||
{ "cmatch", f_pcre_match },
|
||||
{ "__gc", f_pcre_gc },
|
||||
{ NULL, NULL }
|
||||
{ "get_metadata", f_pcre_get_metadata },
|
||||
{ "compile", f_pcre_compile },
|
||||
{ "cmatch", f_pcre_match },
|
||||
{ "__gc", f_pcre_gc },
|
||||
{ NULL, NULL }
|
||||
};
|
||||
|
||||
int luaopen_regex(lua_State *L) {
|
||||
|
|
Loading…
Reference in New Issue