Compare commits
2 Commits
amiga2.1
...
regex-impr
Author | SHA1 | Date |
---|---|---|
takase1121 | 1de4d0a4a3 | |
takase1121 | 09d6087df8 |
|
@ -8,6 +8,7 @@
|
||||||
#define API_TYPE_FONT "Font"
|
#define API_TYPE_FONT "Font"
|
||||||
#define API_TYPE_REPLACE "Replace"
|
#define API_TYPE_REPLACE "Replace"
|
||||||
#define API_TYPE_PROCESS "Process"
|
#define API_TYPE_PROCESS "Process"
|
||||||
|
#define API_TYPE_REGEX "Regex"
|
||||||
|
|
||||||
void api_load_libs(lua_State *L);
|
void api_load_libs(lua_State *L);
|
||||||
|
|
||||||
|
|
223
src/api/regex.c
223
src/api/regex.c
|
@ -2,116 +2,209 @@
|
||||||
|
|
||||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||||
|
|
||||||
|
#include <stdarg.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <pcre2.h>
|
#include <pcre2.h>
|
||||||
|
|
||||||
|
typedef struct regex_t {
|
||||||
|
pcre2_code *re;
|
||||||
|
int metadata;
|
||||||
|
} regex_t;
|
||||||
|
|
||||||
|
static int pcre2_error(lua_State *L, int rc, const char *fmt, ...) {
|
||||||
|
va_list ap;
|
||||||
|
va_start(ap, fmt);
|
||||||
|
lua_pushvfstring(L, fmt, ap);
|
||||||
|
va_end(ap);
|
||||||
|
|
||||||
|
PCRE2_UCHAR err[256];
|
||||||
|
pcre2_get_error_message(rc, err, sizeof(err));
|
||||||
|
lua_pushstring(L, (const char *) err);
|
||||||
|
|
||||||
|
lua_concat(L, 2);
|
||||||
|
lua_error(L);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int f_pcre_gc(lua_State* L) {
|
static int f_pcre_gc(lua_State* L) {
|
||||||
lua_rawgeti(L, -1, 1);
|
regex_t *re = luaL_checkudata(L, 1, API_TYPE_REGEX);
|
||||||
pcre2_code* re = (pcre2_code*)lua_touserdata(L, -1);
|
luaL_unref(L, LUA_REGISTRYINDEX, re->metadata);
|
||||||
if (re)
|
if (re->re)
|
||||||
pcre2_code_free(re);
|
pcre2_code_free(re->re);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int f_pcre_compile(lua_State *L) {
|
static int f_pcre_compile(lua_State *L) {
|
||||||
size_t len;
|
size_t len;
|
||||||
PCRE2_SIZE errorOffset;
|
int options = PCRE2_UTF;
|
||||||
int errorNumber;
|
const char* pattern = luaL_checklstring(L, 1, &len);
|
||||||
int pattern = PCRE2_UTF;
|
const char* optstr = luaL_optstring(L, 2, "");
|
||||||
const char* str = luaL_checklstring(L, 1, &len);
|
if (strstr(optstr,"i"))
|
||||||
if (lua_gettop(L) > 1) {
|
options |= PCRE2_CASELESS;
|
||||||
const char* options = luaL_checkstring(L, 2);
|
if (strstr(optstr,"m"))
|
||||||
if (strstr(options,"i"))
|
options |= PCRE2_MULTILINE;
|
||||||
pattern |= PCRE2_CASELESS;
|
if (strstr(optstr,"s"))
|
||||||
if (strstr(options,"m"))
|
options |= PCRE2_DOTALL;
|
||||||
pattern |= PCRE2_MULTILINE;
|
|
||||||
if (strstr(options,"s"))
|
int error;
|
||||||
pattern |= PCRE2_DOTALL;
|
PCRE2_SIZE error_offset;
|
||||||
}
|
|
||||||
pcre2_code* re = pcre2_compile(
|
pcre2_code* re = pcre2_compile(
|
||||||
(PCRE2_SPTR)str,
|
(PCRE2_SPTR)pattern,
|
||||||
len,
|
len,
|
||||||
pattern,
|
options,
|
||||||
&errorNumber,
|
&error,
|
||||||
&errorOffset,
|
&error_offset,
|
||||||
NULL
|
NULL
|
||||||
);
|
);
|
||||||
if (re) {
|
|
||||||
lua_newtable(L);
|
if (re == NULL)
|
||||||
lua_pushlightuserdata(L, re);
|
return pcre2_error(L, error, "regex compilation failed at %d: ", error_offset);
|
||||||
lua_rawseti(L, -2, 1);
|
|
||||||
luaL_setmetatable(L, "regex");
|
regex_t *regex = (regex_t*) lua_newuserdata(L, sizeof(regex_t));
|
||||||
return 1;
|
luaL_setmetatable(L, API_TYPE_REGEX);
|
||||||
|
regex->re = re;
|
||||||
|
|
||||||
|
lua_newtable(L);
|
||||||
|
|
||||||
|
lua_pushvalue(L, 1);
|
||||||
|
lua_setfield(L, -2, "source");
|
||||||
|
|
||||||
|
lua_pushstring(L, optstr);
|
||||||
|
lua_setfield(L, -2, "flags");
|
||||||
|
|
||||||
|
regex->metadata = luaL_ref(L, LUA_REGISTRYINDEX);
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int get_nametable(lua_State *L, pcre2_code *re) {
|
||||||
|
int ret;
|
||||||
|
uint32_t namecount, entrysize;
|
||||||
|
PCRE2_SPTR nametable;
|
||||||
|
|
||||||
|
ret = pcre2_pattern_info(re, PCRE2_INFO_NAMETABLE, &nametable);
|
||||||
|
if (ret)
|
||||||
|
return pcre2_error(L, ret, "cannot get PCRE2_INFO_NAMETABLE: ");
|
||||||
|
|
||||||
|
pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &namecount);
|
||||||
|
pcre2_pattern_info(re, PCRE2_INFO_NAMEENTRYSIZE, &entrysize);
|
||||||
|
|
||||||
|
lua_createtable(L, namecount, 0);
|
||||||
|
for (uint32_t i = 1; i <= namecount; i++) {
|
||||||
|
uint16_t index = (nametable[1] << 0) | (nametable[0] << 8);
|
||||||
|
nametable += 2;
|
||||||
|
|
||||||
|
lua_createtable(L, 0, 2);
|
||||||
|
lua_pushnumber(L, index);
|
||||||
|
lua_setfield(L, -2, "index");
|
||||||
|
lua_pushstring(L, (const char *) nametable);
|
||||||
|
lua_setfield(L, -2, "name");
|
||||||
|
lua_rawseti(L, -2, i);
|
||||||
|
|
||||||
|
nametable += (entrysize - 2);
|
||||||
}
|
}
|
||||||
PCRE2_UCHAR buffer[256];
|
|
||||||
pcre2_get_error_message(errorNumber, buffer, sizeof(buffer));
|
return 1;
|
||||||
lua_pushnil(L);
|
}
|
||||||
char message[1024];
|
|
||||||
len = snprintf(message, sizeof(message), "regex compilation failed at offset %d: %s", (int)errorOffset, buffer);
|
static int f_pcre_get_metadata(lua_State *L) {
|
||||||
lua_pushlstring(L, message, len);
|
regex_t *re = luaL_checkudata(L, 1, API_TYPE_REGEX);
|
||||||
return 2;
|
|
||||||
|
lua_rawgeti(L, LUA_REGISTRYINDEX, re->metadata);
|
||||||
|
lua_getfield(L, -1, "nametable");
|
||||||
|
if (lua_type(L, -1) == LUA_TTABLE) {
|
||||||
|
lua_pop(L, 1);
|
||||||
|
} else {
|
||||||
|
lua_pop(L, 1);
|
||||||
|
get_nametable(L, re->re);
|
||||||
|
lua_setfield(L, -2, "nametable");
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t check_bitfield(lua_State *L, int t) {
|
||||||
|
size_t size, bit = 0;
|
||||||
|
luaL_checktype(L, t, LUA_TTABLE);
|
||||||
|
size = lua_rawlen(L, t);
|
||||||
|
for (size_t i = 1; i <= size; i++) {
|
||||||
|
lua_rawgeti(L, t, i);
|
||||||
|
bit |= luaL_checkinteger(L, -1);
|
||||||
|
lua_pop(L, 1);
|
||||||
|
}
|
||||||
|
return bit;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Takes string, compiled regex, returns list of indices of matched groups
|
// Takes string, compiled regex, returns list of indices of matched groups
|
||||||
// (including the whole match), if a match was found.
|
// (including the whole match), if a match was found.
|
||||||
static int f_pcre_match(lua_State *L) {
|
static int f_pcre_match(lua_State *L) {
|
||||||
size_t len, offset = 1, opts = 0;
|
size_t len, offset, options;
|
||||||
luaL_checktype(L, 1, LUA_TTABLE);
|
regex_t *re = luaL_checkudata(L, 1, API_TYPE_REGEX);
|
||||||
const char* str = luaL_checklstring(L, 2, &len);
|
const char* str = luaL_checklstring(L, 2, &len);
|
||||||
if (lua_gettop(L) > 2)
|
|
||||||
offset = luaL_checknumber(L, 3);
|
offset = luaL_optnumber(L, 3, 1);
|
||||||
if (lua_gettop(L) > 3)
|
options = lua_gettop(L) == 4 ? check_bitfield(L, 4) : 0;
|
||||||
opts = luaL_checknumber(L, 4);
|
|
||||||
lua_rawgeti(L, 1, 1);
|
pcre2_match_data* md = pcre2_match_data_create_from_pattern(re->re, NULL);
|
||||||
pcre2_code* re = (pcre2_code*)lua_touserdata(L, -1);
|
int rc = pcre2_match(re->re, (PCRE2_SPTR)str, len, offset - 1, options, md, NULL);
|
||||||
pcre2_match_data* md = pcre2_match_data_create_from_pattern(re, NULL);
|
|
||||||
int rc = pcre2_match(re, (PCRE2_SPTR)str, len, offset - 1, opts, md, NULL);
|
|
||||||
if (rc < 0) {
|
if (rc < 0) {
|
||||||
pcre2_match_data_free(md);
|
pcre2_match_data_free(md);
|
||||||
if (rc != PCRE2_ERROR_NOMATCH)
|
if (rc != PCRE2_ERROR_NOMATCH)
|
||||||
luaL_error(L, "regex matching error %d", rc);
|
return pcre2_error(L, rc, "regex matching error: ");
|
||||||
return 0;
|
else
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(md);
|
PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(md);
|
||||||
if (ovector[0] > ovector[1]) {
|
if (ovector[0] > ovector[1]) {
|
||||||
/* We must guard against patterns such as /(?=.\K)/ that use \K in an
|
pcre2_match_data_free(md);
|
||||||
|
/* We must guard against patterns such as /(?=.\K)/ that use \K in an
|
||||||
assertion to set the start of a match later than its end. In the editor,
|
assertion to set the start of a match later than its end. In the editor,
|
||||||
we just detect this case and give up. */
|
we just detect this case and give up. */
|
||||||
luaL_error(L, "regex matching error: \\K was used in an assertion to "
|
return luaL_error(L, "regex matching error: \\K was used in an assertion to "
|
||||||
" set the match start after its end");
|
" set the match start after its end");
|
||||||
pcre2_match_data_free(md);
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
for (int i = 0; i < rc*2; i++)
|
|
||||||
lua_pushnumber(L, ovector[i]+1);
|
rc *= 2;
|
||||||
|
luaL_checkstack(L, rc, NULL);
|
||||||
|
for (int i = 0; i < rc; i++)
|
||||||
|
// for every even vector (the end of a pair), we do not increment by 1
|
||||||
|
lua_pushnumber(L, ovector[i] + (i + 1) % 2);
|
||||||
pcre2_match_data_free(md);
|
pcre2_match_data_free(md);
|
||||||
return rc*2;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const luaL_Reg lib[] = {
|
static const luaL_Reg lib[] = {
|
||||||
{ "compile", f_pcre_compile },
|
{ "get_metadata", f_pcre_get_metadata },
|
||||||
{ "cmatch", f_pcre_match },
|
{ "compile", f_pcre_compile },
|
||||||
{ "__gc", f_pcre_gc },
|
{ "cmatch", f_pcre_match },
|
||||||
{ NULL, NULL }
|
{ "__gc", f_pcre_gc },
|
||||||
|
{ NULL, NULL }
|
||||||
};
|
};
|
||||||
|
|
||||||
int luaopen_regex(lua_State *L) {
|
int luaopen_regex(lua_State *L) {
|
||||||
luaL_newlib(L, lib);
|
luaL_newmetatable(L, API_TYPE_REGEX);
|
||||||
lua_pushliteral(L, "regex");
|
luaL_setfuncs(L, lib, 0);
|
||||||
lua_setfield(L, -2, "__name");
|
|
||||||
lua_pushvalue(L, -1);
|
lua_pushvalue(L, -1);
|
||||||
lua_setfield(L, LUA_REGISTRYINDEX, "regex");
|
lua_setfield(L, -2, "__index");
|
||||||
|
|
||||||
lua_pushnumber(L, PCRE2_ANCHORED);
|
lua_pushnumber(L, PCRE2_ANCHORED);
|
||||||
lua_setfield(L, -2, "ANCHORED");
|
lua_setfield(L, -2, "ANCHORED");
|
||||||
lua_pushnumber(L, PCRE2_ANCHORED) ;
|
|
||||||
lua_setfield(L, -2, "ENDANCHORED");
|
lua_pushnumber(L, PCRE2_ANCHORED) ;
|
||||||
|
lua_setfield(L, -2, "ENDANCHORED");
|
||||||
|
|
||||||
lua_pushnumber(L, PCRE2_NOTBOL);
|
lua_pushnumber(L, PCRE2_NOTBOL);
|
||||||
lua_setfield(L, -2, "NOTBOL");
|
lua_setfield(L, -2, "NOTBOL");
|
||||||
|
|
||||||
lua_pushnumber(L, PCRE2_NOTEOL);
|
lua_pushnumber(L, PCRE2_NOTEOL);
|
||||||
lua_setfield(L, -2, "NOTEOL");
|
lua_setfield(L, -2, "NOTEOL");
|
||||||
|
|
||||||
lua_pushnumber(L, PCRE2_NOTEMPTY);
|
lua_pushnumber(L, PCRE2_NOTEMPTY);
|
||||||
lua_setfield(L, -2, "NOTEMPTY");
|
lua_setfield(L, -2, "NOTEMPTY");
|
||||||
|
|
||||||
lua_pushnumber(L, PCRE2_NOTEMPTY_ATSTART);
|
lua_pushnumber(L, PCRE2_NOTEMPTY_ATSTART);
|
||||||
lua_setfield(L, -2, "NOTEMPTY_ATSTART");
|
lua_setfield(L, -2, "NOTEMPTY_ATSTART");
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue