2021-06-02 21:27:00 +02:00
|
|
|
#include "api.h"
|
|
|
|
|
|
|
|
#define PCRE2_CODE_UNIT_WIDTH 8
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
#include <pcre2.h>
|
|
|
|
|
|
|
|
static int f_pcre_gc(lua_State* L) {
|
|
|
|
lua_rawgeti(L, -1, 1);
|
|
|
|
pcre2_code* re = (pcre2_code*)lua_touserdata(L, -1);
|
|
|
|
if (re)
|
|
|
|
pcre2_code_free(re);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f_pcre_compile(lua_State *L) {
|
|
|
|
size_t len;
|
|
|
|
PCRE2_SIZE errorOffset;
|
|
|
|
int errorNumber;
|
|
|
|
int pattern = PCRE2_UTF;
|
|
|
|
const char* str = luaL_checklstring(L, 1, &len);
|
|
|
|
if (lua_gettop(L) > 1) {
|
|
|
|
const char* options = luaL_checkstring(L, 2);
|
|
|
|
if (strstr(options,"i"))
|
|
|
|
pattern |= PCRE2_CASELESS;
|
|
|
|
if (strstr(options,"m"))
|
|
|
|
pattern |= PCRE2_MULTILINE;
|
|
|
|
if (strstr(options,"s"))
|
|
|
|
pattern |= PCRE2_DOTALL;
|
|
|
|
}
|
|
|
|
pcre2_code* re = pcre2_compile(
|
|
|
|
(PCRE2_SPTR)str,
|
|
|
|
len,
|
|
|
|
pattern,
|
|
|
|
&errorNumber,
|
|
|
|
&errorOffset,
|
|
|
|
NULL
|
|
|
|
);
|
|
|
|
if (re) {
|
|
|
|
lua_newtable(L);
|
|
|
|
lua_pushlightuserdata(L, re);
|
|
|
|
lua_rawseti(L, -2, 1);
|
|
|
|
luaL_setmetatable(L, "regex");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
PCRE2_UCHAR buffer[256];
|
|
|
|
pcre2_get_error_message(errorNumber, buffer, sizeof(buffer));
|
2021-06-05 05:58:17 +02:00
|
|
|
lua_pushnil(L);
|
|
|
|
char message[1024];
|
|
|
|
len = snprintf(message, sizeof(message), "regex compilation failed at offset %d: %s", (int)errorOffset, buffer);
|
|
|
|
lua_pushlstring(L, message, len);
|
|
|
|
return 2;
|
2021-06-02 21:27:00 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Takes string, compiled regex, returns list of indices of matched groups
|
|
|
|
// (including the whole match), if a match was found.
|
|
|
|
static int f_pcre_match(lua_State *L) {
|
|
|
|
size_t len, offset = 1, opts = 0;
|
|
|
|
luaL_checktype(L, 1, LUA_TTABLE);
|
|
|
|
const char* str = luaL_checklstring(L, 2, &len);
|
|
|
|
if (lua_gettop(L) > 2)
|
|
|
|
offset = luaL_checknumber(L, 3);
|
|
|
|
if (lua_gettop(L) > 3)
|
|
|
|
opts = luaL_checknumber(L, 4);
|
|
|
|
lua_rawgeti(L, 1, 1);
|
|
|
|
pcre2_code* re = (pcre2_code*)lua_touserdata(L, -1);
|
|
|
|
pcre2_match_data* md = pcre2_match_data_create_from_pattern(re, NULL);
|
|
|
|
int rc = pcre2_match(re, (PCRE2_SPTR)str, len, offset - 1, opts, md, NULL);
|
|
|
|
if (rc < 0) {
|
|
|
|
pcre2_match_data_free(md);
|
|
|
|
if (rc != PCRE2_ERROR_NOMATCH)
|
|
|
|
luaL_error(L, "regex matching error %d", rc);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(md);
|
|
|
|
if (ovector[0] > ovector[1]) {
|
2021-07-20 20:39:50 +02:00
|
|
|
/* We must guard against patterns such as /(?=.\K)/ that use \K in an
|
2021-06-02 21:27:00 +02:00
|
|
|
assertion to set the start of a match later than its end. In the editor,
|
|
|
|
we just detect this case and give up. */
|
|
|
|
luaL_error(L, "regex matching error: \\K was used in an assertion to "
|
2021-07-20 20:39:50 +02:00
|
|
|
" set the match start after its end");
|
2021-06-02 21:27:00 +02:00
|
|
|
pcre2_match_data_free(md);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
for (int i = 0; i < rc*2; i++)
|
|
|
|
lua_pushnumber(L, ovector[i]+1);
|
|
|
|
pcre2_match_data_free(md);
|
|
|
|
return rc*2;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const luaL_Reg lib[] = {
|
|
|
|
{ "compile", f_pcre_compile },
|
|
|
|
{ "cmatch", f_pcre_match },
|
|
|
|
{ "__gc", f_pcre_gc },
|
|
|
|
{ NULL, NULL }
|
|
|
|
};
|
|
|
|
|
|
|
|
int luaopen_regex(lua_State *L) {
|
|
|
|
luaL_newlib(L, lib);
|
|
|
|
lua_pushliteral(L, "regex");
|
|
|
|
lua_setfield(L, -2, "__name");
|
|
|
|
lua_pushvalue(L, -1);
|
|
|
|
lua_setfield(L, LUA_REGISTRYINDEX, "regex");
|
|
|
|
lua_pushnumber(L, PCRE2_ANCHORED);
|
|
|
|
lua_setfield(L, -2, "ANCHORED");
|
2021-07-20 20:39:50 +02:00
|
|
|
lua_pushnumber(L, PCRE2_ANCHORED) ;
|
|
|
|
lua_setfield(L, -2, "ENDANCHORED");
|
2021-06-02 21:27:00 +02:00
|
|
|
lua_pushnumber(L, PCRE2_NOTBOL);
|
|
|
|
lua_setfield(L, -2, "NOTBOL");
|
|
|
|
lua_pushnumber(L, PCRE2_NOTEOL);
|
|
|
|
lua_setfield(L, -2, "NOTEOL");
|
|
|
|
lua_pushnumber(L, PCRE2_NOTEMPTY);
|
|
|
|
lua_setfield(L, -2, "NOTEMPTY");
|
|
|
|
lua_pushnumber(L, PCRE2_NOTEMPTY_ATSTART);
|
|
|
|
lua_setfield(L, -2, "NOTEMPTY_ATSTART");
|
|
|
|
return 1;
|
|
|
|
}
|