diff --git a/data/core/tokenizer.lua b/data/core/tokenizer.lua index 5fd8c69f..bb3faa03 100644 --- a/data/core/tokenizer.lua +++ b/data/core/tokenizer.lua @@ -136,20 +136,42 @@ function tokenizer.tokenize(incoming_syntax, text, state) end local function find_text(text, p, offset, at_start, close) - local target, res = p.pattern or p.regex, { 1, offset - 1 }, p.regex - local code = type(target) == "table" and target[close and 2 or 1] or target + local target, res = p.pattern or p.regex, { 1, offset - 1 } + local p_idx = close and 2 or 1 + local code = type(target) == "table" and target[p_idx] or target + + if p.whole_line == nil then p.whole_line = { } end + if p.whole_line[p_idx] == nil then + -- Match patterns that start with '^' + p.whole_line[p_idx] = code:match("^%^") and true or false + if p.whole_line[p_idx] then + -- Remove '^' from the beginning of the pattern + if type(target) == "table" then + target[p_idx] = code:sub(2) + else + p.pattern = p.pattern and code:sub(2) + p.regex = p.regex and code:sub(2) + end + end + end + if p.regex and type(p.regex) ~= "table" then p._regex = p._regex or regex.compile(p.regex) code = p._regex - end + end + repeat local next = res[2] + 1 + -- If the pattern contained '^', allow matching only the whole line + if p.whole_line[p_idx] and next > 1 then + return + end -- go to the start of the next utf-8 character while text:byte(next) and common.is_utf8_cont(text, next) do next = next + 1 end - res = p.pattern and { text:find(at_start and "^" .. code or code, next) } - or { regex.match(code, text, next, at_start and regex.ANCHORED or 0) } + res = p.pattern and { text:find((at_start or p.whole_line[p_idx]) and "^" .. code or code, next) } + or { regex.match(code, text, next, (at_start or p.whole_line[p_idx]) and regex.ANCHORED or 0) } if res[1] and close and target[3] then local count = 0 for i = res[1] - 1, 1, -1 do diff --git a/src/api/regex.c b/src/api/regex.c index 08f0f142..6a0aac7a 100644 --- a/src/api/regex.c +++ b/src/api/regex.c @@ -60,12 +60,14 @@ static int f_pcre_match(lua_State *L) { const char* str = luaL_checklstring(L, 2, &len); if (lua_gettop(L) > 2) offset = luaL_checknumber(L, 3); + offset -= 1; + len -= offset; if (lua_gettop(L) > 3) opts = luaL_checknumber(L, 4); lua_rawgeti(L, 1, 1); pcre2_code* re = (pcre2_code*)lua_touserdata(L, -1); pcre2_match_data* md = pcre2_match_data_create_from_pattern(re, NULL); - int rc = pcre2_match(re, (PCRE2_SPTR)str, len, offset - 1, opts, md, NULL); + int rc = pcre2_match(re, (PCRE2_SPTR)&str[offset], len, 0, opts, md, NULL); if (rc < 0) { pcre2_match_data_free(md); if (rc != PCRE2_ERROR_NOMATCH) { @@ -86,7 +88,7 @@ static int f_pcre_match(lua_State *L) { return 0; } for (int i = 0; i < rc*2; i++) - lua_pushnumber(L, ovector[i]+1); + lua_pushnumber(L, ovector[i]+offset+1); pcre2_match_data_free(md); return rc*2; }