Merge pull request #860 from Guldoman/tokenizer_start_of_line
Allow syntax patterns to match with the beginning of the line
This commit is contained in:
commit
2f7da44275
|
@ -136,20 +136,42 @@ function tokenizer.tokenize(incoming_syntax, text, state)
|
||||||
end
|
end
|
||||||
|
|
||||||
local function find_text(text, p, offset, at_start, close)
|
local function find_text(text, p, offset, at_start, close)
|
||||||
local target, res = p.pattern or p.regex, { 1, offset - 1 }, p.regex
|
local target, res = p.pattern or p.regex, { 1, offset - 1 }
|
||||||
local code = type(target) == "table" and target[close and 2 or 1] or target
|
local p_idx = close and 2 or 1
|
||||||
|
local code = type(target) == "table" and target[p_idx] or target
|
||||||
|
|
||||||
|
if p.whole_line == nil then p.whole_line = { } end
|
||||||
|
if p.whole_line[p_idx] == nil then
|
||||||
|
-- Match patterns that start with '^'
|
||||||
|
p.whole_line[p_idx] = code:match("^%^") and true or false
|
||||||
|
if p.whole_line[p_idx] then
|
||||||
|
-- Remove '^' from the beginning of the pattern
|
||||||
|
if type(target) == "table" then
|
||||||
|
target[p_idx] = code:sub(2)
|
||||||
|
else
|
||||||
|
p.pattern = p.pattern and code:sub(2)
|
||||||
|
p.regex = p.regex and code:sub(2)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
if p.regex and type(p.regex) ~= "table" then
|
if p.regex and type(p.regex) ~= "table" then
|
||||||
p._regex = p._regex or regex.compile(p.regex)
|
p._regex = p._regex or regex.compile(p.regex)
|
||||||
code = p._regex
|
code = p._regex
|
||||||
end
|
end
|
||||||
|
|
||||||
repeat
|
repeat
|
||||||
local next = res[2] + 1
|
local next = res[2] + 1
|
||||||
|
-- If the pattern contained '^', allow matching only the whole line
|
||||||
|
if p.whole_line[p_idx] and next > 1 then
|
||||||
|
return
|
||||||
|
end
|
||||||
-- go to the start of the next utf-8 character
|
-- go to the start of the next utf-8 character
|
||||||
while text:byte(next) and common.is_utf8_cont(text, next) do
|
while text:byte(next) and common.is_utf8_cont(text, next) do
|
||||||
next = next + 1
|
next = next + 1
|
||||||
end
|
end
|
||||||
res = p.pattern and { text:find(at_start and "^" .. code or code, next) }
|
res = p.pattern and { text:find((at_start or p.whole_line[p_idx]) and "^" .. code or code, next) }
|
||||||
or { regex.match(code, text, next, at_start and regex.ANCHORED or 0) }
|
or { regex.match(code, text, next, (at_start or p.whole_line[p_idx]) and regex.ANCHORED or 0) }
|
||||||
if res[1] and close and target[3] then
|
if res[1] and close and target[3] then
|
||||||
local count = 0
|
local count = 0
|
||||||
for i = res[1] - 1, 1, -1 do
|
for i = res[1] - 1, 1, -1 do
|
||||||
|
|
|
@ -60,12 +60,14 @@ static int f_pcre_match(lua_State *L) {
|
||||||
const char* str = luaL_checklstring(L, 2, &len);
|
const char* str = luaL_checklstring(L, 2, &len);
|
||||||
if (lua_gettop(L) > 2)
|
if (lua_gettop(L) > 2)
|
||||||
offset = luaL_checknumber(L, 3);
|
offset = luaL_checknumber(L, 3);
|
||||||
|
offset -= 1;
|
||||||
|
len -= offset;
|
||||||
if (lua_gettop(L) > 3)
|
if (lua_gettop(L) > 3)
|
||||||
opts = luaL_checknumber(L, 4);
|
opts = luaL_checknumber(L, 4);
|
||||||
lua_rawgeti(L, 1, 1);
|
lua_rawgeti(L, 1, 1);
|
||||||
pcre2_code* re = (pcre2_code*)lua_touserdata(L, -1);
|
pcre2_code* re = (pcre2_code*)lua_touserdata(L, -1);
|
||||||
pcre2_match_data* md = pcre2_match_data_create_from_pattern(re, NULL);
|
pcre2_match_data* md = pcre2_match_data_create_from_pattern(re, NULL);
|
||||||
int rc = pcre2_match(re, (PCRE2_SPTR)str, len, offset - 1, opts, md, NULL);
|
int rc = pcre2_match(re, (PCRE2_SPTR)&str[offset], len, 0, opts, md, NULL);
|
||||||
if (rc < 0) {
|
if (rc < 0) {
|
||||||
pcre2_match_data_free(md);
|
pcre2_match_data_free(md);
|
||||||
if (rc != PCRE2_ERROR_NOMATCH) {
|
if (rc != PCRE2_ERROR_NOMATCH) {
|
||||||
|
@ -86,7 +88,7 @@ static int f_pcre_match(lua_State *L) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < rc*2; i++)
|
for (int i = 0; i < rc*2; i++)
|
||||||
lua_pushnumber(L, ovector[i]+1);
|
lua_pushnumber(L, ovector[i]+offset+1);
|
||||||
pcre2_match_data_free(md);
|
pcre2_match_data_free(md);
|
||||||
return rc*2;
|
return rc*2;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue