commit
6f732f67f9
|
@ -1,8 +1,8 @@
|
||||||
local common = {}
|
local common = {}
|
||||||
|
|
||||||
|
|
||||||
function common.is_utf8_cont(char)
|
function common.is_utf8_cont(s, offset)
|
||||||
local byte = char:byte()
|
local byte = s:byte(offset or 1)
|
||||||
return byte >= 0x80 and byte < 0xc0
|
return byte >= 0x80 and byte < 0xc0
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
|
|
||||||
-- So that in addition to regex.gsub(pattern, string), we can also do
|
-- So that in addition to regex.gsub(pattern, string), we can also do
|
||||||
-- pattern:gsub(string).
|
-- pattern:gsub(string).
|
||||||
regex.__index = function(table, key) return regex[key]; end
|
regex.__index = function(table, key) return regex[key]; end
|
||||||
|
@ -6,7 +5,8 @@ regex.__index = function(table, key) return regex[key]; end
|
||||||
regex.match = function(pattern_string, string, offset, options)
|
regex.match = function(pattern_string, string, offset, options)
|
||||||
local pattern = type(pattern_string) == "table" and
|
local pattern = type(pattern_string) == "table" and
|
||||||
pattern_string or regex.compile(pattern_string)
|
pattern_string or regex.compile(pattern_string)
|
||||||
return regex.cmatch(pattern, string, offset or 1, options or 0)
|
local s, e = regex.cmatch(pattern, string, offset or 1, options or 0)
|
||||||
|
return s, e and e - 1
|
||||||
end
|
end
|
||||||
|
|
||||||
-- Will iterate back through any UTF-8 bytes so that we don't replace bits
|
-- Will iterate back through any UTF-8 bytes so that we don't replace bits
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
local syntax = require "core.syntax"
|
local syntax = require "core.syntax"
|
||||||
|
local common = require "core.common"
|
||||||
|
|
||||||
local tokenizer = {}
|
local tokenizer = {}
|
||||||
|
|
||||||
|
@ -142,8 +143,13 @@ function tokenizer.tokenize(incoming_syntax, text, state)
|
||||||
code = p._regex
|
code = p._regex
|
||||||
end
|
end
|
||||||
repeat
|
repeat
|
||||||
res = p.pattern and { text:find(at_start and "^" .. code or code, res[2]+1) }
|
local next = res[2] + 1
|
||||||
or { regex.match(code, text, res[2]+1, at_start and regex.ANCHORED or 0) }
|
-- go to the start of the next utf-8 character
|
||||||
|
while common.is_utf8_cont(text, next) do
|
||||||
|
next = next + 1
|
||||||
|
end
|
||||||
|
res = p.pattern and { text:find(at_start and "^" .. code or code, next) }
|
||||||
|
or { regex.match(code, text, next, at_start and regex.ANCHORED or 0) }
|
||||||
if res[1] and close and target[3] then
|
if res[1] and close and target[3] then
|
||||||
local count = 0
|
local count = 0
|
||||||
for i = res[1] - 1, 1, -1 do
|
for i = res[1] - 1, 1, -1 do
|
||||||
|
|
|
@ -68,8 +68,11 @@ static int f_pcre_match(lua_State *L) {
|
||||||
int rc = pcre2_match(re, (PCRE2_SPTR)str, len, offset - 1, opts, md, NULL);
|
int rc = pcre2_match(re, (PCRE2_SPTR)str, len, offset - 1, opts, md, NULL);
|
||||||
if (rc < 0) {
|
if (rc < 0) {
|
||||||
pcre2_match_data_free(md);
|
pcre2_match_data_free(md);
|
||||||
if (rc != PCRE2_ERROR_NOMATCH)
|
if (rc != PCRE2_ERROR_NOMATCH) {
|
||||||
luaL_error(L, "regex matching error %d", rc);
|
PCRE2_UCHAR buffer[120];
|
||||||
|
pcre2_get_error_message(rc, buffer, sizeof(buffer));
|
||||||
|
luaL_error(L, "regex matching error %d: %s", rc, buffer);
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(md);
|
PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(md);
|
||||||
|
|
Loading…
Reference in New Issue