a small fix in case the text is nil

2023-01-08 20:42:19 +00:00 · 2023-01-08 20:42:19 +00:00 · 0cb20ab7b2
parent 789004ea2c
commit 0cb20ab7b2
1 changed files with 81 additions and 79 deletions
--- a/data/core/tokenizer.lua
+++ b/data/core/tokenizer.lua
@ -225,97 +225,99 @@ function tokenizer.tokenize(incoming_syntax, text, state)
  end
  local text_len = text:ulen()
-  while i <= text_len do
+  if text_len ~= nil then
-    -- continue trying to match the end pattern of a pair if we have a state set
+    while i <= text_len do
-    if current_pattern_idx > 0 then
+      -- continue trying to match the end pattern of a pair if we have a state set
-      local p = current_syntax.patterns[current_pattern_idx]
+      if current_pattern_idx > 0 then
-      local s, e = find_text(text, p, i, false, true)
+        local p = current_syntax.patterns[current_pattern_idx]
        local s, e = find_text(text, p, i, false, true)
-      local cont = true
+        local cont = true
-      -- If we're in subsyntax mode, always check to see if we end our syntax
+        -- If we're in subsyntax mode, always check to see if we end our syntax
-      -- first, before the found delimeter, as ending the subsyntax takes
+        -- first, before the found delimeter, as ending the subsyntax takes
-      -- precedence over ending the delimiter in the subsyntax.
+        -- precedence over ending the delimiter in the subsyntax.
-      if subsyntax_info then
+        if subsyntax_info then
-        local ss, se = find_text(text, subsyntax_info, i, false, true)
+          local ss, se = find_text(text, subsyntax_info, i, false, true)
-        -- If we find that we end the subsyntax before the
+          -- If we find that we end the subsyntax before the
-        -- delimiter, push the token, and signal we shouldn't
+          -- delimiter, push the token, and signal we shouldn't
-        -- treat the bit after as a token to be normally parsed
+          -- treat the bit after as a token to be normally parsed
-        -- (as it's the syntax delimiter).
+          -- (as it's the syntax delimiter).
-        if ss and (s == nil or ss < s) then
+          if ss and (s == nil or ss < s) then
-          push_token(res, p.type, text:usub(i, ss - 1))
+            push_token(res, p.type, text:usub(i, ss - 1))
-          i = ss
+            i = ss
-          cont = false
+            cont = false
          end
        end
        -- If we don't have any concerns about syntax delimiters,
        -- continue on as normal.
        if cont then
          if s then
            push_token(res, p.type, text:usub(i, e))
            set_subsyntax_pattern_idx(0)
            i = e + 1
          else
            push_token(res, p.type, text:usub(i))
            break
          end
        end
      end
-      -- If we don't have any concerns about syntax delimiters,
+      -- General end of syntax check. Applies in the case where
-      -- continue on as normal.
+      -- we're ending early in the middle of a delimiter, or
-      if cont then
+      -- just normally, upon finding a token.
      if subsyntax_info then
        local s, e = find_text(text, subsyntax_info, i, true, true)
        if s then
-          push_token(res, p.type, text:usub(i, e))
+          push_token(res, subsyntax_info.type, text:usub(i, e))
-          set_subsyntax_pattern_idx(0)
+          -- On finding unescaped delimiter, pop it.
          pop_subsyntax()
          i = e + 1
-        else
+        end
-          push_token(res, p.type, text:usub(i))
+      end
      -- find matching pattern
      local matched = false
      for n, p in ipairs(current_syntax.patterns) do
        local find_results = { find_text(text, p, i, true, false) }
        if find_results[1] then
          local type_is_table = type(p.type) == "table"
          local n_types = type_is_table and #p.type or 1
          if #find_results == 2 and type_is_table then
            report_bad_pattern(core.warn, current_syntax, n,
              "Token type is a table, but a string was expected.")
            p.type = p.type[1]
          elseif #find_results - 1 > n_types then
            report_bad_pattern(core.error, current_syntax, n,
              "Not enough token types: got %d needed %d.", n_types, #find_results - 1)
          elseif #find_results - 1 < n_types then
            report_bad_pattern(core.warn, current_syntax, n,
              "Too many token types: got %d needed %d.", n_types, #find_results - 1)
          end
          -- matched pattern; make and add tokens
          push_tokens(res, current_syntax, p, text, find_results)
          -- update state if this was a start|end pattern pair
          if type(p.pattern or p.regex) == "table" then
            -- If we have a subsyntax, push that onto the subsyntax stack.
            if p.syntax then
              push_subsyntax(p, n)
            else
              set_subsyntax_pattern_idx(n)
            end
          end
          -- move cursor past this token
          i = find_results[2] + 1
          matched = true
          break
        end
      end
    end
    -- General end of syntax check. Applies in the case where
    -- we're ending early in the middle of a delimiter, or
    -- just normally, upon finding a token.
    if subsyntax_info then
      local s, e = find_text(text, subsyntax_info, i, true, true)
      if s then
        push_token(res, subsyntax_info.type, text:usub(i, e))
        -- On finding unescaped delimiter, pop it.
        pop_subsyntax()
        i = e + 1
      end
    end
-    -- find matching pattern
+      -- consume character if we didn't match
-    local matched = false
+      if not matched then
-    for n, p in ipairs(current_syntax.patterns) do
+        push_token(res, "normal", text:usub(i, i))
-      local find_results = { find_text(text, p, i, true, false) }
+        i = i + 1
      if find_results[1] then
        local type_is_table = type(p.type) == "table"
        local n_types = type_is_table and #p.type or 1
        if #find_results == 2 and type_is_table then
          report_bad_pattern(core.warn, current_syntax, n,
            "Token type is a table, but a string was expected.")
          p.type = p.type[1]
        elseif #find_results - 1 > n_types then
          report_bad_pattern(core.error, current_syntax, n,
            "Not enough token types: got %d needed %d.", n_types, #find_results - 1)
        elseif #find_results - 1 < n_types then
          report_bad_pattern(core.warn, current_syntax, n,
            "Too many token types: got %d needed %d.", n_types, #find_results - 1)
        end
        -- matched pattern; make and add tokens
        push_tokens(res, current_syntax, p, text, find_results)
        -- update state if this was a start|end pattern pair
        if type(p.pattern or p.regex) == "table" then
          -- If we have a subsyntax, push that onto the subsyntax stack.
          if p.syntax then
            push_subsyntax(p, n)
          else
            set_subsyntax_pattern_idx(n)
          end
        end
        -- move cursor past this token
        i = find_results[2] + 1
        matched = true
        break
      end
    end
    -- consume character if we didn't match
    if not matched then
      push_token(res, "normal", text:usub(i, i))
      i = i + 1
    end
  end
-
+  
  return res, state
 end