a small fix in case the text is nil

2023-01-08 20:42:19 +00:00
1 changed files with 81 additions and 79 deletions
--- a/data/core/tokenizer.lua
+++ b/data/core/tokenizer.lua
@ -225,97 +225,99 @@ function tokenizer.tokenize(incoming_syntax, text, state)
  end

  local text_len = text:ulen()
-  while i <= text_len do
-    -- continue trying to match the end pattern of a pair if we have a state set
-    if current_pattern_idx > 0 then
-      local p = current_syntax.patterns[current_pattern_idx]
-      local s, e = find_text(text, p, i, false, true)
+  if text_len ~= nil then
+    while i <= text_len do
+      -- continue trying to match the end pattern of a pair if we have a state set
+      if current_pattern_idx > 0 then
+        local p = current_syntax.patterns[current_pattern_idx]
+        local s, e = find_text(text, p, i, false, true)

-      local cont = true
-      -- If we're in subsyntax mode, always check to see if we end our syntax
-      -- first, before the found delimeter, as ending the subsyntax takes
-      -- precedence over ending the delimiter in the subsyntax.
-      if subsyntax_info then
-        local ss, se = find_text(text, subsyntax_info, i, false, true)
-        -- If we find that we end the subsyntax before the
-        -- delimiter, push the token, and signal we shouldn't
-        -- treat the bit after as a token to be normally parsed
-        -- (as it's the syntax delimiter).
-        if ss and (s == nil or ss < s) then
-          push_token(res, p.type, text:usub(i, ss - 1))
-          i = ss
-          cont = false
+        local cont = true
+        -- If we're in subsyntax mode, always check to see if we end our syntax
+        -- first, before the found delimeter, as ending the subsyntax takes
+        -- precedence over ending the delimiter in the subsyntax.
+        if subsyntax_info then
+          local ss, se = find_text(text, subsyntax_info, i, false, true)
+          -- If we find that we end the subsyntax before the
+          -- delimiter, push the token, and signal we shouldn't
+          -- treat the bit after as a token to be normally parsed
+          -- (as it's the syntax delimiter).
+          if ss and (s == nil or ss < s) then
+            push_token(res, p.type, text:usub(i, ss - 1))
+            i = ss
+            cont = false
+          end
+        end
+        -- If we don't have any concerns about syntax delimiters,
+        -- continue on as normal.
+        if cont then
+          if s then
+            push_token(res, p.type, text:usub(i, e))
+            set_subsyntax_pattern_idx(0)
+            i = e + 1
+          else
+            push_token(res, p.type, text:usub(i))
+            break
+          end
        end
      end
-      -- If we don't have any concerns about syntax delimiters,
-      -- continue on as normal.
-      if cont then
+      -- General end of syntax check. Applies in the case where
+      -- we're ending early in the middle of a delimiter, or
+      -- just normally, upon finding a token.
+      if subsyntax_info then
+        local s, e = find_text(text, subsyntax_info, i, true, true)
        if s then
-          push_token(res, p.type, text:usub(i, e))
-          set_subsyntax_pattern_idx(0)
+          push_token(res, subsyntax_info.type, text:usub(i, e))
+          -- On finding unescaped delimiter, pop it.
+          pop_subsyntax()
          i = e + 1
-        else
-          push_token(res, p.type, text:usub(i))
+        end
+      end
+
+      -- find matching pattern
+      local matched = false
+      for n, p in ipairs(current_syntax.patterns) do
+        local find_results = { find_text(text, p, i, true, false) }
+        if find_results[1] then
+          local type_is_table = type(p.type) == "table"
+          local n_types = type_is_table and #p.type or 1
+          if #find_results == 2 and type_is_table then
+            report_bad_pattern(core.warn, current_syntax, n,
+              "Token type is a table, but a string was expected.")
+            p.type = p.type[1]
+          elseif #find_results - 1 > n_types then
+            report_bad_pattern(core.error, current_syntax, n,
+              "Not enough token types: got %d needed %d.", n_types, #find_results - 1)
+          elseif #find_results - 1 < n_types then
+            report_bad_pattern(core.warn, current_syntax, n,
+              "Too many token types: got %d needed %d.", n_types, #find_results - 1)
+          end
+          -- matched pattern; make and add tokens
+          push_tokens(res, current_syntax, p, text, find_results)
+          -- update state if this was a start|end pattern pair
+          if type(p.pattern or p.regex) == "table" then
+            -- If we have a subsyntax, push that onto the subsyntax stack.
+            if p.syntax then
+              push_subsyntax(p, n)
+            else
+              set_subsyntax_pattern_idx(n)
+            end
+          end
+          -- move cursor past this token
+          i = find_results[2] + 1
+          matched = true
          break
        end
      end
-    end
-    -- General end of syntax check. Applies in the case where
-    -- we're ending early in the middle of a delimiter, or
-    -- just normally, upon finding a token.
-    if subsyntax_info then
-      local s, e = find_text(text, subsyntax_info, i, true, true)
-      if s then
-        push_token(res, subsyntax_info.type, text:usub(i, e))
-        -- On finding unescaped delimiter, pop it.
-        pop_subsyntax()
-        i = e + 1
-      end
-    end

-    -- find matching pattern
-    local matched = false
-    for n, p in ipairs(current_syntax.patterns) do
-      local find_results = { find_text(text, p, i, true, false) }
-      if find_results[1] then
-        local type_is_table = type(p.type) == "table"
-        local n_types = type_is_table and #p.type or 1
-        if #find_results == 2 and type_is_table then
-          report_bad_pattern(core.warn, current_syntax, n,
-            "Token type is a table, but a string was expected.")
-          p.type = p.type[1]
-        elseif #find_results - 1 > n_types then
-          report_bad_pattern(core.error, current_syntax, n,
-            "Not enough token types: got %d needed %d.", n_types, #find_results - 1)
-        elseif #find_results - 1 < n_types then
-          report_bad_pattern(core.warn, current_syntax, n,
-            "Too many token types: got %d needed %d.", n_types, #find_results - 1)
-        end
-        -- matched pattern; make and add tokens
-        push_tokens(res, current_syntax, p, text, find_results)
-        -- update state if this was a start|end pattern pair
-        if type(p.pattern or p.regex) == "table" then
-          -- If we have a subsyntax, push that onto the subsyntax stack.
-          if p.syntax then
-            push_subsyntax(p, n)
-          else
-            set_subsyntax_pattern_idx(n)
-          end
-        end
-        -- move cursor past this token
-        i = find_results[2] + 1
-        matched = true
-        break
+      -- consume character if we didn't match
+      if not matched then
+        push_token(res, "normal", text:usub(i, i))
+        i = i + 1
      end
    end
-
-    -- consume character if we didn't match
-    if not matched then
-      push_token(res, "normal", text:usub(i, i))
-      i = i + 1
-    end
  end
-
+  
  return res, state
 end