lite-xl/data/core/syntax.lua

local common = require "core.common"

local syntax = {}
syntax.items = {}

local plain_text_syntax = { name = "Plain Text", patterns = {}, symbols = {} }


function syntax.add(t)
  if type(t.space_handling) ~= "boolean" then t.space_handling = true end

  if t.patterns then
    -- the rule %s+ gives us a performance gain for the tokenizer in lines with
    -- long amounts of consecutive spaces, can be disabled by plugins where it
    -- causes conflicts by declaring the table property: space_handling = false
    if t.space_handling then
      table.insert(t.patterns, { pattern = "%s+", type = "normal" })
    end

    -- this rule gives us additional performance gain by matching every word
    -- that was not matched by the syntax patterns as a single token, preventing
    -- the tokenizer from iterating over each character individually which is a
    -- lot slower since iteration occurs in lua instead of C and adding to that
    -- it will also try to match every pattern to a single char (same as spaces)
    table.insert(t.patterns, { pattern = "%w+%f[%s]", type = "normal" })
  end

  table.insert(syntax.items, t)
end


local function find(string, field)
  local best_match = 0
  local best_syntax
  for i = #syntax.items, 1, -1 do
    local t = syntax.items[i]
    local s, e = common.match_pattern(string, t[field] or {})
    if s and e - s > best_match then
      best_match = e - s
      best_syntax = t
    end
  end
  return best_syntax
end

function syntax.get(filename, header)
  return find(filename, "files")
      or (header and find(header, "headers"))
      or plain_text_syntax
end


return syntax
Moved `matches_pattern` from `syntax` to `common` 2020-05-07 11:27:37 +02:00			`local common = require "core.common"`
Initial commit 2019-12-28 12:16:32 +01:00
Moved `matches_pattern` from `syntax` to `common` 2020-05-07 11:27:37 +02:00			`local syntax = {}`
Initial commit 2019-12-28 12:16:32 +01:00			`syntax.items = {}`

Add name to plain text fallback syntax 2021-12-02 22:34:49 +01:00			`local plain_text_syntax = { name = "Plain Text", patterns = {}, symbols = {} }`
Initial commit 2019-12-28 12:16:32 +01:00

			`function syntax.add(t)`
syntax: remove pattern re-ordering on optimization * Introduces a flag that syntax writers can turn off named space_handling, turning it off means that your syntax will take care of handling the excessive amount of spaces that can slow down the tokenizer. * Adds another pattern at the end of every single table that also improves tokenizer performance by matching words that weren't match by any of the synxtax patterns. * Modifies language_md to turn off the provided space_handling and do its own since it has rules that require a space at the beginning, also handles long consecutives amount of dashes used in tables that degrade performance. * This changes where discussed in collaboration with @Guldoman and @takase1121 thanks to all! 2022-03-30 03:48:34 +02:00			`if type(t.space_handling) ~= "boolean" then t.space_handling = true end`

syntax: add pattern to boost tokenizer performance (#896) 2022-03-25 16:25:32 +01:00			`if t.patterns then`
syntax: remove pattern re-ordering on optimization * Introduces a flag that syntax writers can turn off named space_handling, turning it off means that your syntax will take care of handling the excessive amount of spaces that can slow down the tokenizer. * Adds another pattern at the end of every single table that also improves tokenizer performance by matching words that weren't match by any of the synxtax patterns. * Modifies language_md to turn off the provided space_handling and do its own since it has rules that require a space at the beginning, also handles long consecutives amount of dashes used in tables that degrade performance. * This changes where discussed in collaboration with @Guldoman and @takase1121 thanks to all! 2022-03-30 03:48:34 +02:00			`-- the rule %s+ gives us a performance gain for the tokenizer in lines with`
			`-- long amounts of consecutive spaces, can be disabled by plugins where it`
			`-- causes conflicts by declaring the table property: space_handling = false`
			`if t.space_handling then`
			`table.insert(t.patterns, { pattern = "%s+", type = "normal" })`
syntax: fix conflicts introduced with #896 * mainly the language_md got affected which has some exotic rules * some other languages are also using spaces at start of pattern and even if not affected this change tackles that 2022-03-29 02:51:09 +02:00			`end`
syntax: remove pattern re-ordering on optimization * Introduces a flag that syntax writers can turn off named space_handling, turning it off means that your syntax will take care of handling the excessive amount of spaces that can slow down the tokenizer. * Adds another pattern at the end of every single table that also improves tokenizer performance by matching words that weren't match by any of the synxtax patterns. * Modifies language_md to turn off the provided space_handling and do its own since it has rules that require a space at the beginning, also handles long consecutives amount of dashes used in tables that degrade performance. * This changes where discussed in collaboration with @Guldoman and @takase1121 thanks to all! 2022-03-30 03:48:34 +02:00
			`-- this rule gives us additional performance gain by matching every word`
			`-- that was not matched by the syntax patterns as a single token, preventing`
			`-- the tokenizer from iterating over each character individually which is a`
			`-- lot slower since iteration occurs in lua instead of C and adding to that`
			`-- it will also try to match every pattern to a single char (same as spaces)`
			`table.insert(t.patterns, { pattern = "%w+%f[%s]", type = "normal" })`
syntax: add pattern to boost tokenizer performance (#896) 2022-03-25 16:25:32 +01:00			`end`
syntax: fix conflicts introduced with #896 * mainly the language_md got affected which has some exotic rules * some other languages are also using spaces at start of pattern and even if not affected this change tackles that 2022-03-29 02:51:09 +02:00
Initial commit 2019-12-28 12:16:32 +01:00			`table.insert(syntax.items, t)`
			`end`


Added file-header pattern support to `syntax` 2020-06-08 10:44:51 +02:00			`local function find(string, field)`
Use the syntax with the longest match (#919) This way, for example, a syntax that applies to `docker-compose.yml` files will take precedence over one that applies to `*.yml` files. 2022-10-13 00:10:11 +02:00			`local best_match = 0`
			`local best_syntax`
Initial commit 2019-12-28 12:16:32 +01:00			`for i = #syntax.items, 1, -1 do`
			`local t = syntax.items[i]`
Use the syntax with the longest match (#919) This way, for example, a syntax that applies to `docker-compose.yml` files will take precedence over one that applies to `*.yml` files. 2022-10-13 00:10:11 +02:00			`local s, e = common.match_pattern(string, t[field] or {})`
			`if s and e - s > best_match then`
			`best_match = e - s`
			`best_syntax = t`
Initial commit 2019-12-28 12:16:32 +01:00			`end`
			`end`
Use the syntax with the longest match (#919) This way, for example, a syntax that applies to `docker-compose.yml` files will take precedence over one that applies to `*.yml` files. 2022-10-13 00:10:11 +02:00			`return best_syntax`
Added file-header pattern support to `syntax` 2020-06-08 10:44:51 +02:00			`end`

			`function syntax.get(filename, header)`
			`return find(filename, "files")`
Use `header` to get syntax only when provided 2021-10-25 14:06:07 +02:00			`or (header and find(header, "headers"))`
Added file-header pattern support to `syntax` 2020-06-08 10:44:51 +02:00			`or plain_text_syntax`
Initial commit 2019-12-28 12:16:32 +01:00			`end`


			`return syntax`