From e572c58f247a3062e6b93ab35957654b37a3daee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jefferson=20Gonz=C3=A1lez?= Date: Tue, 26 Apr 2022 09:42:02 -0400 Subject: [PATCH] Add utf8 support to tokenizer (#945) * add utf8 support to tokenizer * wrap utf8 functions in string table using a 'u' prefix * document new utf8 functions --- data/core/start.lua | 2 + data/core/tokenizer.lua | 55 +- data/core/utf8string.lua | 30 + docs/api/string.lua | 165 ++ docs/api/utf8.lua | 187 ++ src/api/api.c | 2 + src/api/utf8.c | 1305 ++++++++++++++ src/meson.build | 1 + src/unidata.h | 3710 ++++++++++++++++++++++++++++++++++++++ 9 files changed, 5431 insertions(+), 26 deletions(-) create mode 100644 data/core/utf8string.lua create mode 100644 docs/api/string.lua create mode 100644 docs/api/utf8.lua create mode 100644 src/api/utf8.c create mode 100644 src/unidata.h diff --git a/data/core/start.lua b/data/core/start.lua index b6f1ee6a..a08ddc16 100644 --- a/data/core/start.lua +++ b/data/core/start.lua @@ -35,6 +35,8 @@ table.unpack = table.unpack or unpack bit32 = bit32 or require "core.bit" +require "core.utf8string" + -- Because AppImages change the working directory before running the executable, -- we need to change it back to the original one. -- https://github.com/AppImage/AppImageKit/issues/172 diff --git a/data/core/tokenizer.lua b/data/core/tokenizer.lua index bb3faa03..e0c630a4 100644 --- a/data/core/tokenizer.lua +++ b/data/core/tokenizer.lua @@ -6,7 +6,7 @@ local tokenizer = {} local function push_token(t, type, text) local prev_type = t[#t-1] local prev_text = t[#t] - if prev_type and (prev_type == type or prev_text:find("^%s*$")) then + if prev_type and (prev_type == type or prev_text:ufind("^%s*$")) then t[#t-1] = type t[#t] = prev_text .. text else @@ -38,12 +38,12 @@ local function push_tokens(t, syn, pattern, full_text, find_results) local fin = find_results[i + 1] - 1 local type = pattern.type[i - 2] -- ↑ (i - 2) to convert from [3; n] to [1; n] - local text = full_text:sub(start, fin) + local text = full_text:usub(start, fin) push_token(t, syn.symbols[text] or type, text) end else local start, fin = find_results[1], find_results[2] - local text = full_text:sub(start, fin) + local text = full_text:usub(start, fin) push_token(t, syn.symbols[text] or pattern.type, text) end end @@ -52,12 +52,12 @@ end -- State is a 32-bit number that is four separate bytes, illustrating how many -- differnet delimiters we have open, and which subsyntaxes we have active. -- At most, there are 3 subsyntaxes active at the same time. Beyond that, --- does not support further highlighting. +-- does not support further highlighting. -- You can think of it as a maximum 4 integer (0-255) stack. It always has -- 1 integer in it. Calling `push_subsyntax` increases the stack depth. Calling -- `pop_subsyntax` decreases it. The integers represent the index of a pattern --- that we're following in the syntax. The top of the stack can be any valid +-- that we're following in the syntax. The top of the stack can be any valid -- pattern index, any integer lower in the stack must represent a pattern that -- specifies a subsyntax. @@ -92,6 +92,9 @@ local function retrieve_syntax_state(incoming_syntax, state) return current_syntax, subsyntax_info, current_pattern_idx, current_level end +---@param incoming_syntax table +---@param text string +---@param state integer function tokenizer.tokenize(incoming_syntax, text, state) local res = {} local i = 1 @@ -102,22 +105,22 @@ function tokenizer.tokenize(incoming_syntax, text, state) state = state or 0 -- incoming_syntax : the parent syntax of the file. - -- state : a 32-bit number representing syntax state (see above) - + -- state : a 32-bit number representing syntax state (see above) + -- current_syntax : the syntax we're currently in. -- subsyntax_info : info about the delimiters of this subsyntax. -- current_pattern_idx: the index of the pattern we're on for this syntax. -- current_level : how many subsyntaxes deep we are. local current_syntax, subsyntax_info, current_pattern_idx, current_level = retrieve_syntax_state(incoming_syntax, state) - + -- Should be used to set the state variable. Don't modify it directly. local function set_subsyntax_pattern_idx(pattern_idx) current_pattern_idx = pattern_idx state = bit32.replace(state, pattern_idx, current_level*8, 8) end - - + + local function push_subsyntax(entering_syntax, pattern_idx) set_subsyntax_pattern_idx(pattern_idx) current_level = current_level + 1 @@ -126,15 +129,15 @@ function tokenizer.tokenize(incoming_syntax, text, state) entering_syntax.syntax or syntax.get(entering_syntax.syntax) current_pattern_idx = 0 end - + local function pop_subsyntax() set_subsyntax_pattern_idx(0) current_level = current_level - 1 set_subsyntax_pattern_idx(0) - current_syntax, subsyntax_info, current_pattern_idx, current_level = + current_syntax, subsyntax_info, current_pattern_idx, current_level = retrieve_syntax_state(incoming_syntax, state) end - + local function find_text(text, p, offset, at_start, close) local target, res = p.pattern or p.regex, { 1, offset - 1 } local p_idx = close and 2 or 1 @@ -143,14 +146,14 @@ function tokenizer.tokenize(incoming_syntax, text, state) if p.whole_line == nil then p.whole_line = { } end if p.whole_line[p_idx] == nil then -- Match patterns that start with '^' - p.whole_line[p_idx] = code:match("^%^") and true or false + p.whole_line[p_idx] = code:umatch("^%^") and true or false if p.whole_line[p_idx] then -- Remove '^' from the beginning of the pattern if type(target) == "table" then - target[p_idx] = code:sub(2) + target[p_idx] = code:usub(2) else - p.pattern = p.pattern and code:sub(2) - p.regex = p.regex and code:sub(2) + p.pattern = p.pattern and code:usub(2) + p.regex = p.regex and code:usub(2) end end end @@ -170,7 +173,7 @@ function tokenizer.tokenize(incoming_syntax, text, state) while text:byte(next) and common.is_utf8_cont(text, next) do next = next + 1 end - res = p.pattern and { text:find((at_start or p.whole_line[p_idx]) and "^" .. code or code, next) } + res = p.pattern and { text:ufind((at_start or p.whole_line[p_idx]) and "^" .. code or code, next) } or { regex.match(code, text, next, (at_start or p.whole_line[p_idx]) and regex.ANCHORED or 0) } if res[1] and close and target[3] then local count = 0 @@ -185,7 +188,7 @@ function tokenizer.tokenize(incoming_syntax, text, state) until not res[1] or not close or not target[3] return table.unpack(res) end - + while i <= #text do -- continue trying to match the end pattern of a pair if we have a state set if current_pattern_idx > 0 then @@ -198,12 +201,12 @@ function tokenizer.tokenize(incoming_syntax, text, state) -- precedence over ending the delimiter in the subsyntax. if subsyntax_info then local ss, se = find_text(text, subsyntax_info, i, false, true) - -- If we find that we end the subsyntax before the + -- If we find that we end the subsyntax before the -- delimiter, push the token, and signal we shouldn't -- treat the bit after as a token to be normally parsed -- (as it's the syntax delimiter). if ss and (s == nil or ss < s) then - push_token(res, p.type, text:sub(i, ss - 1)) + push_token(res, p.type, text:usub(i, ss - 1)) i = ss cont = false end @@ -212,11 +215,11 @@ function tokenizer.tokenize(incoming_syntax, text, state) -- continue on as normal. if cont then if s then - push_token(res, p.type, text:sub(i, e)) + push_token(res, p.type, text:usub(i, e)) set_subsyntax_pattern_idx(0) i = e + 1 else - push_token(res, p.type, text:sub(i)) + push_token(res, p.type, text:usub(i)) break end end @@ -227,7 +230,7 @@ function tokenizer.tokenize(incoming_syntax, text, state) if subsyntax_info then local s, e = find_text(text, subsyntax_info, i, true, true) if s then - push_token(res, subsyntax_info.type, text:sub(i, e)) + push_token(res, subsyntax_info.type, text:usub(i, e)) -- On finding unescaped delimiter, pop it. pop_subsyntax() i = e + 1 @@ -246,7 +249,7 @@ function tokenizer.tokenize(incoming_syntax, text, state) -- If we have a subsyntax, push that onto the subsyntax stack. if p.syntax then push_subsyntax(p, n) - else + else set_subsyntax_pattern_idx(n) end end @@ -264,7 +267,7 @@ function tokenizer.tokenize(incoming_syntax, text, state) while text:byte(i + n + 1) and common.is_utf8_cont(text, i + n + 1) do n = n + 1 end - push_token(res, "normal", text:sub(i, i + n)) + push_token(res, "normal", text:usub(i, i + n)) i = i + n + 1 end end diff --git a/data/core/utf8string.lua b/data/core/utf8string.lua new file mode 100644 index 00000000..1a2da19b --- /dev/null +++ b/data/core/utf8string.lua @@ -0,0 +1,30 @@ +-------------------------------------------------------------------------------- +-- inject utf8 functions to strings +-------------------------------------------------------------------------------- + +string.ubyte = utf8.byte +string.uchar = utf8.char +string.ufind = utf8.find +string.ugmatch = utf8.gmatch +string.ugsub = utf8.gsub +string.ulen = utf8.len +string.ulower = utf8.lower +string.umatch = utf8.match +string.ureverse = utf8.reverse +string.usub = utf8.sub +string.uupper = utf8.upper + +string.uescape = utf8.escape +string.ucharpos = utf8.charpos +string.unext = utf8.next +string.uinsert = utf8.insert +string.uremove = utf8.remove +string.uwidth = utf8.width +string.uwidthindex = utf8.widthindex +string.utitle = utf8.title +string.ufold = utf8.fold +string.uncasecmp = utf8.ncasecmp + +string.uoffset = utf8.offset +string.ucodepoint = utf8.codepoint +string.ucodes = utf8.codes diff --git a/docs/api/string.lua b/docs/api/string.lua new file mode 100644 index 00000000..0872b462 --- /dev/null +++ b/docs/api/string.lua @@ -0,0 +1,165 @@ +---@meta + +---UTF-8 equivalent of string.byte +---@param s string +---@param i? integer +---@param j? integer +---@return integer +---@return ... +function string.ubyte(s, i, j) end + +---UTF-8 equivalent of string.char +---@param byte integer +---@param ... integer +---@return string +---@return ... +function string.uchar(byte, ...) end + +---UTF-8 equivalent of string.find +---@param s string +---@param pattern string +---@param init? integer +---@param plain? boolean +---@return integer start +---@return integer end +---@return ... captured +function string.ufind(s, pattern, init, plain) end + +---UTF-8 equivalent of string.gmatch +---@param s string +---@param pattern string +---@param init? integer +---@return fun():string, ... +function string.ugmatch(s, pattern, init) end + +---UTF-8 equivalent of string.gsub +---@param s string +---@param pattern string +---@param repl string|table|function +---@param n integer +---@return string +---@return integer count +function string.ugsub(s, pattern, repl, n) end + +---UTF-8 equivalent of string.len +---@param s string +---@return integer +function string.ulen(s) end + +---UTF-8 equivalent of string.lower +---@param s string +---@return string +function string.ulower(s) end + +---UTF-8 equivalent of string.match +---@param s string +---@param pattern string +---@param init? integer +---@return string | number captured +function string.umatch(s, pattern, init) end + +---UTF-8 equivalent of string.reverse +---@param s string +---@return string +function string.ureverse(s) end + +---UTF-8 equivalent of string.sub +---@param s string +---@param i integer +---@param j? integer +---@return string +function string.usub(s, i, j) end + +---UTF-8 equivalent of string.upper +---@param s string +---@return string +function string.uupper(s) end + +---Equivalent to utf8.escape() +---@param s string +---@return string utf8_string +function string.uescape(s) end + + +---Equivalent to utf8.charpos() +---@param s string +---@param charpos? integer +---@param index? integer +---@return integer charpos +---@return integer codepoint +function string.ucharpos(s, charpos, index) end + +---Equivalent to utf8.next() +---@param s string +---@param charpos? integer +---@param index? integer +---@return integer charpos +---@return integer codepoint +function string.unext(s, charpos, index) end + +---Equivalent to utf8.insert() +---@param s string +---@param idx? integer +---@param substring string +---return string new_string +function string.uinsert(s, idx, substring) end + +---Equivalent to utf8.remove() +---@param s string +---@param start? integer +---@param stop? integer +---return string new_string +function string.uremove(s, start, stop) end + +---Equivalent to utf8.width() +---@param s string +---@param ambi_is_double? boolean +---@param default_width? integer +---@return integer width +function string.uwidth(s, ambi_is_double, default_width) end + +---Equivalent to utf8.widthindex() +---@param s string +---@param location integer +---@param ambi_is_double? boolean +---@param default_width? integer +---@return integer idx +---@return integer offset +---@return integer width +function string.uwidthindex(s, location, ambi_is_double, default_width) end + +---Equivalent to utf8.title() +---@param s string +---return string new_string +function string.utitle(s) end + +---Equivalent to utf8.fold() +---@param s string +---return string new_string +function string.ufold(s) end + +---Equivalent to utf8.ncasecmp() +---@param a string +---@param b string +---@return integer result +function string.uncasecmp(a, b) end + +---Equivalent to utf8.offset() +---@param s string +---@param n integer +---@param i? integer +---@return integer position_in_bytes +function string.uoffset(s, n, i) end + +---Equivalent to utf8.codepoint() +---@param s string +---@param i? integer +---@param j? integer +---@return integer code +---@return ... +function string.ucodepoint(s, i, j) end + +---Equivalent to utf8.codes() +---@param s string +---@return fun():integer, integer +function string.ucodes(s) end diff --git a/docs/api/utf8.lua b/docs/api/utf8.lua new file mode 100644 index 00000000..d4dff5a9 --- /dev/null +++ b/docs/api/utf8.lua @@ -0,0 +1,187 @@ +---@meta + +---UTF-8 equivalent of string.byte +---@param s string +---@param i? integer +---@param j? integer +---@return integer +---@return ... +function utf8.byte(s, i, j) end + +---UTF-8 equivalent of string.char +---@param byte integer +---@param ... integer +---@return string +---@return ... +function utf8.char(byte, ...) end + +---UTF-8 equivalent of string.find +---@param s string +---@param pattern string +---@param init? integer +---@param plain? boolean +---@return integer start +---@return integer end +---@return ... captured +function utf8.find(s, pattern, init, plain) end + +---UTF-8 equivalent of string.gmatch +---@param s string +---@param pattern string +---@param init? integer +---@return fun():string, ... +function utf8.gmatch(s, pattern, init) end + +---UTF-8 equivalent of string.gsub +---@param s string +---@param pattern string +---@param repl string|table|function +---@param n integer +---@return string +---@return integer count +function utf8.gsub(s, pattern, repl, n) end + +---UTF-8 equivalent of string.len +---@param s string +---@return integer +function utf8.len(s) end + +---UTF-8 equivalent of string.lower +---@param s string +---@return string +function utf8.lower(s) end + +---UTF-8 equivalent of string.match +---@param s string +---@param pattern string +---@param init? integer +---@return string | number captured +function utf8.match(s, pattern, init) end + +---UTF-8 equivalent of string.reverse +---@param s string +---@return string +function utf8.reverse(s) end + +---UTF-8 equivalent of string.sub +---@param s string +---@param i integer +---@param j? integer +---@return string +function utf8.sub(s, i, j) end + +---UTF-8 equivalent of string.upper +---@param s string +---@return string +function utf8.upper(s) end + +---Escape a str to UTF-8 format string. It support several escape format: +---* %ddd - which ddd is a decimal number at any length: change Unicode code point to UTF-8 format. +---* %{ddd} - same as %nnn but has bracket around. +---* %uddd - same as %ddd, u stands Unicode +---* %u{ddd} - same as %{ddd} +---* %xhhh - hexadigit version of %ddd +---* %x{hhh} same as %xhhh. +---* %? - '?' stands for any other character: escape this character. +---Example: +---```lua +---local u = utf8.escape +---print(u"%123%u123%{123}%u{123}%xABC%x{ABC}") +---print(u"%%123%?%d%%u") +---``` +---@param s string +---@return string utf8_string +function utf8.escape(s) end + +---Convert UTF-8 position to byte offset. if only index is given, return byte +---offset of this UTF-8 char index. if both charpos and index is given, a new +---charpos will be calculated, by add/subtract UTF-8 char index to current +---charpos. in all cases, it returns a new char position, and code point +---(a number) at this position. +---@param s string +---@param charpos? integer +---@param index? integer +---@return integer charpos +---@return integer codepoint +function utf8.charpos(s, charpos, index) end + +---Iterate though the UTF-8 string s. If only s is given, it can used as a iterator: +---```lua +--- for pos, code in utf8.next, "utf8-string" do +--- -- ... +--- end +---```` +---If only charpos is given, return the next byte offset of in string. if +---charpos and index is given, a new charpos will be calculated, by add/subtract +---UTF-8 char offset to current charpos. in all case, it return a new char +---position (in bytes), and code point (a number) at this position. +---@param s string +---@param charpos? integer +---@param index? integer +---@return integer charpos +---@return integer codepoint +function utf8.next(s, charpos, index) end + +---Insert a substring to s. If idx is given, insert substring before char at +---this index, otherwise substring will concat to s. idx can be negative. +---@param s string +---@param idx? integer +---@param substring string +---return string new_string +function utf8.insert(s, idx, substring) end + +---Delete a substring in s. If neither start nor stop is given, delete the last +---UTF-8 char in s, otherwise delete char from start to end of s. if stop is +---given, delete char from start to stop (include start and stop). start and +---stop can be negative. +---@param s string +---@param start? integer +---@param stop? integer +---return string new_string +function utf8.remove(s, start, stop) end + +---Calculate the width of UTF-8 string s. if ambi_is_double is given, the +---ambiguous width character's width is 2, otherwise it's 1. fullwidth/doublewidth +---character's width is 2, and other character's width is 1. if default_width is +---given, it will be the width of unprintable character, used display a +---non-character mark for these characters. if s is a code point, return the +---width of this code point. +---@param s string +---@param ambi_is_double? boolean +---@param default_width? integer +---@return integer width +function utf8.width(s, ambi_is_double, default_width) end + +---Return the character index at given location in string s. this is a reverse +---operation of utf8.width(). this function returns a index of location, and a +---offset in UTF-8 encoding. e.g. if cursor is at the second column (middle) +---of the wide char, offset will be 2. the width of character at idx is +---returned, also. +---@param s string +---@param location integer +---@param ambi_is_double? boolean +---@param default_width? integer +---@return integer idx +---@return integer offset +---@return integer width +function utf8.widthindex(s, location, ambi_is_double, default_width) end + +---Convert UTF-8 string s to title-case, used to compare by ignore case. if s +---is a number, it's treat as a code point and return a convert code point +---(number). utf8.lower/utf8.pper has the same extension. +---@param s string +---return string new_string +function utf8.title(s) end + +---Convert UTF-8 string s to folded case, used to compare by ignore case. if s +---is a number, it's treat as a code point and return a convert code point +---(number). utf8.lower/utf8.pper has the same extension. +---@param s string +---return string new_string +function utf8.fold(s) end + +---Compare a and b without case, -1 means a < b, 0 means a == b and 1 means a > b. +---@param a string +---@param b string +---@return integer result +function utf8.ncasecmp(a, b) end diff --git a/src/api/api.c b/src/api/api.c index 1a6e516d..67a05f19 100644 --- a/src/api/api.c +++ b/src/api/api.c @@ -5,6 +5,7 @@ int luaopen_renderer(lua_State *L); int luaopen_regex(lua_State *L); int luaopen_process(lua_State *L); int luaopen_dirmonitor(lua_State* L); +int luaopen_utf8(lua_State* L); static const luaL_Reg libs[] = { { "system", luaopen_system }, @@ -12,6 +13,7 @@ static const luaL_Reg libs[] = { { "regex", luaopen_regex }, { "process", luaopen_process }, { "dirmonitor", luaopen_dirmonitor }, + { "utf8", luaopen_utf8 }, { NULL, NULL } }; diff --git a/src/api/utf8.c b/src/api/utf8.c new file mode 100644 index 00000000..e1a4ebfe --- /dev/null +++ b/src/api/utf8.c @@ -0,0 +1,1305 @@ +/* + * Integration of https://github.com/starwing/luautf8 + * + * Copyright (c) 2018 Xavier Wang + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include + + +#include +#include + +#include "../unidata.h" + +/* UTF-8 string operations */ + +#define UTF8_BUFFSZ 8 +#define UTF8_MAX 0x7FFFFFFFu +#define UTF8_MAXCP 0x10FFFFu +#define iscont(p) ((*(p) & 0xC0) == 0x80) +#define CAST(tp,expr) ((tp)(expr)) + +#ifndef LUA_QL +# define LUA_QL(x) "'" x "'" +#endif + +static int utf8_invalid (utfint ch) +{ return (ch > UTF8_MAXCP || (0xD800u <= ch && ch <= 0xDFFFu)); } + +static size_t utf8_encode (char *buff, utfint x) { + int n = 1; /* number of bytes put in buffer (backwards) */ + lua_assert(x <= UTF8_MAX); + if (x < 0x80) /* ascii? */ + buff[UTF8_BUFFSZ - 1] = x & 0x7F; + else { /* need continuation bytes */ + utfint mfb = 0x3f; /* maximum that fits in first byte */ + do { /* add continuation bytes */ + buff[UTF8_BUFFSZ - (n++)] = 0x80 | (x & 0x3f); + x >>= 6; /* remove added bits */ + mfb >>= 1; /* now there is one less bit available in first byte */ + } while (x > mfb); /* still needs continuation byte? */ + buff[UTF8_BUFFSZ - n] = ((~mfb << 1) | x) & 0xFF; /* add first byte */ + } + return n; +} + +static const char *utf8_decode (const char *s, utfint *val, int strict) { + static const utfint limits[] = + {~0u, 0x80u, 0x800u, 0x10000u, 0x200000u, 0x4000000u}; + unsigned int c = (unsigned char)s[0]; + utfint res = 0; /* final result */ + if (c < 0x80) /* ascii? */ + res = c; + else { + int count = 0; /* to count number of continuation bytes */ + for (; c & 0x40; c <<= 1) { /* while it needs continuation bytes... */ + unsigned int cc = (unsigned char)s[++count]; /* read next byte */ + if ((cc & 0xC0) != 0x80) /* not a continuation byte? */ + return NULL; /* invalid byte sequence */ + res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */ + } + res |= ((utfint)(c & 0x7F) << (count * 5)); /* add first byte */ + if (count > 5 || res > UTF8_MAX || res < limits[count]) + return NULL; /* invalid byte sequence */ + s += count; /* skip continuation bytes read */ + } + if (strict) { + /* check for invalid code points; too large or surrogates */ + if (res > UTF8_MAXCP || (0xD800u <= res && res <= 0xDFFFu)) + return NULL; + } + if (val) *val = res; + return s + 1; /* +1 to include first byte */ +} + +static const char *utf8_prev (const char *s, const char *e) { + while (s < e && iscont(e - 1)) --e; + return s < e ? e - 1 : s; +} + +static const char *utf8_next (const char *s, const char *e) { + while (s < e && iscont(s + 1)) ++s; + return s < e ? s + 1 : e; +} + +static size_t utf8_length (const char *s, const char *e) { + size_t i; + for (i = 0; s < e; ++i) + s = utf8_next(s, e); + return i; +} + +static const char *utf8_offset (const char *s, const char *e, lua_Integer offset, lua_Integer idx) { + const char *p = s + offset - 1; + if (idx >= 0) { + while (p < e && idx > 0) + p = utf8_next(p, e), --idx; + return idx == 0 ? p : NULL; + } else { + while (s < p && idx < 0) + p = utf8_prev(s, p), ++idx; + return idx == 0 ? p : NULL; + } +} + +static const char *utf8_relat (const char *s, const char *e, int idx) { + return idx >= 0 ? + utf8_offset(s, e, 1, idx - 1) : + utf8_offset(s, e, e-s+1, idx); +} + +static int utf8_range(const char *s, const char *e, lua_Integer *i, lua_Integer *j) { + const char *ps = utf8_relat(s, e, CAST(int, *i)); + const char *pe = utf8_relat(s, e, CAST(int, *j)); + *i = (ps ? ps : (*i > 0 ? e : s)) - s; + *j = (pe ? utf8_next(pe, e) : (*j > 0 ? e : s)) - s; + return *i < *j; +} + + +/* Unicode character categories */ + +#define table_size(t) (sizeof(t)/sizeof((t)[0])) + +#define utf8_categories(X) \ + X('a', alpha) \ + X('c', cntrl) \ + X('d', digit) \ + X('l', lower) \ + X('p', punct) \ + X('s', space) \ + X('t', compose) \ + X('u', upper) \ + X('x', xdigit) + +#define utf8_converters(X) \ + X(lower) \ + X(upper) \ + X(title) \ + X(fold) + +static int find_in_range (range_table *t, size_t size, utfint ch) { + size_t begin, end; + + begin = 0; + end = size; + + while (begin < end) { + size_t mid = (begin + end) / 2; + if (t[mid].last < ch) + begin = mid + 1; + else if (t[mid].first > ch) + end = mid; + else + return (ch - t[mid].first) % t[mid].step == 0; + } + + return 0; +} + +static int convert_char (conv_table *t, size_t size, utfint ch) { + size_t begin, end; + + begin = 0; + end = size; + + while (begin < end) { + size_t mid = (begin + end) / 2; + if (t[mid].last < ch) + begin = mid + 1; + else if (t[mid].first > ch) + end = mid; + else if ((ch - t[mid].first) % t[mid].step == 0) + return ch + t[mid].offset; + else + return ch; + } + + return ch; +} + +#define define_category(cls, name) static int utf8_is##name (utfint ch)\ +{ return find_in_range(name##_table, table_size(name##_table), ch); } +#define define_converter(name) static utfint utf8_to##name (utfint ch) \ +{ return convert_char(to##name##_table, table_size(to##name##_table), ch); } +utf8_categories(define_category) +utf8_converters(define_converter) +#undef define_category +#undef define_converter + +static int utf8_isgraph (utfint ch) { + if (find_in_range(space_table, table_size(space_table), ch)) + return 0; + if (find_in_range(graph_table, table_size(graph_table), ch)) + return 1; + if (find_in_range(compose_table, table_size(compose_table), ch)) + return 1; + return 0; +} + +static int utf8_isalnum (utfint ch) { + if (find_in_range(alpha_table, table_size(alpha_table), ch)) + return 1; + if (find_in_range(alnum_extend_table, table_size(alnum_extend_table), ch)) + return 1; + return 0; +} + +static int utf8_width (utfint ch, int ambi_is_single) { + if (find_in_range(doublewidth_table, table_size(doublewidth_table), ch)) + return 2; + if (find_in_range(ambiwidth_table, table_size(ambiwidth_table), ch)) + return ambi_is_single ? 1 : 2; + if (find_in_range(compose_table, table_size(compose_table), ch)) + return 0; + if (find_in_range(unprintable_table, table_size(unprintable_table), ch)) + return 0; + return 1; +} + + +/* string module compatible interface */ + +static int typeerror (lua_State *L, int idx, const char *tname) +{ return luaL_error(L, "%s expected, got %s", tname, luaL_typename(L, idx)); } + +static const char *check_utf8 (lua_State *L, int idx, const char **end) { + size_t len; + const char *s = luaL_checklstring(L, idx, &len); + if (end) *end = s+len; + return s; +} + +static const char *to_utf8 (lua_State *L, int idx, const char **end) { + size_t len; + const char *s = lua_tolstring(L, idx, &len); + if (end) *end = s+len; + return s; +} + +static const char *utf8_safe_decode (lua_State *L, const char *p, utfint *pval) { + p = utf8_decode(p, pval, 0); + if (p == NULL) luaL_error(L, "invalid UTF-8 code"); + return p; +} + +static void add_utf8char (luaL_Buffer *b, utfint ch) { + char buff[UTF8_BUFFSZ]; + size_t n = utf8_encode(buff, ch); + luaL_addlstring(b, buff+UTF8_BUFFSZ-n, n); +} + +static lua_Integer byte_relat (lua_Integer pos, size_t len) { + if (pos >= 0) return pos; + else if (0u - (size_t)pos > len) return 0; + else return (lua_Integer)len + pos + 1; +} + +static int Lutf8_len (lua_State *L) { + size_t len, n; + const char *s = luaL_checklstring(L, 1, &len), *p, *e; + lua_Integer posi = byte_relat(luaL_optinteger(L, 2, 1), len); + lua_Integer pose = byte_relat(luaL_optinteger(L, 3, -1), len); + int lax = lua_toboolean(L, 4); + luaL_argcheck(L, 1 <= posi && --posi <= (lua_Integer)len, 2, + "initial position out of string"); + luaL_argcheck(L, --pose < (lua_Integer)len, 3, + "final position out of string"); + for (n = 0, p=s+posi, e=s+pose+1; p < e; ++n) { + if (lax) + p = utf8_next(p, e); + else { + utfint ch; + const char *np = utf8_decode(p, &ch, !lax); + if (np == NULL || utf8_invalid(ch)) { + lua_pushnil(L); + lua_pushinteger(L, p - s + 1); + return 2; + } + p = np; + } + } + lua_pushinteger(L, n); + return 1; +} + +static int Lutf8_sub (lua_State *L) { + const char *e, *s = check_utf8(L, 1, &e); + lua_Integer posi = luaL_checkinteger(L, 2); + lua_Integer pose = luaL_optinteger(L, 3, -1); + if (utf8_range(s, e, &posi, &pose)) + lua_pushlstring(L, s+posi, pose-posi); + else + lua_pushliteral(L, ""); + return 1; +} + +static int Lutf8_reverse (lua_State *L) { + luaL_Buffer b; + const char *prev, *pprev, *ends, *e, *s = check_utf8(L, 1, &e); + (void) ends; + int lax = lua_toboolean(L, 2); + luaL_buffinit(L, &b); + if (lax) { + for (prev = e; s < prev; e = prev) { + prev = utf8_prev(s, prev); + luaL_addlstring(&b, prev, e-prev); + } + } else { + for (prev = e; s < prev; prev = pprev) { + utfint code = 0; + ends = utf8_safe_decode(L, pprev = utf8_prev(s, prev), &code); + assert(ends == prev); + if (utf8_invalid(code)) + return luaL_error(L, "invalid UTF-8 code"); + if (!utf8_iscompose(code)) { + luaL_addlstring(&b, pprev, e-pprev); + e = pprev; + } + } + } + luaL_pushresult(&b); + return 1; +} + +static int Lutf8_byte (lua_State *L) { + size_t n = 0; + const char *e, *s = check_utf8(L, 1, &e); + lua_Integer posi = luaL_optinteger(L, 2, 1); + lua_Integer pose = luaL_optinteger(L, 3, posi); + if (utf8_range(s, e, &posi, &pose)) { + for (e = s + pose, s = s + posi; s < e; ++n) { + utfint ch = 0; + s = utf8_safe_decode(L, s, &ch); + lua_pushinteger(L, ch); + } + } + return CAST(int, n); +} + +static int Lutf8_codepoint (lua_State *L) { + const char *e, *s = check_utf8(L, 1, &e); + size_t len = e-s; + lua_Integer posi = byte_relat(luaL_optinteger(L, 2, 1), len); + lua_Integer pose = byte_relat(luaL_optinteger(L, 3, posi), len); + int lax = lua_toboolean(L, 4); + int n; + const char *se; + luaL_argcheck(L, posi >= 1, 2, "out of range"); + luaL_argcheck(L, pose <= (lua_Integer)len, 3, "out of range"); + if (posi > pose) return 0; /* empty interval; return no values */ + if (pose - posi >= INT_MAX) /* (lua_Integer -> int) overflow? */ + return luaL_error(L, "string slice too long"); + n = (int)(pose - posi + 1); + luaL_checkstack(L, n, "string slice too long"); + n = 0; /* count the number of returns */ + se = s + pose; /* string end */ + for (n = 0, s += posi - 1; s < se;) { + utfint code = 0; + s = utf8_safe_decode(L, s, &code); + if (!lax && utf8_invalid(code)) + return luaL_error(L, "invalid UTF-8 code"); + lua_pushinteger(L, code); + n++; + } + return n; +} + +static int Lutf8_char (lua_State *L) { + int i, n = lua_gettop(L); /* number of arguments */ + luaL_Buffer b; + luaL_buffinit(L, &b); + for (i = 1; i <= n; ++i) { + lua_Integer code = luaL_checkinteger(L, i); + luaL_argcheck(L, code <= UTF8_MAXCP, i, "value out of range"); + add_utf8char(&b, CAST(utfint, code)); + } + luaL_pushresult(&b); + return 1; +} + +#define bind_converter(name) \ +static int Lutf8_##name (lua_State *L) { \ + int t = lua_type(L, 1); \ + if (t == LUA_TNUMBER) \ + lua_pushinteger(L, utf8_to##name(CAST(utfint, lua_tointeger(L, 1)))); \ + else if (t == LUA_TSTRING) { \ + luaL_Buffer b; \ + const char *e, *s = to_utf8(L, 1, &e); \ + luaL_buffinit(L, &b); \ + while (s < e) { \ + utfint ch = 0; \ + s = utf8_safe_decode(L, s, &ch); \ + add_utf8char(&b, utf8_to##name(ch)); \ + } \ + luaL_pushresult(&b); \ + } \ + else return typeerror(L, 1, "number/string"); \ + return 1; \ +} +utf8_converters(bind_converter) +#undef bind_converter + + +/* unicode extra interface */ + +static const char *parse_escape (lua_State *L, const char *s, const char *e, int hex, utfint *pch) { + utfint code = 0; + int in_bracket = 0; + if (*s == '{') ++s, in_bracket = 1; + for (; s < e; ++s) { + utfint ch = (unsigned char)*s; + if (ch >= '0' && ch <= '9') ch = ch - '0'; + else if (hex && ch >= 'A' && ch <= 'F') ch = 10 + (ch - 'A'); + else if (hex && ch >= 'a' && ch <= 'f') ch = 10 + (ch - 'a'); + else if (!in_bracket) break; + else if (ch == '}') { ++s; break; } + else luaL_error(L, "invalid escape '%c'", ch); + code *= hex ? 16 : 10; + code += ch; + } + *pch = code; + return s; +} + +static int Lutf8_escape (lua_State *L) { + const char *e, *s = check_utf8(L, 1, &e); + luaL_Buffer b; + luaL_buffinit(L, &b); + while (s < e) { + utfint ch = 0; + s = utf8_safe_decode(L, s, &ch); + if (ch == '%') { + int hex = 0; + switch (*s) { + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + case '8': case '9': case '{': + break; + case 'x': case 'X': hex = 1; /* fall through */ + case 'u': case 'U': if (s+1 < e) { ++s; break; } + /* fall through */ + default: + s = utf8_safe_decode(L, s, &ch); + goto next; + } + s = parse_escape(L, s, e, hex, &ch); + } +next: + add_utf8char(&b, ch); + } + luaL_pushresult(&b); + return 1; +} + +static int Lutf8_insert (lua_State *L) { + const char *e, *s = check_utf8(L, 1, &e); + size_t sublen; + const char *subs; + luaL_Buffer b; + int nargs = 2; + const char *first = e; + if (lua_type(L, 2) == LUA_TNUMBER) { + int idx = (int)lua_tointeger(L, 2); + if (idx != 0) first = utf8_relat(s, e, idx); + luaL_argcheck(L, first, 2, "invalid index"); + ++nargs; + } + subs = luaL_checklstring(L, nargs, &sublen); + luaL_buffinit(L, &b); + luaL_addlstring(&b, s, first-s); + luaL_addlstring(&b, subs, sublen); + luaL_addlstring(&b, first, e-first); + luaL_pushresult(&b); + return 1; +} + +static int Lutf8_remove (lua_State *L) { + const char *e, *s = check_utf8(L, 1, &e); + lua_Integer posi = luaL_optinteger(L, 2, -1); + lua_Integer pose = luaL_optinteger(L, 3, -1); + if (!utf8_range(s, e, &posi, &pose)) + lua_settop(L, 1); + else { + luaL_Buffer b; + luaL_buffinit(L, &b); + luaL_addlstring(&b, s, posi); + luaL_addlstring(&b, s+pose, e-s-pose); + luaL_pushresult(&b); + } + return 1; +} + +static int push_offset (lua_State *L, const char *s, const char *e, lua_Integer offset, lua_Integer idx) { + utfint ch = 0; + const char *p; + if (idx != 0) + p = utf8_offset(s, e, offset, idx); + else if (p = s+offset-1, iscont(p)) + p = utf8_prev(s, p); + if (p == NULL || p == e) return 0; + utf8_decode(p, &ch, 0); + lua_pushinteger(L, p-s+1); + lua_pushinteger(L, ch); + return 2; +} + +static int Lutf8_charpos (lua_State *L) { + const char *e, *s = check_utf8(L, 1, &e); + lua_Integer offset = 1; + if (lua_isnoneornil(L, 3)) { + lua_Integer idx = luaL_optinteger(L, 2, 0); + if (idx > 0) --idx; + else if (idx < 0) offset = e-s+1; + return push_offset(L, s, e, offset, idx); + } + offset = byte_relat(luaL_optinteger(L, 2, 1), e-s); + if (offset < 1) offset = 1; + return push_offset(L, s, e, offset, luaL_checkinteger(L, 3)); +} + +static int Lutf8_offset (lua_State *L) { + size_t len; + const char *s = luaL_checklstring(L, 1, &len); + lua_Integer n = luaL_checkinteger(L, 2); + lua_Integer posi = (n >= 0) ? 1 : len + 1; + posi = byte_relat(luaL_optinteger(L, 3, posi), len); + luaL_argcheck(L, 1 <= posi && --posi <= (lua_Integer)len, 3, + "position out of range"); + if (n == 0) { + /* find beginning of current byte sequence */ + while (posi > 0 && iscont(s + posi)) posi--; + } else { + if (iscont(s + posi)) + return luaL_error(L, "initial position is a continuation byte"); + if (n < 0) { + while (n < 0 && posi > 0) { /* move back */ + do { /* find beginning of previous character */ + posi--; + } while (posi > 0 && iscont(s + posi)); + n++; + } + } else { + n--; /* do not move for 1st character */ + while (n > 0 && posi < (lua_Integer)len) { + do { /* find beginning of next character */ + posi++; + } while (iscont(s + posi)); /* (cannot pass final '\0') */ + n--; + } + } + } + if (n == 0) /* did it find given character? */ + lua_pushinteger(L, posi + 1); + else /* no such character */ + lua_pushnil(L); + return 1; +} + +static int Lutf8_next (lua_State *L) { + const char *e, *s = check_utf8(L, 1, &e); + lua_Integer offset = byte_relat(luaL_optinteger(L, 2, 1), e-s); + lua_Integer idx = luaL_optinteger(L, 3, !lua_isnoneornil(L, 2)); + return push_offset(L, s, e, offset, idx); +} + +static int iter_aux (lua_State *L, int strict) { + const char *e, *s = check_utf8(L, 1, &e); + int n = CAST(int, lua_tointeger(L, 2)); + const char *p = n <= 0 ? s : utf8_next(s+n-1, e); + if (p < e) { + utfint code = 0; + utf8_safe_decode(L, p, &code); + if (strict && utf8_invalid(code)) + return luaL_error(L, "invalid UTF-8 code"); + lua_pushinteger(L, p-s+1); + lua_pushinteger(L, code); + return 2; + } + return 0; /* no more codepoints */ +} + +static int iter_auxstrict (lua_State *L) { return iter_aux(L, 1); } +static int iter_auxlax (lua_State *L) { return iter_aux(L, 0); } + +static int Lutf8_codes (lua_State *L) { + int lax = lua_toboolean(L, 2); + luaL_checkstring(L, 1); + lua_pushcfunction(L, lax ? iter_auxlax : iter_auxstrict); + lua_pushvalue(L, 1); + lua_pushinteger(L, 0); + return 3; +} + +static int Lutf8_width (lua_State *L) { + int t = lua_type(L, 1); + int ambi_is_single = !lua_toboolean(L, 2); + int default_width = CAST(int, luaL_optinteger(L, 3, 0)); + if (t == LUA_TNUMBER) { + size_t chwidth = utf8_width(CAST(utfint, lua_tointeger(L, 1)), ambi_is_single); + if (chwidth == 0) chwidth = default_width; + lua_pushinteger(L, (lua_Integer)chwidth); + } else if (t != LUA_TSTRING) + return typeerror(L, 1, "number/string"); + else { + const char *e, *s = to_utf8(L, 1, &e); + int width = 0; + while (s < e) { + utfint ch = 0; + int chwidth; + s = utf8_safe_decode(L, s, &ch); + chwidth = utf8_width(ch, ambi_is_single); + width += chwidth == 0 ? default_width : chwidth; + } + lua_pushinteger(L, (lua_Integer)width); + } + return 1; +} + +static int Lutf8_widthindex (lua_State *L) { + const char *e, *s = check_utf8(L, 1, &e); + int width = CAST(int, luaL_checkinteger(L, 2)); + int ambi_is_single = !lua_toboolean(L, 3); + int default_width = CAST(int, luaL_optinteger(L, 4, 0)); + size_t idx = 1; + while (s < e) { + utfint ch = 0; + size_t chwidth; + s = utf8_safe_decode(L, s, &ch); + chwidth = utf8_width(ch, ambi_is_single); + if (chwidth == 0) chwidth = default_width; + width -= CAST(int, chwidth); + if (width <= 0) { + lua_pushinteger(L, idx); + lua_pushinteger(L, width + chwidth); + lua_pushinteger(L, chwidth); + return 3; + } + ++idx; + } + lua_pushinteger(L, (lua_Integer)idx); + return 1; +} + +static int Lutf8_ncasecmp (lua_State *L) { + const char *e1, *s1 = check_utf8(L, 1, &e1); + const char *e2, *s2 = check_utf8(L, 2, &e2); + while (s1 < e1 || s2 < e2) { + utfint ch1 = 0, ch2 = 0; + if (s1 == e1) + ch2 = 1; + else if (s2 == e2) + ch1 = 1; + else { + s1 = utf8_safe_decode(L, s1, &ch1); + s2 = utf8_safe_decode(L, s2, &ch2); + ch1 = utf8_tofold(ch1); + ch2 = utf8_tofold(ch2); + } + if (ch1 != ch2) { + lua_pushinteger(L, ch1 > ch2 ? 1 : -1); + return 1; + } + } + lua_pushinteger(L, 0); + return 1; +} + + +/* utf8 pattern matching implement */ + +#ifndef LUA_MAXCAPTURES +# define LUA_MAXCAPTURES 32 +#endif /* LUA_MAXCAPTURES */ + +#define CAP_UNFINISHED (-1) +#define CAP_POSITION (-2) + + +typedef struct MatchState { + int matchdepth; /* control for recursive depth (to avoid C stack overflow) */ + const char *src_init; /* init of source string */ + const char *src_end; /* end ('\0') of source string */ + const char *p_end; /* end ('\0') of pattern */ + lua_State *L; + int level; /* total number of captures (finished or unfinished) */ + struct { + const char *init; + ptrdiff_t len; + } capture[LUA_MAXCAPTURES]; +} MatchState; + +/* recursive function */ +static const char *match (MatchState *ms, const char *s, const char *p); + +/* maximum recursion depth for 'match' */ +#if !defined(MAXCCALLS) +#define MAXCCALLS 200 +#endif + +#define L_ESC '%' +#define SPECIALS "^$*+?.([%-" + +static int check_capture (MatchState *ms, int l) { + l -= '1'; + if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED) + return luaL_error(ms->L, "invalid capture index %%%d", l + 1); + return l; +} + +static int capture_to_close (MatchState *ms) { + int level = ms->level; + while (--level >= 0) + if (ms->capture[level].len == CAP_UNFINISHED) return level; + return luaL_error(ms->L, "invalid pattern capture"); +} + +static const char *classend (MatchState *ms, const char *p) { + utfint ch = 0; + p = utf8_safe_decode(ms->L, p, &ch); + switch (ch) { + case L_ESC: { + if (p == ms->p_end) + luaL_error(ms->L, "malformed pattern (ends with " LUA_QL("%%") ")"); + return utf8_next(p, ms->p_end); + } + case '[': { + if (*p == '^') p++; + do { /* look for a `]' */ + if (p == ms->p_end) + luaL_error(ms->L, "malformed pattern (missing " LUA_QL("]") ")"); + if (*(p++) == L_ESC && p < ms->p_end) + p++; /* skip escapes (e.g. `%]') */ + } while (*p != ']'); + return p+1; + } + default: { + return p; + } + } +} + +static int match_class (utfint c, utfint cl) { + int res; + switch (utf8_tolower(cl)) { +#define X(cls, name) case cls: res = utf8_is##name(c); break; + utf8_categories(X) +#undef X + case 'g' : res = utf8_isgraph(c); break; + case 'w' : res = utf8_isalnum(c); break; + case 'z' : res = (c == 0); break; /* deprecated option */ + default: return (cl == c); + } + return (utf8_islower(cl) ? res : !res); +} + +static int matchbracketclass (MatchState *ms, utfint c, const char *p, const char *ec) { + int sig = 1; + assert(*p == '['); + if (*++p == '^') { + sig = 0; + p++; /* skip the `^' */ + } + while (p < ec) { + utfint ch = 0; + p = utf8_safe_decode(ms->L, p, &ch); + if (ch == L_ESC) { + p = utf8_safe_decode(ms->L, p, &ch); + if (match_class(c, ch)) + return sig; + } else { + utfint next = 0; + const char *np = utf8_safe_decode(ms->L, p, &next); + if (next == '-' && np < ec) { + p = utf8_safe_decode(ms->L, np, &next); + if (ch <= c && c <= next) + return sig; + } + else if (ch == c) return sig; + } + } + return !sig; +} + +static int singlematch (MatchState *ms, const char *s, const char *p, const char *ep) { + if (s >= ms->src_end) + return 0; + else { + utfint ch=0, pch=0; + utf8_safe_decode(ms->L, s, &ch); + p = utf8_safe_decode(ms->L, p, &pch); + switch (pch) { + case '.': return 1; /* matches any char */ + case L_ESC: utf8_safe_decode(ms->L, p, &pch); + return match_class(ch, pch); + case '[': return matchbracketclass(ms, ch, p-1, ep-1); + default: return pch == ch; + } + } +} + +static const char *matchbalance (MatchState *ms, const char *s, const char **p) { + utfint ch=0, begin=0, end=0; + *p = utf8_safe_decode(ms->L, *p, &begin); + if (*p >= ms->p_end) + luaL_error(ms->L, "malformed pattern " + "(missing arguments to " LUA_QL("%%b") ")"); + *p = utf8_safe_decode(ms->L, *p, &end); + s = utf8_safe_decode(ms->L, s, &ch); + if (ch != begin) return NULL; + else { + int cont = 1; + while (s < ms->src_end) { + s = utf8_safe_decode(ms->L, s, &ch); + if (ch == end) { + if (--cont == 0) return s; + } + else if (ch == begin) cont++; + } + } + return NULL; /* string ends out of balance */ +} + +static const char *max_expand (MatchState *ms, const char *s, const char *p, const char *ep) { + const char *m = s; /* matched end of single match p */ + while (singlematch(ms, m, p, ep)) + m = utf8_next(m, ms->src_end); + /* keeps trying to match with the maximum repetitions */ + while (s <= m) { + const char *res = match(ms, m, ep+1); + if (res) return res; + /* else didn't match; reduce 1 repetition to try again */ + if (s == m) break; + m = utf8_prev(s, m); + } + return NULL; +} + +static const char *min_expand (MatchState *ms, const char *s, const char *p, const char *ep) { + for (;;) { + const char *res = match(ms, s, ep+1); + if (res != NULL) + return res; + else if (singlematch(ms, s, p, ep)) + s = utf8_next(s, ms->src_end); /* try with one more repetition */ + else return NULL; + } +} + +static const char *start_capture (MatchState *ms, const char *s, const char *p, int what) { + const char *res; + int level = ms->level; + if (level >= LUA_MAXCAPTURES) luaL_error(ms->L, "too many captures"); + ms->capture[level].init = s; + ms->capture[level].len = what; + ms->level = level+1; + if ((res=match(ms, s, p)) == NULL) /* match failed? */ + ms->level--; /* undo capture */ + return res; +} + +static const char *end_capture (MatchState *ms, const char *s, const char *p) { + int l = capture_to_close(ms); + const char *res; + ms->capture[l].len = s - ms->capture[l].init; /* close capture */ + if ((res = match(ms, s, p)) == NULL) /* match failed? */ + ms->capture[l].len = CAP_UNFINISHED; /* undo capture */ + return res; +} + +static const char *match_capture (MatchState *ms, const char *s, int l) { + size_t len; + l = check_capture(ms, l); + len = ms->capture[l].len; + if ((size_t)(ms->src_end-s) >= len && + memcmp(ms->capture[l].init, s, len) == 0) + return s+len; + else return NULL; +} + +static const char *match (MatchState *ms, const char *s, const char *p) { + if (ms->matchdepth-- == 0) + luaL_error(ms->L, "pattern too complex"); + init: /* using goto's to optimize tail recursion */ + if (p != ms->p_end) { /* end of pattern? */ + utfint ch = 0; + utf8_safe_decode(ms->L, p, &ch); + switch (ch) { + case '(': { /* start capture */ + if (*(p + 1) == ')') /* position capture? */ + s = start_capture(ms, s, p + 2, CAP_POSITION); + else + s = start_capture(ms, s, p + 1, CAP_UNFINISHED); + break; + } + case ')': { /* end capture */ + s = end_capture(ms, s, p + 1); + break; + } + case '$': { + if ((p + 1) != ms->p_end) /* is the `$' the last char in pattern? */ + goto dflt; /* no; go to default */ + s = (s == ms->src_end) ? s : NULL; /* check end of string */ + break; + } + case L_ESC: { /* escaped sequence not in the format class[*+?-]? */ + const char *prev_p = p; + p = utf8_safe_decode(ms->L, p+1, &ch); + switch (ch) { + case 'b': { /* balanced string? */ + s = matchbalance(ms, s, &p); + if (s != NULL) + goto init; /* return match(ms, s, p + 4); */ + /* else fail (s == NULL) */ + break; + } + case 'f': { /* frontier? */ + const char *ep; utfint previous = 0, current = 0; + if (*p != '[') + luaL_error(ms->L, "missing " LUA_QL("[") " after " + LUA_QL("%%f") " in pattern"); + ep = classend(ms, p); /* points to what is next */ + if (s != ms->src_init) + utf8_decode(utf8_prev(ms->src_init, s), &previous, 0); + if (s != ms->src_end) + utf8_decode(s, ¤t, 0); + if (!matchbracketclass(ms, previous, p, ep - 1) && + matchbracketclass(ms, current, p, ep - 1)) { + p = ep; goto init; /* return match(ms, s, ep); */ + } + s = NULL; /* match failed */ + break; + } + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + case '8': case '9': { /* capture results (%0-%9)? */ + s = match_capture(ms, s, ch); + if (s != NULL) goto init; /* return match(ms, s, p + 2) */ + break; + } + default: p = prev_p; goto dflt; + } + break; + } + default: dflt: { /* pattern class plus optional suffix */ + const char *ep = classend(ms, p); /* points to optional suffix */ + /* does not match at least once? */ + if (!singlematch(ms, s, p, ep)) { + if (*ep == '*' || *ep == '?' || *ep == '-') { /* accept empty? */ + p = ep + 1; goto init; /* return match(ms, s, ep + 1); */ + } else /* '+' or no suffix */ + s = NULL; /* fail */ + } else { /* matched once */ + const char *next_s = utf8_next(s, ms->src_end); + switch (*ep) { /* handle optional suffix */ + case '?': { /* optional */ + const char *res; + const char *next_ep = utf8_next(ep, ms->p_end); + if ((res = match(ms, next_s, next_ep)) != NULL) + s = res; + else { + p = next_ep; goto init; /* else return match(ms, s, ep + 1); */ + } + break; + } + case '+': /* 1 or more repetitions */ + s = next_s; /* 1 match already done */ + /* fall through */ + case '*': /* 0 or more repetitions */ + s = max_expand(ms, s, p, ep); + break; + case '-': /* 0 or more repetitions (minimum) */ + s = min_expand(ms, s, p, ep); + break; + default: /* no suffix */ + s = next_s; p = ep; goto init; /* return match(ms, s + 1, ep); */ + } + } + break; + } + } + } + ms->matchdepth++; + return s; +} + +static const char *lmemfind (const char *s1, size_t l1, const char *s2, size_t l2) { + if (l2 == 0) return s1; /* empty strings are everywhere */ + else if (l2 > l1) return NULL; /* avoids a negative `l1' */ + else { + const char *init; /* to search for a `*s2' inside `s1' */ + l2--; /* 1st char will be checked by `memchr' */ + l1 = l1-l2; /* `s2' cannot be found after that */ + while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) { + init++; /* 1st char is already checked */ + if (memcmp(init, s2+1, l2) == 0) + return init-1; + else { /* correct `l1' and `s1' to try again */ + l1 -= init-s1; + s1 = init; + } + } + return NULL; /* not found */ + } +} + +static int get_index (const char *p, const char *s, const char *e) { + int idx; + for (idx = 0; s < e && s < p; ++idx) + s = utf8_next(s, e); + return s == p ? idx : idx - 1; +} + +static void push_onecapture (MatchState *ms, int i, const char *s, const char *e) { + if (i >= ms->level) { + if (i == 0) /* ms->level == 0, too */ + lua_pushlstring(ms->L, s, e - s); /* add whole match */ + else + luaL_error(ms->L, "invalid capture index"); + } else { + ptrdiff_t l = ms->capture[i].len; + if (l == CAP_UNFINISHED) luaL_error(ms->L, "unfinished capture"); + if (l == CAP_POSITION) { + int idx = get_index(ms->capture[i].init, ms->src_init, ms->src_end); + lua_pushinteger(ms->L, idx+1); + } else + lua_pushlstring(ms->L, ms->capture[i].init, l); + } +} + +static int push_captures (MatchState *ms, const char *s, const char *e) { + int i; + int nlevels = (ms->level == 0 && s) ? 1 : ms->level; + luaL_checkstack(ms->L, nlevels, "too many captures"); + for (i = 0; i < nlevels; i++) + push_onecapture(ms, i, s, e); + return nlevels; /* number of strings pushed */ +} + +/* check whether pattern has no special characters */ +static int nospecials (const char *p, const char * ep) { + while (p < ep) { + if (strpbrk(p, SPECIALS)) + return 0; /* pattern has a special character */ + p += strlen(p) + 1; /* may have more after \0 */ + } + return 1; /* no special chars found */ +} + + +/* utf8 pattern matching interface */ + +static int find_aux (lua_State *L, int find) { + const char *es, *s = check_utf8(L, 1, &es); + const char *ep, *p = check_utf8(L, 2, &ep); + lua_Integer idx = luaL_optinteger(L, 3, 1); + const char *init; + if (!idx) idx = 1; + init = utf8_relat(s, es, CAST(int, idx)); + if (init == NULL) { + if (idx > 0) { + lua_pushnil(L); /* cannot find anything */ + return 1; + } + init = s; + } + /* explicit request or no special characters? */ + if (find && (lua_toboolean(L, 4) || nospecials(p, ep))) { + /* do a plain search */ + const char *s2 = lmemfind(init, es-init, p, ep-p); + if (s2) { + const char *e2 = s2 + (ep - p); + if (iscont(e2)) e2 = utf8_next(e2, es); + lua_pushinteger(L, idx = get_index(s2, s, es) + 1); + lua_pushinteger(L, idx + get_index(e2, s2, es) - 1); + return 2; + } + } else { + MatchState ms; + int anchor = (*p == '^'); + if (anchor) p++; /* skip anchor character */ + if (idx < 0) idx += utf8_length(s, es)+1; /* TODO not very good */ + ms.L = L; + ms.matchdepth = MAXCCALLS; + ms.src_init = s; + ms.src_end = es; + ms.p_end = ep; + do { + const char *res; + ms.level = 0; + assert(ms.matchdepth == MAXCCALLS); + if ((res=match(&ms, init, p)) != NULL) { + if (find) { + lua_pushinteger(L, idx); /* start */ + lua_pushinteger(L, idx + utf8_length(init, res) - 1); /* end */ + return push_captures(&ms, NULL, 0) + 2; + } else + return push_captures(&ms, init, res); + } + if (init == es) break; + idx += 1; + init = utf8_next(init, es); + } while (init <= es && !anchor); + } + lua_pushnil(L); /* not found */ + return 1; +} + +static int Lutf8_find (lua_State *L) { return find_aux(L, 1); } +static int Lutf8_match (lua_State *L) { return find_aux(L, 0); } + +static int gmatch_aux (lua_State *L) { + MatchState ms; + const char *es, *s = check_utf8(L, lua_upvalueindex(1), &es); + const char *ep, *p = check_utf8(L, lua_upvalueindex(2), &ep); + const char *src; + ms.L = L; + ms.matchdepth = MAXCCALLS; + ms.src_init = s; + ms.src_end = es; + ms.p_end = ep; + for (src = s + (size_t)lua_tointeger(L, lua_upvalueindex(3)); + src <= ms.src_end; + src = utf8_next(src, ms.src_end)) { + const char *e; + ms.level = 0; + assert(ms.matchdepth == MAXCCALLS); + if ((e = match(&ms, src, p)) != NULL) { + lua_Integer newstart = e-s; + if (e == src) newstart++; /* empty match? go at least one position */ + lua_pushinteger(L, newstart); + lua_replace(L, lua_upvalueindex(3)); + return push_captures(&ms, src, e); + } + if (src == ms.src_end) break; + } + return 0; /* not found */ +} + +static int Lutf8_gmatch (lua_State *L) { + luaL_checkstring(L, 1); + luaL_checkstring(L, 2); + lua_settop(L, 2); + lua_pushinteger(L, 0); + lua_pushcclosure(L, gmatch_aux, 3); + return 1; +} + +static void add_s (MatchState *ms, luaL_Buffer *b, const char *s, const char *e) { + const char *new_end, *news = to_utf8(ms->L, 3, &new_end); + while (news < new_end) { + utfint ch = 0; + news = utf8_safe_decode(ms->L, news, &ch); + if (ch != L_ESC) + add_utf8char(b, ch); + else { + news = utf8_safe_decode(ms->L, news, &ch); /* skip ESC */ + if (!utf8_isdigit(ch)) { + if (ch != L_ESC) + luaL_error(ms->L, "invalid use of " LUA_QL("%c") + " in replacement string", L_ESC); + add_utf8char(b, ch); + } else if (ch == '0') + luaL_addlstring(b, s, e-s); + else { + push_onecapture(ms, ch-'1', s, e); + luaL_addvalue(b); /* add capture to accumulated result */ + } + } + } +} + +static void add_value (MatchState *ms, luaL_Buffer *b, const char *s, const char *e, int tr) { + lua_State *L = ms->L; + switch (tr) { + case LUA_TFUNCTION: { + int n; + lua_pushvalue(L, 3); + n = push_captures(ms, s, e); + lua_call(L, n, 1); + break; + } + case LUA_TTABLE: { + push_onecapture(ms, 0, s, e); + lua_gettable(L, 3); + break; + } + default: { /* LUA_TNUMBER or LUA_TSTRING */ + add_s(ms, b, s, e); + return; + } + } + if (!lua_toboolean(L, -1)) { /* nil or false? */ + lua_pop(L, 1); + lua_pushlstring(L, s, e - s); /* keep original text */ + } else if (!lua_isstring(L, -1)) + luaL_error(L, "invalid replacement value (a %s)", luaL_typename(L, -1)); + luaL_addvalue(b); /* add result to accumulator */ +} + +static int Lutf8_gsub (lua_State *L) { + const char *es, *s = check_utf8(L, 1, &es); + const char *ep, *p = check_utf8(L, 2, &ep); + int tr = lua_type(L, 3); + lua_Integer max_s = luaL_optinteger(L, 4, (es-s)+1); + int anchor = (*p == '^'); + lua_Integer n = 0; + MatchState ms; + luaL_Buffer b; + luaL_argcheck(L, tr == LUA_TNUMBER || tr == LUA_TSTRING || + tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3, + "string/function/table expected"); + luaL_buffinit(L, &b); + if (anchor) p++; /* skip anchor character */ + ms.L = L; + ms.matchdepth = MAXCCALLS; + ms.src_init = s; + ms.src_end = es; + ms.p_end = ep; + while (n < max_s) { + const char *e; + ms.level = 0; + assert(ms.matchdepth == MAXCCALLS); + e = match(&ms, s, p); + if (e) { + n++; + add_value(&ms, &b, s, e, tr); + } + if (e && e > s) /* non empty match? */ + s = e; /* skip it */ + else if (s < es) { + utfint ch = 0; + s = utf8_safe_decode(L, s, &ch); + add_utf8char(&b, ch); + } else break; + if (anchor) break; + } + luaL_addlstring(&b, s, es-s); + luaL_pushresult(&b); + lua_pushinteger(L, n); /* number of substitutions */ + return 2; +} + + +/* lua module import interface */ + +#if LUA_VERSION_NUM >= 502 +static const char UTF8PATT[] = "[\0-\x7F\xC2-\xF4][\x80-\xBF]*"; +#else +static const char UTF8PATT[] = "[%z\1-\x7F\xC2-\xF4][\x80-\xBF]*"; +#endif + +int luaopen_utf8 (lua_State *L) { + luaL_Reg libs[] = { +#define ENTRY(name) { #name, Lutf8_##name } + ENTRY(offset), + ENTRY(codes), + ENTRY(codepoint), + + ENTRY(len), + ENTRY(sub), + ENTRY(reverse), + ENTRY(lower), + ENTRY(upper), + ENTRY(title), + ENTRY(fold), + ENTRY(byte), + ENTRY(char), + ENTRY(escape), + ENTRY(insert), + ENTRY(remove), + ENTRY(charpos), + ENTRY(next), + ENTRY(width), + ENTRY(widthindex), + ENTRY(ncasecmp), + ENTRY(find), + ENTRY(gmatch), + ENTRY(gsub), + ENTRY(match), +#undef ENTRY + { NULL, NULL } + }; + + luaL_newlib(L, libs); + + lua_pushlstring(L, UTF8PATT, sizeof(UTF8PATT)-1); + lua_setfield(L, -2, "charpattern"); + + return 1; +} diff --git a/src/meson.build b/src/meson.build index 5ad95bdd..7229ac72 100644 --- a/src/meson.build +++ b/src/meson.build @@ -4,6 +4,7 @@ lite_sources = [ 'api/regex.c', 'api/system.c', 'api/process.c', + 'api/utf8.c', 'renderer.c', 'renwindow.c', 'rencache.c', diff --git a/src/unidata.h b/src/unidata.h new file mode 100644 index 00000000..13713f6c --- /dev/null +++ b/src/unidata.h @@ -0,0 +1,3710 @@ +/* + * unidata.h - generated by parseucd.lua + */ +#ifndef unidata_h +#define unidata_h + +#ifndef utfint +# define utfint utfint +typedef unsigned int utfint; +#endif + +typedef struct range_table { + utfint first; + utfint last; + int step; +} range_table; + +typedef struct conv_table { + utfint first; + utfint last; + int step; + int offset; +} conv_table; + +static struct range_table alpha_table[] = { + { 0x41, 0x5A, 1 }, + { 0x61, 0x7A, 1 }, + { 0xAA, 0xB5, 11 }, + { 0xBA, 0xC0, 6 }, + { 0xC1, 0xD6, 1 }, + { 0xD8, 0xF6, 1 }, + { 0xF8, 0x2C1, 1 }, + { 0x2C6, 0x2D1, 1 }, + { 0x2E0, 0x2E4, 1 }, + { 0x2EC, 0x2EE, 2 }, + { 0x345, 0x370, 43 }, + { 0x371, 0x374, 1 }, + { 0x376, 0x377, 1 }, + { 0x37A, 0x37D, 1 }, + { 0x37F, 0x386, 7 }, + { 0x388, 0x38A, 1 }, + { 0x38C, 0x38E, 2 }, + { 0x38F, 0x3A1, 1 }, + { 0x3A3, 0x3F5, 1 }, + { 0x3F7, 0x481, 1 }, + { 0x48A, 0x52F, 1 }, + { 0x531, 0x556, 1 }, + { 0x559, 0x560, 7 }, + { 0x561, 0x588, 1 }, + { 0x5B0, 0x5BD, 1 }, + { 0x5BF, 0x5C1, 2 }, + { 0x5C2, 0x5C4, 2 }, + { 0x5C5, 0x5C7, 2 }, + { 0x5D0, 0x5EA, 1 }, + { 0x5EF, 0x5F2, 1 }, + { 0x610, 0x61A, 1 }, + { 0x620, 0x657, 1 }, + { 0x659, 0x65F, 1 }, + { 0x66E, 0x6D3, 1 }, + { 0x6D5, 0x6DC, 1 }, + { 0x6E1, 0x6E8, 1 }, + { 0x6ED, 0x6EF, 1 }, + { 0x6FA, 0x6FC, 1 }, + { 0x6FF, 0x710, 17 }, + { 0x711, 0x73F, 1 }, + { 0x74D, 0x7B1, 1 }, + { 0x7CA, 0x7EA, 1 }, + { 0x7F4, 0x7F5, 1 }, + { 0x7FA, 0x800, 6 }, + { 0x801, 0x817, 1 }, + { 0x81A, 0x82C, 1 }, + { 0x840, 0x858, 1 }, + { 0x860, 0x86A, 1 }, + { 0x870, 0x887, 1 }, + { 0x889, 0x88E, 1 }, + { 0x8A0, 0x8C9, 1 }, + { 0x8D4, 0x8DF, 1 }, + { 0x8E3, 0x8E9, 1 }, + { 0x8F0, 0x93B, 1 }, + { 0x93D, 0x94C, 1 }, + { 0x94E, 0x950, 1 }, + { 0x955, 0x963, 1 }, + { 0x971, 0x983, 1 }, + { 0x985, 0x98C, 1 }, + { 0x98F, 0x990, 1 }, + { 0x993, 0x9A8, 1 }, + { 0x9AA, 0x9B0, 1 }, + { 0x9B2, 0x9B6, 4 }, + { 0x9B7, 0x9B9, 1 }, + { 0x9BD, 0x9C4, 1 }, + { 0x9C7, 0x9C8, 1 }, + { 0x9CB, 0x9CC, 1 }, + { 0x9CE, 0x9D7, 9 }, + { 0x9DC, 0x9DD, 1 }, + { 0x9DF, 0x9E3, 1 }, + { 0x9F0, 0x9F1, 1 }, + { 0x9FC, 0xA01, 5 }, + { 0xA02, 0xA03, 1 }, + { 0xA05, 0xA0A, 1 }, + { 0xA0F, 0xA10, 1 }, + { 0xA13, 0xA28, 1 }, + { 0xA2A, 0xA30, 1 }, + { 0xA32, 0xA33, 1 }, + { 0xA35, 0xA36, 1 }, + { 0xA38, 0xA39, 1 }, + { 0xA3E, 0xA42, 1 }, + { 0xA47, 0xA48, 1 }, + { 0xA4B, 0xA4C, 1 }, + { 0xA51, 0xA59, 8 }, + { 0xA5A, 0xA5C, 1 }, + { 0xA5E, 0xA70, 18 }, + { 0xA71, 0xA75, 1 }, + { 0xA81, 0xA83, 1 }, + { 0xA85, 0xA8D, 1 }, + { 0xA8F, 0xA91, 1 }, + { 0xA93, 0xAA8, 1 }, + { 0xAAA, 0xAB0, 1 }, + { 0xAB2, 0xAB3, 1 }, + { 0xAB5, 0xAB9, 1 }, + { 0xABD, 0xAC5, 1 }, + { 0xAC7, 0xAC9, 1 }, + { 0xACB, 0xACC, 1 }, + { 0xAD0, 0xAE0, 16 }, + { 0xAE1, 0xAE3, 1 }, + { 0xAF9, 0xAFC, 1 }, + { 0xB01, 0xB03, 1 }, + { 0xB05, 0xB0C, 1 }, + { 0xB0F, 0xB10, 1 }, + { 0xB13, 0xB28, 1 }, + { 0xB2A, 0xB30, 1 }, + { 0xB32, 0xB33, 1 }, + { 0xB35, 0xB39, 1 }, + { 0xB3D, 0xB44, 1 }, + { 0xB47, 0xB48, 1 }, + { 0xB4B, 0xB4C, 1 }, + { 0xB56, 0xB57, 1 }, + { 0xB5C, 0xB5D, 1 }, + { 0xB5F, 0xB63, 1 }, + { 0xB71, 0xB82, 17 }, + { 0xB83, 0xB85, 2 }, + { 0xB86, 0xB8A, 1 }, + { 0xB8E, 0xB90, 1 }, + { 0xB92, 0xB95, 1 }, + { 0xB99, 0xB9A, 1 }, + { 0xB9C, 0xB9E, 2 }, + { 0xB9F, 0xBA3, 4 }, + { 0xBA4, 0xBA8, 4 }, + { 0xBA9, 0xBAA, 1 }, + { 0xBAE, 0xBB9, 1 }, + { 0xBBE, 0xBC2, 1 }, + { 0xBC6, 0xBC8, 1 }, + { 0xBCA, 0xBCC, 1 }, + { 0xBD0, 0xBD7, 7 }, + { 0xC00, 0xC03, 1 }, + { 0xC05, 0xC0C, 1 }, + { 0xC0E, 0xC10, 1 }, + { 0xC12, 0xC28, 1 }, + { 0xC2A, 0xC39, 1 }, + { 0xC3D, 0xC44, 1 }, + { 0xC46, 0xC48, 1 }, + { 0xC4A, 0xC4C, 1 }, + { 0xC55, 0xC56, 1 }, + { 0xC58, 0xC5A, 1 }, + { 0xC5D, 0xC60, 3 }, + { 0xC61, 0xC63, 1 }, + { 0xC80, 0xC83, 1 }, + { 0xC85, 0xC8C, 1 }, + { 0xC8E, 0xC90, 1 }, + { 0xC92, 0xCA8, 1 }, + { 0xCAA, 0xCB3, 1 }, + { 0xCB5, 0xCB9, 1 }, + { 0xCBD, 0xCC4, 1 }, + { 0xCC6, 0xCC8, 1 }, + { 0xCCA, 0xCCC, 1 }, + { 0xCD5, 0xCD6, 1 }, + { 0xCDD, 0xCDE, 1 }, + { 0xCE0, 0xCE3, 1 }, + { 0xCF1, 0xCF2, 1 }, + { 0xD00, 0xD0C, 1 }, + { 0xD0E, 0xD10, 1 }, + { 0xD12, 0xD3A, 1 }, + { 0xD3D, 0xD44, 1 }, + { 0xD46, 0xD48, 1 }, + { 0xD4A, 0xD4C, 1 }, + { 0xD4E, 0xD54, 6 }, + { 0xD55, 0xD57, 1 }, + { 0xD5F, 0xD63, 1 }, + { 0xD7A, 0xD7F, 1 }, + { 0xD81, 0xD83, 1 }, + { 0xD85, 0xD96, 1 }, + { 0xD9A, 0xDB1, 1 }, + { 0xDB3, 0xDBB, 1 }, + { 0xDBD, 0xDC0, 3 }, + { 0xDC1, 0xDC6, 1 }, + { 0xDCF, 0xDD4, 1 }, + { 0xDD6, 0xDD8, 2 }, + { 0xDD9, 0xDDF, 1 }, + { 0xDF2, 0xDF3, 1 }, + { 0xE01, 0xE3A, 1 }, + { 0xE40, 0xE46, 1 }, + { 0xE4D, 0xE81, 52 }, + { 0xE82, 0xE86, 2 }, + { 0xE87, 0xE8A, 1 }, + { 0xE8C, 0xEA3, 1 }, + { 0xEA5, 0xEA7, 2 }, + { 0xEA8, 0xEB9, 1 }, + { 0xEBB, 0xEBD, 1 }, + { 0xEC0, 0xEC4, 1 }, + { 0xEC6, 0xECD, 7 }, + { 0xEDC, 0xEDF, 1 }, + { 0xF00, 0xF40, 64 }, + { 0xF41, 0xF47, 1 }, + { 0xF49, 0xF6C, 1 }, + { 0xF71, 0xF81, 1 }, + { 0xF88, 0xF97, 1 }, + { 0xF99, 0xFBC, 1 }, + { 0x1000, 0x1036, 1 }, + { 0x1038, 0x103B, 3 }, + { 0x103C, 0x103F, 1 }, + { 0x1050, 0x108F, 1 }, + { 0x109A, 0x109D, 1 }, + { 0x10A0, 0x10C5, 1 }, + { 0x10C7, 0x10CD, 6 }, + { 0x10D0, 0x10FA, 1 }, + { 0x10FC, 0x1248, 1 }, + { 0x124A, 0x124D, 1 }, + { 0x1250, 0x1256, 1 }, + { 0x1258, 0x125A, 2 }, + { 0x125B, 0x125D, 1 }, + { 0x1260, 0x1288, 1 }, + { 0x128A, 0x128D, 1 }, + { 0x1290, 0x12B0, 1 }, + { 0x12B2, 0x12B5, 1 }, + { 0x12B8, 0x12BE, 1 }, + { 0x12C0, 0x12C2, 2 }, + { 0x12C3, 0x12C5, 1 }, + { 0x12C8, 0x12D6, 1 }, + { 0x12D8, 0x1310, 1 }, + { 0x1312, 0x1315, 1 }, + { 0x1318, 0x135A, 1 }, + { 0x1380, 0x138F, 1 }, + { 0x13A0, 0x13F5, 1 }, + { 0x13F8, 0x13FD, 1 }, + { 0x1401, 0x166C, 1 }, + { 0x166F, 0x167F, 1 }, + { 0x1681, 0x169A, 1 }, + { 0x16A0, 0x16EA, 1 }, + { 0x16EE, 0x16F8, 1 }, + { 0x1700, 0x1713, 1 }, + { 0x171F, 0x1733, 1 }, + { 0x1740, 0x1753, 1 }, + { 0x1760, 0x176C, 1 }, + { 0x176E, 0x1770, 1 }, + { 0x1772, 0x1773, 1 }, + { 0x1780, 0x17B3, 1 }, + { 0x17B6, 0x17C8, 1 }, + { 0x17D7, 0x17DC, 5 }, + { 0x1820, 0x1878, 1 }, + { 0x1880, 0x18AA, 1 }, + { 0x18B0, 0x18F5, 1 }, + { 0x1900, 0x191E, 1 }, + { 0x1920, 0x192B, 1 }, + { 0x1930, 0x1938, 1 }, + { 0x1950, 0x196D, 1 }, + { 0x1970, 0x1974, 1 }, + { 0x1980, 0x19AB, 1 }, + { 0x19B0, 0x19C9, 1 }, + { 0x1A00, 0x1A1B, 1 }, + { 0x1A20, 0x1A5E, 1 }, + { 0x1A61, 0x1A74, 1 }, + { 0x1AA7, 0x1ABF, 24 }, + { 0x1AC0, 0x1ACC, 12 }, + { 0x1ACD, 0x1ACE, 1 }, + { 0x1B00, 0x1B33, 1 }, + { 0x1B35, 0x1B43, 1 }, + { 0x1B45, 0x1B4C, 1 }, + { 0x1B80, 0x1BA9, 1 }, + { 0x1BAC, 0x1BAF, 1 }, + { 0x1BBA, 0x1BE5, 1 }, + { 0x1BE7, 0x1BF1, 1 }, + { 0x1C00, 0x1C36, 1 }, + { 0x1C4D, 0x1C4F, 1 }, + { 0x1C5A, 0x1C7D, 1 }, + { 0x1C80, 0x1C88, 1 }, + { 0x1C90, 0x1CBA, 1 }, + { 0x1CBD, 0x1CBF, 1 }, + { 0x1CE9, 0x1CEC, 1 }, + { 0x1CEE, 0x1CF3, 1 }, + { 0x1CF5, 0x1CF6, 1 }, + { 0x1CFA, 0x1D00, 6 }, + { 0x1D01, 0x1DBF, 1 }, + { 0x1DE7, 0x1DF4, 1 }, + { 0x1E00, 0x1F15, 1 }, + { 0x1F18, 0x1F1D, 1 }, + { 0x1F20, 0x1F45, 1 }, + { 0x1F48, 0x1F4D, 1 }, + { 0x1F50, 0x1F57, 1 }, + { 0x1F59, 0x1F5F, 2 }, + { 0x1F60, 0x1F7D, 1 }, + { 0x1F80, 0x1FB4, 1 }, + { 0x1FB6, 0x1FBC, 1 }, + { 0x1FBE, 0x1FC2, 4 }, + { 0x1FC3, 0x1FC4, 1 }, + { 0x1FC6, 0x1FCC, 1 }, + { 0x1FD0, 0x1FD3, 1 }, + { 0x1FD6, 0x1FDB, 1 }, + { 0x1FE0, 0x1FEC, 1 }, + { 0x1FF2, 0x1FF4, 1 }, + { 0x1FF6, 0x1FFC, 1 }, + { 0x2071, 0x207F, 14 }, + { 0x2090, 0x209C, 1 }, + { 0x2102, 0x2107, 5 }, + { 0x210A, 0x2113, 1 }, + { 0x2115, 0x2119, 4 }, + { 0x211A, 0x211D, 1 }, + { 0x2124, 0x212A, 2 }, + { 0x212B, 0x212D, 1 }, + { 0x212F, 0x2139, 1 }, + { 0x213C, 0x213F, 1 }, + { 0x2145, 0x2149, 1 }, + { 0x214E, 0x2160, 18 }, + { 0x2161, 0x2188, 1 }, + { 0x24B6, 0x24E9, 1 }, + { 0x2C00, 0x2CE4, 1 }, + { 0x2CEB, 0x2CEE, 1 }, + { 0x2CF2, 0x2CF3, 1 }, + { 0x2D00, 0x2D25, 1 }, + { 0x2D27, 0x2D2D, 6 }, + { 0x2D30, 0x2D67, 1 }, + { 0x2D6F, 0x2D80, 17 }, + { 0x2D81, 0x2D96, 1 }, + { 0x2DA0, 0x2DA6, 1 }, + { 0x2DA8, 0x2DAE, 1 }, + { 0x2DB0, 0x2DB6, 1 }, + { 0x2DB8, 0x2DBE, 1 }, + { 0x2DC0, 0x2DC6, 1 }, + { 0x2DC8, 0x2DCE, 1 }, + { 0x2DD0, 0x2DD6, 1 }, + { 0x2DD8, 0x2DDE, 1 }, + { 0x2DE0, 0x2DFF, 1 }, + { 0x2E2F, 0x3005, 470 }, + { 0x3006, 0x3007, 1 }, + { 0x3021, 0x3029, 1 }, + { 0x3031, 0x3035, 1 }, + { 0x3038, 0x303C, 1 }, + { 0x3041, 0x3096, 1 }, + { 0x309D, 0x309F, 1 }, + { 0x30A1, 0x30FA, 1 }, + { 0x30FC, 0x30FF, 1 }, + { 0x3105, 0x312F, 1 }, + { 0x3131, 0x318E, 1 }, + { 0x31A0, 0x31BF, 1 }, + { 0x31F0, 0x31FF, 1 }, + { 0x3400, 0x4DBF, 1 }, + { 0x4E00, 0xA48C, 1 }, + { 0xA4D0, 0xA4FD, 1 }, + { 0xA500, 0xA60C, 1 }, + { 0xA610, 0xA61F, 1 }, + { 0xA62A, 0xA62B, 1 }, + { 0xA640, 0xA66E, 1 }, + { 0xA674, 0xA67B, 1 }, + { 0xA67F, 0xA6EF, 1 }, + { 0xA717, 0xA71F, 1 }, + { 0xA722, 0xA788, 1 }, + { 0xA78B, 0xA7CA, 1 }, + { 0xA7D0, 0xA7D1, 1 }, + { 0xA7D3, 0xA7D5, 2 }, + { 0xA7D6, 0xA7D9, 1 }, + { 0xA7F2, 0xA805, 1 }, + { 0xA807, 0xA827, 1 }, + { 0xA840, 0xA873, 1 }, + { 0xA880, 0xA8C3, 1 }, + { 0xA8C5, 0xA8F2, 45 }, + { 0xA8F3, 0xA8F7, 1 }, + { 0xA8FB, 0xA8FD, 2 }, + { 0xA8FE, 0xA8FF, 1 }, + { 0xA90A, 0xA92A, 1 }, + { 0xA930, 0xA952, 1 }, + { 0xA960, 0xA97C, 1 }, + { 0xA980, 0xA9B2, 1 }, + { 0xA9B4, 0xA9BF, 1 }, + { 0xA9CF, 0xA9E0, 17 }, + { 0xA9E1, 0xA9EF, 1 }, + { 0xA9FA, 0xA9FE, 1 }, + { 0xAA00, 0xAA36, 1 }, + { 0xAA40, 0xAA4D, 1 }, + { 0xAA60, 0xAA76, 1 }, + { 0xAA7A, 0xAABE, 1 }, + { 0xAAC0, 0xAAC2, 2 }, + { 0xAADB, 0xAADD, 1 }, + { 0xAAE0, 0xAAEF, 1 }, + { 0xAAF2, 0xAAF5, 1 }, + { 0xAB01, 0xAB06, 1 }, + { 0xAB09, 0xAB0E, 1 }, + { 0xAB11, 0xAB16, 1 }, + { 0xAB20, 0xAB26, 1 }, + { 0xAB28, 0xAB2E, 1 }, + { 0xAB30, 0xAB5A, 1 }, + { 0xAB5C, 0xAB69, 1 }, + { 0xAB70, 0xABEA, 1 }, + { 0xAC00, 0xD7A3, 1 }, + { 0xD7B0, 0xD7C6, 1 }, + { 0xD7CB, 0xD7FB, 1 }, + { 0xF900, 0xFA6D, 1 }, + { 0xFA70, 0xFAD9, 1 }, + { 0xFB00, 0xFB06, 1 }, + { 0xFB13, 0xFB17, 1 }, + { 0xFB1D, 0xFB28, 1 }, + { 0xFB2A, 0xFB36, 1 }, + { 0xFB38, 0xFB3C, 1 }, + { 0xFB3E, 0xFB40, 2 }, + { 0xFB41, 0xFB43, 2 }, + { 0xFB44, 0xFB46, 2 }, + { 0xFB47, 0xFBB1, 1 }, + { 0xFBD3, 0xFD3D, 1 }, + { 0xFD50, 0xFD8F, 1 }, + { 0xFD92, 0xFDC7, 1 }, + { 0xFDF0, 0xFDFB, 1 }, + { 0xFE70, 0xFE74, 1 }, + { 0xFE76, 0xFEFC, 1 }, + { 0xFF21, 0xFF3A, 1 }, + { 0xFF41, 0xFF5A, 1 }, + { 0xFF66, 0xFFBE, 1 }, + { 0xFFC2, 0xFFC7, 1 }, + { 0xFFCA, 0xFFCF, 1 }, + { 0xFFD2, 0xFFD7, 1 }, + { 0xFFDA, 0xFFDC, 1 }, + { 0x10000, 0x1000B, 1 }, + { 0x1000D, 0x10026, 1 }, + { 0x10028, 0x1003A, 1 }, + { 0x1003C, 0x1003D, 1 }, + { 0x1003F, 0x1004D, 1 }, + { 0x10050, 0x1005D, 1 }, + { 0x10080, 0x100FA, 1 }, + { 0x10140, 0x10174, 1 }, + { 0x10280, 0x1029C, 1 }, + { 0x102A0, 0x102D0, 1 }, + { 0x10300, 0x1031F, 1 }, + { 0x1032D, 0x1034A, 1 }, + { 0x10350, 0x1037A, 1 }, + { 0x10380, 0x1039D, 1 }, + { 0x103A0, 0x103C3, 1 }, + { 0x103C8, 0x103CF, 1 }, + { 0x103D1, 0x103D5, 1 }, + { 0x10400, 0x1049D, 1 }, + { 0x104B0, 0x104D3, 1 }, + { 0x104D8, 0x104FB, 1 }, + { 0x10500, 0x10527, 1 }, + { 0x10530, 0x10563, 1 }, + { 0x10570, 0x1057A, 1 }, + { 0x1057C, 0x1058A, 1 }, + { 0x1058C, 0x10592, 1 }, + { 0x10594, 0x10595, 1 }, + { 0x10597, 0x105A1, 1 }, + { 0x105A3, 0x105B1, 1 }, + { 0x105B3, 0x105B9, 1 }, + { 0x105BB, 0x105BC, 1 }, + { 0x10600, 0x10736, 1 }, + { 0x10740, 0x10755, 1 }, + { 0x10760, 0x10767, 1 }, + { 0x10780, 0x10785, 1 }, + { 0x10787, 0x107B0, 1 }, + { 0x107B2, 0x107BA, 1 }, + { 0x10800, 0x10805, 1 }, + { 0x10808, 0x1080A, 2 }, + { 0x1080B, 0x10835, 1 }, + { 0x10837, 0x10838, 1 }, + { 0x1083C, 0x1083F, 3 }, + { 0x10840, 0x10855, 1 }, + { 0x10860, 0x10876, 1 }, + { 0x10880, 0x1089E, 1 }, + { 0x108E0, 0x108F2, 1 }, + { 0x108F4, 0x108F5, 1 }, + { 0x10900, 0x10915, 1 }, + { 0x10920, 0x10939, 1 }, + { 0x10980, 0x109B7, 1 }, + { 0x109BE, 0x109BF, 1 }, + { 0x10A00, 0x10A03, 1 }, + { 0x10A05, 0x10A06, 1 }, + { 0x10A0C, 0x10A13, 1 }, + { 0x10A15, 0x10A17, 1 }, + { 0x10A19, 0x10A35, 1 }, + { 0x10A60, 0x10A7C, 1 }, + { 0x10A80, 0x10A9C, 1 }, + { 0x10AC0, 0x10AC7, 1 }, + { 0x10AC9, 0x10AE4, 1 }, + { 0x10B00, 0x10B35, 1 }, + { 0x10B40, 0x10B55, 1 }, + { 0x10B60, 0x10B72, 1 }, + { 0x10B80, 0x10B91, 1 }, + { 0x10C00, 0x10C48, 1 }, + { 0x10C80, 0x10CB2, 1 }, + { 0x10CC0, 0x10CF2, 1 }, + { 0x10D00, 0x10D27, 1 }, + { 0x10E80, 0x10EA9, 1 }, + { 0x10EAB, 0x10EAC, 1 }, + { 0x10EB0, 0x10EB1, 1 }, + { 0x10F00, 0x10F1C, 1 }, + { 0x10F27, 0x10F30, 9 }, + { 0x10F31, 0x10F45, 1 }, + { 0x10F70, 0x10F81, 1 }, + { 0x10FB0, 0x10FC4, 1 }, + { 0x10FE0, 0x10FF6, 1 }, + { 0x11000, 0x11045, 1 }, + { 0x11071, 0x11075, 1 }, + { 0x11082, 0x110B8, 1 }, + { 0x110C2, 0x110D0, 14 }, + { 0x110D1, 0x110E8, 1 }, + { 0x11100, 0x11132, 1 }, + { 0x11144, 0x11147, 1 }, + { 0x11150, 0x11172, 1 }, + { 0x11176, 0x11180, 10 }, + { 0x11181, 0x111BF, 1 }, + { 0x111C1, 0x111C4, 1 }, + { 0x111CE, 0x111CF, 1 }, + { 0x111DA, 0x111DC, 2 }, + { 0x11200, 0x11211, 1 }, + { 0x11213, 0x11234, 1 }, + { 0x11237, 0x1123E, 7 }, + { 0x11280, 0x11286, 1 }, + { 0x11288, 0x1128A, 2 }, + { 0x1128B, 0x1128D, 1 }, + { 0x1128F, 0x1129D, 1 }, + { 0x1129F, 0x112A8, 1 }, + { 0x112B0, 0x112E8, 1 }, + { 0x11300, 0x11303, 1 }, + { 0x11305, 0x1130C, 1 }, + { 0x1130F, 0x11310, 1 }, + { 0x11313, 0x11328, 1 }, + { 0x1132A, 0x11330, 1 }, + { 0x11332, 0x11333, 1 }, + { 0x11335, 0x11339, 1 }, + { 0x1133D, 0x11344, 1 }, + { 0x11347, 0x11348, 1 }, + { 0x1134B, 0x1134C, 1 }, + { 0x11350, 0x11357, 7 }, + { 0x1135D, 0x11363, 1 }, + { 0x11400, 0x11441, 1 }, + { 0x11443, 0x11445, 1 }, + { 0x11447, 0x1144A, 1 }, + { 0x1145F, 0x11461, 1 }, + { 0x11480, 0x114C1, 1 }, + { 0x114C4, 0x114C5, 1 }, + { 0x114C7, 0x11580, 185 }, + { 0x11581, 0x115B5, 1 }, + { 0x115B8, 0x115BE, 1 }, + { 0x115D8, 0x115DD, 1 }, + { 0x11600, 0x1163E, 1 }, + { 0x11640, 0x11644, 4 }, + { 0x11680, 0x116B5, 1 }, + { 0x116B8, 0x11700, 72 }, + { 0x11701, 0x1171A, 1 }, + { 0x1171D, 0x1172A, 1 }, + { 0x11740, 0x11746, 1 }, + { 0x11800, 0x11838, 1 }, + { 0x118A0, 0x118DF, 1 }, + { 0x118FF, 0x11906, 1 }, + { 0x11909, 0x1190C, 3 }, + { 0x1190D, 0x11913, 1 }, + { 0x11915, 0x11916, 1 }, + { 0x11918, 0x11935, 1 }, + { 0x11937, 0x11938, 1 }, + { 0x1193B, 0x1193C, 1 }, + { 0x1193F, 0x11942, 1 }, + { 0x119A0, 0x119A7, 1 }, + { 0x119AA, 0x119D7, 1 }, + { 0x119DA, 0x119DF, 1 }, + { 0x119E1, 0x119E3, 2 }, + { 0x119E4, 0x11A00, 28 }, + { 0x11A01, 0x11A32, 1 }, + { 0x11A35, 0x11A3E, 1 }, + { 0x11A50, 0x11A97, 1 }, + { 0x11A9D, 0x11AB0, 19 }, + { 0x11AB1, 0x11AF8, 1 }, + { 0x11C00, 0x11C08, 1 }, + { 0x11C0A, 0x11C36, 1 }, + { 0x11C38, 0x11C3E, 1 }, + { 0x11C40, 0x11C72, 50 }, + { 0x11C73, 0x11C8F, 1 }, + { 0x11C92, 0x11CA7, 1 }, + { 0x11CA9, 0x11CB6, 1 }, + { 0x11D00, 0x11D06, 1 }, + { 0x11D08, 0x11D09, 1 }, + { 0x11D0B, 0x11D36, 1 }, + { 0x11D3A, 0x11D3C, 2 }, + { 0x11D3D, 0x11D3F, 2 }, + { 0x11D40, 0x11D41, 1 }, + { 0x11D43, 0x11D46, 3 }, + { 0x11D47, 0x11D60, 25 }, + { 0x11D61, 0x11D65, 1 }, + { 0x11D67, 0x11D68, 1 }, + { 0x11D6A, 0x11D8E, 1 }, + { 0x11D90, 0x11D91, 1 }, + { 0x11D93, 0x11D96, 1 }, + { 0x11D98, 0x11EE0, 328 }, + { 0x11EE1, 0x11EF6, 1 }, + { 0x11FB0, 0x12000, 80 }, + { 0x12001, 0x12399, 1 }, + { 0x12400, 0x1246E, 1 }, + { 0x12480, 0x12543, 1 }, + { 0x12F90, 0x12FF0, 1 }, + { 0x13000, 0x1342E, 1 }, + { 0x14400, 0x14646, 1 }, + { 0x16800, 0x16A38, 1 }, + { 0x16A40, 0x16A5E, 1 }, + { 0x16A70, 0x16ABE, 1 }, + { 0x16AD0, 0x16AED, 1 }, + { 0x16B00, 0x16B2F, 1 }, + { 0x16B40, 0x16B43, 1 }, + { 0x16B63, 0x16B77, 1 }, + { 0x16B7D, 0x16B8F, 1 }, + { 0x16E40, 0x16E7F, 1 }, + { 0x16F00, 0x16F4A, 1 }, + { 0x16F4F, 0x16F87, 1 }, + { 0x16F8F, 0x16F9F, 1 }, + { 0x16FE0, 0x16FE1, 1 }, + { 0x16FE3, 0x16FF0, 13 }, + { 0x16FF1, 0x17000, 15 }, + { 0x17001, 0x187F7, 1 }, + { 0x18800, 0x18CD5, 1 }, + { 0x18D00, 0x18D08, 1 }, + { 0x1AFF0, 0x1AFF3, 1 }, + { 0x1AFF5, 0x1AFFB, 1 }, + { 0x1AFFD, 0x1AFFE, 1 }, + { 0x1B000, 0x1B122, 1 }, + { 0x1B150, 0x1B152, 1 }, + { 0x1B164, 0x1B167, 1 }, + { 0x1B170, 0x1B2FB, 1 }, + { 0x1BC00, 0x1BC6A, 1 }, + { 0x1BC70, 0x1BC7C, 1 }, + { 0x1BC80, 0x1BC88, 1 }, + { 0x1BC90, 0x1BC99, 1 }, + { 0x1BC9E, 0x1D400, 5986 }, + { 0x1D401, 0x1D454, 1 }, + { 0x1D456, 0x1D49C, 1 }, + { 0x1D49E, 0x1D49F, 1 }, + { 0x1D4A2, 0x1D4A5, 3 }, + { 0x1D4A6, 0x1D4A9, 3 }, + { 0x1D4AA, 0x1D4AC, 1 }, + { 0x1D4AE, 0x1D4B9, 1 }, + { 0x1D4BB, 0x1D4BD, 2 }, + { 0x1D4BE, 0x1D4C3, 1 }, + { 0x1D4C5, 0x1D505, 1 }, + { 0x1D507, 0x1D50A, 1 }, + { 0x1D50D, 0x1D514, 1 }, + { 0x1D516, 0x1D51C, 1 }, + { 0x1D51E, 0x1D539, 1 }, + { 0x1D53B, 0x1D53E, 1 }, + { 0x1D540, 0x1D544, 1 }, + { 0x1D546, 0x1D54A, 4 }, + { 0x1D54B, 0x1D550, 1 }, + { 0x1D552, 0x1D6A5, 1 }, + { 0x1D6A8, 0x1D6C0, 1 }, + { 0x1D6C2, 0x1D6DA, 1 }, + { 0x1D6DC, 0x1D6FA, 1 }, + { 0x1D6FC, 0x1D714, 1 }, + { 0x1D716, 0x1D734, 1 }, + { 0x1D736, 0x1D74E, 1 }, + { 0x1D750, 0x1D76E, 1 }, + { 0x1D770, 0x1D788, 1 }, + { 0x1D78A, 0x1D7A8, 1 }, + { 0x1D7AA, 0x1D7C2, 1 }, + { 0x1D7C4, 0x1D7CB, 1 }, + { 0x1DF00, 0x1DF1E, 1 }, + { 0x1E000, 0x1E006, 1 }, + { 0x1E008, 0x1E018, 1 }, + { 0x1E01B, 0x1E021, 1 }, + { 0x1E023, 0x1E024, 1 }, + { 0x1E026, 0x1E02A, 1 }, + { 0x1E100, 0x1E12C, 1 }, + { 0x1E137, 0x1E13D, 1 }, + { 0x1E14E, 0x1E290, 322 }, + { 0x1E291, 0x1E2AD, 1 }, + { 0x1E2C0, 0x1E2EB, 1 }, + { 0x1E7E0, 0x1E7E6, 1 }, + { 0x1E7E8, 0x1E7EB, 1 }, + { 0x1E7ED, 0x1E7EE, 1 }, + { 0x1E7F0, 0x1E7FE, 1 }, + { 0x1E800, 0x1E8C4, 1 }, + { 0x1E900, 0x1E943, 1 }, + { 0x1E947, 0x1E94B, 4 }, + { 0x1EE00, 0x1EE03, 1 }, + { 0x1EE05, 0x1EE1F, 1 }, + { 0x1EE21, 0x1EE22, 1 }, + { 0x1EE24, 0x1EE27, 3 }, + { 0x1EE29, 0x1EE32, 1 }, + { 0x1EE34, 0x1EE37, 1 }, + { 0x1EE39, 0x1EE3B, 2 }, + { 0x1EE42, 0x1EE47, 5 }, + { 0x1EE49, 0x1EE4D, 2 }, + { 0x1EE4E, 0x1EE4F, 1 }, + { 0x1EE51, 0x1EE52, 1 }, + { 0x1EE54, 0x1EE57, 3 }, + { 0x1EE59, 0x1EE61, 2 }, + { 0x1EE62, 0x1EE64, 2 }, + { 0x1EE67, 0x1EE6A, 1 }, + { 0x1EE6C, 0x1EE72, 1 }, + { 0x1EE74, 0x1EE77, 1 }, + { 0x1EE79, 0x1EE7C, 1 }, + { 0x1EE7E, 0x1EE80, 2 }, + { 0x1EE81, 0x1EE89, 1 }, + { 0x1EE8B, 0x1EE9B, 1 }, + { 0x1EEA1, 0x1EEA3, 1 }, + { 0x1EEA5, 0x1EEA9, 1 }, + { 0x1EEAB, 0x1EEBB, 1 }, + { 0x1F130, 0x1F149, 1 }, + { 0x1F150, 0x1F169, 1 }, + { 0x1F170, 0x1F189, 1 }, + { 0x20000, 0x2A6DF, 1 }, + { 0x2A700, 0x2B738, 1 }, + { 0x2B740, 0x2B81D, 1 }, + { 0x2B820, 0x2CEA1, 1 }, + { 0x2CEB0, 0x2EBE0, 1 }, + { 0x2F800, 0x2FA1D, 1 }, + { 0x30000, 0x3134A, 1 }, +}; + +static struct range_table lower_table[] = { + { 0x61, 0x7A, 1 }, + { 0xAA, 0xB5, 11 }, + { 0xBA, 0xDF, 37 }, + { 0xE0, 0xF6, 1 }, + { 0xF8, 0xFF, 1 }, + { 0x101, 0x137, 2 }, + { 0x138, 0x148, 2 }, + { 0x149, 0x177, 2 }, + { 0x17A, 0x17E, 2 }, + { 0x17F, 0x180, 1 }, + { 0x183, 0x185, 2 }, + { 0x188, 0x18C, 4 }, + { 0x18D, 0x192, 5 }, + { 0x195, 0x199, 4 }, + { 0x19A, 0x19B, 1 }, + { 0x19E, 0x1A1, 3 }, + { 0x1A3, 0x1A5, 2 }, + { 0x1A8, 0x1AA, 2 }, + { 0x1AB, 0x1AD, 2 }, + { 0x1B0, 0x1B4, 4 }, + { 0x1B6, 0x1B9, 3 }, + { 0x1BA, 0x1BD, 3 }, + { 0x1BE, 0x1BF, 1 }, + { 0x1C6, 0x1CC, 3 }, + { 0x1CE, 0x1DC, 2 }, + { 0x1DD, 0x1EF, 2 }, + { 0x1F0, 0x1F3, 3 }, + { 0x1F5, 0x1F9, 4 }, + { 0x1FB, 0x233, 2 }, + { 0x234, 0x239, 1 }, + { 0x23C, 0x23F, 3 }, + { 0x240, 0x242, 2 }, + { 0x247, 0x24F, 2 }, + { 0x250, 0x293, 1 }, + { 0x295, 0x2B8, 1 }, + { 0x2C0, 0x2C1, 1 }, + { 0x2E0, 0x2E4, 1 }, + { 0x345, 0x371, 44 }, + { 0x373, 0x377, 4 }, + { 0x37A, 0x37D, 1 }, + { 0x390, 0x3AC, 28 }, + { 0x3AD, 0x3CE, 1 }, + { 0x3D0, 0x3D1, 1 }, + { 0x3D5, 0x3D7, 1 }, + { 0x3D9, 0x3EF, 2 }, + { 0x3F0, 0x3F3, 1 }, + { 0x3F5, 0x3FB, 3 }, + { 0x3FC, 0x430, 52 }, + { 0x431, 0x45F, 1 }, + { 0x461, 0x481, 2 }, + { 0x48B, 0x4BF, 2 }, + { 0x4C2, 0x4CE, 2 }, + { 0x4CF, 0x52F, 2 }, + { 0x560, 0x588, 1 }, + { 0x10D0, 0x10FA, 1 }, + { 0x10FD, 0x10FF, 1 }, + { 0x13F8, 0x13FD, 1 }, + { 0x1C80, 0x1C88, 1 }, + { 0x1D00, 0x1DBF, 1 }, + { 0x1E01, 0x1E95, 2 }, + { 0x1E96, 0x1E9D, 1 }, + { 0x1E9F, 0x1EFF, 2 }, + { 0x1F00, 0x1F07, 1 }, + { 0x1F10, 0x1F15, 1 }, + { 0x1F20, 0x1F27, 1 }, + { 0x1F30, 0x1F37, 1 }, + { 0x1F40, 0x1F45, 1 }, + { 0x1F50, 0x1F57, 1 }, + { 0x1F60, 0x1F67, 1 }, + { 0x1F70, 0x1F7D, 1 }, + { 0x1F80, 0x1F87, 1 }, + { 0x1F90, 0x1F97, 1 }, + { 0x1FA0, 0x1FA7, 1 }, + { 0x1FB0, 0x1FB4, 1 }, + { 0x1FB6, 0x1FB7, 1 }, + { 0x1FBE, 0x1FC2, 4 }, + { 0x1FC3, 0x1FC4, 1 }, + { 0x1FC6, 0x1FC7, 1 }, + { 0x1FD0, 0x1FD3, 1 }, + { 0x1FD6, 0x1FD7, 1 }, + { 0x1FE0, 0x1FE7, 1 }, + { 0x1FF2, 0x1FF4, 1 }, + { 0x1FF6, 0x1FF7, 1 }, + { 0x2071, 0x207F, 14 }, + { 0x2090, 0x209C, 1 }, + { 0x210A, 0x210E, 4 }, + { 0x210F, 0x2113, 4 }, + { 0x212F, 0x2139, 5 }, + { 0x213C, 0x213D, 1 }, + { 0x2146, 0x2149, 1 }, + { 0x214E, 0x2170, 34 }, + { 0x2171, 0x217F, 1 }, + { 0x2184, 0x24D0, 844 }, + { 0x24D1, 0x24E9, 1 }, + { 0x2C30, 0x2C5F, 1 }, + { 0x2C61, 0x2C65, 4 }, + { 0x2C66, 0x2C6C, 2 }, + { 0x2C71, 0x2C73, 2 }, + { 0x2C74, 0x2C76, 2 }, + { 0x2C77, 0x2C7D, 1 }, + { 0x2C81, 0x2CE3, 2 }, + { 0x2CE4, 0x2CEC, 8 }, + { 0x2CEE, 0x2CF3, 5 }, + { 0x2D00, 0x2D25, 1 }, + { 0x2D27, 0x2D2D, 6 }, + { 0xA641, 0xA66D, 2 }, + { 0xA681, 0xA69B, 2 }, + { 0xA69C, 0xA69D, 1 }, + { 0xA723, 0xA72F, 2 }, + { 0xA730, 0xA731, 1 }, + { 0xA733, 0xA76F, 2 }, + { 0xA770, 0xA778, 1 }, + { 0xA77A, 0xA77C, 2 }, + { 0xA77F, 0xA787, 2 }, + { 0xA78C, 0xA78E, 2 }, + { 0xA791, 0xA793, 2 }, + { 0xA794, 0xA795, 1 }, + { 0xA797, 0xA7A9, 2 }, + { 0xA7AF, 0xA7B5, 6 }, + { 0xA7B7, 0xA7C3, 2 }, + { 0xA7C8, 0xA7CA, 2 }, + { 0xA7D1, 0xA7D9, 2 }, + { 0xA7F6, 0xA7F8, 2 }, + { 0xA7F9, 0xA7FA, 1 }, + { 0xAB30, 0xAB5A, 1 }, + { 0xAB5C, 0xAB68, 1 }, + { 0xAB70, 0xABBF, 1 }, + { 0xFB00, 0xFB06, 1 }, + { 0xFB13, 0xFB17, 1 }, + { 0xFF41, 0xFF5A, 1 }, + { 0x10428, 0x1044F, 1 }, + { 0x104D8, 0x104FB, 1 }, + { 0x10597, 0x105A1, 1 }, + { 0x105A3, 0x105B1, 1 }, + { 0x105B3, 0x105B9, 1 }, + { 0x105BB, 0x105BC, 1 }, + { 0x10780, 0x10783, 3 }, + { 0x10784, 0x10785, 1 }, + { 0x10787, 0x107B0, 1 }, + { 0x107B2, 0x107BA, 1 }, + { 0x10CC0, 0x10CF2, 1 }, + { 0x118C0, 0x118DF, 1 }, + { 0x16E60, 0x16E7F, 1 }, + { 0x1D41A, 0x1D433, 1 }, + { 0x1D44E, 0x1D454, 1 }, + { 0x1D456, 0x1D467, 1 }, + { 0x1D482, 0x1D49B, 1 }, + { 0x1D4B6, 0x1D4B9, 1 }, + { 0x1D4BB, 0x1D4BD, 2 }, + { 0x1D4BE, 0x1D4C3, 1 }, + { 0x1D4C5, 0x1D4CF, 1 }, + { 0x1D4EA, 0x1D503, 1 }, + { 0x1D51E, 0x1D537, 1 }, + { 0x1D552, 0x1D56B, 1 }, + { 0x1D586, 0x1D59F, 1 }, + { 0x1D5BA, 0x1D5D3, 1 }, + { 0x1D5EE, 0x1D607, 1 }, + { 0x1D622, 0x1D63B, 1 }, + { 0x1D656, 0x1D66F, 1 }, + { 0x1D68A, 0x1D6A5, 1 }, + { 0x1D6C2, 0x1D6DA, 1 }, + { 0x1D6DC, 0x1D6E1, 1 }, + { 0x1D6FC, 0x1D714, 1 }, + { 0x1D716, 0x1D71B, 1 }, + { 0x1D736, 0x1D74E, 1 }, + { 0x1D750, 0x1D755, 1 }, + { 0x1D770, 0x1D788, 1 }, + { 0x1D78A, 0x1D78F, 1 }, + { 0x1D7AA, 0x1D7C2, 1 }, + { 0x1D7C4, 0x1D7C9, 1 }, + { 0x1D7CB, 0x1DF00, 1845 }, + { 0x1DF01, 0x1DF09, 1 }, + { 0x1DF0B, 0x1DF1E, 1 }, + { 0x1E922, 0x1E943, 1 }, +}; + +static struct range_table upper_table[] = { + { 0x41, 0x5A, 1 }, + { 0xC0, 0xD6, 1 }, + { 0xD8, 0xDE, 1 }, + { 0x100, 0x136, 2 }, + { 0x139, 0x147, 2 }, + { 0x14A, 0x178, 2 }, + { 0x179, 0x17D, 2 }, + { 0x181, 0x182, 1 }, + { 0x184, 0x186, 2 }, + { 0x187, 0x189, 2 }, + { 0x18A, 0x18B, 1 }, + { 0x18E, 0x191, 1 }, + { 0x193, 0x194, 1 }, + { 0x196, 0x198, 1 }, + { 0x19C, 0x19D, 1 }, + { 0x19F, 0x1A0, 1 }, + { 0x1A2, 0x1A6, 2 }, + { 0x1A7, 0x1A9, 2 }, + { 0x1AC, 0x1AE, 2 }, + { 0x1AF, 0x1B1, 2 }, + { 0x1B2, 0x1B3, 1 }, + { 0x1B5, 0x1B7, 2 }, + { 0x1B8, 0x1BC, 4 }, + { 0x1C4, 0x1CD, 3 }, + { 0x1CF, 0x1DB, 2 }, + { 0x1DE, 0x1EE, 2 }, + { 0x1F1, 0x1F4, 3 }, + { 0x1F6, 0x1F8, 1 }, + { 0x1FA, 0x232, 2 }, + { 0x23A, 0x23B, 1 }, + { 0x23D, 0x23E, 1 }, + { 0x241, 0x243, 2 }, + { 0x244, 0x246, 1 }, + { 0x248, 0x24E, 2 }, + { 0x370, 0x372, 2 }, + { 0x376, 0x37F, 9 }, + { 0x386, 0x388, 2 }, + { 0x389, 0x38A, 1 }, + { 0x38C, 0x38E, 2 }, + { 0x38F, 0x391, 2 }, + { 0x392, 0x3A1, 1 }, + { 0x3A3, 0x3AB, 1 }, + { 0x3CF, 0x3D2, 3 }, + { 0x3D3, 0x3D4, 1 }, + { 0x3D8, 0x3EE, 2 }, + { 0x3F4, 0x3F7, 3 }, + { 0x3F9, 0x3FA, 1 }, + { 0x3FD, 0x42F, 1 }, + { 0x460, 0x480, 2 }, + { 0x48A, 0x4C0, 2 }, + { 0x4C1, 0x4CD, 2 }, + { 0x4D0, 0x52E, 2 }, + { 0x531, 0x556, 1 }, + { 0x10A0, 0x10C5, 1 }, + { 0x10C7, 0x10CD, 6 }, + { 0x13A0, 0x13F5, 1 }, + { 0x1C90, 0x1CBA, 1 }, + { 0x1CBD, 0x1CBF, 1 }, + { 0x1E00, 0x1E94, 2 }, + { 0x1E9E, 0x1EFE, 2 }, + { 0x1F08, 0x1F0F, 1 }, + { 0x1F18, 0x1F1D, 1 }, + { 0x1F28, 0x1F2F, 1 }, + { 0x1F38, 0x1F3F, 1 }, + { 0x1F48, 0x1F4D, 1 }, + { 0x1F59, 0x1F5F, 2 }, + { 0x1F68, 0x1F6F, 1 }, + { 0x1FB8, 0x1FBB, 1 }, + { 0x1FC8, 0x1FCB, 1 }, + { 0x1FD8, 0x1FDB, 1 }, + { 0x1FE8, 0x1FEC, 1 }, + { 0x1FF8, 0x1FFB, 1 }, + { 0x2102, 0x2107, 5 }, + { 0x210B, 0x210D, 1 }, + { 0x2110, 0x2112, 1 }, + { 0x2115, 0x2119, 4 }, + { 0x211A, 0x211D, 1 }, + { 0x2124, 0x212A, 2 }, + { 0x212B, 0x212D, 1 }, + { 0x2130, 0x2133, 1 }, + { 0x213E, 0x213F, 1 }, + { 0x2145, 0x2160, 27 }, + { 0x2161, 0x216F, 1 }, + { 0x2183, 0x24B6, 819 }, + { 0x24B7, 0x24CF, 1 }, + { 0x2C00, 0x2C2F, 1 }, + { 0x2C60, 0x2C62, 2 }, + { 0x2C63, 0x2C64, 1 }, + { 0x2C67, 0x2C6D, 2 }, + { 0x2C6E, 0x2C70, 1 }, + { 0x2C72, 0x2C75, 3 }, + { 0x2C7E, 0x2C80, 1 }, + { 0x2C82, 0x2CE2, 2 }, + { 0x2CEB, 0x2CED, 2 }, + { 0x2CF2, 0xA640, 31054 }, + { 0xA642, 0xA66C, 2 }, + { 0xA680, 0xA69A, 2 }, + { 0xA722, 0xA72E, 2 }, + { 0xA732, 0xA76E, 2 }, + { 0xA779, 0xA77D, 2 }, + { 0xA77E, 0xA786, 2 }, + { 0xA78B, 0xA78D, 2 }, + { 0xA790, 0xA792, 2 }, + { 0xA796, 0xA7AA, 2 }, + { 0xA7AB, 0xA7AE, 1 }, + { 0xA7B0, 0xA7B4, 1 }, + { 0xA7B6, 0xA7C4, 2 }, + { 0xA7C5, 0xA7C7, 1 }, + { 0xA7C9, 0xA7D0, 7 }, + { 0xA7D6, 0xA7D8, 2 }, + { 0xA7F5, 0xFF21, 22316 }, + { 0xFF22, 0xFF3A, 1 }, + { 0x10400, 0x10427, 1 }, + { 0x104B0, 0x104D3, 1 }, + { 0x10570, 0x1057A, 1 }, + { 0x1057C, 0x1058A, 1 }, + { 0x1058C, 0x10592, 1 }, + { 0x10594, 0x10595, 1 }, + { 0x10C80, 0x10CB2, 1 }, + { 0x118A0, 0x118BF, 1 }, + { 0x16E40, 0x16E5F, 1 }, + { 0x1D400, 0x1D419, 1 }, + { 0x1D434, 0x1D44D, 1 }, + { 0x1D468, 0x1D481, 1 }, + { 0x1D49C, 0x1D49E, 2 }, + { 0x1D49F, 0x1D4A5, 3 }, + { 0x1D4A6, 0x1D4A9, 3 }, + { 0x1D4AA, 0x1D4AC, 1 }, + { 0x1D4AE, 0x1D4B5, 1 }, + { 0x1D4D0, 0x1D4E9, 1 }, + { 0x1D504, 0x1D505, 1 }, + { 0x1D507, 0x1D50A, 1 }, + { 0x1D50D, 0x1D514, 1 }, + { 0x1D516, 0x1D51C, 1 }, + { 0x1D538, 0x1D539, 1 }, + { 0x1D53B, 0x1D53E, 1 }, + { 0x1D540, 0x1D544, 1 }, + { 0x1D546, 0x1D54A, 4 }, + { 0x1D54B, 0x1D550, 1 }, + { 0x1D56C, 0x1D585, 1 }, + { 0x1D5A0, 0x1D5B9, 1 }, + { 0x1D5D4, 0x1D5ED, 1 }, + { 0x1D608, 0x1D621, 1 }, + { 0x1D63C, 0x1D655, 1 }, + { 0x1D670, 0x1D689, 1 }, + { 0x1D6A8, 0x1D6C0, 1 }, + { 0x1D6E2, 0x1D6FA, 1 }, + { 0x1D71C, 0x1D734, 1 }, + { 0x1D756, 0x1D76E, 1 }, + { 0x1D790, 0x1D7A8, 1 }, + { 0x1D7CA, 0x1E900, 4406 }, + { 0x1E901, 0x1E921, 1 }, + { 0x1F130, 0x1F149, 1 }, + { 0x1F150, 0x1F169, 1 }, + { 0x1F170, 0x1F189, 1 }, +}; + +static struct range_table xdigit_table[] = { + { 0x30, 0x39, 1 }, + { 0x41, 0x46, 1 }, + { 0x61, 0x66, 1 }, + { 0xFF10, 0xFF19, 1 }, + { 0xFF21, 0xFF26, 1 }, + { 0xFF41, 0xFF46, 1 }, +}; + +static struct range_table space_table[] = { + { 0x9, 0xD, 1 }, + { 0x20, 0x85, 101 }, + { 0xA0, 0x1680, 5600 }, + { 0x2000, 0x200A, 1 }, + { 0x2028, 0x2029, 1 }, + { 0x202F, 0x205F, 48 }, + { 0x3000, 0x3000, 1 }, +}; + +static struct range_table unprintable_table[] = { + { 0xAD, 0x34F, 674 }, + { 0x61C, 0x115F, 2883 }, + { 0x1160, 0x17B4, 1620 }, + { 0x17B5, 0x180B, 86 }, + { 0x180C, 0x180F, 1 }, + { 0x200B, 0x200F, 1 }, + { 0x202A, 0x202E, 1 }, + { 0x2060, 0x206F, 1 }, + { 0x3164, 0xFE00, 52380 }, + { 0xFE01, 0xFE0F, 1 }, + { 0xFEFF, 0xFFA0, 161 }, + { 0xFFF0, 0xFFF8, 1 }, + { 0x1BCA0, 0x1BCA3, 1 }, + { 0x1D173, 0x1D17A, 1 }, + { 0xE0000, 0xE0FFF, 1 }, +}; + +static struct range_table graph_table[] = { + { 0x20, 0x7E, 1 }, + { 0xA0, 0xAC, 1 }, + { 0xAE, 0x2FF, 1 }, + { 0x370, 0x377, 1 }, + { 0x37A, 0x37F, 1 }, + { 0x384, 0x38A, 1 }, + { 0x38C, 0x38E, 2 }, + { 0x38F, 0x3A1, 1 }, + { 0x3A3, 0x482, 1 }, + { 0x48A, 0x52F, 1 }, + { 0x531, 0x556, 1 }, + { 0x559, 0x58A, 1 }, + { 0x58D, 0x58F, 1 }, + { 0x5BE, 0x5C0, 2 }, + { 0x5C3, 0x5C6, 3 }, + { 0x5D0, 0x5EA, 1 }, + { 0x5EF, 0x5F4, 1 }, + { 0x606, 0x60F, 1 }, + { 0x61B, 0x61D, 2 }, + { 0x61E, 0x64A, 1 }, + { 0x660, 0x66F, 1 }, + { 0x671, 0x6D5, 1 }, + { 0x6DE, 0x6E5, 7 }, + { 0x6E6, 0x6E9, 3 }, + { 0x6EE, 0x70D, 1 }, + { 0x710, 0x712, 2 }, + { 0x713, 0x72F, 1 }, + { 0x74D, 0x7A5, 1 }, + { 0x7B1, 0x7C0, 15 }, + { 0x7C1, 0x7EA, 1 }, + { 0x7F4, 0x7FA, 1 }, + { 0x7FE, 0x815, 1 }, + { 0x81A, 0x824, 10 }, + { 0x828, 0x830, 8 }, + { 0x831, 0x83E, 1 }, + { 0x840, 0x858, 1 }, + { 0x85E, 0x860, 2 }, + { 0x861, 0x86A, 1 }, + { 0x870, 0x88E, 1 }, + { 0x8A0, 0x8C9, 1 }, + { 0x903, 0x939, 1 }, + { 0x93B, 0x93D, 2 }, + { 0x93E, 0x940, 1 }, + { 0x949, 0x94C, 1 }, + { 0x94E, 0x950, 1 }, + { 0x958, 0x961, 1 }, + { 0x964, 0x980, 1 }, + { 0x982, 0x983, 1 }, + { 0x985, 0x98C, 1 }, + { 0x98F, 0x990, 1 }, + { 0x993, 0x9A8, 1 }, + { 0x9AA, 0x9B0, 1 }, + { 0x9B2, 0x9B6, 4 }, + { 0x9B7, 0x9B9, 1 }, + { 0x9BD, 0x9BF, 2 }, + { 0x9C0, 0x9C7, 7 }, + { 0x9C8, 0x9CB, 3 }, + { 0x9CC, 0x9CE, 2 }, + { 0x9DC, 0x9DD, 1 }, + { 0x9DF, 0x9E1, 1 }, + { 0x9E6, 0x9FD, 1 }, + { 0xA03, 0xA05, 2 }, + { 0xA06, 0xA0A, 1 }, + { 0xA0F, 0xA10, 1 }, + { 0xA13, 0xA28, 1 }, + { 0xA2A, 0xA30, 1 }, + { 0xA32, 0xA33, 1 }, + { 0xA35, 0xA36, 1 }, + { 0xA38, 0xA39, 1 }, + { 0xA3E, 0xA40, 1 }, + { 0xA59, 0xA5C, 1 }, + { 0xA5E, 0xA66, 8 }, + { 0xA67, 0xA6F, 1 }, + { 0xA72, 0xA74, 1 }, + { 0xA76, 0xA83, 13 }, + { 0xA85, 0xA8D, 1 }, + { 0xA8F, 0xA91, 1 }, + { 0xA93, 0xAA8, 1 }, + { 0xAAA, 0xAB0, 1 }, + { 0xAB2, 0xAB3, 1 }, + { 0xAB5, 0xAB9, 1 }, + { 0xABD, 0xAC0, 1 }, + { 0xAC9, 0xACB, 2 }, + { 0xACC, 0xAD0, 4 }, + { 0xAE0, 0xAE1, 1 }, + { 0xAE6, 0xAF1, 1 }, + { 0xAF9, 0xB02, 9 }, + { 0xB03, 0xB05, 2 }, + { 0xB06, 0xB0C, 1 }, + { 0xB0F, 0xB10, 1 }, + { 0xB13, 0xB28, 1 }, + { 0xB2A, 0xB30, 1 }, + { 0xB32, 0xB33, 1 }, + { 0xB35, 0xB39, 1 }, + { 0xB3D, 0xB40, 3 }, + { 0xB47, 0xB48, 1 }, + { 0xB4B, 0xB4C, 1 }, + { 0xB5C, 0xB5D, 1 }, + { 0xB5F, 0xB61, 1 }, + { 0xB66, 0xB77, 1 }, + { 0xB83, 0xB85, 2 }, + { 0xB86, 0xB8A, 1 }, + { 0xB8E, 0xB90, 1 }, + { 0xB92, 0xB95, 1 }, + { 0xB99, 0xB9A, 1 }, + { 0xB9C, 0xB9E, 2 }, + { 0xB9F, 0xBA3, 4 }, + { 0xBA4, 0xBA8, 4 }, + { 0xBA9, 0xBAA, 1 }, + { 0xBAE, 0xBB9, 1 }, + { 0xBBF, 0xBC1, 2 }, + { 0xBC2, 0xBC6, 4 }, + { 0xBC7, 0xBC8, 1 }, + { 0xBCA, 0xBCC, 1 }, + { 0xBD0, 0xBE6, 22 }, + { 0xBE7, 0xBFA, 1 }, + { 0xC01, 0xC03, 1 }, + { 0xC05, 0xC0C, 1 }, + { 0xC0E, 0xC10, 1 }, + { 0xC12, 0xC28, 1 }, + { 0xC2A, 0xC39, 1 }, + { 0xC3D, 0xC41, 4 }, + { 0xC42, 0xC44, 1 }, + { 0xC58, 0xC5A, 1 }, + { 0xC5D, 0xC60, 3 }, + { 0xC61, 0xC66, 5 }, + { 0xC67, 0xC6F, 1 }, + { 0xC77, 0xC80, 1 }, + { 0xC82, 0xC8C, 1 }, + { 0xC8E, 0xC90, 1 }, + { 0xC92, 0xCA8, 1 }, + { 0xCAA, 0xCB3, 1 }, + { 0xCB5, 0xCB9, 1 }, + { 0xCBD, 0xCBE, 1 }, + { 0xCC0, 0xCC1, 1 }, + { 0xCC3, 0xCC4, 1 }, + { 0xCC7, 0xCC8, 1 }, + { 0xCCA, 0xCCB, 1 }, + { 0xCDD, 0xCDE, 1 }, + { 0xCE0, 0xCE1, 1 }, + { 0xCE6, 0xCEF, 1 }, + { 0xCF1, 0xCF2, 1 }, + { 0xD02, 0xD0C, 1 }, + { 0xD0E, 0xD10, 1 }, + { 0xD12, 0xD3A, 1 }, + { 0xD3D, 0xD3F, 2 }, + { 0xD40, 0xD46, 6 }, + { 0xD47, 0xD48, 1 }, + { 0xD4A, 0xD4C, 1 }, + { 0xD4E, 0xD4F, 1 }, + { 0xD54, 0xD56, 1 }, + { 0xD58, 0xD61, 1 }, + { 0xD66, 0xD7F, 1 }, + { 0xD82, 0xD83, 1 }, + { 0xD85, 0xD96, 1 }, + { 0xD9A, 0xDB1, 1 }, + { 0xDB3, 0xDBB, 1 }, + { 0xDBD, 0xDC0, 3 }, + { 0xDC1, 0xDC6, 1 }, + { 0xDD0, 0xDD1, 1 }, + { 0xDD8, 0xDDE, 1 }, + { 0xDE6, 0xDEF, 1 }, + { 0xDF2, 0xDF4, 1 }, + { 0xE01, 0xE30, 1 }, + { 0xE32, 0xE33, 1 }, + { 0xE3F, 0xE46, 1 }, + { 0xE4F, 0xE5B, 1 }, + { 0xE81, 0xE82, 1 }, + { 0xE84, 0xE86, 2 }, + { 0xE87, 0xE8A, 1 }, + { 0xE8C, 0xEA3, 1 }, + { 0xEA5, 0xEA7, 2 }, + { 0xEA8, 0xEB0, 1 }, + { 0xEB2, 0xEB3, 1 }, + { 0xEBD, 0xEC0, 3 }, + { 0xEC1, 0xEC4, 1 }, + { 0xEC6, 0xED0, 10 }, + { 0xED1, 0xED9, 1 }, + { 0xEDC, 0xEDF, 1 }, + { 0xF00, 0xF17, 1 }, + { 0xF1A, 0xF34, 1 }, + { 0xF36, 0xF3A, 2 }, + { 0xF3B, 0xF47, 1 }, + { 0xF49, 0xF6C, 1 }, + { 0xF7F, 0xF85, 6 }, + { 0xF88, 0xF8C, 1 }, + { 0xFBE, 0xFC5, 1 }, + { 0xFC7, 0xFCC, 1 }, + { 0xFCE, 0xFDA, 1 }, + { 0x1000, 0x102C, 1 }, + { 0x1031, 0x1038, 7 }, + { 0x103B, 0x103C, 1 }, + { 0x103F, 0x1057, 1 }, + { 0x105A, 0x105D, 1 }, + { 0x1061, 0x1070, 1 }, + { 0x1075, 0x1081, 1 }, + { 0x1083, 0x1084, 1 }, + { 0x1087, 0x108C, 1 }, + { 0x108E, 0x109C, 1 }, + { 0x109E, 0x10C5, 1 }, + { 0x10C7, 0x10CD, 6 }, + { 0x10D0, 0x1248, 1 }, + { 0x124A, 0x124D, 1 }, + { 0x1250, 0x1256, 1 }, + { 0x1258, 0x125A, 2 }, + { 0x125B, 0x125D, 1 }, + { 0x1260, 0x1288, 1 }, + { 0x128A, 0x128D, 1 }, + { 0x1290, 0x12B0, 1 }, + { 0x12B2, 0x12B5, 1 }, + { 0x12B8, 0x12BE, 1 }, + { 0x12C0, 0x12C2, 2 }, + { 0x12C3, 0x12C5, 1 }, + { 0x12C8, 0x12D6, 1 }, + { 0x12D8, 0x1310, 1 }, + { 0x1312, 0x1315, 1 }, + { 0x1318, 0x135A, 1 }, + { 0x1360, 0x137C, 1 }, + { 0x1380, 0x1399, 1 }, + { 0x13A0, 0x13F5, 1 }, + { 0x13F8, 0x13FD, 1 }, + { 0x1400, 0x169C, 1 }, + { 0x16A0, 0x16F8, 1 }, + { 0x1700, 0x1711, 1 }, + { 0x1715, 0x171F, 10 }, + { 0x1720, 0x1731, 1 }, + { 0x1734, 0x1736, 1 }, + { 0x1740, 0x1751, 1 }, + { 0x1760, 0x176C, 1 }, + { 0x176E, 0x1770, 1 }, + { 0x1780, 0x17B3, 1 }, + { 0x17B6, 0x17BE, 8 }, + { 0x17BF, 0x17C5, 1 }, + { 0x17C7, 0x17C8, 1 }, + { 0x17D4, 0x17DC, 1 }, + { 0x17E0, 0x17E9, 1 }, + { 0x17F0, 0x17F9, 1 }, + { 0x1800, 0x180A, 1 }, + { 0x1810, 0x1819, 1 }, + { 0x1820, 0x1878, 1 }, + { 0x1880, 0x1884, 1 }, + { 0x1887, 0x18A8, 1 }, + { 0x18AA, 0x18B0, 6 }, + { 0x18B1, 0x18F5, 1 }, + { 0x1900, 0x191E, 1 }, + { 0x1923, 0x1926, 1 }, + { 0x1929, 0x192B, 1 }, + { 0x1930, 0x1931, 1 }, + { 0x1933, 0x1938, 1 }, + { 0x1940, 0x1944, 4 }, + { 0x1945, 0x196D, 1 }, + { 0x1970, 0x1974, 1 }, + { 0x1980, 0x19AB, 1 }, + { 0x19B0, 0x19C9, 1 }, + { 0x19D0, 0x19DA, 1 }, + { 0x19DE, 0x1A16, 1 }, + { 0x1A19, 0x1A1A, 1 }, + { 0x1A1E, 0x1A55, 1 }, + { 0x1A57, 0x1A61, 10 }, + { 0x1A63, 0x1A64, 1 }, + { 0x1A6D, 0x1A72, 1 }, + { 0x1A80, 0x1A89, 1 }, + { 0x1A90, 0x1A99, 1 }, + { 0x1AA0, 0x1AAD, 1 }, + { 0x1B04, 0x1B33, 1 }, + { 0x1B3B, 0x1B3D, 2 }, + { 0x1B3E, 0x1B41, 1 }, + { 0x1B43, 0x1B4C, 1 }, + { 0x1B50, 0x1B6A, 1 }, + { 0x1B74, 0x1B7E, 1 }, + { 0x1B82, 0x1BA1, 1 }, + { 0x1BA6, 0x1BA7, 1 }, + { 0x1BAA, 0x1BAE, 4 }, + { 0x1BAF, 0x1BE5, 1 }, + { 0x1BE7, 0x1BEA, 3 }, + { 0x1BEB, 0x1BEC, 1 }, + { 0x1BEE, 0x1BF2, 4 }, + { 0x1BF3, 0x1BFC, 9 }, + { 0x1BFD, 0x1C2B, 1 }, + { 0x1C34, 0x1C35, 1 }, + { 0x1C3B, 0x1C49, 1 }, + { 0x1C4D, 0x1C88, 1 }, + { 0x1C90, 0x1CBA, 1 }, + { 0x1CBD, 0x1CC7, 1 }, + { 0x1CD3, 0x1CE1, 14 }, + { 0x1CE9, 0x1CEC, 1 }, + { 0x1CEE, 0x1CF3, 1 }, + { 0x1CF5, 0x1CF7, 1 }, + { 0x1CFA, 0x1D00, 6 }, + { 0x1D01, 0x1DBF, 1 }, + { 0x1E00, 0x1F15, 1 }, + { 0x1F18, 0x1F1D, 1 }, + { 0x1F20, 0x1F45, 1 }, + { 0x1F48, 0x1F4D, 1 }, + { 0x1F50, 0x1F57, 1 }, + { 0x1F59, 0x1F5F, 2 }, + { 0x1F60, 0x1F7D, 1 }, + { 0x1F80, 0x1FB4, 1 }, + { 0x1FB6, 0x1FC4, 1 }, + { 0x1FC6, 0x1FD3, 1 }, + { 0x1FD6, 0x1FDB, 1 }, + { 0x1FDD, 0x1FEF, 1 }, + { 0x1FF2, 0x1FF4, 1 }, + { 0x1FF6, 0x1FFE, 1 }, + { 0x2000, 0x200A, 1 }, + { 0x2010, 0x2027, 1 }, + { 0x202F, 0x205F, 1 }, + { 0x2070, 0x2071, 1 }, + { 0x2074, 0x208E, 1 }, + { 0x2090, 0x209C, 1 }, + { 0x20A0, 0x20C0, 1 }, + { 0x2100, 0x218B, 1 }, + { 0x2190, 0x2426, 1 }, + { 0x2440, 0x244A, 1 }, + { 0x2460, 0x2B73, 1 }, + { 0x2B76, 0x2B95, 1 }, + { 0x2B97, 0x2CEE, 1 }, + { 0x2CF2, 0x2CF3, 1 }, + { 0x2CF9, 0x2D25, 1 }, + { 0x2D27, 0x2D2D, 6 }, + { 0x2D30, 0x2D67, 1 }, + { 0x2D6F, 0x2D70, 1 }, + { 0x2D80, 0x2D96, 1 }, + { 0x2DA0, 0x2DA6, 1 }, + { 0x2DA8, 0x2DAE, 1 }, + { 0x2DB0, 0x2DB6, 1 }, + { 0x2DB8, 0x2DBE, 1 }, + { 0x2DC0, 0x2DC6, 1 }, + { 0x2DC8, 0x2DCE, 1 }, + { 0x2DD0, 0x2DD6, 1 }, + { 0x2DD8, 0x2DDE, 1 }, + { 0x2E00, 0x2E5D, 1 }, + { 0x2E80, 0x2E99, 1 }, + { 0x2E9B, 0x2EF3, 1 }, + { 0x2F00, 0x2FD5, 1 }, + { 0x2FF0, 0x2FFB, 1 }, + { 0x3000, 0x3029, 1 }, + { 0x3030, 0x303F, 1 }, + { 0x3041, 0x3096, 1 }, + { 0x309B, 0x30FF, 1 }, + { 0x3105, 0x312F, 1 }, + { 0x3131, 0x318E, 1 }, + { 0x3190, 0x31E3, 1 }, + { 0x31F0, 0x321E, 1 }, + { 0x3220, 0xA48C, 1 }, + { 0xA490, 0xA4C6, 1 }, + { 0xA4D0, 0xA62B, 1 }, + { 0xA640, 0xA66E, 1 }, + { 0xA673, 0xA67E, 11 }, + { 0xA67F, 0xA69D, 1 }, + { 0xA6A0, 0xA6EF, 1 }, + { 0xA6F2, 0xA6F7, 1 }, + { 0xA700, 0xA7CA, 1 }, + { 0xA7D0, 0xA7D1, 1 }, + { 0xA7D3, 0xA7D5, 2 }, + { 0xA7D6, 0xA7D9, 1 }, + { 0xA7F2, 0xA801, 1 }, + { 0xA803, 0xA805, 1 }, + { 0xA807, 0xA80A, 1 }, + { 0xA80C, 0xA824, 1 }, + { 0xA827, 0xA82B, 1 }, + { 0xA830, 0xA839, 1 }, + { 0xA840, 0xA877, 1 }, + { 0xA880, 0xA8C3, 1 }, + { 0xA8CE, 0xA8D9, 1 }, + { 0xA8F2, 0xA8FE, 1 }, + { 0xA900, 0xA925, 1 }, + { 0xA92E, 0xA946, 1 }, + { 0xA952, 0xA953, 1 }, + { 0xA95F, 0xA97C, 1 }, + { 0xA983, 0xA9B2, 1 }, + { 0xA9B4, 0xA9B5, 1 }, + { 0xA9BA, 0xA9BB, 1 }, + { 0xA9BE, 0xA9CD, 1 }, + { 0xA9CF, 0xA9D9, 1 }, + { 0xA9DE, 0xA9E4, 1 }, + { 0xA9E6, 0xA9FE, 1 }, + { 0xAA00, 0xAA28, 1 }, + { 0xAA2F, 0xAA30, 1 }, + { 0xAA33, 0xAA34, 1 }, + { 0xAA40, 0xAA42, 1 }, + { 0xAA44, 0xAA4B, 1 }, + { 0xAA4D, 0xAA50, 3 }, + { 0xAA51, 0xAA59, 1 }, + { 0xAA5C, 0xAA7B, 1 }, + { 0xAA7D, 0xAAAF, 1 }, + { 0xAAB1, 0xAAB5, 4 }, + { 0xAAB6, 0xAAB9, 3 }, + { 0xAABA, 0xAABD, 1 }, + { 0xAAC0, 0xAAC2, 2 }, + { 0xAADB, 0xAAEB, 1 }, + { 0xAAEE, 0xAAF5, 1 }, + { 0xAB01, 0xAB06, 1 }, + { 0xAB09, 0xAB0E, 1 }, + { 0xAB11, 0xAB16, 1 }, + { 0xAB20, 0xAB26, 1 }, + { 0xAB28, 0xAB2E, 1 }, + { 0xAB30, 0xAB6B, 1 }, + { 0xAB70, 0xABE4, 1 }, + { 0xABE6, 0xABE7, 1 }, + { 0xABE9, 0xABEC, 1 }, + { 0xABF0, 0xABF9, 1 }, + { 0xAC00, 0xD7A3, 1 }, + { 0xD7B0, 0xD7C6, 1 }, + { 0xD7CB, 0xD7FB, 1 }, + { 0xF900, 0xFA6D, 1 }, + { 0xFA70, 0xFAD9, 1 }, + { 0xFB00, 0xFB06, 1 }, + { 0xFB13, 0xFB17, 1 }, + { 0xFB1D, 0xFB1F, 2 }, + { 0xFB20, 0xFB36, 1 }, + { 0xFB38, 0xFB3C, 1 }, + { 0xFB3E, 0xFB40, 2 }, + { 0xFB41, 0xFB43, 2 }, + { 0xFB44, 0xFB46, 2 }, + { 0xFB47, 0xFBC2, 1 }, + { 0xFBD3, 0xFD8F, 1 }, + { 0xFD92, 0xFDC7, 1 }, + { 0xFDCF, 0xFDF0, 33 }, + { 0xFDF1, 0xFDFF, 1 }, + { 0xFE10, 0xFE19, 1 }, + { 0xFE30, 0xFE52, 1 }, + { 0xFE54, 0xFE66, 1 }, + { 0xFE68, 0xFE6B, 1 }, + { 0xFE70, 0xFE74, 1 }, + { 0xFE76, 0xFEFC, 1 }, + { 0xFF01, 0xFF9D, 1 }, + { 0xFFA0, 0xFFBE, 1 }, + { 0xFFC2, 0xFFC7, 1 }, + { 0xFFCA, 0xFFCF, 1 }, + { 0xFFD2, 0xFFD7, 1 }, + { 0xFFDA, 0xFFDC, 1 }, + { 0xFFE0, 0xFFE6, 1 }, + { 0xFFE8, 0xFFEE, 1 }, + { 0xFFFC, 0xFFFD, 1 }, + { 0x10000, 0x1000B, 1 }, + { 0x1000D, 0x10026, 1 }, + { 0x10028, 0x1003A, 1 }, + { 0x1003C, 0x1003D, 1 }, + { 0x1003F, 0x1004D, 1 }, + { 0x10050, 0x1005D, 1 }, + { 0x10080, 0x100FA, 1 }, + { 0x10100, 0x10102, 1 }, + { 0x10107, 0x10133, 1 }, + { 0x10137, 0x1018E, 1 }, + { 0x10190, 0x1019C, 1 }, + { 0x101A0, 0x101D0, 48 }, + { 0x101D1, 0x101FC, 1 }, + { 0x10280, 0x1029C, 1 }, + { 0x102A0, 0x102D0, 1 }, + { 0x102E1, 0x102FB, 1 }, + { 0x10300, 0x10323, 1 }, + { 0x1032D, 0x1034A, 1 }, + { 0x10350, 0x10375, 1 }, + { 0x10380, 0x1039D, 1 }, + { 0x1039F, 0x103C3, 1 }, + { 0x103C8, 0x103D5, 1 }, + { 0x10400, 0x1049D, 1 }, + { 0x104A0, 0x104A9, 1 }, + { 0x104B0, 0x104D3, 1 }, + { 0x104D8, 0x104FB, 1 }, + { 0x10500, 0x10527, 1 }, + { 0x10530, 0x10563, 1 }, + { 0x1056F, 0x1057A, 1 }, + { 0x1057C, 0x1058A, 1 }, + { 0x1058C, 0x10592, 1 }, + { 0x10594, 0x10595, 1 }, + { 0x10597, 0x105A1, 1 }, + { 0x105A3, 0x105B1, 1 }, + { 0x105B3, 0x105B9, 1 }, + { 0x105BB, 0x105BC, 1 }, + { 0x10600, 0x10736, 1 }, + { 0x10740, 0x10755, 1 }, + { 0x10760, 0x10767, 1 }, + { 0x10780, 0x10785, 1 }, + { 0x10787, 0x107B0, 1 }, + { 0x107B2, 0x107BA, 1 }, + { 0x10800, 0x10805, 1 }, + { 0x10808, 0x1080A, 2 }, + { 0x1080B, 0x10835, 1 }, + { 0x10837, 0x10838, 1 }, + { 0x1083C, 0x1083F, 3 }, + { 0x10840, 0x10855, 1 }, + { 0x10857, 0x1089E, 1 }, + { 0x108A7, 0x108AF, 1 }, + { 0x108E0, 0x108F2, 1 }, + { 0x108F4, 0x108F5, 1 }, + { 0x108FB, 0x1091B, 1 }, + { 0x1091F, 0x10939, 1 }, + { 0x1093F, 0x10980, 65 }, + { 0x10981, 0x109B7, 1 }, + { 0x109BC, 0x109CF, 1 }, + { 0x109D2, 0x10A00, 1 }, + { 0x10A10, 0x10A13, 1 }, + { 0x10A15, 0x10A17, 1 }, + { 0x10A19, 0x10A35, 1 }, + { 0x10A40, 0x10A48, 1 }, + { 0x10A50, 0x10A58, 1 }, + { 0x10A60, 0x10A9F, 1 }, + { 0x10AC0, 0x10AE4, 1 }, + { 0x10AEB, 0x10AF6, 1 }, + { 0x10B00, 0x10B35, 1 }, + { 0x10B39, 0x10B55, 1 }, + { 0x10B58, 0x10B72, 1 }, + { 0x10B78, 0x10B91, 1 }, + { 0x10B99, 0x10B9C, 1 }, + { 0x10BA9, 0x10BAF, 1 }, + { 0x10C00, 0x10C48, 1 }, + { 0x10C80, 0x10CB2, 1 }, + { 0x10CC0, 0x10CF2, 1 }, + { 0x10CFA, 0x10D23, 1 }, + { 0x10D30, 0x10D39, 1 }, + { 0x10E60, 0x10E7E, 1 }, + { 0x10E80, 0x10EA9, 1 }, + { 0x10EAD, 0x10EB0, 3 }, + { 0x10EB1, 0x10F00, 79 }, + { 0x10F01, 0x10F27, 1 }, + { 0x10F30, 0x10F45, 1 }, + { 0x10F51, 0x10F59, 1 }, + { 0x10F70, 0x10F81, 1 }, + { 0x10F86, 0x10F89, 1 }, + { 0x10FB0, 0x10FCB, 1 }, + { 0x10FE0, 0x10FF6, 1 }, + { 0x11000, 0x11002, 2 }, + { 0x11003, 0x11037, 1 }, + { 0x11047, 0x1104D, 1 }, + { 0x11052, 0x1106F, 1 }, + { 0x11071, 0x11072, 1 }, + { 0x11075, 0x11082, 13 }, + { 0x11083, 0x110B2, 1 }, + { 0x110B7, 0x110B8, 1 }, + { 0x110BB, 0x110BC, 1 }, + { 0x110BE, 0x110C1, 1 }, + { 0x110D0, 0x110E8, 1 }, + { 0x110F0, 0x110F9, 1 }, + { 0x11103, 0x11126, 1 }, + { 0x1112C, 0x11136, 10 }, + { 0x11137, 0x11147, 1 }, + { 0x11150, 0x11172, 1 }, + { 0x11174, 0x11176, 1 }, + { 0x11182, 0x111B5, 1 }, + { 0x111BF, 0x111C8, 1 }, + { 0x111CD, 0x111CE, 1 }, + { 0x111D0, 0x111DF, 1 }, + { 0x111E1, 0x111F4, 1 }, + { 0x11200, 0x11211, 1 }, + { 0x11213, 0x1122E, 1 }, + { 0x11232, 0x11233, 1 }, + { 0x11235, 0x11238, 3 }, + { 0x11239, 0x1123D, 1 }, + { 0x11280, 0x11286, 1 }, + { 0x11288, 0x1128A, 2 }, + { 0x1128B, 0x1128D, 1 }, + { 0x1128F, 0x1129D, 1 }, + { 0x1129F, 0x112A9, 1 }, + { 0x112B0, 0x112DE, 1 }, + { 0x112E0, 0x112E2, 1 }, + { 0x112F0, 0x112F9, 1 }, + { 0x11302, 0x11303, 1 }, + { 0x11305, 0x1130C, 1 }, + { 0x1130F, 0x11310, 1 }, + { 0x11313, 0x11328, 1 }, + { 0x1132A, 0x11330, 1 }, + { 0x11332, 0x11333, 1 }, + { 0x11335, 0x11339, 1 }, + { 0x1133D, 0x11341, 2 }, + { 0x11342, 0x11344, 1 }, + { 0x11347, 0x11348, 1 }, + { 0x1134B, 0x1134D, 1 }, + { 0x11350, 0x1135D, 13 }, + { 0x1135E, 0x11363, 1 }, + { 0x11400, 0x11437, 1 }, + { 0x11440, 0x11441, 1 }, + { 0x11445, 0x11447, 2 }, + { 0x11448, 0x1145B, 1 }, + { 0x1145D, 0x1145F, 2 }, + { 0x11460, 0x11461, 1 }, + { 0x11480, 0x114AF, 1 }, + { 0x114B1, 0x114B2, 1 }, + { 0x114B9, 0x114BB, 2 }, + { 0x114BC, 0x114BE, 2 }, + { 0x114C1, 0x114C4, 3 }, + { 0x114C5, 0x114C7, 1 }, + { 0x114D0, 0x114D9, 1 }, + { 0x11580, 0x115AE, 1 }, + { 0x115B0, 0x115B1, 1 }, + { 0x115B8, 0x115BB, 1 }, + { 0x115BE, 0x115C1, 3 }, + { 0x115C2, 0x115DB, 1 }, + { 0x11600, 0x11632, 1 }, + { 0x1163B, 0x1163C, 1 }, + { 0x1163E, 0x11641, 3 }, + { 0x11642, 0x11644, 1 }, + { 0x11650, 0x11659, 1 }, + { 0x11660, 0x1166C, 1 }, + { 0x11680, 0x116AA, 1 }, + { 0x116AC, 0x116AE, 2 }, + { 0x116AF, 0x116B6, 7 }, + { 0x116B8, 0x116B9, 1 }, + { 0x116C0, 0x116C9, 1 }, + { 0x11700, 0x1171A, 1 }, + { 0x11720, 0x11721, 1 }, + { 0x11726, 0x11730, 10 }, + { 0x11731, 0x11746, 1 }, + { 0x11800, 0x1182E, 1 }, + { 0x11838, 0x1183B, 3 }, + { 0x118A0, 0x118F2, 1 }, + { 0x118FF, 0x11906, 1 }, + { 0x11909, 0x1190C, 3 }, + { 0x1190D, 0x11913, 1 }, + { 0x11915, 0x11916, 1 }, + { 0x11918, 0x1192F, 1 }, + { 0x11931, 0x11935, 1 }, + { 0x11937, 0x11938, 1 }, + { 0x1193D, 0x1193F, 2 }, + { 0x11940, 0x11942, 1 }, + { 0x11944, 0x11946, 1 }, + { 0x11950, 0x11959, 1 }, + { 0x119A0, 0x119A7, 1 }, + { 0x119AA, 0x119D3, 1 }, + { 0x119DC, 0x119DF, 1 }, + { 0x119E1, 0x119E4, 1 }, + { 0x11A00, 0x11A0B, 11 }, + { 0x11A0C, 0x11A32, 1 }, + { 0x11A39, 0x11A3A, 1 }, + { 0x11A3F, 0x11A46, 1 }, + { 0x11A50, 0x11A57, 7 }, + { 0x11A58, 0x11A5C, 4 }, + { 0x11A5D, 0x11A89, 1 }, + { 0x11A97, 0x11A9A, 3 }, + { 0x11A9B, 0x11AA2, 1 }, + { 0x11AB0, 0x11AF8, 1 }, + { 0x11C00, 0x11C08, 1 }, + { 0x11C0A, 0x11C2F, 1 }, + { 0x11C3E, 0x11C40, 2 }, + { 0x11C41, 0x11C45, 1 }, + { 0x11C50, 0x11C6C, 1 }, + { 0x11C70, 0x11C8F, 1 }, + { 0x11CA9, 0x11CB1, 8 }, + { 0x11CB4, 0x11D00, 76 }, + { 0x11D01, 0x11D06, 1 }, + { 0x11D08, 0x11D09, 1 }, + { 0x11D0B, 0x11D30, 1 }, + { 0x11D46, 0x11D50, 10 }, + { 0x11D51, 0x11D59, 1 }, + { 0x11D60, 0x11D65, 1 }, + { 0x11D67, 0x11D68, 1 }, + { 0x11D6A, 0x11D8E, 1 }, + { 0x11D93, 0x11D94, 1 }, + { 0x11D96, 0x11D98, 2 }, + { 0x11DA0, 0x11DA9, 1 }, + { 0x11EE0, 0x11EF2, 1 }, + { 0x11EF5, 0x11EF8, 1 }, + { 0x11FB0, 0x11FC0, 16 }, + { 0x11FC1, 0x11FF1, 1 }, + { 0x11FFF, 0x12399, 1 }, + { 0x12400, 0x1246E, 1 }, + { 0x12470, 0x12474, 1 }, + { 0x12480, 0x12543, 1 }, + { 0x12F90, 0x12FF2, 1 }, + { 0x13000, 0x1342E, 1 }, + { 0x14400, 0x14646, 1 }, + { 0x16800, 0x16A38, 1 }, + { 0x16A40, 0x16A5E, 1 }, + { 0x16A60, 0x16A69, 1 }, + { 0x16A6E, 0x16ABE, 1 }, + { 0x16AC0, 0x16AC9, 1 }, + { 0x16AD0, 0x16AED, 1 }, + { 0x16AF5, 0x16B00, 11 }, + { 0x16B01, 0x16B2F, 1 }, + { 0x16B37, 0x16B45, 1 }, + { 0x16B50, 0x16B59, 1 }, + { 0x16B5B, 0x16B61, 1 }, + { 0x16B63, 0x16B77, 1 }, + { 0x16B7D, 0x16B8F, 1 }, + { 0x16E40, 0x16E9A, 1 }, + { 0x16F00, 0x16F4A, 1 }, + { 0x16F50, 0x16F87, 1 }, + { 0x16F93, 0x16F9F, 1 }, + { 0x16FE0, 0x16FE3, 1 }, + { 0x16FF0, 0x16FF1, 1 }, + { 0x17000, 0x187F7, 1 }, + { 0x18800, 0x18CD5, 1 }, + { 0x18D00, 0x18D08, 1 }, + { 0x1AFF0, 0x1AFF3, 1 }, + { 0x1AFF5, 0x1AFFB, 1 }, + { 0x1AFFD, 0x1AFFE, 1 }, + { 0x1B000, 0x1B122, 1 }, + { 0x1B150, 0x1B152, 1 }, + { 0x1B164, 0x1B167, 1 }, + { 0x1B170, 0x1B2FB, 1 }, + { 0x1BC00, 0x1BC6A, 1 }, + { 0x1BC70, 0x1BC7C, 1 }, + { 0x1BC80, 0x1BC88, 1 }, + { 0x1BC90, 0x1BC99, 1 }, + { 0x1BC9C, 0x1BC9F, 3 }, + { 0x1CF50, 0x1CFC3, 1 }, + { 0x1D000, 0x1D0F5, 1 }, + { 0x1D100, 0x1D126, 1 }, + { 0x1D129, 0x1D164, 1 }, + { 0x1D166, 0x1D16A, 4 }, + { 0x1D16B, 0x1D16D, 1 }, + { 0x1D183, 0x1D184, 1 }, + { 0x1D18C, 0x1D1A9, 1 }, + { 0x1D1AE, 0x1D1EA, 1 }, + { 0x1D200, 0x1D241, 1 }, + { 0x1D245, 0x1D2E0, 155 }, + { 0x1D2E1, 0x1D2F3, 1 }, + { 0x1D300, 0x1D356, 1 }, + { 0x1D360, 0x1D378, 1 }, + { 0x1D400, 0x1D454, 1 }, + { 0x1D456, 0x1D49C, 1 }, + { 0x1D49E, 0x1D49F, 1 }, + { 0x1D4A2, 0x1D4A5, 3 }, + { 0x1D4A6, 0x1D4A9, 3 }, + { 0x1D4AA, 0x1D4AC, 1 }, + { 0x1D4AE, 0x1D4B9, 1 }, + { 0x1D4BB, 0x1D4BD, 2 }, + { 0x1D4BE, 0x1D4C3, 1 }, + { 0x1D4C5, 0x1D505, 1 }, + { 0x1D507, 0x1D50A, 1 }, + { 0x1D50D, 0x1D514, 1 }, + { 0x1D516, 0x1D51C, 1 }, + { 0x1D51E, 0x1D539, 1 }, + { 0x1D53B, 0x1D53E, 1 }, + { 0x1D540, 0x1D544, 1 }, + { 0x1D546, 0x1D54A, 4 }, + { 0x1D54B, 0x1D550, 1 }, + { 0x1D552, 0x1D6A5, 1 }, + { 0x1D6A8, 0x1D7CB, 1 }, + { 0x1D7CE, 0x1D9FF, 1 }, + { 0x1DA37, 0x1DA3A, 1 }, + { 0x1DA6D, 0x1DA74, 1 }, + { 0x1DA76, 0x1DA83, 1 }, + { 0x1DA85, 0x1DA8B, 1 }, + { 0x1DF00, 0x1DF1E, 1 }, + { 0x1E100, 0x1E12C, 1 }, + { 0x1E137, 0x1E13D, 1 }, + { 0x1E140, 0x1E149, 1 }, + { 0x1E14E, 0x1E14F, 1 }, + { 0x1E290, 0x1E2AD, 1 }, + { 0x1E2C0, 0x1E2EB, 1 }, + { 0x1E2F0, 0x1E2F9, 1 }, + { 0x1E2FF, 0x1E7E0, 1249 }, + { 0x1E7E1, 0x1E7E6, 1 }, + { 0x1E7E8, 0x1E7EB, 1 }, + { 0x1E7ED, 0x1E7EE, 1 }, + { 0x1E7F0, 0x1E7FE, 1 }, + { 0x1E800, 0x1E8C4, 1 }, + { 0x1E8C7, 0x1E8CF, 1 }, + { 0x1E900, 0x1E943, 1 }, + { 0x1E94B, 0x1E950, 5 }, + { 0x1E951, 0x1E959, 1 }, + { 0x1E95E, 0x1E95F, 1 }, + { 0x1EC71, 0x1ECB4, 1 }, + { 0x1ED01, 0x1ED3D, 1 }, + { 0x1EE00, 0x1EE03, 1 }, + { 0x1EE05, 0x1EE1F, 1 }, + { 0x1EE21, 0x1EE22, 1 }, + { 0x1EE24, 0x1EE27, 3 }, + { 0x1EE29, 0x1EE32, 1 }, + { 0x1EE34, 0x1EE37, 1 }, + { 0x1EE39, 0x1EE3B, 2 }, + { 0x1EE42, 0x1EE47, 5 }, + { 0x1EE49, 0x1EE4D, 2 }, + { 0x1EE4E, 0x1EE4F, 1 }, + { 0x1EE51, 0x1EE52, 1 }, + { 0x1EE54, 0x1EE57, 3 }, + { 0x1EE59, 0x1EE61, 2 }, + { 0x1EE62, 0x1EE64, 2 }, + { 0x1EE67, 0x1EE6A, 1 }, + { 0x1EE6C, 0x1EE72, 1 }, + { 0x1EE74, 0x1EE77, 1 }, + { 0x1EE79, 0x1EE7C, 1 }, + { 0x1EE7E, 0x1EE80, 2 }, + { 0x1EE81, 0x1EE89, 1 }, + { 0x1EE8B, 0x1EE9B, 1 }, + { 0x1EEA1, 0x1EEA3, 1 }, + { 0x1EEA5, 0x1EEA9, 1 }, + { 0x1EEAB, 0x1EEBB, 1 }, + { 0x1EEF0, 0x1EEF1, 1 }, + { 0x1F000, 0x1F02B, 1 }, + { 0x1F030, 0x1F093, 1 }, + { 0x1F0A0, 0x1F0AE, 1 }, + { 0x1F0B1, 0x1F0BF, 1 }, + { 0x1F0C1, 0x1F0CF, 1 }, + { 0x1F0D1, 0x1F0F5, 1 }, + { 0x1F100, 0x1F1AD, 1 }, + { 0x1F1E6, 0x1F202, 1 }, + { 0x1F210, 0x1F23B, 1 }, + { 0x1F240, 0x1F248, 1 }, + { 0x1F250, 0x1F251, 1 }, + { 0x1F260, 0x1F265, 1 }, + { 0x1F300, 0x1F6D7, 1 }, + { 0x1F6DD, 0x1F6EC, 1 }, + { 0x1F6F0, 0x1F6FC, 1 }, + { 0x1F700, 0x1F773, 1 }, + { 0x1F780, 0x1F7D8, 1 }, + { 0x1F7E0, 0x1F7EB, 1 }, + { 0x1F7F0, 0x1F800, 16 }, + { 0x1F801, 0x1F80B, 1 }, + { 0x1F810, 0x1F847, 1 }, + { 0x1F850, 0x1F859, 1 }, + { 0x1F860, 0x1F887, 1 }, + { 0x1F890, 0x1F8AD, 1 }, + { 0x1F8B0, 0x1F8B1, 1 }, + { 0x1F900, 0x1FA53, 1 }, + { 0x1FA60, 0x1FA6D, 1 }, + { 0x1FA70, 0x1FA74, 1 }, + { 0x1FA78, 0x1FA7C, 1 }, + { 0x1FA80, 0x1FA86, 1 }, + { 0x1FA90, 0x1FAAC, 1 }, + { 0x1FAB0, 0x1FABA, 1 }, + { 0x1FAC0, 0x1FAC5, 1 }, + { 0x1FAD0, 0x1FAD9, 1 }, + { 0x1FAE0, 0x1FAE7, 1 }, + { 0x1FAF0, 0x1FAF6, 1 }, + { 0x1FB00, 0x1FB92, 1 }, + { 0x1FB94, 0x1FBCA, 1 }, + { 0x1FBF0, 0x1FBF9, 1 }, + { 0x20000, 0x2A6DF, 1 }, + { 0x2A700, 0x2B738, 1 }, + { 0x2B740, 0x2B81D, 1 }, + { 0x2B820, 0x2CEA1, 1 }, + { 0x2CEB0, 0x2EBE0, 1 }, + { 0x2F800, 0x2FA1D, 1 }, + { 0x30000, 0x3134A, 1 }, +}; + +static struct range_table compose_table[] = { + { 0x300, 0x36F, 1 }, + { 0x483, 0x489, 1 }, + { 0x591, 0x5BD, 1 }, + { 0x5BF, 0x5C1, 2 }, + { 0x5C2, 0x5C4, 2 }, + { 0x5C5, 0x5C7, 2 }, + { 0x610, 0x61A, 1 }, + { 0x64B, 0x65F, 1 }, + { 0x670, 0x6D6, 102 }, + { 0x6D7, 0x6DC, 1 }, + { 0x6DF, 0x6E4, 1 }, + { 0x6E7, 0x6E8, 1 }, + { 0x6EA, 0x6ED, 1 }, + { 0x711, 0x730, 31 }, + { 0x731, 0x74A, 1 }, + { 0x7A6, 0x7B0, 1 }, + { 0x7EB, 0x7F3, 1 }, + { 0x7FD, 0x816, 25 }, + { 0x817, 0x819, 1 }, + { 0x81B, 0x823, 1 }, + { 0x825, 0x827, 1 }, + { 0x829, 0x82D, 1 }, + { 0x859, 0x85B, 1 }, + { 0x898, 0x89F, 1 }, + { 0x8CA, 0x8E1, 1 }, + { 0x8E3, 0x902, 1 }, + { 0x93A, 0x93C, 2 }, + { 0x941, 0x948, 1 }, + { 0x94D, 0x951, 4 }, + { 0x952, 0x957, 1 }, + { 0x962, 0x963, 1 }, + { 0x981, 0x9BC, 59 }, + { 0x9BE, 0x9C1, 3 }, + { 0x9C2, 0x9C4, 1 }, + { 0x9CD, 0x9D7, 10 }, + { 0x9E2, 0x9E3, 1 }, + { 0x9FE, 0xA01, 3 }, + { 0xA02, 0xA3C, 58 }, + { 0xA41, 0xA42, 1 }, + { 0xA47, 0xA48, 1 }, + { 0xA4B, 0xA4D, 1 }, + { 0xA51, 0xA70, 31 }, + { 0xA71, 0xA75, 4 }, + { 0xA81, 0xA82, 1 }, + { 0xABC, 0xAC1, 5 }, + { 0xAC2, 0xAC5, 1 }, + { 0xAC7, 0xAC8, 1 }, + { 0xACD, 0xAE2, 21 }, + { 0xAE3, 0xAFA, 23 }, + { 0xAFB, 0xAFF, 1 }, + { 0xB01, 0xB3C, 59 }, + { 0xB3E, 0xB3F, 1 }, + { 0xB41, 0xB44, 1 }, + { 0xB4D, 0xB55, 8 }, + { 0xB56, 0xB57, 1 }, + { 0xB62, 0xB63, 1 }, + { 0xB82, 0xBBE, 60 }, + { 0xBC0, 0xBCD, 13 }, + { 0xBD7, 0xC00, 41 }, + { 0xC04, 0xC3C, 56 }, + { 0xC3E, 0xC40, 1 }, + { 0xC46, 0xC48, 1 }, + { 0xC4A, 0xC4D, 1 }, + { 0xC55, 0xC56, 1 }, + { 0xC62, 0xC63, 1 }, + { 0xC81, 0xCBC, 59 }, + { 0xCBF, 0xCC2, 3 }, + { 0xCC6, 0xCCC, 6 }, + { 0xCCD, 0xCD5, 8 }, + { 0xCD6, 0xCE2, 12 }, + { 0xCE3, 0xD00, 29 }, + { 0xD01, 0xD3B, 58 }, + { 0xD3C, 0xD3E, 2 }, + { 0xD41, 0xD44, 1 }, + { 0xD4D, 0xD57, 10 }, + { 0xD62, 0xD63, 1 }, + { 0xD81, 0xDCA, 73 }, + { 0xDCF, 0xDD2, 3 }, + { 0xDD3, 0xDD4, 1 }, + { 0xDD6, 0xDDF, 9 }, + { 0xE31, 0xE34, 3 }, + { 0xE35, 0xE3A, 1 }, + { 0xE47, 0xE4E, 1 }, + { 0xEB1, 0xEB4, 3 }, + { 0xEB5, 0xEBC, 1 }, + { 0xEC8, 0xECD, 1 }, + { 0xF18, 0xF19, 1 }, + { 0xF35, 0xF39, 2 }, + { 0xF71, 0xF7E, 1 }, + { 0xF80, 0xF84, 1 }, + { 0xF86, 0xF87, 1 }, + { 0xF8D, 0xF97, 1 }, + { 0xF99, 0xFBC, 1 }, + { 0xFC6, 0x102D, 103 }, + { 0x102E, 0x1030, 1 }, + { 0x1032, 0x1037, 1 }, + { 0x1039, 0x103A, 1 }, + { 0x103D, 0x103E, 1 }, + { 0x1058, 0x1059, 1 }, + { 0x105E, 0x1060, 1 }, + { 0x1071, 0x1074, 1 }, + { 0x1082, 0x1085, 3 }, + { 0x1086, 0x108D, 7 }, + { 0x109D, 0x135D, 704 }, + { 0x135E, 0x135F, 1 }, + { 0x1712, 0x1714, 1 }, + { 0x1732, 0x1733, 1 }, + { 0x1752, 0x1753, 1 }, + { 0x1772, 0x1773, 1 }, + { 0x17B4, 0x17B5, 1 }, + { 0x17B7, 0x17BD, 1 }, + { 0x17C6, 0x17C9, 3 }, + { 0x17CA, 0x17D3, 1 }, + { 0x17DD, 0x180B, 46 }, + { 0x180C, 0x180D, 1 }, + { 0x180F, 0x1885, 118 }, + { 0x1886, 0x18A9, 35 }, + { 0x1920, 0x1922, 1 }, + { 0x1927, 0x1928, 1 }, + { 0x1932, 0x1939, 7 }, + { 0x193A, 0x193B, 1 }, + { 0x1A17, 0x1A18, 1 }, + { 0x1A1B, 0x1A56, 59 }, + { 0x1A58, 0x1A5E, 1 }, + { 0x1A60, 0x1A62, 2 }, + { 0x1A65, 0x1A6C, 1 }, + { 0x1A73, 0x1A7C, 1 }, + { 0x1A7F, 0x1AB0, 49 }, + { 0x1AB1, 0x1ACE, 1 }, + { 0x1B00, 0x1B03, 1 }, + { 0x1B34, 0x1B3A, 1 }, + { 0x1B3C, 0x1B42, 6 }, + { 0x1B6B, 0x1B73, 1 }, + { 0x1B80, 0x1B81, 1 }, + { 0x1BA2, 0x1BA5, 1 }, + { 0x1BA8, 0x1BA9, 1 }, + { 0x1BAB, 0x1BAD, 1 }, + { 0x1BE6, 0x1BE8, 2 }, + { 0x1BE9, 0x1BED, 4 }, + { 0x1BEF, 0x1BF1, 1 }, + { 0x1C2C, 0x1C33, 1 }, + { 0x1C36, 0x1C37, 1 }, + { 0x1CD0, 0x1CD2, 1 }, + { 0x1CD4, 0x1CE0, 1 }, + { 0x1CE2, 0x1CE8, 1 }, + { 0x1CED, 0x1CF4, 7 }, + { 0x1CF8, 0x1CF9, 1 }, + { 0x1DC0, 0x1DFF, 1 }, + { 0x200C, 0x20D0, 196 }, + { 0x20D1, 0x20F0, 1 }, + { 0x2CEF, 0x2CF1, 1 }, + { 0x2D7F, 0x2DE0, 97 }, + { 0x2DE1, 0x2DFF, 1 }, + { 0x302A, 0x302F, 1 }, + { 0x3099, 0x309A, 1 }, + { 0xA66F, 0xA672, 1 }, + { 0xA674, 0xA67D, 1 }, + { 0xA69E, 0xA69F, 1 }, + { 0xA6F0, 0xA6F1, 1 }, + { 0xA802, 0xA806, 4 }, + { 0xA80B, 0xA825, 26 }, + { 0xA826, 0xA82C, 6 }, + { 0xA8C4, 0xA8C5, 1 }, + { 0xA8E0, 0xA8F1, 1 }, + { 0xA8FF, 0xA926, 39 }, + { 0xA927, 0xA92D, 1 }, + { 0xA947, 0xA951, 1 }, + { 0xA980, 0xA982, 1 }, + { 0xA9B3, 0xA9B6, 3 }, + { 0xA9B7, 0xA9B9, 1 }, + { 0xA9BC, 0xA9BD, 1 }, + { 0xA9E5, 0xAA29, 68 }, + { 0xAA2A, 0xAA2E, 1 }, + { 0xAA31, 0xAA32, 1 }, + { 0xAA35, 0xAA36, 1 }, + { 0xAA43, 0xAA4C, 9 }, + { 0xAA7C, 0xAAB0, 52 }, + { 0xAAB2, 0xAAB4, 1 }, + { 0xAAB7, 0xAAB8, 1 }, + { 0xAABE, 0xAABF, 1 }, + { 0xAAC1, 0xAAEC, 43 }, + { 0xAAED, 0xAAF6, 9 }, + { 0xABE5, 0xABE8, 3 }, + { 0xABED, 0xFB1E, 20273 }, + { 0xFE00, 0xFE0F, 1 }, + { 0xFE20, 0xFE2F, 1 }, + { 0xFF9E, 0xFF9F, 1 }, + { 0x101FD, 0x102E0, 227 }, + { 0x10376, 0x1037A, 1 }, + { 0x10A01, 0x10A03, 1 }, + { 0x10A05, 0x10A06, 1 }, + { 0x10A0C, 0x10A0F, 1 }, + { 0x10A38, 0x10A3A, 1 }, + { 0x10A3F, 0x10AE5, 166 }, + { 0x10AE6, 0x10D24, 574 }, + { 0x10D25, 0x10D27, 1 }, + { 0x10EAB, 0x10EAC, 1 }, + { 0x10F46, 0x10F50, 1 }, + { 0x10F82, 0x10F85, 1 }, + { 0x11001, 0x11038, 55 }, + { 0x11039, 0x11046, 1 }, + { 0x11070, 0x11073, 3 }, + { 0x11074, 0x1107F, 11 }, + { 0x11080, 0x11081, 1 }, + { 0x110B3, 0x110B6, 1 }, + { 0x110B9, 0x110BA, 1 }, + { 0x110C2, 0x11100, 62 }, + { 0x11101, 0x11102, 1 }, + { 0x11127, 0x1112B, 1 }, + { 0x1112D, 0x11134, 1 }, + { 0x11173, 0x11180, 13 }, + { 0x11181, 0x111B6, 53 }, + { 0x111B7, 0x111BE, 1 }, + { 0x111C9, 0x111CC, 1 }, + { 0x111CF, 0x1122F, 96 }, + { 0x11230, 0x11231, 1 }, + { 0x11234, 0x11236, 2 }, + { 0x11237, 0x1123E, 7 }, + { 0x112DF, 0x112E3, 4 }, + { 0x112E4, 0x112EA, 1 }, + { 0x11300, 0x11301, 1 }, + { 0x1133B, 0x1133C, 1 }, + { 0x1133E, 0x11340, 2 }, + { 0x11357, 0x11366, 15 }, + { 0x11367, 0x1136C, 1 }, + { 0x11370, 0x11374, 1 }, + { 0x11438, 0x1143F, 1 }, + { 0x11442, 0x11444, 1 }, + { 0x11446, 0x1145E, 24 }, + { 0x114B0, 0x114B3, 3 }, + { 0x114B4, 0x114B8, 1 }, + { 0x114BA, 0x114BD, 3 }, + { 0x114BF, 0x114C0, 1 }, + { 0x114C2, 0x114C3, 1 }, + { 0x115AF, 0x115B2, 3 }, + { 0x115B3, 0x115B5, 1 }, + { 0x115BC, 0x115BD, 1 }, + { 0x115BF, 0x115C0, 1 }, + { 0x115DC, 0x115DD, 1 }, + { 0x11633, 0x1163A, 1 }, + { 0x1163D, 0x1163F, 2 }, + { 0x11640, 0x116AB, 107 }, + { 0x116AD, 0x116B0, 3 }, + { 0x116B1, 0x116B5, 1 }, + { 0x116B7, 0x1171D, 102 }, + { 0x1171E, 0x1171F, 1 }, + { 0x11722, 0x11725, 1 }, + { 0x11727, 0x1172B, 1 }, + { 0x1182F, 0x11837, 1 }, + { 0x11839, 0x1183A, 1 }, + { 0x11930, 0x1193B, 11 }, + { 0x1193C, 0x1193E, 2 }, + { 0x11943, 0x119D4, 145 }, + { 0x119D5, 0x119D7, 1 }, + { 0x119DA, 0x119DB, 1 }, + { 0x119E0, 0x11A01, 33 }, + { 0x11A02, 0x11A0A, 1 }, + { 0x11A33, 0x11A38, 1 }, + { 0x11A3B, 0x11A3E, 1 }, + { 0x11A47, 0x11A51, 10 }, + { 0x11A52, 0x11A56, 1 }, + { 0x11A59, 0x11A5B, 1 }, + { 0x11A8A, 0x11A96, 1 }, + { 0x11A98, 0x11A99, 1 }, + { 0x11C30, 0x11C36, 1 }, + { 0x11C38, 0x11C3D, 1 }, + { 0x11C3F, 0x11C92, 83 }, + { 0x11C93, 0x11CA7, 1 }, + { 0x11CAA, 0x11CB0, 1 }, + { 0x11CB2, 0x11CB3, 1 }, + { 0x11CB5, 0x11CB6, 1 }, + { 0x11D31, 0x11D36, 1 }, + { 0x11D3A, 0x11D3C, 2 }, + { 0x11D3D, 0x11D3F, 2 }, + { 0x11D40, 0x11D45, 1 }, + { 0x11D47, 0x11D90, 73 }, + { 0x11D91, 0x11D95, 4 }, + { 0x11D97, 0x11EF3, 348 }, + { 0x11EF4, 0x16AF0, 19452 }, + { 0x16AF1, 0x16AF4, 1 }, + { 0x16B30, 0x16B36, 1 }, + { 0x16F4F, 0x16F8F, 64 }, + { 0x16F90, 0x16F92, 1 }, + { 0x16FE4, 0x1BC9D, 19641 }, + { 0x1BC9E, 0x1CF00, 4706 }, + { 0x1CF01, 0x1CF2D, 1 }, + { 0x1CF30, 0x1CF46, 1 }, + { 0x1D165, 0x1D167, 2 }, + { 0x1D168, 0x1D169, 1 }, + { 0x1D16E, 0x1D172, 1 }, + { 0x1D17B, 0x1D182, 1 }, + { 0x1D185, 0x1D18B, 1 }, + { 0x1D1AA, 0x1D1AD, 1 }, + { 0x1D242, 0x1D244, 1 }, + { 0x1DA00, 0x1DA36, 1 }, + { 0x1DA3B, 0x1DA6C, 1 }, + { 0x1DA75, 0x1DA84, 15 }, + { 0x1DA9B, 0x1DA9F, 1 }, + { 0x1DAA1, 0x1DAAF, 1 }, + { 0x1E000, 0x1E006, 1 }, + { 0x1E008, 0x1E018, 1 }, + { 0x1E01B, 0x1E021, 1 }, + { 0x1E023, 0x1E024, 1 }, + { 0x1E026, 0x1E02A, 1 }, + { 0x1E130, 0x1E136, 1 }, + { 0x1E2AE, 0x1E2EC, 62 }, + { 0x1E2ED, 0x1E2EF, 1 }, + { 0x1E8D0, 0x1E8D6, 1 }, + { 0x1E944, 0x1E94A, 1 }, + { 0xE0020, 0xE007F, 1 }, + { 0xE0100, 0xE01EF, 1 }, +}; + +static struct range_table cntrl_table[] = { + { 0x0, 0x1F, 1 }, + { 0x7F, 0x9F, 1 }, + { 0xAD, 0x600, 1363 }, + { 0x601, 0x605, 1 }, + { 0x61C, 0x6DD, 193 }, + { 0x70F, 0x890, 385 }, + { 0x891, 0x8E2, 81 }, + { 0x180E, 0x200B, 2045 }, + { 0x200C, 0x200F, 1 }, + { 0x202A, 0x202E, 1 }, + { 0x2060, 0x2064, 1 }, + { 0x2066, 0x206F, 1 }, + { 0xE000, 0xE000, 0 }, + { 0xE001, 0xF8FF, 1 }, + { 0xFEFF, 0xFFF9, 250 }, + { 0xFFFA, 0xFFFB, 1 }, + { 0x110BD, 0x110CD, 16 }, + { 0x13430, 0x13438, 1 }, + { 0x1BCA0, 0x1BCA3, 1 }, + { 0x1D173, 0x1D17A, 1 }, + { 0xE0001, 0xE0020, 31 }, + { 0xE0021, 0xE007F, 1 }, + { 0xF0000, 0xF0000, 0 }, + { 0xF0001, 0xFFFFD, 1 }, + { 0x100000, 0x100000, 0 }, + { 0x100001, 0x10FFFD, 1 }, +}; + +static struct range_table digit_table[] = { + { 0x30, 0x39, 1 }, + { 0x660, 0x669, 1 }, + { 0x6F0, 0x6F9, 1 }, + { 0x7C0, 0x7C9, 1 }, + { 0x966, 0x96F, 1 }, + { 0x9E6, 0x9EF, 1 }, + { 0xA66, 0xA6F, 1 }, + { 0xAE6, 0xAEF, 1 }, + { 0xB66, 0xB6F, 1 }, + { 0xBE6, 0xBEF, 1 }, + { 0xC66, 0xC6F, 1 }, + { 0xCE6, 0xCEF, 1 }, + { 0xD66, 0xD6F, 1 }, + { 0xDE6, 0xDEF, 1 }, + { 0xE50, 0xE59, 1 }, + { 0xED0, 0xED9, 1 }, + { 0xF20, 0xF29, 1 }, + { 0x1040, 0x1049, 1 }, + { 0x1090, 0x1099, 1 }, + { 0x17E0, 0x17E9, 1 }, + { 0x1810, 0x1819, 1 }, + { 0x1946, 0x194F, 1 }, + { 0x19D0, 0x19D9, 1 }, + { 0x1A80, 0x1A89, 1 }, + { 0x1A90, 0x1A99, 1 }, + { 0x1B50, 0x1B59, 1 }, + { 0x1BB0, 0x1BB9, 1 }, + { 0x1C40, 0x1C49, 1 }, + { 0x1C50, 0x1C59, 1 }, + { 0xA620, 0xA629, 1 }, + { 0xA8D0, 0xA8D9, 1 }, + { 0xA900, 0xA909, 1 }, + { 0xA9D0, 0xA9D9, 1 }, + { 0xA9F0, 0xA9F9, 1 }, + { 0xAA50, 0xAA59, 1 }, + { 0xABF0, 0xABF9, 1 }, + { 0xFF10, 0xFF19, 1 }, + { 0x104A0, 0x104A9, 1 }, + { 0x10D30, 0x10D39, 1 }, + { 0x11066, 0x1106F, 1 }, + { 0x110F0, 0x110F9, 1 }, + { 0x11136, 0x1113F, 1 }, + { 0x111D0, 0x111D9, 1 }, + { 0x112F0, 0x112F9, 1 }, + { 0x11450, 0x11459, 1 }, + { 0x114D0, 0x114D9, 1 }, + { 0x11650, 0x11659, 1 }, + { 0x116C0, 0x116C9, 1 }, + { 0x11730, 0x11739, 1 }, + { 0x118E0, 0x118E9, 1 }, + { 0x11950, 0x11959, 1 }, + { 0x11C50, 0x11C59, 1 }, + { 0x11D50, 0x11D59, 1 }, + { 0x11DA0, 0x11DA9, 1 }, + { 0x16A60, 0x16A69, 1 }, + { 0x16AC0, 0x16AC9, 1 }, + { 0x16B50, 0x16B59, 1 }, + { 0x1D7CE, 0x1D7FF, 1 }, + { 0x1E140, 0x1E149, 1 }, + { 0x1E2F0, 0x1E2F9, 1 }, + { 0x1E950, 0x1E959, 1 }, + { 0x1FBF0, 0x1FBF9, 1 }, +}; + +static struct range_table alnum_extend_table[] = { + { 0x30, 0x39, 1 }, + { 0xB2, 0xB3, 1 }, + { 0xB9, 0xBC, 3 }, + { 0xBD, 0xBE, 1 }, + { 0x660, 0x669, 1 }, + { 0x6F0, 0x6F9, 1 }, + { 0x7C0, 0x7C9, 1 }, + { 0x966, 0x96F, 1 }, + { 0x9E6, 0x9EF, 1 }, + { 0x9F4, 0x9F9, 1 }, + { 0xA66, 0xA6F, 1 }, + { 0xAE6, 0xAEF, 1 }, + { 0xB66, 0xB6F, 1 }, + { 0xB72, 0xB77, 1 }, + { 0xBE6, 0xBF2, 1 }, + { 0xC66, 0xC6F, 1 }, + { 0xC78, 0xC7E, 1 }, + { 0xCE6, 0xCEF, 1 }, + { 0xD58, 0xD5E, 1 }, + { 0xD66, 0xD78, 1 }, + { 0xDE6, 0xDEF, 1 }, + { 0xE50, 0xE59, 1 }, + { 0xED0, 0xED9, 1 }, + { 0xF20, 0xF33, 1 }, + { 0x1040, 0x1049, 1 }, + { 0x1090, 0x1099, 1 }, + { 0x1369, 0x137C, 1 }, + { 0x16EE, 0x16F0, 1 }, + { 0x17E0, 0x17E9, 1 }, + { 0x17F0, 0x17F9, 1 }, + { 0x1810, 0x1819, 1 }, + { 0x1946, 0x194F, 1 }, + { 0x19D0, 0x19DA, 1 }, + { 0x1A80, 0x1A89, 1 }, + { 0x1A90, 0x1A99, 1 }, + { 0x1B50, 0x1B59, 1 }, + { 0x1BB0, 0x1BB9, 1 }, + { 0x1C40, 0x1C49, 1 }, + { 0x1C50, 0x1C59, 1 }, + { 0x2070, 0x2074, 4 }, + { 0x2075, 0x2079, 1 }, + { 0x2080, 0x2089, 1 }, + { 0x2150, 0x2182, 1 }, + { 0x2185, 0x2189, 1 }, + { 0x2460, 0x249B, 1 }, + { 0x24EA, 0x24FF, 1 }, + { 0x2776, 0x2793, 1 }, + { 0x2CFD, 0x3007, 778 }, + { 0x3021, 0x3029, 1 }, + { 0x3038, 0x303A, 1 }, + { 0x3192, 0x3195, 1 }, + { 0x3220, 0x3229, 1 }, + { 0x3248, 0x324F, 1 }, + { 0x3251, 0x325F, 1 }, + { 0x3280, 0x3289, 1 }, + { 0x32B1, 0x32BF, 1 }, + { 0xA620, 0xA629, 1 }, + { 0xA6E6, 0xA6EF, 1 }, + { 0xA830, 0xA835, 1 }, + { 0xA8D0, 0xA8D9, 1 }, + { 0xA900, 0xA909, 1 }, + { 0xA9D0, 0xA9D9, 1 }, + { 0xA9F0, 0xA9F9, 1 }, + { 0xAA50, 0xAA59, 1 }, + { 0xABF0, 0xABF9, 1 }, + { 0xFF10, 0xFF19, 1 }, + { 0x10107, 0x10133, 1 }, + { 0x10140, 0x10178, 1 }, + { 0x1018A, 0x1018B, 1 }, + { 0x102E1, 0x102FB, 1 }, + { 0x10320, 0x10323, 1 }, + { 0x10341, 0x1034A, 9 }, + { 0x103D1, 0x103D5, 1 }, + { 0x104A0, 0x104A9, 1 }, + { 0x10858, 0x1085F, 1 }, + { 0x10879, 0x1087F, 1 }, + { 0x108A7, 0x108AF, 1 }, + { 0x108FB, 0x108FF, 1 }, + { 0x10916, 0x1091B, 1 }, + { 0x109BC, 0x109BD, 1 }, + { 0x109C0, 0x109CF, 1 }, + { 0x109D2, 0x109FF, 1 }, + { 0x10A40, 0x10A48, 1 }, + { 0x10A7D, 0x10A7E, 1 }, + { 0x10A9D, 0x10A9F, 1 }, + { 0x10AEB, 0x10AEF, 1 }, + { 0x10B58, 0x10B5F, 1 }, + { 0x10B78, 0x10B7F, 1 }, + { 0x10BA9, 0x10BAF, 1 }, + { 0x10CFA, 0x10CFF, 1 }, + { 0x10D30, 0x10D39, 1 }, + { 0x10E60, 0x10E7E, 1 }, + { 0x10F1D, 0x10F26, 1 }, + { 0x10F51, 0x10F54, 1 }, + { 0x10FC5, 0x10FCB, 1 }, + { 0x11052, 0x1106F, 1 }, + { 0x110F0, 0x110F9, 1 }, + { 0x11136, 0x1113F, 1 }, + { 0x111D0, 0x111D9, 1 }, + { 0x111E1, 0x111F4, 1 }, + { 0x112F0, 0x112F9, 1 }, + { 0x11450, 0x11459, 1 }, + { 0x114D0, 0x114D9, 1 }, + { 0x11650, 0x11659, 1 }, + { 0x116C0, 0x116C9, 1 }, + { 0x11730, 0x1173B, 1 }, + { 0x118E0, 0x118F2, 1 }, + { 0x11950, 0x11959, 1 }, + { 0x11C50, 0x11C6C, 1 }, + { 0x11D50, 0x11D59, 1 }, + { 0x11DA0, 0x11DA9, 1 }, + { 0x11FC0, 0x11FD4, 1 }, + { 0x12400, 0x1246E, 1 }, + { 0x16A60, 0x16A69, 1 }, + { 0x16AC0, 0x16AC9, 1 }, + { 0x16B50, 0x16B59, 1 }, + { 0x16B5B, 0x16B61, 1 }, + { 0x16E80, 0x16E96, 1 }, + { 0x1D2E0, 0x1D2F3, 1 }, + { 0x1D360, 0x1D378, 1 }, + { 0x1D7CE, 0x1D7FF, 1 }, + { 0x1E140, 0x1E149, 1 }, + { 0x1E2F0, 0x1E2F9, 1 }, + { 0x1E8C7, 0x1E8CF, 1 }, + { 0x1E950, 0x1E959, 1 }, + { 0x1EC71, 0x1ECAB, 1 }, + { 0x1ECAD, 0x1ECAF, 1 }, + { 0x1ECB1, 0x1ECB4, 1 }, + { 0x1ED01, 0x1ED2D, 1 }, + { 0x1ED2F, 0x1ED3D, 1 }, + { 0x1F100, 0x1F10C, 1 }, + { 0x1FBF0, 0x1FBF9, 1 }, +}; + +static struct range_table punct_table[] = { + { 0x21, 0x2F, 1 }, + { 0x3A, 0x40, 1 }, + { 0x5B, 0x60, 1 }, + { 0x7B, 0x7E, 1 }, + { 0xA1, 0xA5, 1 }, + { 0xA7, 0xA8, 1 }, + { 0xAB, 0xAC, 1 }, + { 0xAF, 0xB1, 2 }, + { 0xB4, 0xB6, 2 }, + { 0xB7, 0xB8, 1 }, + { 0xBB, 0xBF, 4 }, + { 0xD7, 0xF7, 32 }, + { 0x2C2, 0x2C5, 1 }, + { 0x2D2, 0x2DF, 1 }, + { 0x2E5, 0x2EB, 1 }, + { 0x2ED, 0x2EF, 2 }, + { 0x2F0, 0x2FF, 1 }, + { 0x375, 0x37E, 9 }, + { 0x384, 0x385, 1 }, + { 0x387, 0x3F6, 111 }, + { 0x55A, 0x55F, 1 }, + { 0x589, 0x58A, 1 }, + { 0x58F, 0x5BE, 47 }, + { 0x5C0, 0x5C6, 3 }, + { 0x5F3, 0x5F4, 1 }, + { 0x606, 0x60D, 1 }, + { 0x61B, 0x61D, 2 }, + { 0x61E, 0x61F, 1 }, + { 0x66A, 0x66D, 1 }, + { 0x6D4, 0x700, 44 }, + { 0x701, 0x70D, 1 }, + { 0x7F7, 0x7F9, 1 }, + { 0x7FE, 0x7FF, 1 }, + { 0x830, 0x83E, 1 }, + { 0x85E, 0x888, 42 }, + { 0x964, 0x965, 1 }, + { 0x970, 0x9F2, 130 }, + { 0x9F3, 0x9FB, 8 }, + { 0x9FD, 0xA76, 121 }, + { 0xAF0, 0xAF1, 1 }, + { 0xBF9, 0xC77, 126 }, + { 0xC84, 0xDF4, 368 }, + { 0xE3F, 0xE4F, 16 }, + { 0xE5A, 0xE5B, 1 }, + { 0xF04, 0xF12, 1 }, + { 0xF14, 0xF3A, 38 }, + { 0xF3B, 0xF3D, 1 }, + { 0xF85, 0xFD0, 75 }, + { 0xFD1, 0xFD4, 1 }, + { 0xFD9, 0xFDA, 1 }, + { 0x104A, 0x104F, 1 }, + { 0x10FB, 0x1360, 613 }, + { 0x1361, 0x1368, 1 }, + { 0x1400, 0x166E, 622 }, + { 0x169B, 0x169C, 1 }, + { 0x16EB, 0x16ED, 1 }, + { 0x1735, 0x1736, 1 }, + { 0x17D4, 0x17D6, 1 }, + { 0x17D8, 0x17DB, 1 }, + { 0x1800, 0x180A, 1 }, + { 0x1944, 0x1945, 1 }, + { 0x1A1E, 0x1A1F, 1 }, + { 0x1AA0, 0x1AA6, 1 }, + { 0x1AA8, 0x1AAD, 1 }, + { 0x1B5A, 0x1B60, 1 }, + { 0x1B7D, 0x1B7E, 1 }, + { 0x1BFC, 0x1BFF, 1 }, + { 0x1C3B, 0x1C3F, 1 }, + { 0x1C7E, 0x1C7F, 1 }, + { 0x1CC0, 0x1CC7, 1 }, + { 0x1CD3, 0x1FBD, 746 }, + { 0x1FBF, 0x1FC1, 1 }, + { 0x1FCD, 0x1FCF, 1 }, + { 0x1FDD, 0x1FDF, 1 }, + { 0x1FED, 0x1FEF, 1 }, + { 0x1FFD, 0x1FFE, 1 }, + { 0x2010, 0x2027, 1 }, + { 0x2030, 0x205E, 1 }, + { 0x207A, 0x207E, 1 }, + { 0x208A, 0x208E, 1 }, + { 0x20A0, 0x20C0, 1 }, + { 0x2118, 0x2140, 40 }, + { 0x2141, 0x2144, 1 }, + { 0x214B, 0x2190, 69 }, + { 0x2191, 0x2194, 1 }, + { 0x219A, 0x219B, 1 }, + { 0x21A0, 0x21A6, 3 }, + { 0x21AE, 0x21CE, 32 }, + { 0x21CF, 0x21D2, 3 }, + { 0x21D4, 0x21F4, 32 }, + { 0x21F5, 0x22FF, 1 }, + { 0x2308, 0x230B, 1 }, + { 0x2320, 0x2321, 1 }, + { 0x2329, 0x232A, 1 }, + { 0x237C, 0x239B, 31 }, + { 0x239C, 0x23B3, 1 }, + { 0x23DC, 0x23E1, 1 }, + { 0x25B7, 0x25C1, 10 }, + { 0x25F8, 0x25FF, 1 }, + { 0x266F, 0x2768, 249 }, + { 0x2769, 0x2775, 1 }, + { 0x27C0, 0x27FF, 1 }, + { 0x2900, 0x2AFF, 1 }, + { 0x2B30, 0x2B44, 1 }, + { 0x2B47, 0x2B4C, 1 }, + { 0x2CF9, 0x2CFC, 1 }, + { 0x2CFE, 0x2CFF, 1 }, + { 0x2D70, 0x2E00, 144 }, + { 0x2E01, 0x2E2E, 1 }, + { 0x2E30, 0x2E4F, 1 }, + { 0x2E52, 0x2E5D, 1 }, + { 0x3001, 0x3003, 1 }, + { 0x3008, 0x3011, 1 }, + { 0x3014, 0x301F, 1 }, + { 0x3030, 0x303D, 13 }, + { 0x309B, 0x309C, 1 }, + { 0x30A0, 0x30FB, 91 }, + { 0xA4FE, 0xA4FF, 1 }, + { 0xA60D, 0xA60F, 1 }, + { 0xA673, 0xA67E, 11 }, + { 0xA6F2, 0xA6F7, 1 }, + { 0xA700, 0xA716, 1 }, + { 0xA720, 0xA721, 1 }, + { 0xA789, 0xA78A, 1 }, + { 0xA838, 0xA874, 60 }, + { 0xA875, 0xA877, 1 }, + { 0xA8CE, 0xA8CF, 1 }, + { 0xA8F8, 0xA8FA, 1 }, + { 0xA8FC, 0xA92E, 50 }, + { 0xA92F, 0xA95F, 48 }, + { 0xA9C1, 0xA9CD, 1 }, + { 0xA9DE, 0xA9DF, 1 }, + { 0xAA5C, 0xAA5F, 1 }, + { 0xAADE, 0xAADF, 1 }, + { 0xAAF0, 0xAAF1, 1 }, + { 0xAB5B, 0xAB6A, 15 }, + { 0xAB6B, 0xABEB, 128 }, + { 0xFB29, 0xFBB2, 137 }, + { 0xFBB3, 0xFBC2, 1 }, + { 0xFD3E, 0xFD3F, 1 }, + { 0xFDFC, 0xFE10, 20 }, + { 0xFE11, 0xFE19, 1 }, + { 0xFE30, 0xFE52, 1 }, + { 0xFE54, 0xFE66, 1 }, + { 0xFE68, 0xFE6B, 1 }, + { 0xFF01, 0xFF0F, 1 }, + { 0xFF1A, 0xFF20, 1 }, + { 0xFF3B, 0xFF40, 1 }, + { 0xFF5B, 0xFF65, 1 }, + { 0xFFE0, 0xFFE3, 1 }, + { 0xFFE5, 0xFFE6, 1 }, + { 0xFFE9, 0xFFEC, 1 }, + { 0x10100, 0x10102, 1 }, + { 0x1039F, 0x103D0, 49 }, + { 0x1056F, 0x10857, 744 }, + { 0x1091F, 0x1093F, 32 }, + { 0x10A50, 0x10A58, 1 }, + { 0x10A7F, 0x10AF0, 113 }, + { 0x10AF1, 0x10AF6, 1 }, + { 0x10B39, 0x10B3F, 1 }, + { 0x10B99, 0x10B9C, 1 }, + { 0x10EAD, 0x10F55, 168 }, + { 0x10F56, 0x10F59, 1 }, + { 0x10F86, 0x10F89, 1 }, + { 0x11047, 0x1104D, 1 }, + { 0x110BB, 0x110BC, 1 }, + { 0x110BE, 0x110C1, 1 }, + { 0x11140, 0x11143, 1 }, + { 0x11174, 0x11175, 1 }, + { 0x111C5, 0x111C8, 1 }, + { 0x111CD, 0x111DB, 14 }, + { 0x111DD, 0x111DF, 1 }, + { 0x11238, 0x1123D, 1 }, + { 0x112A9, 0x1144B, 418 }, + { 0x1144C, 0x1144F, 1 }, + { 0x1145A, 0x1145B, 1 }, + { 0x1145D, 0x114C6, 105 }, + { 0x115C1, 0x115D7, 1 }, + { 0x11641, 0x11643, 1 }, + { 0x11660, 0x1166C, 1 }, + { 0x116B9, 0x1173C, 131 }, + { 0x1173D, 0x1173E, 1 }, + { 0x1183B, 0x11944, 265 }, + { 0x11945, 0x11946, 1 }, + { 0x119E2, 0x11A3F, 93 }, + { 0x11A40, 0x11A46, 1 }, + { 0x11A9A, 0x11A9C, 1 }, + { 0x11A9E, 0x11AA2, 1 }, + { 0x11C41, 0x11C45, 1 }, + { 0x11C70, 0x11C71, 1 }, + { 0x11EF7, 0x11EF8, 1 }, + { 0x11FDD, 0x11FE0, 1 }, + { 0x11FFF, 0x12470, 1137 }, + { 0x12471, 0x12474, 1 }, + { 0x12FF1, 0x12FF2, 1 }, + { 0x16A6E, 0x16A6F, 1 }, + { 0x16AF5, 0x16B37, 66 }, + { 0x16B38, 0x16B3B, 1 }, + { 0x16B44, 0x16E97, 851 }, + { 0x16E98, 0x16E9A, 1 }, + { 0x16FE2, 0x1BC9F, 19645 }, + { 0x1D6C1, 0x1D6DB, 26 }, + { 0x1D6FB, 0x1D715, 26 }, + { 0x1D735, 0x1D74F, 26 }, + { 0x1D76F, 0x1D789, 26 }, + { 0x1D7A9, 0x1D7C3, 26 }, + { 0x1DA87, 0x1DA8B, 1 }, + { 0x1E2FF, 0x1E95E, 1631 }, + { 0x1E95F, 0x1ECB0, 849 }, + { 0x1EEF0, 0x1EEF1, 1 }, + { 0x1F3FB, 0x1F3FF, 1 }, +}; + +static struct conv_table tolower_table[] = { + { 0x41, 0x5A, 1, 32 }, + { 0xC0, 0xD6, 1, 32 }, + { 0xD8, 0xDE, 1, 32 }, + { 0x100, 0x12E, 2, 1 }, + { 0x130, 0x130, 1, -199 }, + { 0x132, 0x136, 2, 1 }, + { 0x139, 0x147, 2, 1 }, + { 0x14A, 0x176, 2, 1 }, + { 0x178, 0x178, 1, -121 }, + { 0x179, 0x17D, 2, 1 }, + { 0x181, 0x181, 1, 210 }, + { 0x182, 0x184, 2, 1 }, + { 0x186, 0x186, 1, 206 }, + { 0x187, 0x187, 1, 1 }, + { 0x189, 0x18A, 1, 205 }, + { 0x18B, 0x18B, 1, 1 }, + { 0x18E, 0x18E, 1, 79 }, + { 0x18F, 0x18F, 1, 202 }, + { 0x190, 0x190, 1, 203 }, + { 0x191, 0x191, 1, 1 }, + { 0x193, 0x193, 1, 205 }, + { 0x194, 0x194, 1, 207 }, + { 0x196, 0x196, 1, 211 }, + { 0x197, 0x197, 1, 209 }, + { 0x198, 0x198, 1, 1 }, + { 0x19C, 0x19C, 1, 211 }, + { 0x19D, 0x19D, 1, 213 }, + { 0x19F, 0x19F, 1, 214 }, + { 0x1A0, 0x1A4, 2, 1 }, + { 0x1A6, 0x1A6, 1, 218 }, + { 0x1A7, 0x1A7, 1, 1 }, + { 0x1A9, 0x1A9, 1, 218 }, + { 0x1AC, 0x1AC, 1, 1 }, + { 0x1AE, 0x1AE, 1, 218 }, + { 0x1AF, 0x1AF, 1, 1 }, + { 0x1B1, 0x1B2, 1, 217 }, + { 0x1B3, 0x1B5, 2, 1 }, + { 0x1B7, 0x1B7, 1, 219 }, + { 0x1B8, 0x1BC, 4, 1 }, + { 0x1C4, 0x1C4, 1, 2 }, + { 0x1C5, 0x1C5, 1, 1 }, + { 0x1C7, 0x1C7, 1, 2 }, + { 0x1C8, 0x1C8, 1, 1 }, + { 0x1CA, 0x1CA, 1, 2 }, + { 0x1CB, 0x1DB, 2, 1 }, + { 0x1DE, 0x1EE, 2, 1 }, + { 0x1F1, 0x1F1, 1, 2 }, + { 0x1F2, 0x1F4, 2, 1 }, + { 0x1F6, 0x1F6, 1, -97 }, + { 0x1F7, 0x1F7, 1, -56 }, + { 0x1F8, 0x21E, 2, 1 }, + { 0x220, 0x220, 1, -130 }, + { 0x222, 0x232, 2, 1 }, + { 0x23A, 0x23A, 1, 10795 }, + { 0x23B, 0x23B, 1, 1 }, + { 0x23D, 0x23D, 1, -163 }, + { 0x23E, 0x23E, 1, 10792 }, + { 0x241, 0x241, 1, 1 }, + { 0x243, 0x243, 1, -195 }, + { 0x244, 0x244, 1, 69 }, + { 0x245, 0x245, 1, 71 }, + { 0x246, 0x24E, 2, 1 }, + { 0x370, 0x372, 2, 1 }, + { 0x376, 0x376, 1, 1 }, + { 0x37F, 0x37F, 1, 116 }, + { 0x386, 0x386, 1, 38 }, + { 0x388, 0x38A, 1, 37 }, + { 0x38C, 0x38C, 1, 64 }, + { 0x38E, 0x38F, 1, 63 }, + { 0x391, 0x3A1, 1, 32 }, + { 0x3A3, 0x3AB, 1, 32 }, + { 0x3CF, 0x3CF, 1, 8 }, + { 0x3D8, 0x3EE, 2, 1 }, + { 0x3F4, 0x3F4, 1, -60 }, + { 0x3F7, 0x3F7, 1, 1 }, + { 0x3F9, 0x3F9, 1, -7 }, + { 0x3FA, 0x3FA, 1, 1 }, + { 0x3FD, 0x3FF, 1, -130 }, + { 0x400, 0x40F, 1, 80 }, + { 0x410, 0x42F, 1, 32 }, + { 0x460, 0x480, 2, 1 }, + { 0x48A, 0x4BE, 2, 1 }, + { 0x4C0, 0x4C0, 1, 15 }, + { 0x4C1, 0x4CD, 2, 1 }, + { 0x4D0, 0x52E, 2, 1 }, + { 0x531, 0x556, 1, 48 }, + { 0x10A0, 0x10C5, 1, 7264 }, + { 0x10C7, 0x10CD, 6, 7264 }, + { 0x13A0, 0x13EF, 1, 38864 }, + { 0x13F0, 0x13F5, 1, 8 }, + { 0x1C90, 0x1CBA, 1, -3008 }, + { 0x1CBD, 0x1CBF, 1, -3008 }, + { 0x1E00, 0x1E94, 2, 1 }, + { 0x1E9E, 0x1E9E, 1, -7615 }, + { 0x1EA0, 0x1EFE, 2, 1 }, + { 0x1F08, 0x1F0F, 1, -8 }, + { 0x1F18, 0x1F1D, 1, -8 }, + { 0x1F28, 0x1F2F, 1, -8 }, + { 0x1F38, 0x1F3F, 1, -8 }, + { 0x1F48, 0x1F4D, 1, -8 }, + { 0x1F59, 0x1F5F, 2, -8 }, + { 0x1F68, 0x1F6F, 1, -8 }, + { 0x1F88, 0x1F8F, 1, -8 }, + { 0x1F98, 0x1F9F, 1, -8 }, + { 0x1FA8, 0x1FAF, 1, -8 }, + { 0x1FB8, 0x1FB9, 1, -8 }, + { 0x1FBA, 0x1FBB, 1, -74 }, + { 0x1FBC, 0x1FBC, 1, -9 }, + { 0x1FC8, 0x1FCB, 1, -86 }, + { 0x1FCC, 0x1FCC, 1, -9 }, + { 0x1FD8, 0x1FD9, 1, -8 }, + { 0x1FDA, 0x1FDB, 1, -100 }, + { 0x1FE8, 0x1FE9, 1, -8 }, + { 0x1FEA, 0x1FEB, 1, -112 }, + { 0x1FEC, 0x1FEC, 1, -7 }, + { 0x1FF8, 0x1FF9, 1, -128 }, + { 0x1FFA, 0x1FFB, 1, -126 }, + { 0x1FFC, 0x1FFC, 1, -9 }, + { 0x2126, 0x2126, 1, -7517 }, + { 0x212A, 0x212A, 1, -8383 }, + { 0x212B, 0x212B, 1, -8262 }, + { 0x2132, 0x2132, 1, 28 }, + { 0x2160, 0x216F, 1, 16 }, + { 0x2183, 0x2183, 1, 1 }, + { 0x24B6, 0x24CF, 1, 26 }, + { 0x2C00, 0x2C2F, 1, 48 }, + { 0x2C60, 0x2C60, 1, 1 }, + { 0x2C62, 0x2C62, 1, -10743 }, + { 0x2C63, 0x2C63, 1, -3814 }, + { 0x2C64, 0x2C64, 1, -10727 }, + { 0x2C67, 0x2C6B, 2, 1 }, + { 0x2C6D, 0x2C6D, 1, -10780 }, + { 0x2C6E, 0x2C6E, 1, -10749 }, + { 0x2C6F, 0x2C6F, 1, -10783 }, + { 0x2C70, 0x2C70, 1, -10782 }, + { 0x2C72, 0x2C75, 3, 1 }, + { 0x2C7E, 0x2C7F, 1, -10815 }, + { 0x2C80, 0x2CE2, 2, 1 }, + { 0x2CEB, 0x2CED, 2, 1 }, + { 0x2CF2, 0xA640, 31054, 1 }, + { 0xA642, 0xA66C, 2, 1 }, + { 0xA680, 0xA69A, 2, 1 }, + { 0xA722, 0xA72E, 2, 1 }, + { 0xA732, 0xA76E, 2, 1 }, + { 0xA779, 0xA77B, 2, 1 }, + { 0xA77D, 0xA77D, 1, -35332 }, + { 0xA77E, 0xA786, 2, 1 }, + { 0xA78B, 0xA78B, 1, 1 }, + { 0xA78D, 0xA78D, 1, -42280 }, + { 0xA790, 0xA792, 2, 1 }, + { 0xA796, 0xA7A8, 2, 1 }, + { 0xA7AA, 0xA7AA, 1, -42308 }, + { 0xA7AB, 0xA7AB, 1, -42319 }, + { 0xA7AC, 0xA7AC, 1, -42315 }, + { 0xA7AD, 0xA7AD, 1, -42305 }, + { 0xA7AE, 0xA7AE, 1, -42308 }, + { 0xA7B0, 0xA7B0, 1, -42258 }, + { 0xA7B1, 0xA7B1, 1, -42282 }, + { 0xA7B2, 0xA7B2, 1, -42261 }, + { 0xA7B3, 0xA7B3, 1, 928 }, + { 0xA7B4, 0xA7C2, 2, 1 }, + { 0xA7C4, 0xA7C4, 1, -48 }, + { 0xA7C5, 0xA7C5, 1, -42307 }, + { 0xA7C6, 0xA7C6, 1, -35384 }, + { 0xA7C7, 0xA7C9, 2, 1 }, + { 0xA7D0, 0xA7D6, 6, 1 }, + { 0xA7D8, 0xA7F5, 29, 1 }, + { 0xFF21, 0xFF3A, 1, 32 }, + { 0x10400, 0x10427, 1, 40 }, + { 0x104B0, 0x104D3, 1, 40 }, + { 0x10570, 0x1057A, 1, 39 }, + { 0x1057C, 0x1058A, 1, 39 }, + { 0x1058C, 0x10592, 1, 39 }, + { 0x10594, 0x10595, 1, 39 }, + { 0x10C80, 0x10CB2, 1, 64 }, + { 0x118A0, 0x118BF, 1, 32 }, + { 0x16E40, 0x16E5F, 1, 32 }, + { 0x1E900, 0x1E921, 1, 34 }, +}; + +static struct conv_table toupper_table[] = { + { 0x61, 0x7A, 1, -32 }, + { 0xB5, 0xB5, 1, 743 }, + { 0xE0, 0xF6, 1, -32 }, + { 0xF8, 0xFE, 1, -32 }, + { 0xFF, 0xFF, 1, 121 }, + { 0x101, 0x12F, 2, -1 }, + { 0x131, 0x131, 1, -232 }, + { 0x133, 0x137, 2, -1 }, + { 0x13A, 0x148, 2, -1 }, + { 0x14B, 0x177, 2, -1 }, + { 0x17A, 0x17E, 2, -1 }, + { 0x17F, 0x17F, 1, -300 }, + { 0x180, 0x180, 1, 195 }, + { 0x183, 0x185, 2, -1 }, + { 0x188, 0x18C, 4, -1 }, + { 0x192, 0x192, 1, -1 }, + { 0x195, 0x195, 1, 97 }, + { 0x199, 0x199, 1, -1 }, + { 0x19A, 0x19A, 1, 163 }, + { 0x19E, 0x19E, 1, 130 }, + { 0x1A1, 0x1A5, 2, -1 }, + { 0x1A8, 0x1AD, 5, -1 }, + { 0x1B0, 0x1B4, 4, -1 }, + { 0x1B6, 0x1B9, 3, -1 }, + { 0x1BD, 0x1BD, 1, -1 }, + { 0x1BF, 0x1BF, 1, 56 }, + { 0x1C5, 0x1C5, 1, -1 }, + { 0x1C6, 0x1C6, 1, -2 }, + { 0x1C8, 0x1C8, 1, -1 }, + { 0x1C9, 0x1C9, 1, -2 }, + { 0x1CB, 0x1CB, 1, -1 }, + { 0x1CC, 0x1CC, 1, -2 }, + { 0x1CE, 0x1DC, 2, -1 }, + { 0x1DD, 0x1DD, 1, -79 }, + { 0x1DF, 0x1EF, 2, -1 }, + { 0x1F2, 0x1F2, 1, -1 }, + { 0x1F3, 0x1F3, 1, -2 }, + { 0x1F5, 0x1F9, 4, -1 }, + { 0x1FB, 0x21F, 2, -1 }, + { 0x223, 0x233, 2, -1 }, + { 0x23C, 0x23C, 1, -1 }, + { 0x23F, 0x240, 1, 10815 }, + { 0x242, 0x247, 5, -1 }, + { 0x249, 0x24F, 2, -1 }, + { 0x250, 0x250, 1, 10783 }, + { 0x251, 0x251, 1, 10780 }, + { 0x252, 0x252, 1, 10782 }, + { 0x253, 0x253, 1, -210 }, + { 0x254, 0x254, 1, -206 }, + { 0x256, 0x257, 1, -205 }, + { 0x259, 0x259, 1, -202 }, + { 0x25B, 0x25B, 1, -203 }, + { 0x25C, 0x25C, 1, 42319 }, + { 0x260, 0x260, 1, -205 }, + { 0x261, 0x261, 1, 42315 }, + { 0x263, 0x263, 1, -207 }, + { 0x265, 0x265, 1, 42280 }, + { 0x266, 0x266, 1, 42308 }, + { 0x268, 0x268, 1, -209 }, + { 0x269, 0x269, 1, -211 }, + { 0x26A, 0x26A, 1, 42308 }, + { 0x26B, 0x26B, 1, 10743 }, + { 0x26C, 0x26C, 1, 42305 }, + { 0x26F, 0x26F, 1, -211 }, + { 0x271, 0x271, 1, 10749 }, + { 0x272, 0x272, 1, -213 }, + { 0x275, 0x275, 1, -214 }, + { 0x27D, 0x27D, 1, 10727 }, + { 0x280, 0x280, 1, -218 }, + { 0x282, 0x282, 1, 42307 }, + { 0x283, 0x283, 1, -218 }, + { 0x287, 0x287, 1, 42282 }, + { 0x288, 0x288, 1, -218 }, + { 0x289, 0x289, 1, -69 }, + { 0x28A, 0x28B, 1, -217 }, + { 0x28C, 0x28C, 1, -71 }, + { 0x292, 0x292, 1, -219 }, + { 0x29D, 0x29D, 1, 42261 }, + { 0x29E, 0x29E, 1, 42258 }, + { 0x345, 0x345, 1, 84 }, + { 0x371, 0x373, 2, -1 }, + { 0x377, 0x377, 1, -1 }, + { 0x37B, 0x37D, 1, 130 }, + { 0x3AC, 0x3AC, 1, -38 }, + { 0x3AD, 0x3AF, 1, -37 }, + { 0x3B1, 0x3C1, 1, -32 }, + { 0x3C2, 0x3C2, 1, -31 }, + { 0x3C3, 0x3CB, 1, -32 }, + { 0x3CC, 0x3CC, 1, -64 }, + { 0x3CD, 0x3CE, 1, -63 }, + { 0x3D0, 0x3D0, 1, -62 }, + { 0x3D1, 0x3D1, 1, -57 }, + { 0x3D5, 0x3D5, 1, -47 }, + { 0x3D6, 0x3D6, 1, -54 }, + { 0x3D7, 0x3D7, 1, -8 }, + { 0x3D9, 0x3EF, 2, -1 }, + { 0x3F0, 0x3F0, 1, -86 }, + { 0x3F1, 0x3F1, 1, -80 }, + { 0x3F2, 0x3F2, 1, 7 }, + { 0x3F3, 0x3F3, 1, -116 }, + { 0x3F5, 0x3F5, 1, -96 }, + { 0x3F8, 0x3FB, 3, -1 }, + { 0x430, 0x44F, 1, -32 }, + { 0x450, 0x45F, 1, -80 }, + { 0x461, 0x481, 2, -1 }, + { 0x48B, 0x4BF, 2, -1 }, + { 0x4C2, 0x4CE, 2, -1 }, + { 0x4CF, 0x4CF, 1, -15 }, + { 0x4D1, 0x52F, 2, -1 }, + { 0x561, 0x586, 1, -48 }, + { 0x10D0, 0x10FA, 1, 3008 }, + { 0x10FD, 0x10FF, 1, 3008 }, + { 0x13F8, 0x13FD, 1, -8 }, + { 0x1C80, 0x1C80, 1, -6254 }, + { 0x1C81, 0x1C81, 1, -6253 }, + { 0x1C82, 0x1C82, 1, -6244 }, + { 0x1C83, 0x1C84, 1, -6242 }, + { 0x1C85, 0x1C85, 1, -6243 }, + { 0x1C86, 0x1C86, 1, -6236 }, + { 0x1C87, 0x1C87, 1, -6181 }, + { 0x1C88, 0x1C88, 1, 35266 }, + { 0x1D79, 0x1D79, 1, 35332 }, + { 0x1D7D, 0x1D7D, 1, 3814 }, + { 0x1D8E, 0x1D8E, 1, 35384 }, + { 0x1E01, 0x1E95, 2, -1 }, + { 0x1E9B, 0x1E9B, 1, -59 }, + { 0x1EA1, 0x1EFF, 2, -1 }, + { 0x1F00, 0x1F07, 1, 8 }, + { 0x1F10, 0x1F15, 1, 8 }, + { 0x1F20, 0x1F27, 1, 8 }, + { 0x1F30, 0x1F37, 1, 8 }, + { 0x1F40, 0x1F45, 1, 8 }, + { 0x1F51, 0x1F57, 2, 8 }, + { 0x1F60, 0x1F67, 1, 8 }, + { 0x1F70, 0x1F71, 1, 74 }, + { 0x1F72, 0x1F75, 1, 86 }, + { 0x1F76, 0x1F77, 1, 100 }, + { 0x1F78, 0x1F79, 1, 128 }, + { 0x1F7A, 0x1F7B, 1, 112 }, + { 0x1F7C, 0x1F7D, 1, 126 }, + { 0x1F80, 0x1F87, 1, 8 }, + { 0x1F90, 0x1F97, 1, 8 }, + { 0x1FA0, 0x1FA7, 1, 8 }, + { 0x1FB0, 0x1FB1, 1, 8 }, + { 0x1FB3, 0x1FB3, 1, 9 }, + { 0x1FBE, 0x1FBE, 1, -7205 }, + { 0x1FC3, 0x1FC3, 1, 9 }, + { 0x1FD0, 0x1FD1, 1, 8 }, + { 0x1FE0, 0x1FE1, 1, 8 }, + { 0x1FE5, 0x1FE5, 1, 7 }, + { 0x1FF3, 0x1FF3, 1, 9 }, + { 0x214E, 0x214E, 1, -28 }, + { 0x2170, 0x217F, 1, -16 }, + { 0x2184, 0x2184, 1, -1 }, + { 0x24D0, 0x24E9, 1, -26 }, + { 0x2C30, 0x2C5F, 1, -48 }, + { 0x2C61, 0x2C61, 1, -1 }, + { 0x2C65, 0x2C65, 1, -10795 }, + { 0x2C66, 0x2C66, 1, -10792 }, + { 0x2C68, 0x2C6C, 2, -1 }, + { 0x2C73, 0x2C76, 3, -1 }, + { 0x2C81, 0x2CE3, 2, -1 }, + { 0x2CEC, 0x2CEE, 2, -1 }, + { 0x2CF3, 0x2CF3, 1, -1 }, + { 0x2D00, 0x2D25, 1, -7264 }, + { 0x2D27, 0x2D2D, 6, -7264 }, + { 0xA641, 0xA66D, 2, -1 }, + { 0xA681, 0xA69B, 2, -1 }, + { 0xA723, 0xA72F, 2, -1 }, + { 0xA733, 0xA76F, 2, -1 }, + { 0xA77A, 0xA77C, 2, -1 }, + { 0xA77F, 0xA787, 2, -1 }, + { 0xA78C, 0xA791, 5, -1 }, + { 0xA793, 0xA793, 1, -1 }, + { 0xA794, 0xA794, 1, 48 }, + { 0xA797, 0xA7A9, 2, -1 }, + { 0xA7B5, 0xA7C3, 2, -1 }, + { 0xA7C8, 0xA7CA, 2, -1 }, + { 0xA7D1, 0xA7D7, 6, -1 }, + { 0xA7D9, 0xA7F6, 29, -1 }, + { 0xAB53, 0xAB53, 1, -928 }, + { 0xAB70, 0xABBF, 1, -38864 }, + { 0xFF41, 0xFF5A, 1, -32 }, + { 0x10428, 0x1044F, 1, -40 }, + { 0x104D8, 0x104FB, 1, -40 }, + { 0x10597, 0x105A1, 1, -39 }, + { 0x105A3, 0x105B1, 1, -39 }, + { 0x105B3, 0x105B9, 1, -39 }, + { 0x105BB, 0x105BC, 1, -39 }, + { 0x10CC0, 0x10CF2, 1, -64 }, + { 0x118C0, 0x118DF, 1, -32 }, + { 0x16E60, 0x16E7F, 1, -32 }, + { 0x1E922, 0x1E943, 1, -34 }, +}; + +static struct conv_table totitle_table[] = { + { 0x61, 0x7A, 1, -32 }, + { 0xB5, 0xB5, 1, 743 }, + { 0xE0, 0xF6, 1, -32 }, + { 0xF8, 0xFE, 1, -32 }, + { 0xFF, 0xFF, 1, 121 }, + { 0x101, 0x12F, 2, -1 }, + { 0x131, 0x131, 1, -232 }, + { 0x133, 0x137, 2, -1 }, + { 0x13A, 0x148, 2, -1 }, + { 0x14B, 0x177, 2, -1 }, + { 0x17A, 0x17E, 2, -1 }, + { 0x17F, 0x17F, 1, -300 }, + { 0x180, 0x180, 1, 195 }, + { 0x183, 0x185, 2, -1 }, + { 0x188, 0x18C, 4, -1 }, + { 0x192, 0x192, 1, -1 }, + { 0x195, 0x195, 1, 97 }, + { 0x199, 0x199, 1, -1 }, + { 0x19A, 0x19A, 1, 163 }, + { 0x19E, 0x19E, 1, 130 }, + { 0x1A1, 0x1A5, 2, -1 }, + { 0x1A8, 0x1AD, 5, -1 }, + { 0x1B0, 0x1B4, 4, -1 }, + { 0x1B6, 0x1B9, 3, -1 }, + { 0x1BD, 0x1BD, 1, -1 }, + { 0x1BF, 0x1BF, 1, 56 }, + { 0x1C4, 0x1C4, 1, 1 }, + { 0x1C5, 0x1C5, 1, 0 }, + { 0x1C6, 0x1C6, 1, -1 }, + { 0x1C7, 0x1C7, 1, 1 }, + { 0x1C8, 0x1C8, 1, 0 }, + { 0x1C9, 0x1C9, 1, -1 }, + { 0x1CA, 0x1CA, 1, 1 }, + { 0x1CB, 0x1CB, 1, 0 }, + { 0x1CC, 0x1DC, 2, -1 }, + { 0x1DD, 0x1DD, 1, -79 }, + { 0x1DF, 0x1EF, 2, -1 }, + { 0x1F1, 0x1F1, 1, 1 }, + { 0x1F2, 0x1F2, 1, 0 }, + { 0x1F3, 0x1F5, 2, -1 }, + { 0x1F9, 0x21F, 2, -1 }, + { 0x223, 0x233, 2, -1 }, + { 0x23C, 0x23C, 1, -1 }, + { 0x23F, 0x240, 1, 10815 }, + { 0x242, 0x247, 5, -1 }, + { 0x249, 0x24F, 2, -1 }, + { 0x250, 0x250, 1, 10783 }, + { 0x251, 0x251, 1, 10780 }, + { 0x252, 0x252, 1, 10782 }, + { 0x253, 0x253, 1, -210 }, + { 0x254, 0x254, 1, -206 }, + { 0x256, 0x257, 1, -205 }, + { 0x259, 0x259, 1, -202 }, + { 0x25B, 0x25B, 1, -203 }, + { 0x25C, 0x25C, 1, 42319 }, + { 0x260, 0x260, 1, -205 }, + { 0x261, 0x261, 1, 42315 }, + { 0x263, 0x263, 1, -207 }, + { 0x265, 0x265, 1, 42280 }, + { 0x266, 0x266, 1, 42308 }, + { 0x268, 0x268, 1, -209 }, + { 0x269, 0x269, 1, -211 }, + { 0x26A, 0x26A, 1, 42308 }, + { 0x26B, 0x26B, 1, 10743 }, + { 0x26C, 0x26C, 1, 42305 }, + { 0x26F, 0x26F, 1, -211 }, + { 0x271, 0x271, 1, 10749 }, + { 0x272, 0x272, 1, -213 }, + { 0x275, 0x275, 1, -214 }, + { 0x27D, 0x27D, 1, 10727 }, + { 0x280, 0x280, 1, -218 }, + { 0x282, 0x282, 1, 42307 }, + { 0x283, 0x283, 1, -218 }, + { 0x287, 0x287, 1, 42282 }, + { 0x288, 0x288, 1, -218 }, + { 0x289, 0x289, 1, -69 }, + { 0x28A, 0x28B, 1, -217 }, + { 0x28C, 0x28C, 1, -71 }, + { 0x292, 0x292, 1, -219 }, + { 0x29D, 0x29D, 1, 42261 }, + { 0x29E, 0x29E, 1, 42258 }, + { 0x345, 0x345, 1, 84 }, + { 0x371, 0x373, 2, -1 }, + { 0x377, 0x377, 1, -1 }, + { 0x37B, 0x37D, 1, 130 }, + { 0x3AC, 0x3AC, 1, -38 }, + { 0x3AD, 0x3AF, 1, -37 }, + { 0x3B1, 0x3C1, 1, -32 }, + { 0x3C2, 0x3C2, 1, -31 }, + { 0x3C3, 0x3CB, 1, -32 }, + { 0x3CC, 0x3CC, 1, -64 }, + { 0x3CD, 0x3CE, 1, -63 }, + { 0x3D0, 0x3D0, 1, -62 }, + { 0x3D1, 0x3D1, 1, -57 }, + { 0x3D5, 0x3D5, 1, -47 }, + { 0x3D6, 0x3D6, 1, -54 }, + { 0x3D7, 0x3D7, 1, -8 }, + { 0x3D9, 0x3EF, 2, -1 }, + { 0x3F0, 0x3F0, 1, -86 }, + { 0x3F1, 0x3F1, 1, -80 }, + { 0x3F2, 0x3F2, 1, 7 }, + { 0x3F3, 0x3F3, 1, -116 }, + { 0x3F5, 0x3F5, 1, -96 }, + { 0x3F8, 0x3FB, 3, -1 }, + { 0x430, 0x44F, 1, -32 }, + { 0x450, 0x45F, 1, -80 }, + { 0x461, 0x481, 2, -1 }, + { 0x48B, 0x4BF, 2, -1 }, + { 0x4C2, 0x4CE, 2, -1 }, + { 0x4CF, 0x4CF, 1, -15 }, + { 0x4D1, 0x52F, 2, -1 }, + { 0x561, 0x586, 1, -48 }, + { 0x10D0, 0x10FA, 1, 0 }, + { 0x10FD, 0x10FF, 1, 0 }, + { 0x13F8, 0x13FD, 1, -8 }, + { 0x1C80, 0x1C80, 1, -6254 }, + { 0x1C81, 0x1C81, 1, -6253 }, + { 0x1C82, 0x1C82, 1, -6244 }, + { 0x1C83, 0x1C84, 1, -6242 }, + { 0x1C85, 0x1C85, 1, -6243 }, + { 0x1C86, 0x1C86, 1, -6236 }, + { 0x1C87, 0x1C87, 1, -6181 }, + { 0x1C88, 0x1C88, 1, 35266 }, + { 0x1D79, 0x1D79, 1, 35332 }, + { 0x1D7D, 0x1D7D, 1, 3814 }, + { 0x1D8E, 0x1D8E, 1, 35384 }, + { 0x1E01, 0x1E95, 2, -1 }, + { 0x1E9B, 0x1E9B, 1, -59 }, + { 0x1EA1, 0x1EFF, 2, -1 }, + { 0x1F00, 0x1F07, 1, 8 }, + { 0x1F10, 0x1F15, 1, 8 }, + { 0x1F20, 0x1F27, 1, 8 }, + { 0x1F30, 0x1F37, 1, 8 }, + { 0x1F40, 0x1F45, 1, 8 }, + { 0x1F51, 0x1F57, 2, 8 }, + { 0x1F60, 0x1F67, 1, 8 }, + { 0x1F70, 0x1F71, 1, 74 }, + { 0x1F72, 0x1F75, 1, 86 }, + { 0x1F76, 0x1F77, 1, 100 }, + { 0x1F78, 0x1F79, 1, 128 }, + { 0x1F7A, 0x1F7B, 1, 112 }, + { 0x1F7C, 0x1F7D, 1, 126 }, + { 0x1F80, 0x1F87, 1, 8 }, + { 0x1F90, 0x1F97, 1, 8 }, + { 0x1FA0, 0x1FA7, 1, 8 }, + { 0x1FB0, 0x1FB1, 1, 8 }, + { 0x1FB3, 0x1FB3, 1, 9 }, + { 0x1FBE, 0x1FBE, 1, -7205 }, + { 0x1FC3, 0x1FC3, 1, 9 }, + { 0x1FD0, 0x1FD1, 1, 8 }, + { 0x1FE0, 0x1FE1, 1, 8 }, + { 0x1FE5, 0x1FE5, 1, 7 }, + { 0x1FF3, 0x1FF3, 1, 9 }, + { 0x214E, 0x214E, 1, -28 }, + { 0x2170, 0x217F, 1, -16 }, + { 0x2184, 0x2184, 1, -1 }, + { 0x24D0, 0x24E9, 1, -26 }, + { 0x2C30, 0x2C5F, 1, -48 }, + { 0x2C61, 0x2C61, 1, -1 }, + { 0x2C65, 0x2C65, 1, -10795 }, + { 0x2C66, 0x2C66, 1, -10792 }, + { 0x2C68, 0x2C6C, 2, -1 }, + { 0x2C73, 0x2C76, 3, -1 }, + { 0x2C81, 0x2CE3, 2, -1 }, + { 0x2CEC, 0x2CEE, 2, -1 }, + { 0x2CF3, 0x2CF3, 1, -1 }, + { 0x2D00, 0x2D25, 1, -7264 }, + { 0x2D27, 0x2D2D, 6, -7264 }, + { 0xA641, 0xA66D, 2, -1 }, + { 0xA681, 0xA69B, 2, -1 }, + { 0xA723, 0xA72F, 2, -1 }, + { 0xA733, 0xA76F, 2, -1 }, + { 0xA77A, 0xA77C, 2, -1 }, + { 0xA77F, 0xA787, 2, -1 }, + { 0xA78C, 0xA791, 5, -1 }, + { 0xA793, 0xA793, 1, -1 }, + { 0xA794, 0xA794, 1, 48 }, + { 0xA797, 0xA7A9, 2, -1 }, + { 0xA7B5, 0xA7C3, 2, -1 }, + { 0xA7C8, 0xA7CA, 2, -1 }, + { 0xA7D1, 0xA7D7, 6, -1 }, + { 0xA7D9, 0xA7F6, 29, -1 }, + { 0xAB53, 0xAB53, 1, -928 }, + { 0xAB70, 0xABBF, 1, -38864 }, + { 0xFF41, 0xFF5A, 1, -32 }, + { 0x10428, 0x1044F, 1, -40 }, + { 0x104D8, 0x104FB, 1, -40 }, + { 0x10597, 0x105A1, 1, -39 }, + { 0x105A3, 0x105B1, 1, -39 }, + { 0x105B3, 0x105B9, 1, -39 }, + { 0x105BB, 0x105BC, 1, -39 }, + { 0x10CC0, 0x10CF2, 1, -64 }, + { 0x118C0, 0x118DF, 1, -32 }, + { 0x16E60, 0x16E7F, 1, -32 }, + { 0x1E922, 0x1E943, 1, -34 }, +}; + +static struct conv_table tofold_table[] = { + { 0x41, 0x5A, 1, 32 }, + { 0xB5, 0xB5, 1, 775 }, + { 0xC0, 0xD6, 1, 32 }, + { 0xD8, 0xDE, 1, 32 }, + { 0x100, 0x12E, 2, 1 }, + { 0x132, 0x136, 2, 1 }, + { 0x139, 0x147, 2, 1 }, + { 0x14A, 0x176, 2, 1 }, + { 0x178, 0x178, 1, -121 }, + { 0x179, 0x17D, 2, 1 }, + { 0x17F, 0x17F, 1, -268 }, + { 0x181, 0x181, 1, 210 }, + { 0x182, 0x184, 2, 1 }, + { 0x186, 0x186, 1, 206 }, + { 0x187, 0x187, 1, 1 }, + { 0x189, 0x18A, 1, 205 }, + { 0x18B, 0x18B, 1, 1 }, + { 0x18E, 0x18E, 1, 79 }, + { 0x18F, 0x18F, 1, 202 }, + { 0x190, 0x190, 1, 203 }, + { 0x191, 0x191, 1, 1 }, + { 0x193, 0x193, 1, 205 }, + { 0x194, 0x194, 1, 207 }, + { 0x196, 0x196, 1, 211 }, + { 0x197, 0x197, 1, 209 }, + { 0x198, 0x198, 1, 1 }, + { 0x19C, 0x19C, 1, 211 }, + { 0x19D, 0x19D, 1, 213 }, + { 0x19F, 0x19F, 1, 214 }, + { 0x1A0, 0x1A4, 2, 1 }, + { 0x1A6, 0x1A6, 1, 218 }, + { 0x1A7, 0x1A7, 1, 1 }, + { 0x1A9, 0x1A9, 1, 218 }, + { 0x1AC, 0x1AC, 1, 1 }, + { 0x1AE, 0x1AE, 1, 218 }, + { 0x1AF, 0x1AF, 1, 1 }, + { 0x1B1, 0x1B2, 1, 217 }, + { 0x1B3, 0x1B5, 2, 1 }, + { 0x1B7, 0x1B7, 1, 219 }, + { 0x1B8, 0x1BC, 4, 1 }, + { 0x1C4, 0x1C4, 1, 2 }, + { 0x1C5, 0x1C5, 1, 1 }, + { 0x1C7, 0x1C7, 1, 2 }, + { 0x1C8, 0x1C8, 1, 1 }, + { 0x1CA, 0x1CA, 1, 2 }, + { 0x1CB, 0x1DB, 2, 1 }, + { 0x1DE, 0x1EE, 2, 1 }, + { 0x1F1, 0x1F1, 1, 2 }, + { 0x1F2, 0x1F4, 2, 1 }, + { 0x1F6, 0x1F6, 1, -97 }, + { 0x1F7, 0x1F7, 1, -56 }, + { 0x1F8, 0x21E, 2, 1 }, + { 0x220, 0x220, 1, -130 }, + { 0x222, 0x232, 2, 1 }, + { 0x23A, 0x23A, 1, 10795 }, + { 0x23B, 0x23B, 1, 1 }, + { 0x23D, 0x23D, 1, -163 }, + { 0x23E, 0x23E, 1, 10792 }, + { 0x241, 0x241, 1, 1 }, + { 0x243, 0x243, 1, -195 }, + { 0x244, 0x244, 1, 69 }, + { 0x245, 0x245, 1, 71 }, + { 0x246, 0x24E, 2, 1 }, + { 0x345, 0x345, 1, 116 }, + { 0x370, 0x372, 2, 1 }, + { 0x376, 0x376, 1, 1 }, + { 0x37F, 0x37F, 1, 116 }, + { 0x386, 0x386, 1, 38 }, + { 0x388, 0x38A, 1, 37 }, + { 0x38C, 0x38C, 1, 64 }, + { 0x38E, 0x38F, 1, 63 }, + { 0x391, 0x3A1, 1, 32 }, + { 0x3A3, 0x3AB, 1, 32 }, + { 0x3C2, 0x3C2, 1, 1 }, + { 0x3CF, 0x3CF, 1, 8 }, + { 0x3D0, 0x3D0, 1, -30 }, + { 0x3D1, 0x3D1, 1, -25 }, + { 0x3D5, 0x3D5, 1, -15 }, + { 0x3D6, 0x3D6, 1, -22 }, + { 0x3D8, 0x3EE, 2, 1 }, + { 0x3F0, 0x3F0, 1, -54 }, + { 0x3F1, 0x3F1, 1, -48 }, + { 0x3F4, 0x3F4, 1, -60 }, + { 0x3F5, 0x3F5, 1, -64 }, + { 0x3F7, 0x3F7, 1, 1 }, + { 0x3F9, 0x3F9, 1, -7 }, + { 0x3FA, 0x3FA, 1, 1 }, + { 0x3FD, 0x3FF, 1, -130 }, + { 0x400, 0x40F, 1, 80 }, + { 0x410, 0x42F, 1, 32 }, + { 0x460, 0x480, 2, 1 }, + { 0x48A, 0x4BE, 2, 1 }, + { 0x4C0, 0x4C0, 1, 15 }, + { 0x4C1, 0x4CD, 2, 1 }, + { 0x4D0, 0x52E, 2, 1 }, + { 0x531, 0x556, 1, 48 }, + { 0x10A0, 0x10C5, 1, 7264 }, + { 0x10C7, 0x10CD, 6, 7264 }, + { 0x13F8, 0x13FD, 1, -8 }, + { 0x1C80, 0x1C80, 1, -6222 }, + { 0x1C81, 0x1C81, 1, -6221 }, + { 0x1C82, 0x1C82, 1, -6212 }, + { 0x1C83, 0x1C84, 1, -6210 }, + { 0x1C85, 0x1C85, 1, -6211 }, + { 0x1C86, 0x1C86, 1, -6204 }, + { 0x1C87, 0x1C87, 1, -6180 }, + { 0x1C88, 0x1C88, 1, 35267 }, + { 0x1C90, 0x1CBA, 1, -3008 }, + { 0x1CBD, 0x1CBF, 1, -3008 }, + { 0x1E00, 0x1E94, 2, 1 }, + { 0x1E9B, 0x1E9B, 1, -58 }, + { 0x1E9E, 0x1E9E, 1, -7615 }, + { 0x1EA0, 0x1EFE, 2, 1 }, + { 0x1F08, 0x1F0F, 1, -8 }, + { 0x1F18, 0x1F1D, 1, -8 }, + { 0x1F28, 0x1F2F, 1, -8 }, + { 0x1F38, 0x1F3F, 1, -8 }, + { 0x1F48, 0x1F4D, 1, -8 }, + { 0x1F59, 0x1F5F, 2, -8 }, + { 0x1F68, 0x1F6F, 1, -8 }, + { 0x1F88, 0x1F8F, 1, -8 }, + { 0x1F98, 0x1F9F, 1, -8 }, + { 0x1FA8, 0x1FAF, 1, -8 }, + { 0x1FB8, 0x1FB9, 1, -8 }, + { 0x1FBA, 0x1FBB, 1, -74 }, + { 0x1FBC, 0x1FBC, 1, -9 }, + { 0x1FBE, 0x1FBE, 1, -7173 }, + { 0x1FC8, 0x1FCB, 1, -86 }, + { 0x1FCC, 0x1FCC, 1, -9 }, + { 0x1FD8, 0x1FD9, 1, -8 }, + { 0x1FDA, 0x1FDB, 1, -100 }, + { 0x1FE8, 0x1FE9, 1, -8 }, + { 0x1FEA, 0x1FEB, 1, -112 }, + { 0x1FEC, 0x1FEC, 1, -7 }, + { 0x1FF8, 0x1FF9, 1, -128 }, + { 0x1FFA, 0x1FFB, 1, -126 }, + { 0x1FFC, 0x1FFC, 1, -9 }, + { 0x2126, 0x2126, 1, -7517 }, + { 0x212A, 0x212A, 1, -8383 }, + { 0x212B, 0x212B, 1, -8262 }, + { 0x2132, 0x2132, 1, 28 }, + { 0x2160, 0x216F, 1, 16 }, + { 0x2183, 0x2183, 1, 1 }, + { 0x24B6, 0x24CF, 1, 26 }, + { 0x2C00, 0x2C2F, 1, 48 }, + { 0x2C60, 0x2C60, 1, 1 }, + { 0x2C62, 0x2C62, 1, -10743 }, + { 0x2C63, 0x2C63, 1, -3814 }, + { 0x2C64, 0x2C64, 1, -10727 }, + { 0x2C67, 0x2C6B, 2, 1 }, + { 0x2C6D, 0x2C6D, 1, -10780 }, + { 0x2C6E, 0x2C6E, 1, -10749 }, + { 0x2C6F, 0x2C6F, 1, -10783 }, + { 0x2C70, 0x2C70, 1, -10782 }, + { 0x2C72, 0x2C75, 3, 1 }, + { 0x2C7E, 0x2C7F, 1, -10815 }, + { 0x2C80, 0x2CE2, 2, 1 }, + { 0x2CEB, 0x2CED, 2, 1 }, + { 0x2CF2, 0xA640, 31054, 1 }, + { 0xA642, 0xA66C, 2, 1 }, + { 0xA680, 0xA69A, 2, 1 }, + { 0xA722, 0xA72E, 2, 1 }, + { 0xA732, 0xA76E, 2, 1 }, + { 0xA779, 0xA77B, 2, 1 }, + { 0xA77D, 0xA77D, 1, -35332 }, + { 0xA77E, 0xA786, 2, 1 }, + { 0xA78B, 0xA78B, 1, 1 }, + { 0xA78D, 0xA78D, 1, -42280 }, + { 0xA790, 0xA792, 2, 1 }, + { 0xA796, 0xA7A8, 2, 1 }, + { 0xA7AA, 0xA7AA, 1, -42308 }, + { 0xA7AB, 0xA7AB, 1, -42319 }, + { 0xA7AC, 0xA7AC, 1, -42315 }, + { 0xA7AD, 0xA7AD, 1, -42305 }, + { 0xA7AE, 0xA7AE, 1, -42308 }, + { 0xA7B0, 0xA7B0, 1, -42258 }, + { 0xA7B1, 0xA7B1, 1, -42282 }, + { 0xA7B2, 0xA7B2, 1, -42261 }, + { 0xA7B3, 0xA7B3, 1, 928 }, + { 0xA7B4, 0xA7C2, 2, 1 }, + { 0xA7C4, 0xA7C4, 1, -48 }, + { 0xA7C5, 0xA7C5, 1, -42307 }, + { 0xA7C6, 0xA7C6, 1, -35384 }, + { 0xA7C7, 0xA7C9, 2, 1 }, + { 0xA7D0, 0xA7D6, 6, 1 }, + { 0xA7D8, 0xA7F5, 29, 1 }, + { 0xAB70, 0xABBF, 1, -38864 }, + { 0xFF21, 0xFF3A, 1, 32 }, + { 0x10400, 0x10427, 1, 40 }, + { 0x104B0, 0x104D3, 1, 40 }, + { 0x10570, 0x1057A, 1, 39 }, + { 0x1057C, 0x1058A, 1, 39 }, + { 0x1058C, 0x10592, 1, 39 }, + { 0x10594, 0x10595, 1, 39 }, + { 0x10C80, 0x10CB2, 1, 64 }, + { 0x118A0, 0x118BF, 1, 32 }, + { 0x16E40, 0x16E5F, 1, 32 }, + { 0x1E900, 0x1E921, 1, 34 }, +}; + +static struct range_table doublewidth_table[] = { + { 0x1100, 0x115F, 1 }, + { 0x231A, 0x231B, 1 }, + { 0x2329, 0x232A, 1 }, + { 0x23E9, 0x23EC, 1 }, + { 0x23F0, 0x23F3, 3 }, + { 0x25FD, 0x25FE, 1 }, + { 0x2614, 0x2615, 1 }, + { 0x2648, 0x2653, 1 }, + { 0x267F, 0x2693, 20 }, + { 0x26A1, 0x26AA, 9 }, + { 0x26AB, 0x26BD, 18 }, + { 0x26BE, 0x26C4, 6 }, + { 0x26C5, 0x26CE, 9 }, + { 0x26D4, 0x26EA, 22 }, + { 0x26F2, 0x26F3, 1 }, + { 0x26F5, 0x26FA, 5 }, + { 0x26FD, 0x2705, 8 }, + { 0x270A, 0x270B, 1 }, + { 0x2728, 0x274C, 36 }, + { 0x274E, 0x2753, 5 }, + { 0x2754, 0x2755, 1 }, + { 0x2757, 0x2795, 62 }, + { 0x2796, 0x2797, 1 }, + { 0x27B0, 0x27BF, 15 }, + { 0x2B1B, 0x2B1C, 1 }, + { 0x2B50, 0x2B55, 5 }, + { 0x2E80, 0x2E99, 1 }, + { 0x2E9B, 0x2EF3, 1 }, + { 0x2F00, 0x2FD5, 1 }, + { 0x2FF0, 0x2FFB, 1 }, + { 0x3000, 0x303E, 1 }, + { 0x3041, 0x3096, 1 }, + { 0x3099, 0x30FF, 1 }, + { 0x3105, 0x312F, 1 }, + { 0x3131, 0x318E, 1 }, + { 0x3190, 0x31E3, 1 }, + { 0x31F0, 0x321E, 1 }, + { 0x3220, 0x3247, 1 }, + { 0x3250, 0x4DBF, 1 }, + { 0x4E00, 0xA48C, 1 }, + { 0xA490, 0xA4C6, 1 }, + { 0xA960, 0xA97C, 1 }, + { 0xAC00, 0xD7A3, 1 }, + { 0xF900, 0xFAFF, 1 }, + { 0xFE10, 0xFE19, 1 }, + { 0xFE30, 0xFE52, 1 }, + { 0xFE54, 0xFE66, 1 }, + { 0xFE68, 0xFE6B, 1 }, + { 0xFF01, 0xFF60, 1 }, + { 0xFFE0, 0xFFE6, 1 }, + { 0x16FE0, 0x16FE4, 1 }, + { 0x16FF0, 0x16FF1, 1 }, + { 0x17000, 0x187F7, 1 }, + { 0x18800, 0x18CD5, 1 }, + { 0x18D00, 0x18D08, 1 }, + { 0x1AFF0, 0x1AFF3, 1 }, + { 0x1AFF5, 0x1AFFB, 1 }, + { 0x1AFFD, 0x1AFFE, 1 }, + { 0x1B000, 0x1B122, 1 }, + { 0x1B150, 0x1B152, 1 }, + { 0x1B164, 0x1B167, 1 }, + { 0x1B170, 0x1B2FB, 1 }, + { 0x1F004, 0x1F0CF, 203 }, + { 0x1F18E, 0x1F191, 3 }, + { 0x1F192, 0x1F19A, 1 }, + { 0x1F200, 0x1F202, 1 }, + { 0x1F210, 0x1F23B, 1 }, + { 0x1F240, 0x1F248, 1 }, + { 0x1F250, 0x1F251, 1 }, + { 0x1F260, 0x1F265, 1 }, + { 0x1F300, 0x1F320, 1 }, + { 0x1F32D, 0x1F335, 1 }, + { 0x1F337, 0x1F37C, 1 }, + { 0x1F37E, 0x1F393, 1 }, + { 0x1F3A0, 0x1F3CA, 1 }, + { 0x1F3CF, 0x1F3D3, 1 }, + { 0x1F3E0, 0x1F3F0, 1 }, + { 0x1F3F4, 0x1F3F8, 4 }, + { 0x1F3F9, 0x1F43E, 1 }, + { 0x1F440, 0x1F442, 2 }, + { 0x1F443, 0x1F4FC, 1 }, + { 0x1F4FF, 0x1F53D, 1 }, + { 0x1F54B, 0x1F54E, 1 }, + { 0x1F550, 0x1F567, 1 }, + { 0x1F57A, 0x1F595, 27 }, + { 0x1F596, 0x1F5A4, 14 }, + { 0x1F5FB, 0x1F64F, 1 }, + { 0x1F680, 0x1F6C5, 1 }, + { 0x1F6CC, 0x1F6D0, 4 }, + { 0x1F6D1, 0x1F6D2, 1 }, + { 0x1F6D5, 0x1F6D7, 1 }, + { 0x1F6DD, 0x1F6DF, 1 }, + { 0x1F6EB, 0x1F6EC, 1 }, + { 0x1F6F4, 0x1F6FC, 1 }, + { 0x1F7E0, 0x1F7EB, 1 }, + { 0x1F7F0, 0x1F90C, 284 }, + { 0x1F90D, 0x1F93A, 1 }, + { 0x1F93C, 0x1F945, 1 }, + { 0x1F947, 0x1F9FF, 1 }, + { 0x1FA70, 0x1FA74, 1 }, + { 0x1FA78, 0x1FA7C, 1 }, + { 0x1FA80, 0x1FA86, 1 }, + { 0x1FA90, 0x1FAAC, 1 }, + { 0x1FAB0, 0x1FABA, 1 }, + { 0x1FAC0, 0x1FAC5, 1 }, + { 0x1FAD0, 0x1FAD9, 1 }, + { 0x1FAE0, 0x1FAE7, 1 }, + { 0x1FAF0, 0x1FAF6, 1 }, + { 0x20000, 0x2FFFD, 1 }, + { 0x30000, 0x3FFFD, 1 }, +}; + +static struct range_table ambiwidth_table[] = { + { 0xA1, 0xA7, 3 }, + { 0xA8, 0xAA, 2 }, + { 0xAD, 0xAE, 1 }, + { 0xB0, 0xB4, 1 }, + { 0xB6, 0xBA, 1 }, + { 0xBC, 0xBF, 1 }, + { 0xC6, 0xD0, 10 }, + { 0xD7, 0xD8, 1 }, + { 0xDE, 0xE1, 1 }, + { 0xE6, 0xE8, 2 }, + { 0xE9, 0xEA, 1 }, + { 0xEC, 0xED, 1 }, + { 0xF0, 0xF2, 2 }, + { 0xF3, 0xF7, 4 }, + { 0xF8, 0xFA, 1 }, + { 0xFC, 0xFE, 2 }, + { 0x101, 0x111, 16 }, + { 0x113, 0x11B, 8 }, + { 0x126, 0x127, 1 }, + { 0x12B, 0x131, 6 }, + { 0x132, 0x133, 1 }, + { 0x138, 0x13F, 7 }, + { 0x140, 0x142, 1 }, + { 0x144, 0x148, 4 }, + { 0x149, 0x14B, 1 }, + { 0x14D, 0x152, 5 }, + { 0x153, 0x166, 19 }, + { 0x167, 0x16B, 4 }, + { 0x1CE, 0x1DC, 2 }, + { 0x251, 0x261, 16 }, + { 0x2C4, 0x2C7, 3 }, + { 0x2C9, 0x2CB, 1 }, + { 0x2CD, 0x2D0, 3 }, + { 0x2D8, 0x2DB, 1 }, + { 0x2DD, 0x2DF, 2 }, + { 0x300, 0x36F, 1 }, + { 0x391, 0x3A1, 1 }, + { 0x3A3, 0x3A9, 1 }, + { 0x3B1, 0x3C1, 1 }, + { 0x3C3, 0x3C9, 1 }, + { 0x401, 0x410, 15 }, + { 0x411, 0x44F, 1 }, + { 0x451, 0x2010, 7103 }, + { 0x2013, 0x2016, 1 }, + { 0x2018, 0x2019, 1 }, + { 0x201C, 0x201D, 1 }, + { 0x2020, 0x2022, 1 }, + { 0x2024, 0x2027, 1 }, + { 0x2030, 0x2032, 2 }, + { 0x2033, 0x2035, 2 }, + { 0x203B, 0x203E, 3 }, + { 0x2074, 0x207F, 11 }, + { 0x2081, 0x2084, 1 }, + { 0x20AC, 0x2103, 87 }, + { 0x2105, 0x2109, 4 }, + { 0x2113, 0x2116, 3 }, + { 0x2121, 0x2122, 1 }, + { 0x2126, 0x212B, 5 }, + { 0x2153, 0x2154, 1 }, + { 0x215B, 0x215E, 1 }, + { 0x2160, 0x216B, 1 }, + { 0x2170, 0x2179, 1 }, + { 0x2189, 0x2190, 7 }, + { 0x2191, 0x2199, 1 }, + { 0x21B8, 0x21B9, 1 }, + { 0x21D2, 0x21D4, 2 }, + { 0x21E7, 0x2200, 25 }, + { 0x2202, 0x2203, 1 }, + { 0x2207, 0x2208, 1 }, + { 0x220B, 0x220F, 4 }, + { 0x2211, 0x2215, 4 }, + { 0x221A, 0x221D, 3 }, + { 0x221E, 0x2220, 1 }, + { 0x2223, 0x2227, 2 }, + { 0x2228, 0x222C, 1 }, + { 0x222E, 0x2234, 6 }, + { 0x2235, 0x2237, 1 }, + { 0x223C, 0x223D, 1 }, + { 0x2248, 0x224C, 4 }, + { 0x2252, 0x2260, 14 }, + { 0x2261, 0x2264, 3 }, + { 0x2265, 0x2267, 1 }, + { 0x226A, 0x226B, 1 }, + { 0x226E, 0x226F, 1 }, + { 0x2282, 0x2283, 1 }, + { 0x2286, 0x2287, 1 }, + { 0x2295, 0x2299, 4 }, + { 0x22A5, 0x22BF, 26 }, + { 0x2312, 0x2460, 334 }, + { 0x2461, 0x24E9, 1 }, + { 0x24EB, 0x254B, 1 }, + { 0x2550, 0x2573, 1 }, + { 0x2580, 0x258F, 1 }, + { 0x2592, 0x2595, 1 }, + { 0x25A0, 0x25A1, 1 }, + { 0x25A3, 0x25A9, 1 }, + { 0x25B2, 0x25B3, 1 }, + { 0x25B6, 0x25B7, 1 }, + { 0x25BC, 0x25BD, 1 }, + { 0x25C0, 0x25C1, 1 }, + { 0x25C6, 0x25C8, 1 }, + { 0x25CB, 0x25CE, 3 }, + { 0x25CF, 0x25D1, 1 }, + { 0x25E2, 0x25E5, 1 }, + { 0x25EF, 0x2605, 22 }, + { 0x2606, 0x2609, 3 }, + { 0x260E, 0x260F, 1 }, + { 0x261C, 0x261E, 2 }, + { 0x2640, 0x2642, 2 }, + { 0x2660, 0x2661, 1 }, + { 0x2663, 0x2665, 1 }, + { 0x2667, 0x266A, 1 }, + { 0x266C, 0x266D, 1 }, + { 0x266F, 0x269E, 47 }, + { 0x269F, 0x26BF, 32 }, + { 0x26C6, 0x26CD, 1 }, + { 0x26CF, 0x26D3, 1 }, + { 0x26D5, 0x26E1, 1 }, + { 0x26E3, 0x26E8, 5 }, + { 0x26E9, 0x26EB, 2 }, + { 0x26EC, 0x26F1, 1 }, + { 0x26F4, 0x26F6, 2 }, + { 0x26F7, 0x26F9, 1 }, + { 0x26FB, 0x26FC, 1 }, + { 0x26FE, 0x26FF, 1 }, + { 0x273D, 0x2776, 57 }, + { 0x2777, 0x277F, 1 }, + { 0x2B56, 0x2B59, 1 }, + { 0x3248, 0x324F, 1 }, + { 0xE000, 0xF8FF, 1 }, + { 0xFE00, 0xFE0F, 1 }, + { 0xFFFD, 0x1F100, 61699 }, + { 0x1F101, 0x1F10A, 1 }, + { 0x1F110, 0x1F12D, 1 }, + { 0x1F130, 0x1F169, 1 }, + { 0x1F170, 0x1F18D, 1 }, + { 0x1F18F, 0x1F190, 1 }, + { 0x1F19B, 0x1F1AC, 1 }, + { 0xE0100, 0xE01EF, 1 }, + { 0xF0000, 0xFFFFD, 1 }, + { 0x100000, 0x10FFFD, 1 }, +}; + +#endif /* unidata_h */