2019-05-22 22:21:21 +02:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
|
|
|
|
*
|
|
|
|
* Permission to use, copy, modify, and/or distribute this software for any
|
|
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
|
|
* copyright notice and this permission notice appear in all copies.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "hb.hh"
|
2019-07-03 01:02:13 +02:00
|
|
|
#include "hb-unicode.hh"
|
2019-05-22 22:21:21 +02:00
|
|
|
#include "hb-machinery.hh"
|
|
|
|
|
|
|
|
#include "hb-ucd-table.hh"
|
|
|
|
|
|
|
|
static hb_unicode_combining_class_t
|
|
|
|
hb_ucd_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
|
|
|
hb_codepoint_t unicode,
|
|
|
|
void *user_data HB_UNUSED)
|
|
|
|
{
|
|
|
|
return (hb_unicode_combining_class_t) _hb_ucd_ccc (unicode);
|
|
|
|
}
|
|
|
|
|
|
|
|
static hb_unicode_general_category_t
|
|
|
|
hb_ucd_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
|
|
|
hb_codepoint_t unicode,
|
|
|
|
void *user_data HB_UNUSED)
|
|
|
|
{
|
|
|
|
return (hb_unicode_general_category_t) _hb_ucd_gc (unicode);
|
|
|
|
}
|
|
|
|
|
|
|
|
static hb_codepoint_t
|
|
|
|
hb_ucd_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
|
|
|
hb_codepoint_t unicode,
|
|
|
|
void *user_data HB_UNUSED)
|
|
|
|
{
|
|
|
|
return unicode + _hb_ucd_bmg (unicode);
|
|
|
|
}
|
|
|
|
|
|
|
|
static hb_script_t
|
|
|
|
hb_ucd_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
|
|
|
hb_codepoint_t unicode,
|
|
|
|
void *user_data HB_UNUSED)
|
|
|
|
{
|
|
|
|
return _hb_ucd_sc_map[_hb_ucd_sc (unicode)];
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#define SBASE 0xAC00u
|
|
|
|
#define LBASE 0x1100u
|
|
|
|
#define VBASE 0x1161u
|
|
|
|
#define TBASE 0x11A7u
|
|
|
|
#define SCOUNT 11172u
|
|
|
|
#define LCOUNT 19u
|
|
|
|
#define VCOUNT 21u
|
|
|
|
#define TCOUNT 28u
|
|
|
|
#define NCOUNT (VCOUNT * TCOUNT)
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
_hb_ucd_decompose_hangul (hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b)
|
|
|
|
{
|
|
|
|
unsigned si = ab - SBASE;
|
|
|
|
|
|
|
|
if (si >= SCOUNT)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (si % TCOUNT)
|
|
|
|
{
|
|
|
|
/* LV,T */
|
|
|
|
*a = SBASE + (si / TCOUNT) * TCOUNT;
|
|
|
|
*b = TBASE + (si % TCOUNT);
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
/* L,V */
|
|
|
|
*a = LBASE + (si / NCOUNT);
|
|
|
|
*b = VBASE + (si % NCOUNT) / TCOUNT;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
_hb_ucd_compose_hangul (hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab)
|
|
|
|
{
|
|
|
|
if (a >= SBASE && a < (SBASE + SCOUNT) && b > TBASE && b < (TBASE + TCOUNT) &&
|
|
|
|
!((a - SBASE) % TCOUNT))
|
|
|
|
{
|
|
|
|
/* LV,T */
|
|
|
|
*ab = a + (b - TBASE);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT))
|
|
|
|
{
|
|
|
|
/* L,V */
|
|
|
|
int li = a - LBASE;
|
|
|
|
int vi = b - VBASE;
|
|
|
|
*ab = SBASE + li * NCOUNT + vi * TCOUNT;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
_cmp_pair (const void *_key, const void *_item)
|
|
|
|
{
|
|
|
|
uint64_t& a = * (uint64_t*) _key;
|
|
|
|
uint64_t b = (* (uint64_t*) _item) & HB_CODEPOINT_ENCODE3(0x1FFFFFu, 0x1FFFFFu, 0);
|
|
|
|
|
|
|
|
return a < b ? -1 : a > b ? +1 : 0;
|
|
|
|
}
|
2019-06-24 21:37:23 +02:00
|
|
|
static int
|
|
|
|
_cmp_pair_11_7_14 (const void *_key, const void *_item)
|
|
|
|
{
|
|
|
|
uint32_t& a = * (uint32_t*) _key;
|
|
|
|
uint32_t b = (* (uint32_t*) _item) & HB_CODEPOINT_ENCODE3_11_7_14(0x1FFFFFu, 0x1FFFFFu, 0);
|
|
|
|
|
|
|
|
return a < b ? -1 : a > b ? +1 : 0;
|
|
|
|
}
|
2019-05-22 22:21:21 +02:00
|
|
|
|
|
|
|
static hb_bool_t
|
|
|
|
hb_ucd_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
|
|
|
hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab,
|
|
|
|
void *user_data HB_UNUSED)
|
|
|
|
{
|
2022-11-20 21:54:56 +01:00
|
|
|
// Hangul is handled algorithmically.
|
2019-05-22 22:21:21 +02:00
|
|
|
if (_hb_ucd_compose_hangul (a, b, ab)) return true;
|
|
|
|
|
2019-06-24 21:37:23 +02:00
|
|
|
hb_codepoint_t u = 0;
|
2019-05-22 22:21:21 +02:00
|
|
|
|
2019-06-24 21:37:23 +02:00
|
|
|
if ((a & 0xFFFFF800u) == 0x0000u && (b & 0xFFFFFF80) == 0x0300u)
|
|
|
|
{
|
2022-11-20 21:54:56 +01:00
|
|
|
/* If "a" is small enough and "b" is in the U+0300 range,
|
|
|
|
* the composition data is encoded in a 32bit array sorted
|
|
|
|
* by "a,b" pair. */
|
2019-06-24 21:37:23 +02:00
|
|
|
uint32_t k = HB_CODEPOINT_ENCODE3_11_7_14 (a, b, 0);
|
2019-12-06 04:35:24 +01:00
|
|
|
const uint32_t *v = hb_bsearch (k,
|
|
|
|
_hb_ucd_dm2_u32_map,
|
|
|
|
ARRAY_LENGTH (_hb_ucd_dm2_u32_map),
|
|
|
|
sizeof (*_hb_ucd_dm2_u32_map),
|
|
|
|
_cmp_pair_11_7_14);
|
2019-06-24 21:37:23 +02:00
|
|
|
if (likely (!v)) return false;
|
|
|
|
u = HB_CODEPOINT_DECODE3_11_7_14_3 (*v);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2022-11-20 21:54:56 +01:00
|
|
|
/* Otherwise it is stored in a 64bit array sorted by
|
|
|
|
* "a,b" pair. */
|
2019-06-24 21:37:23 +02:00
|
|
|
uint64_t k = HB_CODEPOINT_ENCODE3 (a, b, 0);
|
2019-12-06 04:35:24 +01:00
|
|
|
const uint64_t *v = hb_bsearch (k,
|
|
|
|
_hb_ucd_dm2_u64_map,
|
|
|
|
ARRAY_LENGTH (_hb_ucd_dm2_u64_map),
|
|
|
|
sizeof (*_hb_ucd_dm2_u64_map),
|
|
|
|
_cmp_pair);
|
2019-06-24 21:37:23 +02:00
|
|
|
if (likely (!v)) return false;
|
|
|
|
u = HB_CODEPOINT_DECODE3_3 (*v);
|
|
|
|
}
|
2019-05-22 22:21:21 +02:00
|
|
|
|
2019-06-24 21:37:23 +02:00
|
|
|
if (unlikely (!u)) return false;
|
2019-05-22 22:21:21 +02:00
|
|
|
*ab = u;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static hb_bool_t
|
|
|
|
hb_ucd_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
|
|
|
hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b,
|
|
|
|
void *user_data HB_UNUSED)
|
|
|
|
{
|
|
|
|
if (_hb_ucd_decompose_hangul (ab, a, b)) return true;
|
|
|
|
|
|
|
|
unsigned i = _hb_ucd_dm (ab);
|
|
|
|
|
2022-11-20 21:54:56 +01:00
|
|
|
/* If no data, there's no decomposition. */
|
2019-05-22 22:21:21 +02:00
|
|
|
if (likely (!i)) return false;
|
|
|
|
i--;
|
|
|
|
|
2022-11-20 21:54:56 +01:00
|
|
|
/* Check if it's a single-character decomposition. */
|
2019-06-24 02:14:27 +02:00
|
|
|
if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map))
|
2019-05-22 22:21:21 +02:00
|
|
|
{
|
2022-11-20 21:54:56 +01:00
|
|
|
/* Single-character decompositions currently are only in plane 0 or plane 2. */
|
2019-06-24 02:14:27 +02:00
|
|
|
if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map))
|
2022-11-20 21:54:56 +01:00
|
|
|
{
|
|
|
|
/* Plane 0. */
|
2019-06-24 02:14:27 +02:00
|
|
|
*a = _hb_ucd_dm1_p0_map[i];
|
2022-11-20 21:54:56 +01:00
|
|
|
}
|
2019-06-07 23:20:45 +02:00
|
|
|
else
|
|
|
|
{
|
2022-11-20 21:54:56 +01:00
|
|
|
/* Plane 2. */
|
2019-06-24 02:14:27 +02:00
|
|
|
i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map);
|
|
|
|
*a = 0x20000 | _hb_ucd_dm1_p2_map[i];
|
2019-06-07 23:20:45 +02:00
|
|
|
}
|
2019-05-22 22:21:21 +02:00
|
|
|
*b = 0;
|
|
|
|
return true;
|
|
|
|
}
|
2019-06-24 02:14:27 +02:00
|
|
|
i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map);
|
2019-05-22 22:21:21 +02:00
|
|
|
|
2022-11-20 21:54:56 +01:00
|
|
|
/* Otherwise they are encoded either in a 32bit array or a 64bit array. */
|
2019-06-24 21:37:23 +02:00
|
|
|
if (i < ARRAY_LENGTH (_hb_ucd_dm2_u32_map))
|
|
|
|
{
|
2022-11-20 21:54:56 +01:00
|
|
|
/* 32bit array. */
|
2019-06-24 21:37:23 +02:00
|
|
|
uint32_t v = _hb_ucd_dm2_u32_map[i];
|
|
|
|
*a = HB_CODEPOINT_DECODE3_11_7_14_1 (v);
|
|
|
|
*b = HB_CODEPOINT_DECODE3_11_7_14_2 (v);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
i -= ARRAY_LENGTH (_hb_ucd_dm2_u32_map);
|
|
|
|
|
2022-11-20 21:54:56 +01:00
|
|
|
/* 64bit array. */
|
2019-06-24 21:37:23 +02:00
|
|
|
uint64_t v = _hb_ucd_dm2_u64_map[i];
|
2019-05-22 22:21:21 +02:00
|
|
|
*a = HB_CODEPOINT_DECODE3_1 (v);
|
|
|
|
*b = HB_CODEPOINT_DECODE3_2 (v);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void free_static_ucd_funcs ();
|
|
|
|
|
|
|
|
static struct hb_ucd_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_ucd_unicode_funcs_lazy_loader_t>
|
|
|
|
{
|
|
|
|
static hb_unicode_funcs_t *create ()
|
|
|
|
{
|
|
|
|
hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr);
|
|
|
|
|
|
|
|
hb_unicode_funcs_set_combining_class_func (funcs, hb_ucd_combining_class, nullptr, nullptr);
|
|
|
|
hb_unicode_funcs_set_general_category_func (funcs, hb_ucd_general_category, nullptr, nullptr);
|
|
|
|
hb_unicode_funcs_set_mirroring_func (funcs, hb_ucd_mirroring, nullptr, nullptr);
|
|
|
|
hb_unicode_funcs_set_script_func (funcs, hb_ucd_script, nullptr, nullptr);
|
|
|
|
hb_unicode_funcs_set_compose_func (funcs, hb_ucd_compose, nullptr, nullptr);
|
|
|
|
hb_unicode_funcs_set_decompose_func (funcs, hb_ucd_decompose, nullptr, nullptr);
|
|
|
|
|
|
|
|
hb_unicode_funcs_make_immutable (funcs);
|
|
|
|
|
2021-09-14 13:09:54 +02:00
|
|
|
hb_atexit (free_static_ucd_funcs);
|
2019-05-22 22:21:21 +02:00
|
|
|
|
|
|
|
return funcs;
|
|
|
|
}
|
|
|
|
} static_ucd_funcs;
|
|
|
|
|
2021-09-14 13:09:54 +02:00
|
|
|
static inline
|
2019-05-22 22:21:21 +02:00
|
|
|
void free_static_ucd_funcs ()
|
|
|
|
{
|
|
|
|
static_ucd_funcs.free_instance ();
|
|
|
|
}
|
|
|
|
|
|
|
|
hb_unicode_funcs_t *
|
|
|
|
hb_ucd_get_unicode_funcs ()
|
|
|
|
{
|
2019-05-24 18:37:53 +02:00
|
|
|
#ifdef HB_NO_UCD
|
|
|
|
return hb_unicode_funcs_get_empty ();
|
|
|
|
#endif
|
2019-05-22 22:21:21 +02:00
|
|
|
return static_ucd_funcs.get_unconst ();
|
|
|
|
}
|