[ucdn] Replace UCDN with a new UCD implementation

UCDN was ~120kb of data.  New implementatoin is 69kb in default builds,
and 49kb if built with HB_OPTIMIZE_SIZE or __OPTIMIZE_SIZE__.  The
latter automatically enabled if built with -Os or -Oz.

There's room to shave off another 10kb or 20kb.  That will follow later.

Fixes https://github.com/harfbuzz/harfbuzz/issues/1652
This commit is contained in:
Behdad Esfahbod 2019-05-22 16:21:21 -04:00
parent 12c59f6c40
commit 65392b734e
16 changed files with 5427 additions and 7032 deletions

View File

@ -87,17 +87,6 @@ HBSOURCES += $(HB_CORETEXT_sources)
HBHEADERS += $(HB_CORETEXT_headers)
endif
if HAVE_UCDN
SUBDIRS += hb-ucdn
HBCFLAGS += -I$(srcdir)/hb-ucdn
HBLIBS += hb-ucdn/libhb-ucdn.la
HBSOURCES += $(HB_UCDN_sources)
hb-ucdn/libhb-ucdn.la: ucdn
ucdn:
@$(MAKE) $(AM_MAKEFLAGS) -C hb-ucdn
endif
DIST_SUBDIRS += hb-ucdn
BUILT_SOURCES += \
hb-version.h

View File

@ -145,6 +145,8 @@ HB_BASE_sources = \
hb-shaper.hh \
hb-static.cc \
hb-string-array.hh \
hb-ucd-table.hh \
hb-ucd.cc \
hb-unicode-emoji-table.hh \
hb-unicode.cc \
hb-unicode.hh \
@ -224,9 +226,6 @@ HB_DIRECTWRITE_headers = hb-directwrite.h
HB_UNISCRIBE_sources = hb-uniscribe.cc
HB_UNISCRIBE_headers = hb-uniscribe.h
# Additional supplemental sources
HB_UCDN_sources = hb-ucdn.cc
# Sources for libharfbuzz-gobject and libharfbuzz-icu
HB_ICU_sources = hb-icu.cc
HB_ICU_headers = hb-icu.h

View File

@ -24,12 +24,16 @@ sc = [u['sc'] for u in ucd]
dm = {i:tuple(int(v, 16) for v in u['dm'].split()) for i,u in enumerate(ucd)
if u['dm'] != '#' and u['dt'] == 'can' and not (0xAC00 <= i < 0xAC00+11172)}
ce = {i for i,u in enumerate(ucd) if u['Comp_Ex'] == 'Y'}
assert not any(v for v in dm.values() if len(v) not in (1,2))
dm1 = sorted(set(v for v in dm.values() if len(v) == 1))
dm1_array = ['0x%04Xu' % v for v in dm1]
dm1_order = {v:i+1 for i,v in enumerate(dm1)}
dm2 = sorted((v, i) for i,v in dm.items() if len(v) == 2)
dm2 = [("_HB_UCD_ENCODE3 (0x%04Xu, 0x%04Xu, 0x%04Xu)" % (v+(i,)), v) for v,i in dm2]
dm2 = [("HB_CODEPOINT_ENCODE3 (0x%04Xu, 0x%04Xu, 0x%04Xu)" %
(v+(i if i not in ce and not ccc[i] else 0,)), v)
for v,i in dm2]
dm2_array = [s for s,v in dm2]
l = 1 + len(dm1_array)
dm2_order = {v[1]:i+l for i,v in enumerate(dm2)}
@ -63,7 +67,6 @@ DEFAULT = 1
COMPACT = 3
print("/* == Start of generated table == */")
print("/*")
print(" * The following table is generated by running:")
@ -78,7 +81,7 @@ print("#define HB_UCD_TABLE_HH")
print()
print()
print('#include <stdint.h>')
print('#include "hb.hh"')
print()
code = packTab.Code('_hb_ucd')
@ -90,7 +93,7 @@ code.print_c(linkage='static inline')
for compression in (DEFAULT, COMPACT):
print()
if compression == DEFAULT:
print('#ifdef HB_OPTIMIZE_SIZE')
print('#ifndef HB_OPTIMIZE_SIZE')
else:
print('#else')
print()

View File

@ -34,6 +34,14 @@
#include "hb-null.hh"
/* Encodes three unsigned integers in one 64-bit number. If the inputs have more than 21 bits,
* values will be truncated / overlap, and might not decode exactly. */
#define HB_CODEPOINT_ENCODE3(x,y,z) (((uint64_t) (x) << 42) | ((uint64_t) (y) << 21) | (uint64_t) (z))
#define HB_CODEPOINT_DECODE3_1(v) ((hb_codepoint_t) ((v) >> 42))
#define HB_CODEPOINT_DECODE3_2(v) ((hb_codepoint_t) ((v) >> 21) & 0x1FFFFFu)
#define HB_CODEPOINT_DECODE3_3(v) ((hb_codepoint_t) (v) & 0x1FFFFFu)
struct
{
/* Note. This is dangerous in that if it's passed an rvalue, it returns rvalue-reference. */

View File

@ -158,84 +158,82 @@ OT::GDEF::is_blacklisted (hb_blob_t *blob,
* https://bugzilla.mozilla.org/show_bug.cgi?id=1279693
* https://bugzilla.mozilla.org/show_bug.cgi?id=1279875
*/
#define ENCODE(x,y,z) (((uint64_t) (x) << 48) | ((uint64_t) (y) << 24) | (uint64_t) (z))
switch ENCODE(blob->length,
face->table.GSUB->table.get_length (),
face->table.GPOS->table.get_length ())
switch HB_CODEPOINT_ENCODE3(blob->length,
face->table.GSUB->table.get_length (),
face->table.GPOS->table.get_length ())
{
/* sha1sum:c5ee92f0bca4bfb7d06c4d03e8cf9f9cf75d2e8a Windows 7? timesi.ttf */
case ENCODE (442, 2874, 42038):
case HB_CODEPOINT_ENCODE3 (442, 2874, 42038):
/* sha1sum:37fc8c16a0894ab7b749e35579856c73c840867b Windows 7? timesbi.ttf */
case ENCODE (430, 2874, 40662):
case HB_CODEPOINT_ENCODE3 (430, 2874, 40662):
/* sha1sum:19fc45110ea6cd3cdd0a5faca256a3797a069a80 Windows 7 timesi.ttf */
case ENCODE (442, 2874, 39116):
case HB_CODEPOINT_ENCODE3 (442, 2874, 39116):
/* sha1sum:6d2d3c9ed5b7de87bc84eae0df95ee5232ecde26 Windows 7 timesbi.ttf */
case ENCODE (430, 2874, 39374):
case HB_CODEPOINT_ENCODE3 (430, 2874, 39374):
/* sha1sum:8583225a8b49667c077b3525333f84af08c6bcd8 OS X 10.11.3 Times New Roman Italic.ttf */
case ENCODE (490, 3046, 41638):
case HB_CODEPOINT_ENCODE3 (490, 3046, 41638):
/* sha1sum:ec0f5a8751845355b7c3271d11f9918a966cb8c9 OS X 10.11.3 Times New Roman Bold Italic.ttf */
case ENCODE (478, 3046, 41902):
case HB_CODEPOINT_ENCODE3 (478, 3046, 41902):
/* sha1sum:96eda93f7d33e79962451c6c39a6b51ee893ce8c tahoma.ttf from Windows 8 */
case ENCODE (898, 12554, 46470):
case HB_CODEPOINT_ENCODE3 (898, 12554, 46470):
/* sha1sum:20928dc06014e0cd120b6fc942d0c3b1a46ac2bc tahomabd.ttf from Windows 8 */
case ENCODE (910, 12566, 47732):
case HB_CODEPOINT_ENCODE3 (910, 12566, 47732):
/* sha1sum:4f95b7e4878f60fa3a39ca269618dfde9721a79e tahoma.ttf from Windows 8.1 */
case ENCODE (928, 23298, 59332):
case HB_CODEPOINT_ENCODE3 (928, 23298, 59332):
/* sha1sum:6d400781948517c3c0441ba42acb309584b73033 tahomabd.ttf from Windows 8.1 */
case ENCODE (940, 23310, 60732):
case HB_CODEPOINT_ENCODE3 (940, 23310, 60732):
/* tahoma.ttf v6.04 from Windows 8.1 x64, see https://bugzilla.mozilla.org/show_bug.cgi?id=1279925 */
case ENCODE (964, 23836, 60072):
case HB_CODEPOINT_ENCODE3 (964, 23836, 60072):
/* tahomabd.ttf v6.04 from Windows 8.1 x64, see https://bugzilla.mozilla.org/show_bug.cgi?id=1279925 */
case ENCODE (976, 23832, 61456):
case HB_CODEPOINT_ENCODE3 (976, 23832, 61456):
/* sha1sum:e55fa2dfe957a9f7ec26be516a0e30b0c925f846 tahoma.ttf from Windows 10 */
case ENCODE (994, 24474, 60336):
case HB_CODEPOINT_ENCODE3 (994, 24474, 60336):
/* sha1sum:7199385abb4c2cc81c83a151a7599b6368e92343 tahomabd.ttf from Windows 10 */
case ENCODE (1006, 24470, 61740):
case HB_CODEPOINT_ENCODE3 (1006, 24470, 61740):
/* tahoma.ttf v6.91 from Windows 10 x64, see https://bugzilla.mozilla.org/show_bug.cgi?id=1279925 */
case ENCODE (1006, 24576, 61346):
case HB_CODEPOINT_ENCODE3 (1006, 24576, 61346):
/* tahomabd.ttf v6.91 from Windows 10 x64, see https://bugzilla.mozilla.org/show_bug.cgi?id=1279925 */
case ENCODE (1018, 24572, 62828):
case HB_CODEPOINT_ENCODE3 (1018, 24572, 62828):
/* sha1sum:b9c84d820c49850d3d27ec498be93955b82772b5 tahoma.ttf from Windows 10 AU */
case ENCODE (1006, 24576, 61352):
case HB_CODEPOINT_ENCODE3 (1006, 24576, 61352):
/* sha1sum:2bdfaab28174bdadd2f3d4200a30a7ae31db79d2 tahomabd.ttf from Windows 10 AU */
case ENCODE (1018, 24572, 62834):
case HB_CODEPOINT_ENCODE3 (1018, 24572, 62834):
/* sha1sum:b0d36cf5a2fbe746a3dd277bffc6756a820807a7 Tahoma.ttf from Mac OS X 10.9 */
case ENCODE (832, 7324, 47162):
case HB_CODEPOINT_ENCODE3 (832, 7324, 47162):
/* sha1sum:12fc4538e84d461771b30c18b5eb6bd434e30fba Tahoma Bold.ttf from Mac OS X 10.9 */
case ENCODE (844, 7302, 45474):
case HB_CODEPOINT_ENCODE3 (844, 7302, 45474):
/* sha1sum:eb8afadd28e9cf963e886b23a30b44ab4fd83acc himalaya.ttf from Windows 7 */
case ENCODE (180, 13054, 7254):
case HB_CODEPOINT_ENCODE3 (180, 13054, 7254):
/* sha1sum:73da7f025b238a3f737aa1fde22577a6370f77b0 himalaya.ttf from Windows 8 */
case ENCODE (192, 12638, 7254):
case HB_CODEPOINT_ENCODE3 (192, 12638, 7254):
/* sha1sum:6e80fd1c0b059bbee49272401583160dc1e6a427 himalaya.ttf from Windows 8.1 */
case ENCODE (192, 12690, 7254):
case HB_CODEPOINT_ENCODE3 (192, 12690, 7254):
/* 8d9267aea9cd2c852ecfb9f12a6e834bfaeafe44 cantarell-fonts-0.0.21/otf/Cantarell-Regular.otf */
/* 983988ff7b47439ab79aeaf9a45bd4a2c5b9d371 cantarell-fonts-0.0.21/otf/Cantarell-Oblique.otf */
case ENCODE (188, 248, 3852):
case HB_CODEPOINT_ENCODE3 (188, 248, 3852):
/* 2c0c90c6f6087ffbfea76589c93113a9cbb0e75f cantarell-fonts-0.0.21/otf/Cantarell-Bold.otf */
/* 55461f5b853c6da88069ffcdf7f4dd3f8d7e3e6b cantarell-fonts-0.0.21/otf/Cantarell-Bold-Oblique.otf */
case ENCODE (188, 264, 3426):
case HB_CODEPOINT_ENCODE3 (188, 264, 3426):
/* d125afa82a77a6475ac0e74e7c207914af84b37a padauk-2.80/Padauk.ttf RHEL 7.2 */
case ENCODE (1058, 47032, 11818):
case HB_CODEPOINT_ENCODE3 (1058, 47032, 11818):
/* 0f7b80437227b90a577cc078c0216160ae61b031 padauk-2.80/Padauk-Bold.ttf RHEL 7.2*/
case ENCODE (1046, 47030, 12600):
case HB_CODEPOINT_ENCODE3 (1046, 47030, 12600):
/* d3dde9aa0a6b7f8f6a89ef1002e9aaa11b882290 padauk-2.80/Padauk.ttf Ubuntu 16.04 */
case ENCODE (1058, 71796, 16770):
case HB_CODEPOINT_ENCODE3 (1058, 71796, 16770):
/* 5f3c98ccccae8a953be2d122c1b3a77fd805093f padauk-2.80/Padauk-Bold.ttf Ubuntu 16.04 */
case ENCODE (1046, 71790, 17862):
case HB_CODEPOINT_ENCODE3 (1046, 71790, 17862):
/* 6c93b63b64e8b2c93f5e824e78caca555dc887c7 padauk-2.80/Padauk-book.ttf */
case ENCODE (1046, 71788, 17112):
case HB_CODEPOINT_ENCODE3 (1046, 71788, 17112):
/* d89b1664058359b8ec82e35d3531931125991fb9 padauk-2.80/Padauk-bookbold.ttf */
case ENCODE (1058, 71794, 17514):
case HB_CODEPOINT_ENCODE3 (1058, 71794, 17514):
/* 824cfd193aaf6234b2b4dc0cf3c6ef576c0d00ef padauk-3.0/Padauk-book.ttf */
case ENCODE (1330, 109904, 57938):
case HB_CODEPOINT_ENCODE3 (1330, 109904, 57938):
/* 91fcc10cf15e012d27571e075b3b4dfe31754a8a padauk-3.0/Padauk-bookbold.ttf */
case ENCODE (1330, 109904, 58972):
case HB_CODEPOINT_ENCODE3 (1330, 109904, 58972):
/* sha1sum: c26e41d567ed821bed997e937bc0c41435689e85 Padauk.ttf
* "Padauk Regular" "Version 2.5", see https://crbug.com/681813 */
case ENCODE (1004, 59092, 14836):
case HB_CODEPOINT_ENCODE3 (1004, 59092, 14836):
return true;
#undef ENCODE
}
return false;
}

5160
src/hb-ucd-table.hh Normal file

File diff suppressed because it is too large Load Diff

209
src/hb-ucd.cc Normal file
View File

@ -0,0 +1,209 @@
/*
* Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "hb.hh"
#include "hb-machinery.hh"
#include "hb-ucd-table.hh"
static hb_unicode_combining_class_t
hb_ucd_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode,
void *user_data HB_UNUSED)
{
return (hb_unicode_combining_class_t) _hb_ucd_ccc (unicode);
}
static hb_unicode_general_category_t
hb_ucd_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode,
void *user_data HB_UNUSED)
{
return (hb_unicode_general_category_t) _hb_ucd_gc (unicode);
}
static hb_codepoint_t
hb_ucd_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode,
void *user_data HB_UNUSED)
{
return unicode + _hb_ucd_bmg (unicode);
}
static hb_script_t
hb_ucd_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode,
void *user_data HB_UNUSED)
{
return _hb_ucd_sc_map[_hb_ucd_sc (unicode)];
}
#define SBASE 0xAC00u
#define LBASE 0x1100u
#define VBASE 0x1161u
#define TBASE 0x11A7u
#define SCOUNT 11172u
#define LCOUNT 19u
#define VCOUNT 21u
#define TCOUNT 28u
#define NCOUNT (VCOUNT * TCOUNT)
static inline bool
_hb_ucd_decompose_hangul (hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b)
{
unsigned si = ab - SBASE;
if (si >= SCOUNT)
return false;
if (si % TCOUNT)
{
/* LV,T */
*a = SBASE + (si / TCOUNT) * TCOUNT;
*b = TBASE + (si % TCOUNT);
return true;
} else {
/* L,V */
*a = LBASE + (si / NCOUNT);
*b = VBASE + (si % NCOUNT) / TCOUNT;
return true;
}
}
static inline bool
_hb_ucd_compose_hangul (hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab)
{
if (a >= SBASE && a < (SBASE + SCOUNT) && b > TBASE && b < (TBASE + TCOUNT) &&
!((a - SBASE) % TCOUNT))
{
/* LV,T */
*ab = a + (b - TBASE);
return true;
}
else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT))
{
/* L,V */
int li = a - LBASE;
int vi = b - VBASE;
*ab = SBASE + li * NCOUNT + vi * TCOUNT;
return true;
}
else
return false;
}
static int
_cmp_pair (const void *_key, const void *_item)
{
uint64_t& a = * (uint64_t*) _key;
uint64_t b = (* (uint64_t*) _item) & HB_CODEPOINT_ENCODE3(0x1FFFFFu, 0x1FFFFFu, 0);
return a < b ? -1 : a > b ? +1 : 0;
}
static hb_bool_t
hb_ucd_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab,
void *user_data HB_UNUSED)
{
if (_hb_ucd_compose_hangul (a, b, ab)) return true;
uint64_t k = HB_CODEPOINT_ENCODE3 (a, b, 0);
uint64_t *v = (uint64_t*) hb_bsearch (&k, _hb_ucd_dm2_map,
ARRAY_LENGTH (_hb_ucd_dm2_map),
sizeof (*_hb_ucd_dm2_map),
_cmp_pair);
if (likely (!v)) return false;
hb_codepoint_t u = HB_CODEPOINT_DECODE3_3 (*v);
if (unlikely (!u)) return false;
*ab = u;
return true;
}
static hb_bool_t
hb_ucd_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b,
void *user_data HB_UNUSED)
{
if (_hb_ucd_decompose_hangul (ab, a, b)) return true;
unsigned i = _hb_ucd_dm (ab);
if (likely (!i)) return false;
i--;
if (i < ARRAY_LENGTH (_hb_ucd_dm1_map))
{
*a = _hb_ucd_dm1_map[i];
*b = 0;
return true;
}
i -= ARRAY_LENGTH (_hb_ucd_dm1_map);
uint64_t v = _hb_ucd_dm2_map[i];
*a = HB_CODEPOINT_DECODE3_1 (v);
*b = HB_CODEPOINT_DECODE3_2 (v);
return true;
}
#if HB_USE_ATEXIT
static void free_static_ucd_funcs ();
#endif
static struct hb_ucd_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_ucd_unicode_funcs_lazy_loader_t>
{
static hb_unicode_funcs_t *create ()
{
hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr);
hb_unicode_funcs_set_combining_class_func (funcs, hb_ucd_combining_class, nullptr, nullptr);
hb_unicode_funcs_set_general_category_func (funcs, hb_ucd_general_category, nullptr, nullptr);
hb_unicode_funcs_set_mirroring_func (funcs, hb_ucd_mirroring, nullptr, nullptr);
hb_unicode_funcs_set_script_func (funcs, hb_ucd_script, nullptr, nullptr);
hb_unicode_funcs_set_compose_func (funcs, hb_ucd_compose, nullptr, nullptr);
hb_unicode_funcs_set_decompose_func (funcs, hb_ucd_decompose, nullptr, nullptr);
hb_unicode_funcs_make_immutable (funcs);
#if HB_USE_ATEXIT
atexit (free_static_ucd_funcs);
#endif
return funcs;
}
} static_ucd_funcs;
#if HB_USE_ATEXIT
static
void free_static_ucd_funcs ()
{
static_ucd_funcs.free_instance ();
}
#endif
extern "C" HB_INTERNAL
hb_unicode_funcs_t *
hb_ucd_get_unicode_funcs ();
hb_unicode_funcs_t *
hb_ucd_get_unicode_funcs ()
{
return static_ucd_funcs.get_unconst ();
}

View File

@ -1,272 +0,0 @@
/*
* Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "hb.hh"
#include "hb-machinery.hh"
#include "ucdn.h"
static const hb_script_t ucdn_script_translate[] =
{
HB_SCRIPT_COMMON,
HB_SCRIPT_LATIN,
HB_SCRIPT_GREEK,
HB_SCRIPT_CYRILLIC,
HB_SCRIPT_ARMENIAN,
HB_SCRIPT_HEBREW,
HB_SCRIPT_ARABIC,
HB_SCRIPT_SYRIAC,
HB_SCRIPT_THAANA,
HB_SCRIPT_DEVANAGARI,
HB_SCRIPT_BENGALI,
HB_SCRIPT_GURMUKHI,
HB_SCRIPT_GUJARATI,
HB_SCRIPT_ORIYA,
HB_SCRIPT_TAMIL,
HB_SCRIPT_TELUGU,
HB_SCRIPT_KANNADA,
HB_SCRIPT_MALAYALAM,
HB_SCRIPT_SINHALA,
HB_SCRIPT_THAI,
HB_SCRIPT_LAO,
HB_SCRIPT_TIBETAN,
HB_SCRIPT_MYANMAR,
HB_SCRIPT_GEORGIAN,
HB_SCRIPT_HANGUL,
HB_SCRIPT_ETHIOPIC,
HB_SCRIPT_CHEROKEE,
HB_SCRIPT_CANADIAN_SYLLABICS,
HB_SCRIPT_OGHAM,
HB_SCRIPT_RUNIC,
HB_SCRIPT_KHMER,
HB_SCRIPT_MONGOLIAN,
HB_SCRIPT_HIRAGANA,
HB_SCRIPT_KATAKANA,
HB_SCRIPT_BOPOMOFO,
HB_SCRIPT_HAN,
HB_SCRIPT_YI,
HB_SCRIPT_OLD_ITALIC,
HB_SCRIPT_GOTHIC,
HB_SCRIPT_DESERET,
HB_SCRIPT_INHERITED,
HB_SCRIPT_TAGALOG,
HB_SCRIPT_HANUNOO,
HB_SCRIPT_BUHID,
HB_SCRIPT_TAGBANWA,
HB_SCRIPT_LIMBU,
HB_SCRIPT_TAI_LE,
HB_SCRIPT_LINEAR_B,
HB_SCRIPT_UGARITIC,
HB_SCRIPT_SHAVIAN,
HB_SCRIPT_OSMANYA,
HB_SCRIPT_CYPRIOT,
HB_SCRIPT_BRAILLE,
HB_SCRIPT_BUGINESE,
HB_SCRIPT_COPTIC,
HB_SCRIPT_NEW_TAI_LUE,
HB_SCRIPT_GLAGOLITIC,
HB_SCRIPT_TIFINAGH,
HB_SCRIPT_SYLOTI_NAGRI,
HB_SCRIPT_OLD_PERSIAN,
HB_SCRIPT_KHAROSHTHI,
HB_SCRIPT_BALINESE,
HB_SCRIPT_CUNEIFORM,
HB_SCRIPT_PHOENICIAN,
HB_SCRIPT_PHAGS_PA,
HB_SCRIPT_NKO,
HB_SCRIPT_SUNDANESE,
HB_SCRIPT_LEPCHA,
HB_SCRIPT_OL_CHIKI,
HB_SCRIPT_VAI,
HB_SCRIPT_SAURASHTRA,
HB_SCRIPT_KAYAH_LI,
HB_SCRIPT_REJANG,
HB_SCRIPT_LYCIAN,
HB_SCRIPT_CARIAN,
HB_SCRIPT_LYDIAN,
HB_SCRIPT_CHAM,
HB_SCRIPT_TAI_THAM,
HB_SCRIPT_TAI_VIET,
HB_SCRIPT_AVESTAN,
HB_SCRIPT_EGYPTIAN_HIEROGLYPHS,
HB_SCRIPT_SAMARITAN,
HB_SCRIPT_LISU,
HB_SCRIPT_BAMUM,
HB_SCRIPT_JAVANESE,
HB_SCRIPT_MEETEI_MAYEK,
HB_SCRIPT_IMPERIAL_ARAMAIC,
HB_SCRIPT_OLD_SOUTH_ARABIAN,
HB_SCRIPT_INSCRIPTIONAL_PARTHIAN,
HB_SCRIPT_INSCRIPTIONAL_PAHLAVI,
HB_SCRIPT_OLD_TURKIC,
HB_SCRIPT_KAITHI,
HB_SCRIPT_BATAK,
HB_SCRIPT_BRAHMI,
HB_SCRIPT_MANDAIC,
HB_SCRIPT_CHAKMA,
HB_SCRIPT_MEROITIC_CURSIVE,
HB_SCRIPT_MEROITIC_HIEROGLYPHS,
HB_SCRIPT_MIAO,
HB_SCRIPT_SHARADA,
HB_SCRIPT_SORA_SOMPENG,
HB_SCRIPT_TAKRI,
HB_SCRIPT_UNKNOWN,
HB_SCRIPT_BASSA_VAH,
HB_SCRIPT_CAUCASIAN_ALBANIAN,
HB_SCRIPT_DUPLOYAN,
HB_SCRIPT_ELBASAN,
HB_SCRIPT_GRANTHA,
HB_SCRIPT_KHOJKI,
HB_SCRIPT_KHUDAWADI,
HB_SCRIPT_LINEAR_A,
HB_SCRIPT_MAHAJANI,
HB_SCRIPT_MANICHAEAN,
HB_SCRIPT_MENDE_KIKAKUI,
HB_SCRIPT_MODI,
HB_SCRIPT_MRO,
HB_SCRIPT_NABATAEAN,
HB_SCRIPT_OLD_NORTH_ARABIAN,
HB_SCRIPT_OLD_PERMIC,
HB_SCRIPT_PAHAWH_HMONG,
HB_SCRIPT_PALMYRENE,
HB_SCRIPT_PAU_CIN_HAU,
HB_SCRIPT_PSALTER_PAHLAVI,
HB_SCRIPT_SIDDHAM,
HB_SCRIPT_TIRHUTA,
HB_SCRIPT_WARANG_CITI,
HB_SCRIPT_AHOM,
HB_SCRIPT_ANATOLIAN_HIEROGLYPHS,
HB_SCRIPT_HATRAN,
HB_SCRIPT_MULTANI,
HB_SCRIPT_OLD_HUNGARIAN,
HB_SCRIPT_SIGNWRITING,
HB_SCRIPT_ADLAM,
HB_SCRIPT_BHAIKSUKI,
HB_SCRIPT_MARCHEN,
HB_SCRIPT_NEWA,
HB_SCRIPT_OSAGE,
HB_SCRIPT_TANGUT,
HB_SCRIPT_MASARAM_GONDI,
HB_SCRIPT_NUSHU,
HB_SCRIPT_SOYOMBO,
HB_SCRIPT_ZANABAZAR_SQUARE,
HB_SCRIPT_DOGRA,
HB_SCRIPT_GUNJALA_GONDI,
HB_SCRIPT_HANIFI_ROHINGYA,
HB_SCRIPT_MAKASAR,
HB_SCRIPT_MEDEFAIDRIN,
HB_SCRIPT_OLD_SOGDIAN,
HB_SCRIPT_SOGDIAN,
HB_SCRIPT_ELYMAIC,
HB_SCRIPT_NANDINAGARI,
HB_SCRIPT_NYIAKENG_PUACHUE_HMONG,
HB_SCRIPT_WANCHO,
};
static hb_unicode_combining_class_t
hb_ucdn_combining_class(hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode,
void *user_data HB_UNUSED)
{
return (hb_unicode_combining_class_t) ucdn_get_combining_class(unicode);
}
static hb_unicode_general_category_t
hb_ucdn_general_category(hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode,
void *user_data HB_UNUSED)
{
return (hb_unicode_general_category_t)ucdn_get_general_category(unicode);
}
static hb_codepoint_t
hb_ucdn_mirroring(hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode,
void *user_data HB_UNUSED)
{
return ucdn_mirror(unicode);
}
static hb_script_t
hb_ucdn_script(hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode,
void *user_data HB_UNUSED)
{
return ucdn_script_translate[ucdn_get_script(unicode)];
}
static hb_bool_t
hb_ucdn_compose(hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab,
void *user_data HB_UNUSED)
{
return ucdn_compose(ab, a, b);
}
static hb_bool_t
hb_ucdn_decompose(hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b,
void *user_data HB_UNUSED)
{
return ucdn_decompose(ab, a, b);
}
#if HB_USE_ATEXIT
static void free_static_ucdn_funcs ();
#endif
static struct hb_ucdn_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_ucdn_unicode_funcs_lazy_loader_t>
{
static hb_unicode_funcs_t *create ()
{
hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr);
hb_unicode_funcs_set_combining_class_func (funcs, hb_ucdn_combining_class, nullptr, nullptr);
hb_unicode_funcs_set_general_category_func (funcs, hb_ucdn_general_category, nullptr, nullptr);
hb_unicode_funcs_set_mirroring_func (funcs, hb_ucdn_mirroring, nullptr, nullptr);
hb_unicode_funcs_set_script_func (funcs, hb_ucdn_script, nullptr, nullptr);
hb_unicode_funcs_set_compose_func (funcs, hb_ucdn_compose, nullptr, nullptr);
hb_unicode_funcs_set_decompose_func (funcs, hb_ucdn_decompose, nullptr, nullptr);
hb_unicode_funcs_make_immutable (funcs);
#if HB_USE_ATEXIT
atexit (free_static_ucdn_funcs);
#endif
return funcs;
}
} static_ucdn_funcs;
#if HB_USE_ATEXIT
static
void free_static_ucdn_funcs ()
{
static_ucdn_funcs.free_instance ();
}
#endif
extern "C" HB_INTERNAL
hb_unicode_funcs_t *
hb_ucdn_get_unicode_funcs ();
hb_unicode_funcs_t *
hb_ucdn_get_unicode_funcs ()
{
return static_ucdn_funcs.get_unconst ();
}

View File

@ -1,13 +0,0 @@
The contents of this directory are licensed under the following terms:
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

View File

@ -1,16 +0,0 @@
## Process this file with automake to produce Makefile.in
noinst_LTLIBRARIES = libhb-ucdn.la
include Makefile.sources
libhb_ucdn_la_SOURCES = $(LIBHB_UCDN_sources)
libhb_ucdn_la_CPPFLAGS = \
-I$(top_srcdir) \
-I$(top_srcdir)/src \
-I$(top_builddir)/src
libhb_ucdn_la_LIBADD =
EXTRA_DIST = README COPYING
-include $(top_srcdir)/git.mk

View File

@ -1,7 +0,0 @@
NULL =
LIBHB_UCDN_sources = \
ucdn.h \
ucdn.c \
ucdn_db.h \
$(NULL)

View File

@ -1,40 +0,0 @@
Contents of this directory are derived from UCDN:
https://github.com/grigorig/ucdn
The original README follows:
UCDN - Unicode Database and Normalization
UCDN is a Unicode support library. Currently, it provides access
to basic character properties contained in the Unicode Character
Database and low-level normalization functions (pairwise canonical
composition/decomposition and compatibility decomposition). More
functionality might be provided in the future, such as additional
properties, string normalization and encoding conversion.
UCDN uses standard C89 with no particular dependencies or requirements
except for stdint.h, and can be easily integrated into existing
projects. However, it can also be used as a standalone library,
and a CMake build script is provided for this. The first motivation
behind UCDN development was to provide a standalone set of Unicode
functions for the HarfBuzz OpenType shaping library. For this purpose,
a HarfBuzz-specific wrapper is shipped along with it (hb-ucdn.h).
UCDN is published under the ISC license, please see the license header
in the C source code for more information. The makeunicodata.py script
required for parsing Unicode database files is licensed under the
PSF license, please see PYTHON-LICENSE for more information.
UCDN was written by Grigori Goronzy <greg@kinoho.net>.
How to Use
Include ucdn.c, ucdn.h and ucdn_db.h in your project. Now, just use the
functions as documented in ucdn.h.
In some cases, it might be necessary to regenerate the Unicode
database file. The script makeunicodedata.py (Python 3.x required)
fetches the appropriate files and dumps the compressed database into
ucdn_db.h.

View File

@ -1,361 +0,0 @@
/*
* Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include "ucdn.h"
typedef struct {
unsigned char category;
unsigned char combining;
unsigned char bidi_class;
unsigned char east_asian_width;
unsigned char script;
unsigned char linebreak_class;
} UCDRecord;
typedef struct {
unsigned short from, to;
} MirrorPair;
typedef struct {
unsigned short from, to;
unsigned char type;
} BracketPair;
typedef struct {
unsigned int start;
short count, index;
} Reindex;
#include "ucdn_db.h"
/* constants required for Hangul (de)composition */
#define SBASE 0xAC00
#define LBASE 0x1100
#define VBASE 0x1161
#define TBASE 0x11A7
#define SCOUNT 11172
#define LCOUNT 19
#define VCOUNT 21
#define TCOUNT 28
#define NCOUNT (VCOUNT * TCOUNT)
static const UCDRecord *get_ucd_record(uint32_t code)
{
int index, offset;
if (code >= 0x110000)
index = 0;
else {
index = index0[code >> (SHIFT1+SHIFT2)] << SHIFT1;
offset = (code >> SHIFT2) & ((1<<SHIFT1) - 1);
index = index1[index + offset] << SHIFT2;
offset = code & ((1<<SHIFT2) - 1);
index = index2[index + offset];
}
return &ucd_records[index];
}
static const unsigned short *get_decomp_record(uint32_t code)
{
int index, offset;
if (code >= 0x110000)
index = 0;
else {
index = decomp_index0[code >> (DECOMP_SHIFT1+DECOMP_SHIFT2)]
<< DECOMP_SHIFT1;
offset = (code >> DECOMP_SHIFT2) & ((1<<DECOMP_SHIFT1) - 1);
index = decomp_index1[index + offset] << DECOMP_SHIFT2;
offset = code & ((1<<DECOMP_SHIFT2) - 1);
index = decomp_index2[index + offset];
}
return &decomp_data[index];
}
static int compare_reindex(const void *a, const void *b)
{
Reindex *ra = (Reindex *)a;
Reindex *rb = (Reindex *)b;
if (ra->start < rb->start)
return -1;
else if (ra->start > (rb->start + rb->count))
return 1;
else
return 0;
}
static int get_comp_index(uint32_t code, const Reindex *idx, size_t len)
{
Reindex *res;
Reindex r = {0, 0, 0};
r.start = code;
res = (Reindex *) bsearch(&r, idx, len, sizeof(Reindex), compare_reindex);
if (res != NULL)
return res->index + (code - res->start);
else
return -1;
}
static int compare_mp(const void *a, const void *b)
{
MirrorPair *mpa = (MirrorPair *)a;
MirrorPair *mpb = (MirrorPair *)b;
return mpa->from - mpb->from;
}
static int compare_bp(const void *a, const void *b)
{
BracketPair *bpa = (BracketPair *)a;
BracketPair *bpb = (BracketPair *)b;
return bpa->from - bpb->from;
}
static BracketPair *search_bp(uint32_t code)
{
BracketPair bp = {0,0,2};
BracketPair *res;
bp.from = code;
res = (BracketPair *) bsearch(&bp, bracket_pairs, BIDI_BRACKET_LEN,
sizeof(BracketPair), compare_bp);
return res;
}
static int hangul_pair_decompose(uint32_t code, uint32_t *a, uint32_t *b)
{
int si = code - SBASE;
if (si < 0 || si >= SCOUNT)
return 0;
if (si % TCOUNT) {
/* LV,T */
*a = SBASE + (si / TCOUNT) * TCOUNT;
*b = TBASE + (si % TCOUNT);
return 3;
} else {
/* L,V */
*a = LBASE + (si / NCOUNT);
*b = VBASE + (si % NCOUNT) / TCOUNT;
return 2;
}
}
static int hangul_pair_compose(uint32_t *code, uint32_t a, uint32_t b)
{
if (a >= SBASE && a < (SBASE + SCOUNT) && b > TBASE && b < (TBASE + TCOUNT) &&
!((a - SBASE) % TCOUNT)) {
/* LV,T */
*code = a + (b - TBASE);
return 3;
} else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT)) {
/* L,V */
int li = a - LBASE;
int vi = b - VBASE;
*code = SBASE + li * NCOUNT + vi * TCOUNT;
return 2;
} else {
return 0;
}
}
static uint32_t decode_utf16(const unsigned short **code_ptr)
{
const unsigned short *code = *code_ptr;
if (code[0] < 0xd800 || code[0] > 0xdc00) {
*code_ptr += 1;
return (uint32_t)code[0];
} else {
*code_ptr += 2;
return 0x10000 + ((uint32_t)code[1] - 0xdc00) +
(((uint32_t)code[0] - 0xd800) << 10);
}
}
const char *ucdn_get_unicode_version(void)
{
return UNIDATA_VERSION;
}
int ucdn_get_combining_class(uint32_t code)
{
return get_ucd_record(code)->combining;
}
int ucdn_get_east_asian_width(uint32_t code)
{
return get_ucd_record(code)->east_asian_width;
}
int ucdn_get_general_category(uint32_t code)
{
return get_ucd_record(code)->category;
}
int ucdn_get_bidi_class(uint32_t code)
{
return get_ucd_record(code)->bidi_class;
}
int ucdn_get_mirrored(uint32_t code)
{
return ucdn_mirror(code) != code;
}
int ucdn_get_script(uint32_t code)
{
return get_ucd_record(code)->script;
}
int ucdn_get_linebreak_class(uint32_t code)
{
return get_ucd_record(code)->linebreak_class;
}
int ucdn_get_resolved_linebreak_class(uint32_t code)
{
const UCDRecord *record = get_ucd_record(code);
switch (record->linebreak_class)
{
case UCDN_LINEBREAK_CLASS_AI:
case UCDN_LINEBREAK_CLASS_SG:
case UCDN_LINEBREAK_CLASS_XX:
return UCDN_LINEBREAK_CLASS_AL;
case UCDN_LINEBREAK_CLASS_SA:
if (record->category == UCDN_GENERAL_CATEGORY_MC ||
record->category == UCDN_GENERAL_CATEGORY_MN)
return UCDN_LINEBREAK_CLASS_CM;
return UCDN_LINEBREAK_CLASS_AL;
case UCDN_LINEBREAK_CLASS_CJ:
return UCDN_LINEBREAK_CLASS_NS;
case UCDN_LINEBREAK_CLASS_CB:
return UCDN_LINEBREAK_CLASS_B2;
case UCDN_LINEBREAK_CLASS_NL:
return UCDN_LINEBREAK_CLASS_BK;
default:
return record->linebreak_class;
}
}
uint32_t ucdn_mirror(uint32_t code)
{
MirrorPair mp = {0};
MirrorPair *res;
mp.from = code;
res = (MirrorPair *) bsearch(&mp, mirror_pairs, BIDI_MIRROR_LEN,
sizeof(MirrorPair), compare_mp);
if (res == NULL)
return code;
else
return res->to;
}
uint32_t ucdn_paired_bracket(uint32_t code)
{
BracketPair *res = search_bp(code);
if (res == NULL)
return code;
else
return res->to;
}
int ucdn_paired_bracket_type(uint32_t code)
{
BracketPair *res = search_bp(code);
if (res == NULL)
return UCDN_BIDI_PAIRED_BRACKET_TYPE_NONE;
else
return res->type;
}
int ucdn_decompose(uint32_t code, uint32_t *a, uint32_t *b)
{
const unsigned short *rec;
int len;
if (hangul_pair_decompose(code, a, b))
return 1;
rec = get_decomp_record(code);
len = rec[0] >> 8;
if ((rec[0] & 0xff) != 0 || len == 0)
return 0;
rec++;
*a = decode_utf16(&rec);
if (len > 1)
*b = decode_utf16(&rec);
else
*b = 0;
return 1;
}
int ucdn_compose(uint32_t *code, uint32_t a, uint32_t b)
{
int l, r, index, indexi, offset;
if (hangul_pair_compose(code, a, b))
return 1;
l = get_comp_index(a, nfc_first, sizeof(nfc_first) / sizeof(Reindex));
r = get_comp_index(b, nfc_last, sizeof(nfc_last) / sizeof(Reindex));
if (l < 0 || r < 0)
return 0;
indexi = l * TOTAL_LAST + r;
index = comp_index0[indexi >> (COMP_SHIFT1+COMP_SHIFT2)] << COMP_SHIFT1;
offset = (indexi >> COMP_SHIFT2) & ((1<<COMP_SHIFT1) - 1);
index = comp_index1[index + offset] << COMP_SHIFT2;
offset = indexi & ((1<<COMP_SHIFT2) - 1);
*code = comp_data[index + offset];
return *code != 0;
}
int ucdn_compat_decompose(uint32_t code, uint32_t *decomposed)
{
int i, len;
const unsigned short *rec = get_decomp_record(code);
len = rec[0] >> 8;
if (len == 0)
return 0;
rec++;
for (i = 0; i < len; i++)
decomposed[i] = decode_utf16(&rec);
return len;
}

View File

@ -1,472 +0,0 @@
/*
* Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef UCDN_H
#define UCDN_H
#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
# define HB_BEGIN_VISIBILITY _Pragma ("GCC visibility push(hidden)")
# define HB_END_VISIBILITY _Pragma ("GCC visibility pop")
#else
# define HB_BEGIN_VISIBILITY
# define HB_END_VISIBILITY
#endif
#ifdef __cplusplus
# define HB_BEGIN_HEADER extern "C" { HB_BEGIN_VISIBILITY
# define HB_END_HEADER HB_END_VISIBILITY }
#else
# define HB_BEGIN_HEADER HB_BEGIN_VISIBILITY
# define HB_END_HEADER HB_END_VISIBILITY
#endif
HB_BEGIN_HEADER
#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || \
defined (_sgi) || defined (__sun) || defined (sun) || \
defined (__digital__) || defined (__HP_cc)
# include <inttypes.h>
#elif defined (_AIX)
# include <sys/inttypes.h>
#elif defined (_MSC_VER) && _MSC_VER < 1600
/* VS 2010 (_MSC_VER 1600) has stdint.h */
typedef __int8 int8_t;
typedef unsigned __int8 uint8_t;
typedef __int16 int16_t;
typedef unsigned __int16 uint16_t;
typedef __int32 int32_t;
typedef unsigned __int32 uint32_t;
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
#else
# include <stdint.h>
#endif
#define UCDN_EAST_ASIAN_F 0
#define UCDN_EAST_ASIAN_H 1
#define UCDN_EAST_ASIAN_W 2
#define UCDN_EAST_ASIAN_NA 3
#define UCDN_EAST_ASIAN_A 4
#define UCDN_EAST_ASIAN_N 5
#define UCDN_SCRIPT_COMMON 0
#define UCDN_SCRIPT_LATIN 1
#define UCDN_SCRIPT_GREEK 2
#define UCDN_SCRIPT_CYRILLIC 3
#define UCDN_SCRIPT_ARMENIAN 4
#define UCDN_SCRIPT_HEBREW 5
#define UCDN_SCRIPT_ARABIC 6
#define UCDN_SCRIPT_SYRIAC 7
#define UCDN_SCRIPT_THAANA 8
#define UCDN_SCRIPT_DEVANAGARI 9
#define UCDN_SCRIPT_BENGALI 10
#define UCDN_SCRIPT_GURMUKHI 11
#define UCDN_SCRIPT_GUJARATI 12
#define UCDN_SCRIPT_ORIYA 13
#define UCDN_SCRIPT_TAMIL 14
#define UCDN_SCRIPT_TELUGU 15
#define UCDN_SCRIPT_KANNADA 16
#define UCDN_SCRIPT_MALAYALAM 17
#define UCDN_SCRIPT_SINHALA 18
#define UCDN_SCRIPT_THAI 19
#define UCDN_SCRIPT_LAO 20
#define UCDN_SCRIPT_TIBETAN 21
#define UCDN_SCRIPT_MYANMAR 22
#define UCDN_SCRIPT_GEORGIAN 23
#define UCDN_SCRIPT_HANGUL 24
#define UCDN_SCRIPT_ETHIOPIC 25
#define UCDN_SCRIPT_CHEROKEE 26
#define UCDN_SCRIPT_CANADIAN_ABORIGINAL 27
#define UCDN_SCRIPT_OGHAM 28
#define UCDN_SCRIPT_RUNIC 29
#define UCDN_SCRIPT_KHMER 30
#define UCDN_SCRIPT_MONGOLIAN 31
#define UCDN_SCRIPT_HIRAGANA 32
#define UCDN_SCRIPT_KATAKANA 33
#define UCDN_SCRIPT_BOPOMOFO 34
#define UCDN_SCRIPT_HAN 35
#define UCDN_SCRIPT_YI 36
#define UCDN_SCRIPT_OLD_ITALIC 37
#define UCDN_SCRIPT_GOTHIC 38
#define UCDN_SCRIPT_DESERET 39
#define UCDN_SCRIPT_INHERITED 40
#define UCDN_SCRIPT_TAGALOG 41
#define UCDN_SCRIPT_HANUNOO 42
#define UCDN_SCRIPT_BUHID 43
#define UCDN_SCRIPT_TAGBANWA 44
#define UCDN_SCRIPT_LIMBU 45
#define UCDN_SCRIPT_TAI_LE 46
#define UCDN_SCRIPT_LINEAR_B 47
#define UCDN_SCRIPT_UGARITIC 48
#define UCDN_SCRIPT_SHAVIAN 49
#define UCDN_SCRIPT_OSMANYA 50
#define UCDN_SCRIPT_CYPRIOT 51
#define UCDN_SCRIPT_BRAILLE 52
#define UCDN_SCRIPT_BUGINESE 53
#define UCDN_SCRIPT_COPTIC 54
#define UCDN_SCRIPT_NEW_TAI_LUE 55
#define UCDN_SCRIPT_GLAGOLITIC 56
#define UCDN_SCRIPT_TIFINAGH 57
#define UCDN_SCRIPT_SYLOTI_NAGRI 58
#define UCDN_SCRIPT_OLD_PERSIAN 59
#define UCDN_SCRIPT_KHAROSHTHI 60
#define UCDN_SCRIPT_BALINESE 61
#define UCDN_SCRIPT_CUNEIFORM 62
#define UCDN_SCRIPT_PHOENICIAN 63
#define UCDN_SCRIPT_PHAGS_PA 64
#define UCDN_SCRIPT_NKO 65
#define UCDN_SCRIPT_SUNDANESE 66
#define UCDN_SCRIPT_LEPCHA 67
#define UCDN_SCRIPT_OL_CHIKI 68
#define UCDN_SCRIPT_VAI 69
#define UCDN_SCRIPT_SAURASHTRA 70
#define UCDN_SCRIPT_KAYAH_LI 71
#define UCDN_SCRIPT_REJANG 72
#define UCDN_SCRIPT_LYCIAN 73
#define UCDN_SCRIPT_CARIAN 74
#define UCDN_SCRIPT_LYDIAN 75
#define UCDN_SCRIPT_CHAM 76
#define UCDN_SCRIPT_TAI_THAM 77
#define UCDN_SCRIPT_TAI_VIET 78
#define UCDN_SCRIPT_AVESTAN 79
#define UCDN_SCRIPT_EGYPTIAN_HIEROGLYPHS 80
#define UCDN_SCRIPT_SAMARITAN 81
#define UCDN_SCRIPT_LISU 82
#define UCDN_SCRIPT_BAMUM 83
#define UCDN_SCRIPT_JAVANESE 84
#define UCDN_SCRIPT_MEETEI_MAYEK 85
#define UCDN_SCRIPT_IMPERIAL_ARAMAIC 86
#define UCDN_SCRIPT_OLD_SOUTH_ARABIAN 87
#define UCDN_SCRIPT_INSCRIPTIONAL_PARTHIAN 88
#define UCDN_SCRIPT_INSCRIPTIONAL_PAHLAVI 89
#define UCDN_SCRIPT_OLD_TURKIC 90
#define UCDN_SCRIPT_KAITHI 91
#define UCDN_SCRIPT_BATAK 92
#define UCDN_SCRIPT_BRAHMI 93
#define UCDN_SCRIPT_MANDAIC 94
#define UCDN_SCRIPT_CHAKMA 95
#define UCDN_SCRIPT_MEROITIC_CURSIVE 96
#define UCDN_SCRIPT_MEROITIC_HIEROGLYPHS 97
#define UCDN_SCRIPT_MIAO 98
#define UCDN_SCRIPT_SHARADA 99
#define UCDN_SCRIPT_SORA_SOMPENG 100
#define UCDN_SCRIPT_TAKRI 101
#define UCDN_SCRIPT_UNKNOWN 102
#define UCDN_SCRIPT_BASSA_VAH 103
#define UCDN_SCRIPT_CAUCASIAN_ALBANIAN 104
#define UCDN_SCRIPT_DUPLOYAN 105
#define UCDN_SCRIPT_ELBASAN 106
#define UCDN_SCRIPT_GRANTHA 107
#define UCDN_SCRIPT_KHOJKI 108
#define UCDN_SCRIPT_KHUDAWADI 109
#define UCDN_SCRIPT_LINEAR_A 110
#define UCDN_SCRIPT_MAHAJANI 111
#define UCDN_SCRIPT_MANICHAEAN 112
#define UCDN_SCRIPT_MENDE_KIKAKUI 113
#define UCDN_SCRIPT_MODI 114
#define UCDN_SCRIPT_MRO 115
#define UCDN_SCRIPT_NABATAEAN 116
#define UCDN_SCRIPT_OLD_NORTH_ARABIAN 117
#define UCDN_SCRIPT_OLD_PERMIC 118
#define UCDN_SCRIPT_PAHAWH_HMONG 119
#define UCDN_SCRIPT_PALMYRENE 120
#define UCDN_SCRIPT_PAU_CIN_HAU 121
#define UCDN_SCRIPT_PSALTER_PAHLAVI 122
#define UCDN_SCRIPT_SIDDHAM 123
#define UCDN_SCRIPT_TIRHUTA 124
#define UCDN_SCRIPT_WARANG_CITI 125
#define UCDN_SCRIPT_AHOM 126
#define UCDN_SCRIPT_ANATOLIAN_HIEROGLYPHS 127
#define UCDN_SCRIPT_HATRAN 128
#define UCDN_SCRIPT_MULTANI 129
#define UCDN_SCRIPT_OLD_HUNGARIAN 130
#define UCDN_SCRIPT_SIGNWRITING 131
#define UCDN_SCRIPT_ADLAM 132
#define UCDN_SCRIPT_BHAIKSUKI 133
#define UCDN_SCRIPT_MARCHEN 134
#define UCDN_SCRIPT_NEWA 135
#define UCDN_SCRIPT_OSAGE 136
#define UCDN_SCRIPT_TANGUT 137
#define UCDN_SCRIPT_MASARAM_GONDI 138
#define UCDN_SCRIPT_NUSHU 139
#define UCDN_SCRIPT_SOYOMBO 140
#define UCDN_SCRIPT_ZANABAZAR_SQUARE 141
#define UCDN_SCRIPT_DOGRA 142
#define UCDN_SCRIPT_GUNJALA_GONDI 143
#define UCDN_SCRIPT_HANIFI_ROHINGYA 144
#define UCDN_SCRIPT_MAKASAR 145
#define UCDN_SCRIPT_MEDEFAIDRIN 146
#define UCDN_SCRIPT_OLD_SOGDIAN 147
#define UCDN_SCRIPT_SOGDIAN 148
#define UCDN_SCRIPT_ELYMAIC 149
#define UCDN_SCRIPT_NANDINAGARI 150
#define UCDN_SCRIPT_NYIAKENG_PUACHUE_HMONG 151
#define UCDN_SCRIPT_WANCHO 152
#define UCDN_LINEBREAK_CLASS_OP 0
#define UCDN_LINEBREAK_CLASS_CL 1
#define UCDN_LINEBREAK_CLASS_CP 2
#define UCDN_LINEBREAK_CLASS_QU 3
#define UCDN_LINEBREAK_CLASS_GL 4
#define UCDN_LINEBREAK_CLASS_NS 5
#define UCDN_LINEBREAK_CLASS_EX 6
#define UCDN_LINEBREAK_CLASS_SY 7
#define UCDN_LINEBREAK_CLASS_IS 8
#define UCDN_LINEBREAK_CLASS_PR 9
#define UCDN_LINEBREAK_CLASS_PO 10
#define UCDN_LINEBREAK_CLASS_NU 11
#define UCDN_LINEBREAK_CLASS_AL 12
#define UCDN_LINEBREAK_CLASS_HL 13
#define UCDN_LINEBREAK_CLASS_ID 14
#define UCDN_LINEBREAK_CLASS_IN 15
#define UCDN_LINEBREAK_CLASS_HY 16
#define UCDN_LINEBREAK_CLASS_BA 17
#define UCDN_LINEBREAK_CLASS_BB 18
#define UCDN_LINEBREAK_CLASS_B2 19
#define UCDN_LINEBREAK_CLASS_ZW 20
#define UCDN_LINEBREAK_CLASS_CM 21
#define UCDN_LINEBREAK_CLASS_WJ 22
#define UCDN_LINEBREAK_CLASS_H2 23
#define UCDN_LINEBREAK_CLASS_H3 24
#define UCDN_LINEBREAK_CLASS_JL 25
#define UCDN_LINEBREAK_CLASS_JV 26
#define UCDN_LINEBREAK_CLASS_JT 27
#define UCDN_LINEBREAK_CLASS_RI 28
#define UCDN_LINEBREAK_CLASS_AI 29
#define UCDN_LINEBREAK_CLASS_BK 30
#define UCDN_LINEBREAK_CLASS_CB 31
#define UCDN_LINEBREAK_CLASS_CJ 32
#define UCDN_LINEBREAK_CLASS_CR 33
#define UCDN_LINEBREAK_CLASS_LF 34
#define UCDN_LINEBREAK_CLASS_NL 35
#define UCDN_LINEBREAK_CLASS_SA 36
#define UCDN_LINEBREAK_CLASS_SG 37
#define UCDN_LINEBREAK_CLASS_SP 38
#define UCDN_LINEBREAK_CLASS_XX 39
#define UCDN_LINEBREAK_CLASS_ZWJ 40
#define UCDN_LINEBREAK_CLASS_EB 41
#define UCDN_LINEBREAK_CLASS_EM 42
#define UCDN_GENERAL_CATEGORY_CC 0
#define UCDN_GENERAL_CATEGORY_CF 1
#define UCDN_GENERAL_CATEGORY_CN 2
#define UCDN_GENERAL_CATEGORY_CO 3
#define UCDN_GENERAL_CATEGORY_CS 4
#define UCDN_GENERAL_CATEGORY_LL 5
#define UCDN_GENERAL_CATEGORY_LM 6
#define UCDN_GENERAL_CATEGORY_LO 7
#define UCDN_GENERAL_CATEGORY_LT 8
#define UCDN_GENERAL_CATEGORY_LU 9
#define UCDN_GENERAL_CATEGORY_MC 10
#define UCDN_GENERAL_CATEGORY_ME 11
#define UCDN_GENERAL_CATEGORY_MN 12
#define UCDN_GENERAL_CATEGORY_ND 13
#define UCDN_GENERAL_CATEGORY_NL 14
#define UCDN_GENERAL_CATEGORY_NO 15
#define UCDN_GENERAL_CATEGORY_PC 16
#define UCDN_GENERAL_CATEGORY_PD 17
#define UCDN_GENERAL_CATEGORY_PE 18
#define UCDN_GENERAL_CATEGORY_PF 19
#define UCDN_GENERAL_CATEGORY_PI 20
#define UCDN_GENERAL_CATEGORY_PO 21
#define UCDN_GENERAL_CATEGORY_PS 22
#define UCDN_GENERAL_CATEGORY_SC 23
#define UCDN_GENERAL_CATEGORY_SK 24
#define UCDN_GENERAL_CATEGORY_SM 25
#define UCDN_GENERAL_CATEGORY_SO 26
#define UCDN_GENERAL_CATEGORY_ZL 27
#define UCDN_GENERAL_CATEGORY_ZP 28
#define UCDN_GENERAL_CATEGORY_ZS 29
#define UCDN_BIDI_CLASS_L 0
#define UCDN_BIDI_CLASS_LRE 1
#define UCDN_BIDI_CLASS_LRO 2
#define UCDN_BIDI_CLASS_R 3
#define UCDN_BIDI_CLASS_AL 4
#define UCDN_BIDI_CLASS_RLE 5
#define UCDN_BIDI_CLASS_RLO 6
#define UCDN_BIDI_CLASS_PDF 7
#define UCDN_BIDI_CLASS_EN 8
#define UCDN_BIDI_CLASS_ES 9
#define UCDN_BIDI_CLASS_ET 10
#define UCDN_BIDI_CLASS_AN 11
#define UCDN_BIDI_CLASS_CS 12
#define UCDN_BIDI_CLASS_NSM 13
#define UCDN_BIDI_CLASS_BN 14
#define UCDN_BIDI_CLASS_B 15
#define UCDN_BIDI_CLASS_S 16
#define UCDN_BIDI_CLASS_WS 17
#define UCDN_BIDI_CLASS_ON 18
#define UCDN_BIDI_CLASS_LRI 19
#define UCDN_BIDI_CLASS_RLI 20
#define UCDN_BIDI_CLASS_FSI 21
#define UCDN_BIDI_CLASS_PDI 22
#define UCDN_BIDI_PAIRED_BRACKET_TYPE_OPEN 0
#define UCDN_BIDI_PAIRED_BRACKET_TYPE_CLOSE 1
#define UCDN_BIDI_PAIRED_BRACKET_TYPE_NONE 2
/**
* Return version of the Unicode database.
*
* @return Unicode database version
*/
const char *ucdn_get_unicode_version(void);
/**
* Get combining class of a codepoint.
*
* @param code Unicode codepoint
* @return combining class value, as defined in UAX#44
*/
int ucdn_get_combining_class(uint32_t code);
/**
* Get east-asian width of a codepoint.
*
* @param code Unicode codepoint
* @return value according to UCDN_EAST_ASIAN_* and as defined in UAX#11.
*/
int ucdn_get_east_asian_width(uint32_t code);
/**
* Get general category of a codepoint.
*
* @param code Unicode codepoint
* @return value according to UCDN_GENERAL_CATEGORY_* and as defined in
* UAX#44.
*/
int ucdn_get_general_category(uint32_t code);
/**
* Get bidirectional class of a codepoint.
*
* @param code Unicode codepoint
* @return value according to UCDN_BIDI_CLASS_* and as defined in UAX#44.
*/
int ucdn_get_bidi_class(uint32_t code);
/**
* Get script of a codepoint.
*
* @param code Unicode codepoint
* @return value according to UCDN_SCRIPT_* and as defined in UAX#24.
*/
int ucdn_get_script(uint32_t code);
/**
* Get unresolved linebreak class of a codepoint. This does not take
* rule LB1 of UAX#14 into account. See ucdn_get_resolved_linebreak_class()
* for resolved linebreak classes.
*
* @param code Unicode codepoint
* @return value according to UCDN_LINEBREAK_* and as defined in UAX#14.
*/
int ucdn_get_linebreak_class(uint32_t code);
/**
* Get resolved linebreak class of a codepoint. This resolves characters
* in the AI, SG, XX, SA and CJ classes according to rule LB1 of UAX#14.
* In addition the CB class is resolved as the equivalent B2 class and
* the NL class is resolved as the equivalent BK class.
*
* @param code Unicode codepoint
* @return value according to UCDN_LINEBREAK_* and as defined in UAX#14.
*/
int ucdn_get_resolved_linebreak_class(uint32_t code);
/**
* Check if codepoint can be mirrored.
*
* @param code Unicode codepoint
* @return 1 if mirrored character exists, otherwise 0
*/
int ucdn_get_mirrored(uint32_t code);
/**
* Mirror a codepoint.
*
* @param code Unicode codepoint
* @return mirrored codepoint or the original codepoint if no
* mirrored character exists
*/
uint32_t ucdn_mirror(uint32_t code);
/**
* Get paired bracket for a codepoint.
*
* @param code Unicode codepoint
* @return paired bracket codepoint or the original codepoint if no
* paired bracket character exists
*/
uint32_t ucdn_paired_bracket(uint32_t code);
/**
* Get paired bracket type for a codepoint.
*
* @param code Unicode codepoint
* @return value according to UCDN_BIDI_PAIRED_BRACKET_TYPE_* and as defined
* in UAX#9.
*
*/
int ucdn_paired_bracket_type(uint32_t code);
/**
* Pairwise canonical decomposition of a codepoint. This includes
* Hangul Jamo decomposition (see chapter 3.12 of the Unicode core
* specification).
*
* Hangul is decomposed into L and V jamos for LV forms, and an
* LV precomposed syllable and a T jamo for LVT forms.
*
* @param code Unicode codepoint
* @param a filled with first codepoint of decomposition
* @param b filled with second codepoint of decomposition, or 0
* @return success
*/
int ucdn_decompose(uint32_t code, uint32_t *a, uint32_t *b);
/**
* Compatibility decomposition of a codepoint.
*
* @param code Unicode codepoint
* @param decomposed filled with decomposition, must be able to hold 18
* characters
* @return length of decomposition or 0 in case none exists
*/
int ucdn_compat_decompose(uint32_t code, uint32_t *decomposed);
/**
* Pairwise canonical composition of two codepoints. This includes
* Hangul Jamo composition (see chapter 3.12 of the Unicode core
* specification).
*
* Hangul composition expects either L and V jamos, or an LV
* precomposed syllable and a T jamo. This is exactly the inverse
* of pairwise Hangul decomposition.
*
* @param code filled with composition
* @param a first codepoint
* @param b second codepoint
* @return success
*/
int ucdn_compose(uint32_t *code, uint32_t a, uint32_t b);
HB_END_HEADER
#endif

File diff suppressed because it is too large Load Diff

View File

@ -127,15 +127,15 @@ hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED
#endif
extern "C" hb_unicode_funcs_t *hb_ucd_get_unicode_funcs ();
extern "C" hb_unicode_funcs_t *hb_glib_get_unicode_funcs ();
extern "C" hb_unicode_funcs_t *hb_icu_get_unicode_funcs ();
extern "C" hb_unicode_funcs_t *hb_ucdn_get_unicode_funcs ();
hb_unicode_funcs_t *
hb_unicode_funcs_get_default ()
{
#if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_UCDN)
return hb_ucdn_get_unicode_funcs ();
#if !defined(HB_NO_UNICODE_FUNCS) && !defined(HB_NO_UCD)
return hb_ucd_get_unicode_funcs ();
#elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB)
return hb_glib_get_unicode_funcs ();
#elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN)
@ -148,7 +148,7 @@ hb_unicode_funcs_get_default ()
#if !defined(HB_NO_UNICODE_FUNCS) && defined(HB_UNICODE_FUNCS_NIL)
#error "Could not find any Unicode functions implementation, you have to provide your own"
#error "Consider building hb-ucdn.c. If you absolutely want to build without any, check the code."
#error "Consider building hb-ucd.cc. If you absolutely want to build without any, check the code."
#endif
/**