[ucdn] Replace UCDN with a new UCD implementation
UCDN was ~120kb of data. New implementatoin is 69kb in default builds, and 49kb if built with HB_OPTIMIZE_SIZE or __OPTIMIZE_SIZE__. The latter automatically enabled if built with -Os or -Oz. There's room to shave off another 10kb or 20kb. That will follow later. Fixes https://github.com/harfbuzz/harfbuzz/issues/1652
This commit is contained in:
parent
12c59f6c40
commit
65392b734e
|
@ -87,17 +87,6 @@ HBSOURCES += $(HB_CORETEXT_sources)
|
|||
HBHEADERS += $(HB_CORETEXT_headers)
|
||||
endif
|
||||
|
||||
if HAVE_UCDN
|
||||
SUBDIRS += hb-ucdn
|
||||
HBCFLAGS += -I$(srcdir)/hb-ucdn
|
||||
HBLIBS += hb-ucdn/libhb-ucdn.la
|
||||
HBSOURCES += $(HB_UCDN_sources)
|
||||
hb-ucdn/libhb-ucdn.la: ucdn
|
||||
ucdn:
|
||||
@$(MAKE) $(AM_MAKEFLAGS) -C hb-ucdn
|
||||
endif
|
||||
DIST_SUBDIRS += hb-ucdn
|
||||
|
||||
|
||||
BUILT_SOURCES += \
|
||||
hb-version.h
|
||||
|
|
|
@ -145,6 +145,8 @@ HB_BASE_sources = \
|
|||
hb-shaper.hh \
|
||||
hb-static.cc \
|
||||
hb-string-array.hh \
|
||||
hb-ucd-table.hh \
|
||||
hb-ucd.cc \
|
||||
hb-unicode-emoji-table.hh \
|
||||
hb-unicode.cc \
|
||||
hb-unicode.hh \
|
||||
|
@ -224,9 +226,6 @@ HB_DIRECTWRITE_headers = hb-directwrite.h
|
|||
HB_UNISCRIBE_sources = hb-uniscribe.cc
|
||||
HB_UNISCRIBE_headers = hb-uniscribe.h
|
||||
|
||||
# Additional supplemental sources
|
||||
HB_UCDN_sources = hb-ucdn.cc
|
||||
|
||||
# Sources for libharfbuzz-gobject and libharfbuzz-icu
|
||||
HB_ICU_sources = hb-icu.cc
|
||||
HB_ICU_headers = hb-icu.h
|
||||
|
|
|
@ -24,12 +24,16 @@ sc = [u['sc'] for u in ucd]
|
|||
|
||||
dm = {i:tuple(int(v, 16) for v in u['dm'].split()) for i,u in enumerate(ucd)
|
||||
if u['dm'] != '#' and u['dt'] == 'can' and not (0xAC00 <= i < 0xAC00+11172)}
|
||||
ce = {i for i,u in enumerate(ucd) if u['Comp_Ex'] == 'Y'}
|
||||
|
||||
assert not any(v for v in dm.values() if len(v) not in (1,2))
|
||||
dm1 = sorted(set(v for v in dm.values() if len(v) == 1))
|
||||
dm1_array = ['0x%04Xu' % v for v in dm1]
|
||||
dm1_order = {v:i+1 for i,v in enumerate(dm1)}
|
||||
dm2 = sorted((v, i) for i,v in dm.items() if len(v) == 2)
|
||||
dm2 = [("_HB_UCD_ENCODE3 (0x%04Xu, 0x%04Xu, 0x%04Xu)" % (v+(i,)), v) for v,i in dm2]
|
||||
dm2 = [("HB_CODEPOINT_ENCODE3 (0x%04Xu, 0x%04Xu, 0x%04Xu)" %
|
||||
(v+(i if i not in ce and not ccc[i] else 0,)), v)
|
||||
for v,i in dm2]
|
||||
dm2_array = [s for s,v in dm2]
|
||||
l = 1 + len(dm1_array)
|
||||
dm2_order = {v[1]:i+l for i,v in enumerate(dm2)}
|
||||
|
@ -63,7 +67,6 @@ DEFAULT = 1
|
|||
COMPACT = 3
|
||||
|
||||
|
||||
|
||||
print("/* == Start of generated table == */")
|
||||
print("/*")
|
||||
print(" * The following table is generated by running:")
|
||||
|
@ -78,7 +81,7 @@ print("#define HB_UCD_TABLE_HH")
|
|||
print()
|
||||
|
||||
print()
|
||||
print('#include <stdint.h>')
|
||||
print('#include "hb.hh"')
|
||||
print()
|
||||
|
||||
code = packTab.Code('_hb_ucd')
|
||||
|
@ -90,7 +93,7 @@ code.print_c(linkage='static inline')
|
|||
for compression in (DEFAULT, COMPACT):
|
||||
print()
|
||||
if compression == DEFAULT:
|
||||
print('#ifdef HB_OPTIMIZE_SIZE')
|
||||
print('#ifndef HB_OPTIMIZE_SIZE')
|
||||
else:
|
||||
print('#else')
|
||||
print()
|
||||
|
|
|
@ -34,6 +34,14 @@
|
|||
#include "hb-null.hh"
|
||||
|
||||
|
||||
/* Encodes three unsigned integers in one 64-bit number. If the inputs have more than 21 bits,
|
||||
* values will be truncated / overlap, and might not decode exactly. */
|
||||
#define HB_CODEPOINT_ENCODE3(x,y,z) (((uint64_t) (x) << 42) | ((uint64_t) (y) << 21) | (uint64_t) (z))
|
||||
#define HB_CODEPOINT_DECODE3_1(v) ((hb_codepoint_t) ((v) >> 42))
|
||||
#define HB_CODEPOINT_DECODE3_2(v) ((hb_codepoint_t) ((v) >> 21) & 0x1FFFFFu)
|
||||
#define HB_CODEPOINT_DECODE3_3(v) ((hb_codepoint_t) (v) & 0x1FFFFFu)
|
||||
|
||||
|
||||
struct
|
||||
{
|
||||
/* Note. This is dangerous in that if it's passed an rvalue, it returns rvalue-reference. */
|
||||
|
|
|
@ -158,84 +158,82 @@ OT::GDEF::is_blacklisted (hb_blob_t *blob,
|
|||
* https://bugzilla.mozilla.org/show_bug.cgi?id=1279693
|
||||
* https://bugzilla.mozilla.org/show_bug.cgi?id=1279875
|
||||
*/
|
||||
#define ENCODE(x,y,z) (((uint64_t) (x) << 48) | ((uint64_t) (y) << 24) | (uint64_t) (z))
|
||||
switch ENCODE(blob->length,
|
||||
switch HB_CODEPOINT_ENCODE3(blob->length,
|
||||
face->table.GSUB->table.get_length (),
|
||||
face->table.GPOS->table.get_length ())
|
||||
{
|
||||
/* sha1sum:c5ee92f0bca4bfb7d06c4d03e8cf9f9cf75d2e8a Windows 7? timesi.ttf */
|
||||
case ENCODE (442, 2874, 42038):
|
||||
case HB_CODEPOINT_ENCODE3 (442, 2874, 42038):
|
||||
/* sha1sum:37fc8c16a0894ab7b749e35579856c73c840867b Windows 7? timesbi.ttf */
|
||||
case ENCODE (430, 2874, 40662):
|
||||
case HB_CODEPOINT_ENCODE3 (430, 2874, 40662):
|
||||
/* sha1sum:19fc45110ea6cd3cdd0a5faca256a3797a069a80 Windows 7 timesi.ttf */
|
||||
case ENCODE (442, 2874, 39116):
|
||||
case HB_CODEPOINT_ENCODE3 (442, 2874, 39116):
|
||||
/* sha1sum:6d2d3c9ed5b7de87bc84eae0df95ee5232ecde26 Windows 7 timesbi.ttf */
|
||||
case ENCODE (430, 2874, 39374):
|
||||
case HB_CODEPOINT_ENCODE3 (430, 2874, 39374):
|
||||
/* sha1sum:8583225a8b49667c077b3525333f84af08c6bcd8 OS X 10.11.3 Times New Roman Italic.ttf */
|
||||
case ENCODE (490, 3046, 41638):
|
||||
case HB_CODEPOINT_ENCODE3 (490, 3046, 41638):
|
||||
/* sha1sum:ec0f5a8751845355b7c3271d11f9918a966cb8c9 OS X 10.11.3 Times New Roman Bold Italic.ttf */
|
||||
case ENCODE (478, 3046, 41902):
|
||||
case HB_CODEPOINT_ENCODE3 (478, 3046, 41902):
|
||||
/* sha1sum:96eda93f7d33e79962451c6c39a6b51ee893ce8c tahoma.ttf from Windows 8 */
|
||||
case ENCODE (898, 12554, 46470):
|
||||
case HB_CODEPOINT_ENCODE3 (898, 12554, 46470):
|
||||
/* sha1sum:20928dc06014e0cd120b6fc942d0c3b1a46ac2bc tahomabd.ttf from Windows 8 */
|
||||
case ENCODE (910, 12566, 47732):
|
||||
case HB_CODEPOINT_ENCODE3 (910, 12566, 47732):
|
||||
/* sha1sum:4f95b7e4878f60fa3a39ca269618dfde9721a79e tahoma.ttf from Windows 8.1 */
|
||||
case ENCODE (928, 23298, 59332):
|
||||
case HB_CODEPOINT_ENCODE3 (928, 23298, 59332):
|
||||
/* sha1sum:6d400781948517c3c0441ba42acb309584b73033 tahomabd.ttf from Windows 8.1 */
|
||||
case ENCODE (940, 23310, 60732):
|
||||
case HB_CODEPOINT_ENCODE3 (940, 23310, 60732):
|
||||
/* tahoma.ttf v6.04 from Windows 8.1 x64, see https://bugzilla.mozilla.org/show_bug.cgi?id=1279925 */
|
||||
case ENCODE (964, 23836, 60072):
|
||||
case HB_CODEPOINT_ENCODE3 (964, 23836, 60072):
|
||||
/* tahomabd.ttf v6.04 from Windows 8.1 x64, see https://bugzilla.mozilla.org/show_bug.cgi?id=1279925 */
|
||||
case ENCODE (976, 23832, 61456):
|
||||
case HB_CODEPOINT_ENCODE3 (976, 23832, 61456):
|
||||
/* sha1sum:e55fa2dfe957a9f7ec26be516a0e30b0c925f846 tahoma.ttf from Windows 10 */
|
||||
case ENCODE (994, 24474, 60336):
|
||||
case HB_CODEPOINT_ENCODE3 (994, 24474, 60336):
|
||||
/* sha1sum:7199385abb4c2cc81c83a151a7599b6368e92343 tahomabd.ttf from Windows 10 */
|
||||
case ENCODE (1006, 24470, 61740):
|
||||
case HB_CODEPOINT_ENCODE3 (1006, 24470, 61740):
|
||||
/* tahoma.ttf v6.91 from Windows 10 x64, see https://bugzilla.mozilla.org/show_bug.cgi?id=1279925 */
|
||||
case ENCODE (1006, 24576, 61346):
|
||||
case HB_CODEPOINT_ENCODE3 (1006, 24576, 61346):
|
||||
/* tahomabd.ttf v6.91 from Windows 10 x64, see https://bugzilla.mozilla.org/show_bug.cgi?id=1279925 */
|
||||
case ENCODE (1018, 24572, 62828):
|
||||
case HB_CODEPOINT_ENCODE3 (1018, 24572, 62828):
|
||||
/* sha1sum:b9c84d820c49850d3d27ec498be93955b82772b5 tahoma.ttf from Windows 10 AU */
|
||||
case ENCODE (1006, 24576, 61352):
|
||||
case HB_CODEPOINT_ENCODE3 (1006, 24576, 61352):
|
||||
/* sha1sum:2bdfaab28174bdadd2f3d4200a30a7ae31db79d2 tahomabd.ttf from Windows 10 AU */
|
||||
case ENCODE (1018, 24572, 62834):
|
||||
case HB_CODEPOINT_ENCODE3 (1018, 24572, 62834):
|
||||
/* sha1sum:b0d36cf5a2fbe746a3dd277bffc6756a820807a7 Tahoma.ttf from Mac OS X 10.9 */
|
||||
case ENCODE (832, 7324, 47162):
|
||||
case HB_CODEPOINT_ENCODE3 (832, 7324, 47162):
|
||||
/* sha1sum:12fc4538e84d461771b30c18b5eb6bd434e30fba Tahoma Bold.ttf from Mac OS X 10.9 */
|
||||
case ENCODE (844, 7302, 45474):
|
||||
case HB_CODEPOINT_ENCODE3 (844, 7302, 45474):
|
||||
/* sha1sum:eb8afadd28e9cf963e886b23a30b44ab4fd83acc himalaya.ttf from Windows 7 */
|
||||
case ENCODE (180, 13054, 7254):
|
||||
case HB_CODEPOINT_ENCODE3 (180, 13054, 7254):
|
||||
/* sha1sum:73da7f025b238a3f737aa1fde22577a6370f77b0 himalaya.ttf from Windows 8 */
|
||||
case ENCODE (192, 12638, 7254):
|
||||
case HB_CODEPOINT_ENCODE3 (192, 12638, 7254):
|
||||
/* sha1sum:6e80fd1c0b059bbee49272401583160dc1e6a427 himalaya.ttf from Windows 8.1 */
|
||||
case ENCODE (192, 12690, 7254):
|
||||
case HB_CODEPOINT_ENCODE3 (192, 12690, 7254):
|
||||
/* 8d9267aea9cd2c852ecfb9f12a6e834bfaeafe44 cantarell-fonts-0.0.21/otf/Cantarell-Regular.otf */
|
||||
/* 983988ff7b47439ab79aeaf9a45bd4a2c5b9d371 cantarell-fonts-0.0.21/otf/Cantarell-Oblique.otf */
|
||||
case ENCODE (188, 248, 3852):
|
||||
case HB_CODEPOINT_ENCODE3 (188, 248, 3852):
|
||||
/* 2c0c90c6f6087ffbfea76589c93113a9cbb0e75f cantarell-fonts-0.0.21/otf/Cantarell-Bold.otf */
|
||||
/* 55461f5b853c6da88069ffcdf7f4dd3f8d7e3e6b cantarell-fonts-0.0.21/otf/Cantarell-Bold-Oblique.otf */
|
||||
case ENCODE (188, 264, 3426):
|
||||
case HB_CODEPOINT_ENCODE3 (188, 264, 3426):
|
||||
/* d125afa82a77a6475ac0e74e7c207914af84b37a padauk-2.80/Padauk.ttf RHEL 7.2 */
|
||||
case ENCODE (1058, 47032, 11818):
|
||||
case HB_CODEPOINT_ENCODE3 (1058, 47032, 11818):
|
||||
/* 0f7b80437227b90a577cc078c0216160ae61b031 padauk-2.80/Padauk-Bold.ttf RHEL 7.2*/
|
||||
case ENCODE (1046, 47030, 12600):
|
||||
case HB_CODEPOINT_ENCODE3 (1046, 47030, 12600):
|
||||
/* d3dde9aa0a6b7f8f6a89ef1002e9aaa11b882290 padauk-2.80/Padauk.ttf Ubuntu 16.04 */
|
||||
case ENCODE (1058, 71796, 16770):
|
||||
case HB_CODEPOINT_ENCODE3 (1058, 71796, 16770):
|
||||
/* 5f3c98ccccae8a953be2d122c1b3a77fd805093f padauk-2.80/Padauk-Bold.ttf Ubuntu 16.04 */
|
||||
case ENCODE (1046, 71790, 17862):
|
||||
case HB_CODEPOINT_ENCODE3 (1046, 71790, 17862):
|
||||
/* 6c93b63b64e8b2c93f5e824e78caca555dc887c7 padauk-2.80/Padauk-book.ttf */
|
||||
case ENCODE (1046, 71788, 17112):
|
||||
case HB_CODEPOINT_ENCODE3 (1046, 71788, 17112):
|
||||
/* d89b1664058359b8ec82e35d3531931125991fb9 padauk-2.80/Padauk-bookbold.ttf */
|
||||
case ENCODE (1058, 71794, 17514):
|
||||
case HB_CODEPOINT_ENCODE3 (1058, 71794, 17514):
|
||||
/* 824cfd193aaf6234b2b4dc0cf3c6ef576c0d00ef padauk-3.0/Padauk-book.ttf */
|
||||
case ENCODE (1330, 109904, 57938):
|
||||
case HB_CODEPOINT_ENCODE3 (1330, 109904, 57938):
|
||||
/* 91fcc10cf15e012d27571e075b3b4dfe31754a8a padauk-3.0/Padauk-bookbold.ttf */
|
||||
case ENCODE (1330, 109904, 58972):
|
||||
case HB_CODEPOINT_ENCODE3 (1330, 109904, 58972):
|
||||
/* sha1sum: c26e41d567ed821bed997e937bc0c41435689e85 Padauk.ttf
|
||||
* "Padauk Regular" "Version 2.5", see https://crbug.com/681813 */
|
||||
case ENCODE (1004, 59092, 14836):
|
||||
case HB_CODEPOINT_ENCODE3 (1004, 59092, 14836):
|
||||
return true;
|
||||
#undef ENCODE
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,209 @@
|
|||
/*
|
||||
* Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hb.hh"
|
||||
#include "hb-machinery.hh"
|
||||
|
||||
#include "hb-ucd-table.hh"
|
||||
|
||||
static hb_unicode_combining_class_t
|
||||
hb_ucd_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
||||
hb_codepoint_t unicode,
|
||||
void *user_data HB_UNUSED)
|
||||
{
|
||||
return (hb_unicode_combining_class_t) _hb_ucd_ccc (unicode);
|
||||
}
|
||||
|
||||
static hb_unicode_general_category_t
|
||||
hb_ucd_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
||||
hb_codepoint_t unicode,
|
||||
void *user_data HB_UNUSED)
|
||||
{
|
||||
return (hb_unicode_general_category_t) _hb_ucd_gc (unicode);
|
||||
}
|
||||
|
||||
static hb_codepoint_t
|
||||
hb_ucd_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
||||
hb_codepoint_t unicode,
|
||||
void *user_data HB_UNUSED)
|
||||
{
|
||||
return unicode + _hb_ucd_bmg (unicode);
|
||||
}
|
||||
|
||||
static hb_script_t
|
||||
hb_ucd_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
||||
hb_codepoint_t unicode,
|
||||
void *user_data HB_UNUSED)
|
||||
{
|
||||
return _hb_ucd_sc_map[_hb_ucd_sc (unicode)];
|
||||
}
|
||||
|
||||
|
||||
#define SBASE 0xAC00u
|
||||
#define LBASE 0x1100u
|
||||
#define VBASE 0x1161u
|
||||
#define TBASE 0x11A7u
|
||||
#define SCOUNT 11172u
|
||||
#define LCOUNT 19u
|
||||
#define VCOUNT 21u
|
||||
#define TCOUNT 28u
|
||||
#define NCOUNT (VCOUNT * TCOUNT)
|
||||
|
||||
static inline bool
|
||||
_hb_ucd_decompose_hangul (hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b)
|
||||
{
|
||||
unsigned si = ab - SBASE;
|
||||
|
||||
if (si >= SCOUNT)
|
||||
return false;
|
||||
|
||||
if (si % TCOUNT)
|
||||
{
|
||||
/* LV,T */
|
||||
*a = SBASE + (si / TCOUNT) * TCOUNT;
|
||||
*b = TBASE + (si % TCOUNT);
|
||||
return true;
|
||||
} else {
|
||||
/* L,V */
|
||||
*a = LBASE + (si / NCOUNT);
|
||||
*b = VBASE + (si % NCOUNT) / TCOUNT;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool
|
||||
_hb_ucd_compose_hangul (hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab)
|
||||
{
|
||||
if (a >= SBASE && a < (SBASE + SCOUNT) && b > TBASE && b < (TBASE + TCOUNT) &&
|
||||
!((a - SBASE) % TCOUNT))
|
||||
{
|
||||
/* LV,T */
|
||||
*ab = a + (b - TBASE);
|
||||
return true;
|
||||
}
|
||||
else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT))
|
||||
{
|
||||
/* L,V */
|
||||
int li = a - LBASE;
|
||||
int vi = b - VBASE;
|
||||
*ab = SBASE + li * NCOUNT + vi * TCOUNT;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
static int
|
||||
_cmp_pair (const void *_key, const void *_item)
|
||||
{
|
||||
uint64_t& a = * (uint64_t*) _key;
|
||||
uint64_t b = (* (uint64_t*) _item) & HB_CODEPOINT_ENCODE3(0x1FFFFFu, 0x1FFFFFu, 0);
|
||||
|
||||
return a < b ? -1 : a > b ? +1 : 0;
|
||||
}
|
||||
|
||||
static hb_bool_t
|
||||
hb_ucd_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
||||
hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab,
|
||||
void *user_data HB_UNUSED)
|
||||
{
|
||||
if (_hb_ucd_compose_hangul (a, b, ab)) return true;
|
||||
|
||||
uint64_t k = HB_CODEPOINT_ENCODE3 (a, b, 0);
|
||||
uint64_t *v = (uint64_t*) hb_bsearch (&k, _hb_ucd_dm2_map,
|
||||
ARRAY_LENGTH (_hb_ucd_dm2_map),
|
||||
sizeof (*_hb_ucd_dm2_map),
|
||||
_cmp_pair);
|
||||
if (likely (!v)) return false;
|
||||
|
||||
hb_codepoint_t u = HB_CODEPOINT_DECODE3_3 (*v);
|
||||
if (unlikely (!u)) return false;
|
||||
|
||||
*ab = u;
|
||||
return true;
|
||||
}
|
||||
|
||||
static hb_bool_t
|
||||
hb_ucd_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
||||
hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b,
|
||||
void *user_data HB_UNUSED)
|
||||
{
|
||||
if (_hb_ucd_decompose_hangul (ab, a, b)) return true;
|
||||
|
||||
unsigned i = _hb_ucd_dm (ab);
|
||||
|
||||
if (likely (!i)) return false;
|
||||
i--;
|
||||
|
||||
if (i < ARRAY_LENGTH (_hb_ucd_dm1_map))
|
||||
{
|
||||
*a = _hb_ucd_dm1_map[i];
|
||||
*b = 0;
|
||||
return true;
|
||||
}
|
||||
i -= ARRAY_LENGTH (_hb_ucd_dm1_map);
|
||||
|
||||
uint64_t v = _hb_ucd_dm2_map[i];
|
||||
*a = HB_CODEPOINT_DECODE3_1 (v);
|
||||
*b = HB_CODEPOINT_DECODE3_2 (v);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
#if HB_USE_ATEXIT
|
||||
static void free_static_ucd_funcs ();
|
||||
#endif
|
||||
|
||||
static struct hb_ucd_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_ucd_unicode_funcs_lazy_loader_t>
|
||||
{
|
||||
static hb_unicode_funcs_t *create ()
|
||||
{
|
||||
hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr);
|
||||
|
||||
hb_unicode_funcs_set_combining_class_func (funcs, hb_ucd_combining_class, nullptr, nullptr);
|
||||
hb_unicode_funcs_set_general_category_func (funcs, hb_ucd_general_category, nullptr, nullptr);
|
||||
hb_unicode_funcs_set_mirroring_func (funcs, hb_ucd_mirroring, nullptr, nullptr);
|
||||
hb_unicode_funcs_set_script_func (funcs, hb_ucd_script, nullptr, nullptr);
|
||||
hb_unicode_funcs_set_compose_func (funcs, hb_ucd_compose, nullptr, nullptr);
|
||||
hb_unicode_funcs_set_decompose_func (funcs, hb_ucd_decompose, nullptr, nullptr);
|
||||
|
||||
hb_unicode_funcs_make_immutable (funcs);
|
||||
|
||||
#if HB_USE_ATEXIT
|
||||
atexit (free_static_ucd_funcs);
|
||||
#endif
|
||||
|
||||
return funcs;
|
||||
}
|
||||
} static_ucd_funcs;
|
||||
|
||||
#if HB_USE_ATEXIT
|
||||
static
|
||||
void free_static_ucd_funcs ()
|
||||
{
|
||||
static_ucd_funcs.free_instance ();
|
||||
}
|
||||
#endif
|
||||
|
||||
extern "C" HB_INTERNAL
|
||||
hb_unicode_funcs_t *
|
||||
hb_ucd_get_unicode_funcs ();
|
||||
|
||||
hb_unicode_funcs_t *
|
||||
hb_ucd_get_unicode_funcs ()
|
||||
{
|
||||
return static_ucd_funcs.get_unconst ();
|
||||
}
|
272
src/hb-ucdn.cc
272
src/hb-ucdn.cc
|
@ -1,272 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hb.hh"
|
||||
|
||||
#include "hb-machinery.hh"
|
||||
|
||||
#include "ucdn.h"
|
||||
|
||||
static const hb_script_t ucdn_script_translate[] =
|
||||
{
|
||||
HB_SCRIPT_COMMON,
|
||||
HB_SCRIPT_LATIN,
|
||||
HB_SCRIPT_GREEK,
|
||||
HB_SCRIPT_CYRILLIC,
|
||||
HB_SCRIPT_ARMENIAN,
|
||||
HB_SCRIPT_HEBREW,
|
||||
HB_SCRIPT_ARABIC,
|
||||
HB_SCRIPT_SYRIAC,
|
||||
HB_SCRIPT_THAANA,
|
||||
HB_SCRIPT_DEVANAGARI,
|
||||
HB_SCRIPT_BENGALI,
|
||||
HB_SCRIPT_GURMUKHI,
|
||||
HB_SCRIPT_GUJARATI,
|
||||
HB_SCRIPT_ORIYA,
|
||||
HB_SCRIPT_TAMIL,
|
||||
HB_SCRIPT_TELUGU,
|
||||
HB_SCRIPT_KANNADA,
|
||||
HB_SCRIPT_MALAYALAM,
|
||||
HB_SCRIPT_SINHALA,
|
||||
HB_SCRIPT_THAI,
|
||||
HB_SCRIPT_LAO,
|
||||
HB_SCRIPT_TIBETAN,
|
||||
HB_SCRIPT_MYANMAR,
|
||||
HB_SCRIPT_GEORGIAN,
|
||||
HB_SCRIPT_HANGUL,
|
||||
HB_SCRIPT_ETHIOPIC,
|
||||
HB_SCRIPT_CHEROKEE,
|
||||
HB_SCRIPT_CANADIAN_SYLLABICS,
|
||||
HB_SCRIPT_OGHAM,
|
||||
HB_SCRIPT_RUNIC,
|
||||
HB_SCRIPT_KHMER,
|
||||
HB_SCRIPT_MONGOLIAN,
|
||||
HB_SCRIPT_HIRAGANA,
|
||||
HB_SCRIPT_KATAKANA,
|
||||
HB_SCRIPT_BOPOMOFO,
|
||||
HB_SCRIPT_HAN,
|
||||
HB_SCRIPT_YI,
|
||||
HB_SCRIPT_OLD_ITALIC,
|
||||
HB_SCRIPT_GOTHIC,
|
||||
HB_SCRIPT_DESERET,
|
||||
HB_SCRIPT_INHERITED,
|
||||
HB_SCRIPT_TAGALOG,
|
||||
HB_SCRIPT_HANUNOO,
|
||||
HB_SCRIPT_BUHID,
|
||||
HB_SCRIPT_TAGBANWA,
|
||||
HB_SCRIPT_LIMBU,
|
||||
HB_SCRIPT_TAI_LE,
|
||||
HB_SCRIPT_LINEAR_B,
|
||||
HB_SCRIPT_UGARITIC,
|
||||
HB_SCRIPT_SHAVIAN,
|
||||
HB_SCRIPT_OSMANYA,
|
||||
HB_SCRIPT_CYPRIOT,
|
||||
HB_SCRIPT_BRAILLE,
|
||||
HB_SCRIPT_BUGINESE,
|
||||
HB_SCRIPT_COPTIC,
|
||||
HB_SCRIPT_NEW_TAI_LUE,
|
||||
HB_SCRIPT_GLAGOLITIC,
|
||||
HB_SCRIPT_TIFINAGH,
|
||||
HB_SCRIPT_SYLOTI_NAGRI,
|
||||
HB_SCRIPT_OLD_PERSIAN,
|
||||
HB_SCRIPT_KHAROSHTHI,
|
||||
HB_SCRIPT_BALINESE,
|
||||
HB_SCRIPT_CUNEIFORM,
|
||||
HB_SCRIPT_PHOENICIAN,
|
||||
HB_SCRIPT_PHAGS_PA,
|
||||
HB_SCRIPT_NKO,
|
||||
HB_SCRIPT_SUNDANESE,
|
||||
HB_SCRIPT_LEPCHA,
|
||||
HB_SCRIPT_OL_CHIKI,
|
||||
HB_SCRIPT_VAI,
|
||||
HB_SCRIPT_SAURASHTRA,
|
||||
HB_SCRIPT_KAYAH_LI,
|
||||
HB_SCRIPT_REJANG,
|
||||
HB_SCRIPT_LYCIAN,
|
||||
HB_SCRIPT_CARIAN,
|
||||
HB_SCRIPT_LYDIAN,
|
||||
HB_SCRIPT_CHAM,
|
||||
HB_SCRIPT_TAI_THAM,
|
||||
HB_SCRIPT_TAI_VIET,
|
||||
HB_SCRIPT_AVESTAN,
|
||||
HB_SCRIPT_EGYPTIAN_HIEROGLYPHS,
|
||||
HB_SCRIPT_SAMARITAN,
|
||||
HB_SCRIPT_LISU,
|
||||
HB_SCRIPT_BAMUM,
|
||||
HB_SCRIPT_JAVANESE,
|
||||
HB_SCRIPT_MEETEI_MAYEK,
|
||||
HB_SCRIPT_IMPERIAL_ARAMAIC,
|
||||
HB_SCRIPT_OLD_SOUTH_ARABIAN,
|
||||
HB_SCRIPT_INSCRIPTIONAL_PARTHIAN,
|
||||
HB_SCRIPT_INSCRIPTIONAL_PAHLAVI,
|
||||
HB_SCRIPT_OLD_TURKIC,
|
||||
HB_SCRIPT_KAITHI,
|
||||
HB_SCRIPT_BATAK,
|
||||
HB_SCRIPT_BRAHMI,
|
||||
HB_SCRIPT_MANDAIC,
|
||||
HB_SCRIPT_CHAKMA,
|
||||
HB_SCRIPT_MEROITIC_CURSIVE,
|
||||
HB_SCRIPT_MEROITIC_HIEROGLYPHS,
|
||||
HB_SCRIPT_MIAO,
|
||||
HB_SCRIPT_SHARADA,
|
||||
HB_SCRIPT_SORA_SOMPENG,
|
||||
HB_SCRIPT_TAKRI,
|
||||
HB_SCRIPT_UNKNOWN,
|
||||
HB_SCRIPT_BASSA_VAH,
|
||||
HB_SCRIPT_CAUCASIAN_ALBANIAN,
|
||||
HB_SCRIPT_DUPLOYAN,
|
||||
HB_SCRIPT_ELBASAN,
|
||||
HB_SCRIPT_GRANTHA,
|
||||
HB_SCRIPT_KHOJKI,
|
||||
HB_SCRIPT_KHUDAWADI,
|
||||
HB_SCRIPT_LINEAR_A,
|
||||
HB_SCRIPT_MAHAJANI,
|
||||
HB_SCRIPT_MANICHAEAN,
|
||||
HB_SCRIPT_MENDE_KIKAKUI,
|
||||
HB_SCRIPT_MODI,
|
||||
HB_SCRIPT_MRO,
|
||||
HB_SCRIPT_NABATAEAN,
|
||||
HB_SCRIPT_OLD_NORTH_ARABIAN,
|
||||
HB_SCRIPT_OLD_PERMIC,
|
||||
HB_SCRIPT_PAHAWH_HMONG,
|
||||
HB_SCRIPT_PALMYRENE,
|
||||
HB_SCRIPT_PAU_CIN_HAU,
|
||||
HB_SCRIPT_PSALTER_PAHLAVI,
|
||||
HB_SCRIPT_SIDDHAM,
|
||||
HB_SCRIPT_TIRHUTA,
|
||||
HB_SCRIPT_WARANG_CITI,
|
||||
HB_SCRIPT_AHOM,
|
||||
HB_SCRIPT_ANATOLIAN_HIEROGLYPHS,
|
||||
HB_SCRIPT_HATRAN,
|
||||
HB_SCRIPT_MULTANI,
|
||||
HB_SCRIPT_OLD_HUNGARIAN,
|
||||
HB_SCRIPT_SIGNWRITING,
|
||||
HB_SCRIPT_ADLAM,
|
||||
HB_SCRIPT_BHAIKSUKI,
|
||||
HB_SCRIPT_MARCHEN,
|
||||
HB_SCRIPT_NEWA,
|
||||
HB_SCRIPT_OSAGE,
|
||||
HB_SCRIPT_TANGUT,
|
||||
HB_SCRIPT_MASARAM_GONDI,
|
||||
HB_SCRIPT_NUSHU,
|
||||
HB_SCRIPT_SOYOMBO,
|
||||
HB_SCRIPT_ZANABAZAR_SQUARE,
|
||||
HB_SCRIPT_DOGRA,
|
||||
HB_SCRIPT_GUNJALA_GONDI,
|
||||
HB_SCRIPT_HANIFI_ROHINGYA,
|
||||
HB_SCRIPT_MAKASAR,
|
||||
HB_SCRIPT_MEDEFAIDRIN,
|
||||
HB_SCRIPT_OLD_SOGDIAN,
|
||||
HB_SCRIPT_SOGDIAN,
|
||||
HB_SCRIPT_ELYMAIC,
|
||||
HB_SCRIPT_NANDINAGARI,
|
||||
HB_SCRIPT_NYIAKENG_PUACHUE_HMONG,
|
||||
HB_SCRIPT_WANCHO,
|
||||
};
|
||||
|
||||
static hb_unicode_combining_class_t
|
||||
hb_ucdn_combining_class(hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
||||
hb_codepoint_t unicode,
|
||||
void *user_data HB_UNUSED)
|
||||
{
|
||||
return (hb_unicode_combining_class_t) ucdn_get_combining_class(unicode);
|
||||
}
|
||||
|
||||
static hb_unicode_general_category_t
|
||||
hb_ucdn_general_category(hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
||||
hb_codepoint_t unicode,
|
||||
void *user_data HB_UNUSED)
|
||||
{
|
||||
return (hb_unicode_general_category_t)ucdn_get_general_category(unicode);
|
||||
}
|
||||
|
||||
static hb_codepoint_t
|
||||
hb_ucdn_mirroring(hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
||||
hb_codepoint_t unicode,
|
||||
void *user_data HB_UNUSED)
|
||||
{
|
||||
return ucdn_mirror(unicode);
|
||||
}
|
||||
|
||||
static hb_script_t
|
||||
hb_ucdn_script(hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
||||
hb_codepoint_t unicode,
|
||||
void *user_data HB_UNUSED)
|
||||
{
|
||||
return ucdn_script_translate[ucdn_get_script(unicode)];
|
||||
}
|
||||
|
||||
static hb_bool_t
|
||||
hb_ucdn_compose(hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
||||
hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab,
|
||||
void *user_data HB_UNUSED)
|
||||
{
|
||||
return ucdn_compose(ab, a, b);
|
||||
}
|
||||
|
||||
static hb_bool_t
|
||||
hb_ucdn_decompose(hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
||||
hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b,
|
||||
void *user_data HB_UNUSED)
|
||||
{
|
||||
return ucdn_decompose(ab, a, b);
|
||||
}
|
||||
|
||||
|
||||
#if HB_USE_ATEXIT
|
||||
static void free_static_ucdn_funcs ();
|
||||
#endif
|
||||
|
||||
static struct hb_ucdn_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_ucdn_unicode_funcs_lazy_loader_t>
|
||||
{
|
||||
static hb_unicode_funcs_t *create ()
|
||||
{
|
||||
hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr);
|
||||
|
||||
hb_unicode_funcs_set_combining_class_func (funcs, hb_ucdn_combining_class, nullptr, nullptr);
|
||||
hb_unicode_funcs_set_general_category_func (funcs, hb_ucdn_general_category, nullptr, nullptr);
|
||||
hb_unicode_funcs_set_mirroring_func (funcs, hb_ucdn_mirroring, nullptr, nullptr);
|
||||
hb_unicode_funcs_set_script_func (funcs, hb_ucdn_script, nullptr, nullptr);
|
||||
hb_unicode_funcs_set_compose_func (funcs, hb_ucdn_compose, nullptr, nullptr);
|
||||
hb_unicode_funcs_set_decompose_func (funcs, hb_ucdn_decompose, nullptr, nullptr);
|
||||
|
||||
hb_unicode_funcs_make_immutable (funcs);
|
||||
|
||||
#if HB_USE_ATEXIT
|
||||
atexit (free_static_ucdn_funcs);
|
||||
#endif
|
||||
|
||||
return funcs;
|
||||
}
|
||||
} static_ucdn_funcs;
|
||||
|
||||
#if HB_USE_ATEXIT
|
||||
static
|
||||
void free_static_ucdn_funcs ()
|
||||
{
|
||||
static_ucdn_funcs.free_instance ();
|
||||
}
|
||||
#endif
|
||||
|
||||
extern "C" HB_INTERNAL
|
||||
hb_unicode_funcs_t *
|
||||
hb_ucdn_get_unicode_funcs ();
|
||||
|
||||
hb_unicode_funcs_t *
|
||||
hb_ucdn_get_unicode_funcs ()
|
||||
{
|
||||
return static_ucdn_funcs.get_unconst ();
|
||||
}
|
|
@ -1,13 +0,0 @@
|
|||
The contents of this directory are licensed under the following terms:
|
||||
|
||||
Permission to use, copy, modify, and/or distribute this software for any
|
||||
purpose with or without fee is hereby granted, provided that the above
|
||||
copyright notice and this permission notice appear in all copies.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
@ -1,16 +0,0 @@
|
|||
## Process this file with automake to produce Makefile.in
|
||||
|
||||
noinst_LTLIBRARIES = libhb-ucdn.la
|
||||
|
||||
include Makefile.sources
|
||||
|
||||
libhb_ucdn_la_SOURCES = $(LIBHB_UCDN_sources)
|
||||
libhb_ucdn_la_CPPFLAGS = \
|
||||
-I$(top_srcdir) \
|
||||
-I$(top_srcdir)/src \
|
||||
-I$(top_builddir)/src
|
||||
libhb_ucdn_la_LIBADD =
|
||||
|
||||
EXTRA_DIST = README COPYING
|
||||
|
||||
-include $(top_srcdir)/git.mk
|
|
@ -1,7 +0,0 @@
|
|||
NULL =
|
||||
|
||||
LIBHB_UCDN_sources = \
|
||||
ucdn.h \
|
||||
ucdn.c \
|
||||
ucdn_db.h \
|
||||
$(NULL)
|
|
@ -1,40 +0,0 @@
|
|||
Contents of this directory are derived from UCDN:
|
||||
|
||||
https://github.com/grigorig/ucdn
|
||||
|
||||
The original README follows:
|
||||
|
||||
|
||||
UCDN - Unicode Database and Normalization
|
||||
|
||||
UCDN is a Unicode support library. Currently, it provides access
|
||||
to basic character properties contained in the Unicode Character
|
||||
Database and low-level normalization functions (pairwise canonical
|
||||
composition/decomposition and compatibility decomposition). More
|
||||
functionality might be provided in the future, such as additional
|
||||
properties, string normalization and encoding conversion.
|
||||
|
||||
UCDN uses standard C89 with no particular dependencies or requirements
|
||||
except for stdint.h, and can be easily integrated into existing
|
||||
projects. However, it can also be used as a standalone library,
|
||||
and a CMake build script is provided for this. The first motivation
|
||||
behind UCDN development was to provide a standalone set of Unicode
|
||||
functions for the HarfBuzz OpenType shaping library. For this purpose,
|
||||
a HarfBuzz-specific wrapper is shipped along with it (hb-ucdn.h).
|
||||
|
||||
UCDN is published under the ISC license, please see the license header
|
||||
in the C source code for more information. The makeunicodata.py script
|
||||
required for parsing Unicode database files is licensed under the
|
||||
PSF license, please see PYTHON-LICENSE for more information.
|
||||
|
||||
UCDN was written by Grigori Goronzy <greg@kinoho.net>.
|
||||
|
||||
How to Use
|
||||
|
||||
Include ucdn.c, ucdn.h and ucdn_db.h in your project. Now, just use the
|
||||
functions as documented in ucdn.h.
|
||||
|
||||
In some cases, it might be necessary to regenerate the Unicode
|
||||
database file. The script makeunicodedata.py (Python 3.x required)
|
||||
fetches the appropriate files and dumps the compressed database into
|
||||
ucdn_db.h.
|
|
@ -1,361 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include "ucdn.h"
|
||||
|
||||
typedef struct {
|
||||
unsigned char category;
|
||||
unsigned char combining;
|
||||
unsigned char bidi_class;
|
||||
unsigned char east_asian_width;
|
||||
unsigned char script;
|
||||
unsigned char linebreak_class;
|
||||
} UCDRecord;
|
||||
|
||||
typedef struct {
|
||||
unsigned short from, to;
|
||||
} MirrorPair;
|
||||
|
||||
typedef struct {
|
||||
unsigned short from, to;
|
||||
unsigned char type;
|
||||
} BracketPair;
|
||||
|
||||
typedef struct {
|
||||
unsigned int start;
|
||||
short count, index;
|
||||
} Reindex;
|
||||
|
||||
#include "ucdn_db.h"
|
||||
|
||||
/* constants required for Hangul (de)composition */
|
||||
#define SBASE 0xAC00
|
||||
#define LBASE 0x1100
|
||||
#define VBASE 0x1161
|
||||
#define TBASE 0x11A7
|
||||
#define SCOUNT 11172
|
||||
#define LCOUNT 19
|
||||
#define VCOUNT 21
|
||||
#define TCOUNT 28
|
||||
#define NCOUNT (VCOUNT * TCOUNT)
|
||||
|
||||
static const UCDRecord *get_ucd_record(uint32_t code)
|
||||
{
|
||||
int index, offset;
|
||||
|
||||
if (code >= 0x110000)
|
||||
index = 0;
|
||||
else {
|
||||
index = index0[code >> (SHIFT1+SHIFT2)] << SHIFT1;
|
||||
offset = (code >> SHIFT2) & ((1<<SHIFT1) - 1);
|
||||
index = index1[index + offset] << SHIFT2;
|
||||
offset = code & ((1<<SHIFT2) - 1);
|
||||
index = index2[index + offset];
|
||||
}
|
||||
|
||||
return &ucd_records[index];
|
||||
}
|
||||
|
||||
static const unsigned short *get_decomp_record(uint32_t code)
|
||||
{
|
||||
int index, offset;
|
||||
|
||||
if (code >= 0x110000)
|
||||
index = 0;
|
||||
else {
|
||||
index = decomp_index0[code >> (DECOMP_SHIFT1+DECOMP_SHIFT2)]
|
||||
<< DECOMP_SHIFT1;
|
||||
offset = (code >> DECOMP_SHIFT2) & ((1<<DECOMP_SHIFT1) - 1);
|
||||
index = decomp_index1[index + offset] << DECOMP_SHIFT2;
|
||||
offset = code & ((1<<DECOMP_SHIFT2) - 1);
|
||||
index = decomp_index2[index + offset];
|
||||
}
|
||||
|
||||
return &decomp_data[index];
|
||||
}
|
||||
|
||||
static int compare_reindex(const void *a, const void *b)
|
||||
{
|
||||
Reindex *ra = (Reindex *)a;
|
||||
Reindex *rb = (Reindex *)b;
|
||||
|
||||
if (ra->start < rb->start)
|
||||
return -1;
|
||||
else if (ra->start > (rb->start + rb->count))
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_comp_index(uint32_t code, const Reindex *idx, size_t len)
|
||||
{
|
||||
Reindex *res;
|
||||
Reindex r = {0, 0, 0};
|
||||
r.start = code;
|
||||
res = (Reindex *) bsearch(&r, idx, len, sizeof(Reindex), compare_reindex);
|
||||
|
||||
if (res != NULL)
|
||||
return res->index + (code - res->start);
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int compare_mp(const void *a, const void *b)
|
||||
{
|
||||
MirrorPair *mpa = (MirrorPair *)a;
|
||||
MirrorPair *mpb = (MirrorPair *)b;
|
||||
return mpa->from - mpb->from;
|
||||
}
|
||||
|
||||
static int compare_bp(const void *a, const void *b)
|
||||
{
|
||||
BracketPair *bpa = (BracketPair *)a;
|
||||
BracketPair *bpb = (BracketPair *)b;
|
||||
return bpa->from - bpb->from;
|
||||
}
|
||||
|
||||
static BracketPair *search_bp(uint32_t code)
|
||||
{
|
||||
BracketPair bp = {0,0,2};
|
||||
BracketPair *res;
|
||||
|
||||
bp.from = code;
|
||||
res = (BracketPair *) bsearch(&bp, bracket_pairs, BIDI_BRACKET_LEN,
|
||||
sizeof(BracketPair), compare_bp);
|
||||
return res;
|
||||
}
|
||||
|
||||
static int hangul_pair_decompose(uint32_t code, uint32_t *a, uint32_t *b)
|
||||
{
|
||||
int si = code - SBASE;
|
||||
|
||||
if (si < 0 || si >= SCOUNT)
|
||||
return 0;
|
||||
|
||||
if (si % TCOUNT) {
|
||||
/* LV,T */
|
||||
*a = SBASE + (si / TCOUNT) * TCOUNT;
|
||||
*b = TBASE + (si % TCOUNT);
|
||||
return 3;
|
||||
} else {
|
||||
/* L,V */
|
||||
*a = LBASE + (si / NCOUNT);
|
||||
*b = VBASE + (si % NCOUNT) / TCOUNT;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
static int hangul_pair_compose(uint32_t *code, uint32_t a, uint32_t b)
|
||||
{
|
||||
if (a >= SBASE && a < (SBASE + SCOUNT) && b > TBASE && b < (TBASE + TCOUNT) &&
|
||||
!((a - SBASE) % TCOUNT)) {
|
||||
/* LV,T */
|
||||
*code = a + (b - TBASE);
|
||||
return 3;
|
||||
} else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT)) {
|
||||
/* L,V */
|
||||
int li = a - LBASE;
|
||||
int vi = b - VBASE;
|
||||
*code = SBASE + li * NCOUNT + vi * TCOUNT;
|
||||
return 2;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t decode_utf16(const unsigned short **code_ptr)
|
||||
{
|
||||
const unsigned short *code = *code_ptr;
|
||||
|
||||
if (code[0] < 0xd800 || code[0] > 0xdc00) {
|
||||
*code_ptr += 1;
|
||||
return (uint32_t)code[0];
|
||||
} else {
|
||||
*code_ptr += 2;
|
||||
return 0x10000 + ((uint32_t)code[1] - 0xdc00) +
|
||||
(((uint32_t)code[0] - 0xd800) << 10);
|
||||
}
|
||||
}
|
||||
|
||||
const char *ucdn_get_unicode_version(void)
|
||||
{
|
||||
return UNIDATA_VERSION;
|
||||
}
|
||||
|
||||
int ucdn_get_combining_class(uint32_t code)
|
||||
{
|
||||
return get_ucd_record(code)->combining;
|
||||
}
|
||||
|
||||
int ucdn_get_east_asian_width(uint32_t code)
|
||||
{
|
||||
return get_ucd_record(code)->east_asian_width;
|
||||
}
|
||||
|
||||
int ucdn_get_general_category(uint32_t code)
|
||||
{
|
||||
return get_ucd_record(code)->category;
|
||||
}
|
||||
|
||||
int ucdn_get_bidi_class(uint32_t code)
|
||||
{
|
||||
return get_ucd_record(code)->bidi_class;
|
||||
}
|
||||
|
||||
int ucdn_get_mirrored(uint32_t code)
|
||||
{
|
||||
return ucdn_mirror(code) != code;
|
||||
}
|
||||
|
||||
int ucdn_get_script(uint32_t code)
|
||||
{
|
||||
return get_ucd_record(code)->script;
|
||||
}
|
||||
|
||||
int ucdn_get_linebreak_class(uint32_t code)
|
||||
{
|
||||
return get_ucd_record(code)->linebreak_class;
|
||||
}
|
||||
|
||||
int ucdn_get_resolved_linebreak_class(uint32_t code)
|
||||
{
|
||||
const UCDRecord *record = get_ucd_record(code);
|
||||
|
||||
switch (record->linebreak_class)
|
||||
{
|
||||
case UCDN_LINEBREAK_CLASS_AI:
|
||||
case UCDN_LINEBREAK_CLASS_SG:
|
||||
case UCDN_LINEBREAK_CLASS_XX:
|
||||
return UCDN_LINEBREAK_CLASS_AL;
|
||||
|
||||
case UCDN_LINEBREAK_CLASS_SA:
|
||||
if (record->category == UCDN_GENERAL_CATEGORY_MC ||
|
||||
record->category == UCDN_GENERAL_CATEGORY_MN)
|
||||
return UCDN_LINEBREAK_CLASS_CM;
|
||||
return UCDN_LINEBREAK_CLASS_AL;
|
||||
|
||||
case UCDN_LINEBREAK_CLASS_CJ:
|
||||
return UCDN_LINEBREAK_CLASS_NS;
|
||||
|
||||
case UCDN_LINEBREAK_CLASS_CB:
|
||||
return UCDN_LINEBREAK_CLASS_B2;
|
||||
|
||||
case UCDN_LINEBREAK_CLASS_NL:
|
||||
return UCDN_LINEBREAK_CLASS_BK;
|
||||
|
||||
default:
|
||||
return record->linebreak_class;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t ucdn_mirror(uint32_t code)
|
||||
{
|
||||
MirrorPair mp = {0};
|
||||
MirrorPair *res;
|
||||
|
||||
mp.from = code;
|
||||
res = (MirrorPair *) bsearch(&mp, mirror_pairs, BIDI_MIRROR_LEN,
|
||||
sizeof(MirrorPair), compare_mp);
|
||||
|
||||
if (res == NULL)
|
||||
return code;
|
||||
else
|
||||
return res->to;
|
||||
}
|
||||
|
||||
uint32_t ucdn_paired_bracket(uint32_t code)
|
||||
{
|
||||
BracketPair *res = search_bp(code);
|
||||
if (res == NULL)
|
||||
return code;
|
||||
else
|
||||
return res->to;
|
||||
}
|
||||
|
||||
int ucdn_paired_bracket_type(uint32_t code)
|
||||
{
|
||||
BracketPair *res = search_bp(code);
|
||||
if (res == NULL)
|
||||
return UCDN_BIDI_PAIRED_BRACKET_TYPE_NONE;
|
||||
else
|
||||
return res->type;
|
||||
}
|
||||
|
||||
int ucdn_decompose(uint32_t code, uint32_t *a, uint32_t *b)
|
||||
{
|
||||
const unsigned short *rec;
|
||||
int len;
|
||||
|
||||
if (hangul_pair_decompose(code, a, b))
|
||||
return 1;
|
||||
|
||||
rec = get_decomp_record(code);
|
||||
len = rec[0] >> 8;
|
||||
|
||||
if ((rec[0] & 0xff) != 0 || len == 0)
|
||||
return 0;
|
||||
|
||||
rec++;
|
||||
*a = decode_utf16(&rec);
|
||||
if (len > 1)
|
||||
*b = decode_utf16(&rec);
|
||||
else
|
||||
*b = 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int ucdn_compose(uint32_t *code, uint32_t a, uint32_t b)
|
||||
{
|
||||
int l, r, index, indexi, offset;
|
||||
|
||||
if (hangul_pair_compose(code, a, b))
|
||||
return 1;
|
||||
|
||||
l = get_comp_index(a, nfc_first, sizeof(nfc_first) / sizeof(Reindex));
|
||||
r = get_comp_index(b, nfc_last, sizeof(nfc_last) / sizeof(Reindex));
|
||||
|
||||
if (l < 0 || r < 0)
|
||||
return 0;
|
||||
|
||||
indexi = l * TOTAL_LAST + r;
|
||||
index = comp_index0[indexi >> (COMP_SHIFT1+COMP_SHIFT2)] << COMP_SHIFT1;
|
||||
offset = (indexi >> COMP_SHIFT2) & ((1<<COMP_SHIFT1) - 1);
|
||||
index = comp_index1[index + offset] << COMP_SHIFT2;
|
||||
offset = indexi & ((1<<COMP_SHIFT2) - 1);
|
||||
*code = comp_data[index + offset];
|
||||
|
||||
return *code != 0;
|
||||
}
|
||||
|
||||
int ucdn_compat_decompose(uint32_t code, uint32_t *decomposed)
|
||||
{
|
||||
int i, len;
|
||||
const unsigned short *rec = get_decomp_record(code);
|
||||
len = rec[0] >> 8;
|
||||
|
||||
if (len == 0)
|
||||
return 0;
|
||||
|
||||
rec++;
|
||||
for (i = 0; i < len; i++)
|
||||
decomposed[i] = decode_utf16(&rec);
|
||||
|
||||
return len;
|
||||
}
|
|
@ -1,472 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UCDN_H
|
||||
#define UCDN_H
|
||||
|
||||
|
||||
|
||||
#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
|
||||
# define HB_BEGIN_VISIBILITY _Pragma ("GCC visibility push(hidden)")
|
||||
# define HB_END_VISIBILITY _Pragma ("GCC visibility pop")
|
||||
#else
|
||||
# define HB_BEGIN_VISIBILITY
|
||||
# define HB_END_VISIBILITY
|
||||
#endif
|
||||
#ifdef __cplusplus
|
||||
# define HB_BEGIN_HEADER extern "C" { HB_BEGIN_VISIBILITY
|
||||
# define HB_END_HEADER HB_END_VISIBILITY }
|
||||
#else
|
||||
# define HB_BEGIN_HEADER HB_BEGIN_VISIBILITY
|
||||
# define HB_END_HEADER HB_END_VISIBILITY
|
||||
#endif
|
||||
|
||||
HB_BEGIN_HEADER
|
||||
|
||||
#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || \
|
||||
defined (_sgi) || defined (__sun) || defined (sun) || \
|
||||
defined (__digital__) || defined (__HP_cc)
|
||||
# include <inttypes.h>
|
||||
#elif defined (_AIX)
|
||||
# include <sys/inttypes.h>
|
||||
#elif defined (_MSC_VER) && _MSC_VER < 1600
|
||||
/* VS 2010 (_MSC_VER 1600) has stdint.h */
|
||||
typedef __int8 int8_t;
|
||||
typedef unsigned __int8 uint8_t;
|
||||
typedef __int16 int16_t;
|
||||
typedef unsigned __int16 uint16_t;
|
||||
typedef __int32 int32_t;
|
||||
typedef unsigned __int32 uint32_t;
|
||||
typedef __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
#else
|
||||
# include <stdint.h>
|
||||
#endif
|
||||
|
||||
|
||||
#define UCDN_EAST_ASIAN_F 0
|
||||
#define UCDN_EAST_ASIAN_H 1
|
||||
#define UCDN_EAST_ASIAN_W 2
|
||||
#define UCDN_EAST_ASIAN_NA 3
|
||||
#define UCDN_EAST_ASIAN_A 4
|
||||
#define UCDN_EAST_ASIAN_N 5
|
||||
|
||||
#define UCDN_SCRIPT_COMMON 0
|
||||
#define UCDN_SCRIPT_LATIN 1
|
||||
#define UCDN_SCRIPT_GREEK 2
|
||||
#define UCDN_SCRIPT_CYRILLIC 3
|
||||
#define UCDN_SCRIPT_ARMENIAN 4
|
||||
#define UCDN_SCRIPT_HEBREW 5
|
||||
#define UCDN_SCRIPT_ARABIC 6
|
||||
#define UCDN_SCRIPT_SYRIAC 7
|
||||
#define UCDN_SCRIPT_THAANA 8
|
||||
#define UCDN_SCRIPT_DEVANAGARI 9
|
||||
#define UCDN_SCRIPT_BENGALI 10
|
||||
#define UCDN_SCRIPT_GURMUKHI 11
|
||||
#define UCDN_SCRIPT_GUJARATI 12
|
||||
#define UCDN_SCRIPT_ORIYA 13
|
||||
#define UCDN_SCRIPT_TAMIL 14
|
||||
#define UCDN_SCRIPT_TELUGU 15
|
||||
#define UCDN_SCRIPT_KANNADA 16
|
||||
#define UCDN_SCRIPT_MALAYALAM 17
|
||||
#define UCDN_SCRIPT_SINHALA 18
|
||||
#define UCDN_SCRIPT_THAI 19
|
||||
#define UCDN_SCRIPT_LAO 20
|
||||
#define UCDN_SCRIPT_TIBETAN 21
|
||||
#define UCDN_SCRIPT_MYANMAR 22
|
||||
#define UCDN_SCRIPT_GEORGIAN 23
|
||||
#define UCDN_SCRIPT_HANGUL 24
|
||||
#define UCDN_SCRIPT_ETHIOPIC 25
|
||||
#define UCDN_SCRIPT_CHEROKEE 26
|
||||
#define UCDN_SCRIPT_CANADIAN_ABORIGINAL 27
|
||||
#define UCDN_SCRIPT_OGHAM 28
|
||||
#define UCDN_SCRIPT_RUNIC 29
|
||||
#define UCDN_SCRIPT_KHMER 30
|
||||
#define UCDN_SCRIPT_MONGOLIAN 31
|
||||
#define UCDN_SCRIPT_HIRAGANA 32
|
||||
#define UCDN_SCRIPT_KATAKANA 33
|
||||
#define UCDN_SCRIPT_BOPOMOFO 34
|
||||
#define UCDN_SCRIPT_HAN 35
|
||||
#define UCDN_SCRIPT_YI 36
|
||||
#define UCDN_SCRIPT_OLD_ITALIC 37
|
||||
#define UCDN_SCRIPT_GOTHIC 38
|
||||
#define UCDN_SCRIPT_DESERET 39
|
||||
#define UCDN_SCRIPT_INHERITED 40
|
||||
#define UCDN_SCRIPT_TAGALOG 41
|
||||
#define UCDN_SCRIPT_HANUNOO 42
|
||||
#define UCDN_SCRIPT_BUHID 43
|
||||
#define UCDN_SCRIPT_TAGBANWA 44
|
||||
#define UCDN_SCRIPT_LIMBU 45
|
||||
#define UCDN_SCRIPT_TAI_LE 46
|
||||
#define UCDN_SCRIPT_LINEAR_B 47
|
||||
#define UCDN_SCRIPT_UGARITIC 48
|
||||
#define UCDN_SCRIPT_SHAVIAN 49
|
||||
#define UCDN_SCRIPT_OSMANYA 50
|
||||
#define UCDN_SCRIPT_CYPRIOT 51
|
||||
#define UCDN_SCRIPT_BRAILLE 52
|
||||
#define UCDN_SCRIPT_BUGINESE 53
|
||||
#define UCDN_SCRIPT_COPTIC 54
|
||||
#define UCDN_SCRIPT_NEW_TAI_LUE 55
|
||||
#define UCDN_SCRIPT_GLAGOLITIC 56
|
||||
#define UCDN_SCRIPT_TIFINAGH 57
|
||||
#define UCDN_SCRIPT_SYLOTI_NAGRI 58
|
||||
#define UCDN_SCRIPT_OLD_PERSIAN 59
|
||||
#define UCDN_SCRIPT_KHAROSHTHI 60
|
||||
#define UCDN_SCRIPT_BALINESE 61
|
||||
#define UCDN_SCRIPT_CUNEIFORM 62
|
||||
#define UCDN_SCRIPT_PHOENICIAN 63
|
||||
#define UCDN_SCRIPT_PHAGS_PA 64
|
||||
#define UCDN_SCRIPT_NKO 65
|
||||
#define UCDN_SCRIPT_SUNDANESE 66
|
||||
#define UCDN_SCRIPT_LEPCHA 67
|
||||
#define UCDN_SCRIPT_OL_CHIKI 68
|
||||
#define UCDN_SCRIPT_VAI 69
|
||||
#define UCDN_SCRIPT_SAURASHTRA 70
|
||||
#define UCDN_SCRIPT_KAYAH_LI 71
|
||||
#define UCDN_SCRIPT_REJANG 72
|
||||
#define UCDN_SCRIPT_LYCIAN 73
|
||||
#define UCDN_SCRIPT_CARIAN 74
|
||||
#define UCDN_SCRIPT_LYDIAN 75
|
||||
#define UCDN_SCRIPT_CHAM 76
|
||||
#define UCDN_SCRIPT_TAI_THAM 77
|
||||
#define UCDN_SCRIPT_TAI_VIET 78
|
||||
#define UCDN_SCRIPT_AVESTAN 79
|
||||
#define UCDN_SCRIPT_EGYPTIAN_HIEROGLYPHS 80
|
||||
#define UCDN_SCRIPT_SAMARITAN 81
|
||||
#define UCDN_SCRIPT_LISU 82
|
||||
#define UCDN_SCRIPT_BAMUM 83
|
||||
#define UCDN_SCRIPT_JAVANESE 84
|
||||
#define UCDN_SCRIPT_MEETEI_MAYEK 85
|
||||
#define UCDN_SCRIPT_IMPERIAL_ARAMAIC 86
|
||||
#define UCDN_SCRIPT_OLD_SOUTH_ARABIAN 87
|
||||
#define UCDN_SCRIPT_INSCRIPTIONAL_PARTHIAN 88
|
||||
#define UCDN_SCRIPT_INSCRIPTIONAL_PAHLAVI 89
|
||||
#define UCDN_SCRIPT_OLD_TURKIC 90
|
||||
#define UCDN_SCRIPT_KAITHI 91
|
||||
#define UCDN_SCRIPT_BATAK 92
|
||||
#define UCDN_SCRIPT_BRAHMI 93
|
||||
#define UCDN_SCRIPT_MANDAIC 94
|
||||
#define UCDN_SCRIPT_CHAKMA 95
|
||||
#define UCDN_SCRIPT_MEROITIC_CURSIVE 96
|
||||
#define UCDN_SCRIPT_MEROITIC_HIEROGLYPHS 97
|
||||
#define UCDN_SCRIPT_MIAO 98
|
||||
#define UCDN_SCRIPT_SHARADA 99
|
||||
#define UCDN_SCRIPT_SORA_SOMPENG 100
|
||||
#define UCDN_SCRIPT_TAKRI 101
|
||||
#define UCDN_SCRIPT_UNKNOWN 102
|
||||
#define UCDN_SCRIPT_BASSA_VAH 103
|
||||
#define UCDN_SCRIPT_CAUCASIAN_ALBANIAN 104
|
||||
#define UCDN_SCRIPT_DUPLOYAN 105
|
||||
#define UCDN_SCRIPT_ELBASAN 106
|
||||
#define UCDN_SCRIPT_GRANTHA 107
|
||||
#define UCDN_SCRIPT_KHOJKI 108
|
||||
#define UCDN_SCRIPT_KHUDAWADI 109
|
||||
#define UCDN_SCRIPT_LINEAR_A 110
|
||||
#define UCDN_SCRIPT_MAHAJANI 111
|
||||
#define UCDN_SCRIPT_MANICHAEAN 112
|
||||
#define UCDN_SCRIPT_MENDE_KIKAKUI 113
|
||||
#define UCDN_SCRIPT_MODI 114
|
||||
#define UCDN_SCRIPT_MRO 115
|
||||
#define UCDN_SCRIPT_NABATAEAN 116
|
||||
#define UCDN_SCRIPT_OLD_NORTH_ARABIAN 117
|
||||
#define UCDN_SCRIPT_OLD_PERMIC 118
|
||||
#define UCDN_SCRIPT_PAHAWH_HMONG 119
|
||||
#define UCDN_SCRIPT_PALMYRENE 120
|
||||
#define UCDN_SCRIPT_PAU_CIN_HAU 121
|
||||
#define UCDN_SCRIPT_PSALTER_PAHLAVI 122
|
||||
#define UCDN_SCRIPT_SIDDHAM 123
|
||||
#define UCDN_SCRIPT_TIRHUTA 124
|
||||
#define UCDN_SCRIPT_WARANG_CITI 125
|
||||
#define UCDN_SCRIPT_AHOM 126
|
||||
#define UCDN_SCRIPT_ANATOLIAN_HIEROGLYPHS 127
|
||||
#define UCDN_SCRIPT_HATRAN 128
|
||||
#define UCDN_SCRIPT_MULTANI 129
|
||||
#define UCDN_SCRIPT_OLD_HUNGARIAN 130
|
||||
#define UCDN_SCRIPT_SIGNWRITING 131
|
||||
#define UCDN_SCRIPT_ADLAM 132
|
||||
#define UCDN_SCRIPT_BHAIKSUKI 133
|
||||
#define UCDN_SCRIPT_MARCHEN 134
|
||||
#define UCDN_SCRIPT_NEWA 135
|
||||
#define UCDN_SCRIPT_OSAGE 136
|
||||
#define UCDN_SCRIPT_TANGUT 137
|
||||
#define UCDN_SCRIPT_MASARAM_GONDI 138
|
||||
#define UCDN_SCRIPT_NUSHU 139
|
||||
#define UCDN_SCRIPT_SOYOMBO 140
|
||||
#define UCDN_SCRIPT_ZANABAZAR_SQUARE 141
|
||||
#define UCDN_SCRIPT_DOGRA 142
|
||||
#define UCDN_SCRIPT_GUNJALA_GONDI 143
|
||||
#define UCDN_SCRIPT_HANIFI_ROHINGYA 144
|
||||
#define UCDN_SCRIPT_MAKASAR 145
|
||||
#define UCDN_SCRIPT_MEDEFAIDRIN 146
|
||||
#define UCDN_SCRIPT_OLD_SOGDIAN 147
|
||||
#define UCDN_SCRIPT_SOGDIAN 148
|
||||
#define UCDN_SCRIPT_ELYMAIC 149
|
||||
#define UCDN_SCRIPT_NANDINAGARI 150
|
||||
#define UCDN_SCRIPT_NYIAKENG_PUACHUE_HMONG 151
|
||||
#define UCDN_SCRIPT_WANCHO 152
|
||||
|
||||
#define UCDN_LINEBREAK_CLASS_OP 0
|
||||
#define UCDN_LINEBREAK_CLASS_CL 1
|
||||
#define UCDN_LINEBREAK_CLASS_CP 2
|
||||
#define UCDN_LINEBREAK_CLASS_QU 3
|
||||
#define UCDN_LINEBREAK_CLASS_GL 4
|
||||
#define UCDN_LINEBREAK_CLASS_NS 5
|
||||
#define UCDN_LINEBREAK_CLASS_EX 6
|
||||
#define UCDN_LINEBREAK_CLASS_SY 7
|
||||
#define UCDN_LINEBREAK_CLASS_IS 8
|
||||
#define UCDN_LINEBREAK_CLASS_PR 9
|
||||
#define UCDN_LINEBREAK_CLASS_PO 10
|
||||
#define UCDN_LINEBREAK_CLASS_NU 11
|
||||
#define UCDN_LINEBREAK_CLASS_AL 12
|
||||
#define UCDN_LINEBREAK_CLASS_HL 13
|
||||
#define UCDN_LINEBREAK_CLASS_ID 14
|
||||
#define UCDN_LINEBREAK_CLASS_IN 15
|
||||
#define UCDN_LINEBREAK_CLASS_HY 16
|
||||
#define UCDN_LINEBREAK_CLASS_BA 17
|
||||
#define UCDN_LINEBREAK_CLASS_BB 18
|
||||
#define UCDN_LINEBREAK_CLASS_B2 19
|
||||
#define UCDN_LINEBREAK_CLASS_ZW 20
|
||||
#define UCDN_LINEBREAK_CLASS_CM 21
|
||||
#define UCDN_LINEBREAK_CLASS_WJ 22
|
||||
#define UCDN_LINEBREAK_CLASS_H2 23
|
||||
#define UCDN_LINEBREAK_CLASS_H3 24
|
||||
#define UCDN_LINEBREAK_CLASS_JL 25
|
||||
#define UCDN_LINEBREAK_CLASS_JV 26
|
||||
#define UCDN_LINEBREAK_CLASS_JT 27
|
||||
#define UCDN_LINEBREAK_CLASS_RI 28
|
||||
#define UCDN_LINEBREAK_CLASS_AI 29
|
||||
#define UCDN_LINEBREAK_CLASS_BK 30
|
||||
#define UCDN_LINEBREAK_CLASS_CB 31
|
||||
#define UCDN_LINEBREAK_CLASS_CJ 32
|
||||
#define UCDN_LINEBREAK_CLASS_CR 33
|
||||
#define UCDN_LINEBREAK_CLASS_LF 34
|
||||
#define UCDN_LINEBREAK_CLASS_NL 35
|
||||
#define UCDN_LINEBREAK_CLASS_SA 36
|
||||
#define UCDN_LINEBREAK_CLASS_SG 37
|
||||
#define UCDN_LINEBREAK_CLASS_SP 38
|
||||
#define UCDN_LINEBREAK_CLASS_XX 39
|
||||
#define UCDN_LINEBREAK_CLASS_ZWJ 40
|
||||
#define UCDN_LINEBREAK_CLASS_EB 41
|
||||
#define UCDN_LINEBREAK_CLASS_EM 42
|
||||
|
||||
#define UCDN_GENERAL_CATEGORY_CC 0
|
||||
#define UCDN_GENERAL_CATEGORY_CF 1
|
||||
#define UCDN_GENERAL_CATEGORY_CN 2
|
||||
#define UCDN_GENERAL_CATEGORY_CO 3
|
||||
#define UCDN_GENERAL_CATEGORY_CS 4
|
||||
#define UCDN_GENERAL_CATEGORY_LL 5
|
||||
#define UCDN_GENERAL_CATEGORY_LM 6
|
||||
#define UCDN_GENERAL_CATEGORY_LO 7
|
||||
#define UCDN_GENERAL_CATEGORY_LT 8
|
||||
#define UCDN_GENERAL_CATEGORY_LU 9
|
||||
#define UCDN_GENERAL_CATEGORY_MC 10
|
||||
#define UCDN_GENERAL_CATEGORY_ME 11
|
||||
#define UCDN_GENERAL_CATEGORY_MN 12
|
||||
#define UCDN_GENERAL_CATEGORY_ND 13
|
||||
#define UCDN_GENERAL_CATEGORY_NL 14
|
||||
#define UCDN_GENERAL_CATEGORY_NO 15
|
||||
#define UCDN_GENERAL_CATEGORY_PC 16
|
||||
#define UCDN_GENERAL_CATEGORY_PD 17
|
||||
#define UCDN_GENERAL_CATEGORY_PE 18
|
||||
#define UCDN_GENERAL_CATEGORY_PF 19
|
||||
#define UCDN_GENERAL_CATEGORY_PI 20
|
||||
#define UCDN_GENERAL_CATEGORY_PO 21
|
||||
#define UCDN_GENERAL_CATEGORY_PS 22
|
||||
#define UCDN_GENERAL_CATEGORY_SC 23
|
||||
#define UCDN_GENERAL_CATEGORY_SK 24
|
||||
#define UCDN_GENERAL_CATEGORY_SM 25
|
||||
#define UCDN_GENERAL_CATEGORY_SO 26
|
||||
#define UCDN_GENERAL_CATEGORY_ZL 27
|
||||
#define UCDN_GENERAL_CATEGORY_ZP 28
|
||||
#define UCDN_GENERAL_CATEGORY_ZS 29
|
||||
|
||||
#define UCDN_BIDI_CLASS_L 0
|
||||
#define UCDN_BIDI_CLASS_LRE 1
|
||||
#define UCDN_BIDI_CLASS_LRO 2
|
||||
#define UCDN_BIDI_CLASS_R 3
|
||||
#define UCDN_BIDI_CLASS_AL 4
|
||||
#define UCDN_BIDI_CLASS_RLE 5
|
||||
#define UCDN_BIDI_CLASS_RLO 6
|
||||
#define UCDN_BIDI_CLASS_PDF 7
|
||||
#define UCDN_BIDI_CLASS_EN 8
|
||||
#define UCDN_BIDI_CLASS_ES 9
|
||||
#define UCDN_BIDI_CLASS_ET 10
|
||||
#define UCDN_BIDI_CLASS_AN 11
|
||||
#define UCDN_BIDI_CLASS_CS 12
|
||||
#define UCDN_BIDI_CLASS_NSM 13
|
||||
#define UCDN_BIDI_CLASS_BN 14
|
||||
#define UCDN_BIDI_CLASS_B 15
|
||||
#define UCDN_BIDI_CLASS_S 16
|
||||
#define UCDN_BIDI_CLASS_WS 17
|
||||
#define UCDN_BIDI_CLASS_ON 18
|
||||
#define UCDN_BIDI_CLASS_LRI 19
|
||||
#define UCDN_BIDI_CLASS_RLI 20
|
||||
#define UCDN_BIDI_CLASS_FSI 21
|
||||
#define UCDN_BIDI_CLASS_PDI 22
|
||||
|
||||
#define UCDN_BIDI_PAIRED_BRACKET_TYPE_OPEN 0
|
||||
#define UCDN_BIDI_PAIRED_BRACKET_TYPE_CLOSE 1
|
||||
#define UCDN_BIDI_PAIRED_BRACKET_TYPE_NONE 2
|
||||
|
||||
/**
|
||||
* Return version of the Unicode database.
|
||||
*
|
||||
* @return Unicode database version
|
||||
*/
|
||||
const char *ucdn_get_unicode_version(void);
|
||||
|
||||
/**
|
||||
* Get combining class of a codepoint.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @return combining class value, as defined in UAX#44
|
||||
*/
|
||||
int ucdn_get_combining_class(uint32_t code);
|
||||
|
||||
/**
|
||||
* Get east-asian width of a codepoint.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @return value according to UCDN_EAST_ASIAN_* and as defined in UAX#11.
|
||||
*/
|
||||
int ucdn_get_east_asian_width(uint32_t code);
|
||||
|
||||
/**
|
||||
* Get general category of a codepoint.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @return value according to UCDN_GENERAL_CATEGORY_* and as defined in
|
||||
* UAX#44.
|
||||
*/
|
||||
int ucdn_get_general_category(uint32_t code);
|
||||
|
||||
/**
|
||||
* Get bidirectional class of a codepoint.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @return value according to UCDN_BIDI_CLASS_* and as defined in UAX#44.
|
||||
*/
|
||||
int ucdn_get_bidi_class(uint32_t code);
|
||||
|
||||
/**
|
||||
* Get script of a codepoint.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @return value according to UCDN_SCRIPT_* and as defined in UAX#24.
|
||||
*/
|
||||
int ucdn_get_script(uint32_t code);
|
||||
|
||||
/**
|
||||
* Get unresolved linebreak class of a codepoint. This does not take
|
||||
* rule LB1 of UAX#14 into account. See ucdn_get_resolved_linebreak_class()
|
||||
* for resolved linebreak classes.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @return value according to UCDN_LINEBREAK_* and as defined in UAX#14.
|
||||
*/
|
||||
int ucdn_get_linebreak_class(uint32_t code);
|
||||
|
||||
/**
|
||||
* Get resolved linebreak class of a codepoint. This resolves characters
|
||||
* in the AI, SG, XX, SA and CJ classes according to rule LB1 of UAX#14.
|
||||
* In addition the CB class is resolved as the equivalent B2 class and
|
||||
* the NL class is resolved as the equivalent BK class.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @return value according to UCDN_LINEBREAK_* and as defined in UAX#14.
|
||||
*/
|
||||
int ucdn_get_resolved_linebreak_class(uint32_t code);
|
||||
|
||||
/**
|
||||
* Check if codepoint can be mirrored.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @return 1 if mirrored character exists, otherwise 0
|
||||
*/
|
||||
int ucdn_get_mirrored(uint32_t code);
|
||||
|
||||
/**
|
||||
* Mirror a codepoint.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @return mirrored codepoint or the original codepoint if no
|
||||
* mirrored character exists
|
||||
*/
|
||||
uint32_t ucdn_mirror(uint32_t code);
|
||||
|
||||
/**
|
||||
* Get paired bracket for a codepoint.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @return paired bracket codepoint or the original codepoint if no
|
||||
* paired bracket character exists
|
||||
*/
|
||||
uint32_t ucdn_paired_bracket(uint32_t code);
|
||||
|
||||
/**
|
||||
* Get paired bracket type for a codepoint.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @return value according to UCDN_BIDI_PAIRED_BRACKET_TYPE_* and as defined
|
||||
* in UAX#9.
|
||||
*
|
||||
*/
|
||||
int ucdn_paired_bracket_type(uint32_t code);
|
||||
|
||||
/**
|
||||
* Pairwise canonical decomposition of a codepoint. This includes
|
||||
* Hangul Jamo decomposition (see chapter 3.12 of the Unicode core
|
||||
* specification).
|
||||
*
|
||||
* Hangul is decomposed into L and V jamos for LV forms, and an
|
||||
* LV precomposed syllable and a T jamo for LVT forms.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @param a filled with first codepoint of decomposition
|
||||
* @param b filled with second codepoint of decomposition, or 0
|
||||
* @return success
|
||||
*/
|
||||
int ucdn_decompose(uint32_t code, uint32_t *a, uint32_t *b);
|
||||
|
||||
/**
|
||||
* Compatibility decomposition of a codepoint.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @param decomposed filled with decomposition, must be able to hold 18
|
||||
* characters
|
||||
* @return length of decomposition or 0 in case none exists
|
||||
*/
|
||||
int ucdn_compat_decompose(uint32_t code, uint32_t *decomposed);
|
||||
|
||||
/**
|
||||
* Pairwise canonical composition of two codepoints. This includes
|
||||
* Hangul Jamo composition (see chapter 3.12 of the Unicode core
|
||||
* specification).
|
||||
*
|
||||
* Hangul composition expects either L and V jamos, or an LV
|
||||
* precomposed syllable and a T jamo. This is exactly the inverse
|
||||
* of pairwise Hangul decomposition.
|
||||
*
|
||||
* @param code filled with composition
|
||||
* @param a first codepoint
|
||||
* @param b second codepoint
|
||||
* @return success
|
||||
*/
|
||||
int ucdn_compose(uint32_t *code, uint32_t a, uint32_t b);
|
||||
|
||||
HB_END_HEADER
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -127,15 +127,15 @@ hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED
|
|||
#endif
|
||||
|
||||
|
||||
extern "C" hb_unicode_funcs_t *hb_ucd_get_unicode_funcs ();
|
||||
extern "C" hb_unicode_funcs_t *hb_glib_get_unicode_funcs ();
|
||||
extern "C" hb_unicode_funcs_t *hb_icu_get_unicode_funcs ();
|
||||
extern "C" hb_unicode_funcs_t *hb_ucdn_get_unicode_funcs ();
|
||||
|
||||
hb_unicode_funcs_t *
|
||||
hb_unicode_funcs_get_default ()
|
||||
{
|
||||
#if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_UCDN)
|
||||
return hb_ucdn_get_unicode_funcs ();
|
||||
#if !defined(HB_NO_UNICODE_FUNCS) && !defined(HB_NO_UCD)
|
||||
return hb_ucd_get_unicode_funcs ();
|
||||
#elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB)
|
||||
return hb_glib_get_unicode_funcs ();
|
||||
#elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN)
|
||||
|
@ -148,7 +148,7 @@ hb_unicode_funcs_get_default ()
|
|||
|
||||
#if !defined(HB_NO_UNICODE_FUNCS) && defined(HB_UNICODE_FUNCS_NIL)
|
||||
#error "Could not find any Unicode functions implementation, you have to provide your own"
|
||||
#error "Consider building hb-ucdn.c. If you absolutely want to build without any, check the code."
|
||||
#error "Consider building hb-ucd.cc. If you absolutely want to build without any, check the code."
|
||||
#endif
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue