Import UCDN into source tree

https://github.com/grigorig/ucdn
This commit is contained in:
Behdad Esfahbod 2012-10-02 16:03:18 -04:00
parent 0e292eb2a2
commit 3f33f0d1f2
9 changed files with 5498 additions and 1 deletions

View File

@ -125,6 +125,14 @@ AM_CONDITIONAL(HAVE_HB_OLD, $have_hb_old)
dnl =========================================================================== dnl ===========================================================================
have_ucdn=true
if $have_ucdn; then
AC_DEFINE(HAVE_UCDN, 1, [Have UCDN Unicode functions])
fi
AM_CONDITIONAL(HAVE_UCDN, $have_ucdn)
dnl ===========================================================================
PKG_CHECK_MODULES(GLIB, glib-2.0 >= 2.16, have_glib=true, have_glib=false) PKG_CHECK_MODULES(GLIB, glib-2.0 >= 2.16, have_glib=true, have_glib=false)
if $have_glib; then if $have_glib; then
AC_DEFINE(HAVE_GLIB, 1, [Have glib2 library]) AC_DEFINE(HAVE_GLIB, 1, [Have glib2 library])
@ -245,8 +253,9 @@ Makefile
harfbuzz.pc harfbuzz.pc
src/Makefile src/Makefile
src/hb-version.h src/hb-version.h
src/hb-old/Makefile
src/hb-icu-le/Makefile src/hb-icu-le/Makefile
src/hb-old/Makefile
src/hb-ucdn/Makefile
util/Makefile util/Makefile
test/Makefile test/Makefile
test/api/Makefile test/api/Makefile

View File

@ -178,6 +178,13 @@ HBSOURCES += hb-icu-le.cc
endif endif
DIST_SUBDIRS += hb-icu-le DIST_SUBDIRS += hb-icu-le
if HAVE_UCDN
SUBDIRS += hb-ucdn
HBCFLAGS += -I$(srcdir)/hb-ucdn
HBLIBS += hb-ucdn/libhb-ucdn.la
HBSOURCES += hb-ucdn.cc
endif
DIST_SUBDIRS += hb-ucdn
# Put the library together # Put the library together

207
src/hb-ucdn.cc Normal file
View File

@ -0,0 +1,207 @@
/*
* Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "hb-private.hh"
#include "hb-unicode-private.hh"
HB_BEGIN_DECLS
#include "ucdn.h"
HB_END_DECLS
static const hb_script_t ucdn_script_translate[] =
{
HB_SCRIPT_COMMON,
HB_SCRIPT_LATIN,
HB_SCRIPT_GREEK,
HB_SCRIPT_CYRILLIC,
HB_SCRIPT_ARMENIAN,
HB_SCRIPT_HEBREW,
HB_SCRIPT_ARABIC,
HB_SCRIPT_SYRIAC,
HB_SCRIPT_THAANA,
HB_SCRIPT_DEVANAGARI,
HB_SCRIPT_BENGALI,
HB_SCRIPT_GURMUKHI,
HB_SCRIPT_GUJARATI,
HB_SCRIPT_ORIYA,
HB_SCRIPT_TAMIL,
HB_SCRIPT_TELUGU,
HB_SCRIPT_KANNADA,
HB_SCRIPT_MALAYALAM,
HB_SCRIPT_SINHALA,
HB_SCRIPT_THAI,
HB_SCRIPT_LAO,
HB_SCRIPT_TIBETAN,
HB_SCRIPT_MYANMAR,
HB_SCRIPT_GEORGIAN,
HB_SCRIPT_HANGUL,
HB_SCRIPT_ETHIOPIC,
HB_SCRIPT_CHEROKEE,
HB_SCRIPT_CANADIAN_ABORIGINAL,
HB_SCRIPT_OGHAM,
HB_SCRIPT_RUNIC,
HB_SCRIPT_KHMER,
HB_SCRIPT_MONGOLIAN,
HB_SCRIPT_HIRAGANA,
HB_SCRIPT_KATAKANA,
HB_SCRIPT_BOPOMOFO,
HB_SCRIPT_HAN,
HB_SCRIPT_YI,
HB_SCRIPT_OLD_ITALIC,
HB_SCRIPT_GOTHIC,
HB_SCRIPT_DESERET,
HB_SCRIPT_INHERITED,
HB_SCRIPT_TAGALOG,
HB_SCRIPT_HANUNOO,
HB_SCRIPT_BUHID,
HB_SCRIPT_TAGBANWA,
HB_SCRIPT_LIMBU,
HB_SCRIPT_TAI_LE,
HB_SCRIPT_LINEAR_B,
HB_SCRIPT_UGARITIC,
HB_SCRIPT_SHAVIAN,
HB_SCRIPT_OSMANYA,
HB_SCRIPT_CYPRIOT,
HB_SCRIPT_BRAILLE,
HB_SCRIPT_BUGINESE,
HB_SCRIPT_COPTIC,
HB_SCRIPT_NEW_TAI_LUE,
HB_SCRIPT_GLAGOLITIC,
HB_SCRIPT_TIFINAGH,
HB_SCRIPT_SYLOTI_NAGRI,
HB_SCRIPT_OLD_PERSIAN,
HB_SCRIPT_KHAROSHTHI,
HB_SCRIPT_BALINESE,
HB_SCRIPT_CUNEIFORM,
HB_SCRIPT_PHOENICIAN,
HB_SCRIPT_PHAGS_PA,
HB_SCRIPT_NKO,
HB_SCRIPT_SUNDANESE,
HB_SCRIPT_LEPCHA,
HB_SCRIPT_OL_CHIKI,
HB_SCRIPT_VAI,
HB_SCRIPT_SAURASHTRA,
HB_SCRIPT_KAYAH_LI,
HB_SCRIPT_REJANG,
HB_SCRIPT_LYCIAN,
HB_SCRIPT_CARIAN,
HB_SCRIPT_LYDIAN,
HB_SCRIPT_CHAM,
HB_SCRIPT_TAI_THAM,
HB_SCRIPT_TAI_VIET,
HB_SCRIPT_AVESTAN,
HB_SCRIPT_EGYPTIAN_HIEROGLYPHS,
HB_SCRIPT_SAMARITAN,
HB_SCRIPT_LISU,
HB_SCRIPT_BAMUM,
HB_SCRIPT_JAVANESE,
HB_SCRIPT_MEETEI_MAYEK,
HB_SCRIPT_IMPERIAL_ARAMAIC,
HB_SCRIPT_OLD_SOUTH_ARABIAN,
HB_SCRIPT_INSCRIPTIONAL_PARTHIAN,
HB_SCRIPT_INSCRIPTIONAL_PAHLAVI,
HB_SCRIPT_OLD_TURKIC,
HB_SCRIPT_KAITHI,
HB_SCRIPT_BATAK,
HB_SCRIPT_BRAHMI,
HB_SCRIPT_MANDAIC,
HB_SCRIPT_CHAKMA,
HB_SCRIPT_MEROITIC_CURSIVE,
HB_SCRIPT_MEROITIC_HIEROGLYPHS,
HB_SCRIPT_MIAO,
HB_SCRIPT_SHARADA,
HB_SCRIPT_SORA_SOMPENG,
HB_SCRIPT_TAKRI,
HB_SCRIPT_UNKNOWN,
};
static hb_unicode_combining_class_t
hb_ucdn_combining_class(hb_unicode_funcs_t *ufuncs, hb_codepoint_t unicode,
void *user_data)
{
return (hb_unicode_combining_class_t) ucdn_get_combining_class(unicode);
}
static unsigned int
hb_ucdn_eastasian_width(hb_unicode_funcs_t *ufuncs, hb_codepoint_t unicode,
void *user_data)
{
int w = ucdn_get_east_asian_width(unicode);
return (w == UCDN_EAST_ASIAN_F || w == UCDN_EAST_ASIAN_W) ? 2 : 1;
}
static hb_unicode_general_category_t
hb_ucdn_general_category(hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode, void *user_data)
{
return (hb_unicode_general_category_t)ucdn_get_general_category(unicode);
}
static hb_codepoint_t
hb_ucdn_mirroring(hb_unicode_funcs_t *ufuncs, hb_codepoint_t unicode,
void *user_data)
{
return ucdn_mirror(unicode);
}
static hb_script_t
hb_ucdn_script(hb_unicode_funcs_t *ufuncs, hb_codepoint_t unicode,
void *user_data)
{
return ucdn_script_translate[ucdn_get_script(unicode)];
}
static hb_bool_t
hb_ucdn_compose(hb_unicode_funcs_t *ufuncs, hb_codepoint_t a,
hb_codepoint_t b, hb_codepoint_t *ab, void *user_data)
{
return ucdn_compose(ab, a, b);
}
static hb_bool_t
hb_ucdn_decompose(hb_unicode_funcs_t *ufuncs, hb_codepoint_t ab,
hb_codepoint_t *a, hb_codepoint_t *b, void *user_data)
{
return ucdn_decompose(ab, a, b);
}
static unsigned int
hb_ucdn_decompose_compatibility(hb_unicode_funcs_t *ufuncs, hb_codepoint_t u,
hb_codepoint_t *decomposed, void *user_data)
{
return ucdn_compat_decompose(u, decomposed);
}
extern "C" HB_INTERNAL
hb_unicode_funcs_t *
hb_ucdn_get_unicode_funcs (void)
{
static const hb_unicode_funcs_t _hb_ucdn_unicode_funcs = {
HB_OBJECT_HEADER_STATIC,
NULL, /* parent */
true, /* immutable */
{
#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_ucdn_##name,
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
#undef HB_UNICODE_FUNC_IMPLEMENT
}
};
return const_cast<hb_unicode_funcs_t *> (&_hb_ucdn_unicode_funcs);
}

18
src/hb-ucdn/Makefile.am Normal file
View File

@ -0,0 +1,18 @@
## Process this file with automake to produce Makefile.in
noinst_LTLIBRARIES = libhb-ucdn.la
libhb_ucdn_la_SOURCES = \
ucdn.h \
ucdn.c \
unicodedata_db.h
libhb_ucdn_CPPFLAGS = \
-I$(top_srcdir) \
-I$(top_srcdir)/src \
-I$(top_builddir)/src
libhb_ucdn_la_LIBADD =
EXTRA_DIST = README
-include $(top_srcdir)/git.mk

33
src/hb-ucdn/README Normal file
View File

@ -0,0 +1,33 @@
UCDN - Unicode Database and Normalization
UCDN is a Unicode support library. Currently, it provides access
to basic character properties contained in the Unicode Character
Database and low-level normalization functions (pairwise canonical
composition/decomposition and compatibility decomposition). More
functionality might be provided in the future, such as additional
properties, string normalization and encoding conversion.
UCDN uses standard C89 with no particular dependencies or requirements
except for stdint.h, and can be easily integrated into existing
projects. However, it can also be used as a standalone library,
and a CMake build script is provided for this. The first motivation
behind UCDN development was to provide a standalone set of Unicode
functions for the HarfBuzz OpenType shaping library. For this purpose,
a HarfBuzz-specific wrapper is shipped along with it (hb-ucdn.h).
UCDN is published under the ISC license, please see the license header
in the C source code for more information. The makeunicodata.py script
required for parsing Unicode database files is licensed under the
PSF license, please see PYTHON-LICENSE for more information.
UCDN was written by Grigori Goronzy <greg@kinoho.net>.
How to Use
Include ucdn.c, ucdn.h and unicodedata_db.h in your project. Now,
just use the functions as documented in ucdn.h.
In some cases, it might be necessary to regenerate the Unicode
database file. The script makeunicodedata.py (Python 3.x required)
fetches the appropriate files and dumps the compressed database into
unicodedata_db.h.

282
src/hb-ucdn/ucdn.c Normal file
View File

@ -0,0 +1,282 @@
/*
* Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include "ucdn.h"
typedef struct {
const unsigned char category;
const unsigned char combining;
const unsigned char bidi_class;
const unsigned char mirrored;
const unsigned char east_asian_width;
const unsigned char normalization_check;
const unsigned char script;
} UCDRecord;
typedef struct {
unsigned short from, to;
} MirrorPair;
typedef struct {
int start;
short count, index;
} Reindex;
#include "unicodedata_db.h"
/* constants required for Hangul (de)composition */
#define SBASE 0xAC00
#define LBASE 0x1100
#define VBASE 0x1161
#define TBASE 0x11A7
#define SCOUNT 11172
#define LCOUNT 19
#define VCOUNT 21
#define TCOUNT 28
#define NCOUNT (VCOUNT * TCOUNT)
static UCDRecord *get_ucd_record(uint32_t code)
{
int index, offset;
if (code >= 0x110000)
index = 0;
else {
index = index0[code >> (SHIFT1+SHIFT2)] << SHIFT1;
offset = (code >> SHIFT2) & ((1<<SHIFT1) - 1);
index = index1[index + offset] << SHIFT2;
offset = code & ((1<<SHIFT2) - 1);
index = index2[index + offset];
}
return &ucd_records[index];
}
static unsigned short *get_decomp_record(uint32_t code)
{
int index, offset;
if (code >= 0x110000)
index = 0;
else {
index = decomp_index0[code >> (DECOMP_SHIFT1+DECOMP_SHIFT2)]
<< DECOMP_SHIFT1;
offset = (code >> DECOMP_SHIFT2) & ((1<<DECOMP_SHIFT1) - 1);
index = decomp_index1[index + offset] << DECOMP_SHIFT2;
offset = code & ((1<<DECOMP_SHIFT2) - 1);
index = decomp_index2[index + offset];
}
return &decomp_data[index];
}
static int get_comp_index(uint32_t code, Reindex *idx)
{
int i;
for (i = 0; idx[i].start; i++) {
Reindex *cur = &idx[i];
if (code < cur->start)
return -1;
if (code <= cur->start + cur->count) {
return cur->index + (code - cur->start);
}
}
return -1;
}
static int compare_mp(const void *a, const void *b)
{
MirrorPair *mpa = (MirrorPair *)a;
MirrorPair *mpb = (MirrorPair *)b;
return mpa->from - mpb->from;
}
static int hangul_pair_decompose(uint32_t code, uint32_t *a, uint32_t *b)
{
int si = code - SBASE;
if (si < 0 || si >= SCOUNT)
return 0;
if (si % TCOUNT) {
/* LV,T */
*a = SBASE + (si / TCOUNT) * TCOUNT;
*b = TBASE + (si % TCOUNT);
return 3;
} else {
/* L,V */
*a = LBASE + (si / NCOUNT);
*b = VBASE + (si % NCOUNT) / TCOUNT;
return 2;
}
}
static int hangul_pair_compose(uint32_t *code, uint32_t a, uint32_t b)
{
if (b < VBASE || b >= (TBASE + TCOUNT))
return 0;
if ((a < LBASE || a >= (LBASE + LCOUNT))
&& (a < SBASE || a >= (SBASE + SCOUNT)))
return 0;
if (a >= SBASE) {
/* LV,T */
*code = a + (b - TBASE);
return 3;
} else {
/* L,V */
int li = a - LBASE;
int vi = b - VBASE;
*code = SBASE + li * NCOUNT + vi * TCOUNT;
return 2;
}
}
static uint32_t decode_utf16(unsigned short **code_ptr)
{
unsigned short *code = *code_ptr;
if ((code[0] & 0xd800) != 0xd800) {
*code_ptr += 1;
return (uint32_t)code[0];
} else {
*code_ptr += 2;
return 0x10000 + ((uint32_t)code[1] - 0xdc00) +
(((uint32_t)code[0] - 0xd800) << 10);
}
}
const char *ucdn_get_unicode_version(void)
{
return UNIDATA_VERSION;
}
int ucdn_get_combining_class(uint32_t code)
{
return get_ucd_record(code)->combining;
}
int ucdn_get_east_asian_width(uint32_t code)
{
return get_ucd_record(code)->east_asian_width;
}
int ucdn_get_general_category(uint32_t code)
{
return get_ucd_record(code)->category;
}
int ucdn_get_bidi_class(uint32_t code)
{
return get_ucd_record(code)->bidi_class;
}
int ucdn_get_mirrored(uint32_t code)
{
return get_ucd_record(code)->mirrored;
}
int ucdn_get_script(uint32_t code)
{
return get_ucd_record(code)->script;
}
uint32_t ucdn_mirror(uint32_t code)
{
MirrorPair mp = {0};
MirrorPair *res;
if (get_ucd_record(code)->mirrored == 0)
return code;
mp.from = code;
res = bsearch(&mp, mirror_pairs, BIDI_MIRROR_LEN, sizeof(MirrorPair),
compare_mp);
if (res == NULL)
return code;
else
return res->to;
}
int ucdn_decompose(uint32_t code, uint32_t *a, uint32_t *b)
{
unsigned short *rec;
int len;
if (hangul_pair_decompose(code, a, b))
return 1;
rec = get_decomp_record(code);
len = rec[0] >> 8;
if ((rec[0] & 0xff) != 0 || len == 0)
return 0;
rec++;
*a = decode_utf16(&rec);
if (len > 1)
*b = decode_utf16(&rec);
else
*b = 0;
return 1;
}
int ucdn_compose(uint32_t *code, uint32_t a, uint32_t b)
{
int l, r, index, indexi, offset;
if (hangul_pair_compose(code, a, b))
return 1;
l = get_comp_index(a, nfc_first);
r = get_comp_index(b, nfc_last);
if (l < 0 || r < 0)
return 0;
indexi = l * TOTAL_LAST + r;
index = comp_index0[indexi >> (COMP_SHIFT1+COMP_SHIFT2)] << COMP_SHIFT1;
offset = (indexi >> COMP_SHIFT2) & ((1<<COMP_SHIFT1) - 1);
index = comp_index1[index + offset] << COMP_SHIFT2;
offset = indexi & ((1<<COMP_SHIFT2) - 1);
*code = comp_data[index + offset];
return *code != 0;
}
int ucdn_compat_decompose(uint32_t code, uint32_t *decomposed)
{
int i, len;
unsigned short *rec = get_decomp_record(code);
len = rec[0] >> 8;
if (len == 0)
return 0;
rec++;
for (i = 0; i < len; i++)
decomposed[i] = decode_utf16(&rec);
return len;
}

290
src/hb-ucdn/ucdn.h Normal file
View File

@ -0,0 +1,290 @@
/*
* Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef UCDN_H
#define UCDN_H
#include <stdint.h>
#define UCDN_EAST_ASIAN_F 0
#define UCDN_EAST_ASIAN_H 1
#define UCDN_EAST_ASIAN_W 2
#define UCDN_EAST_ASIAN_NA 3
#define UCDN_EAST_ASIAN_A 4
#define UCDN_EAST_ASIAN_N 5
#define UCDN_SCRIPT_COMMON 0
#define UCDN_SCRIPT_LATIN 1
#define UCDN_SCRIPT_GREEK 2
#define UCDN_SCRIPT_CYRILLIC 3
#define UCDN_SCRIPT_ARMENIAN 4
#define UCDN_SCRIPT_HEBREW 5
#define UCDN_SCRIPT_ARABIC 6
#define UCDN_SCRIPT_SYRIAC 7
#define UCDN_SCRIPT_THAANA 8
#define UCDN_SCRIPT_DEVANAGARI 9
#define UCDN_SCRIPT_BENGALI 10
#define UCDN_SCRIPT_GURMUKHI 11
#define UCDN_SCRIPT_GUJARATI 12
#define UCDN_SCRIPT_ORIYA 13
#define UCDN_SCRIPT_TAMIL 14
#define UCDN_SCRIPT_TELUGU 15
#define UCDN_SCRIPT_KANNADA 16
#define UCDN_SCRIPT_MALAYALAM 17
#define UCDN_SCRIPT_SINHALA 18
#define UCDN_SCRIPT_THAI 19
#define UCDN_SCRIPT_LAO 20
#define UCDN_SCRIPT_TIBETAN 21
#define UCDN_SCRIPT_MYANMAR 22
#define UCDN_SCRIPT_GEORGIAN 23
#define UCDN_SCRIPT_HANGUL 24
#define UCDN_SCRIPT_ETHIOPIC 25
#define UCDN_SCRIPT_CHEROKEE 26
#define UCDN_SCRIPT_CANADIAN_ABORIGINAL 27
#define UCDN_SCRIPT_OGHAM 28
#define UCDN_SCRIPT_RUNIC 29
#define UCDN_SCRIPT_KHMER 30
#define UCDN_SCRIPT_MONGOLIAN 31
#define UCDN_SCRIPT_HIRAGANA 32
#define UCDN_SCRIPT_KATAKANA 33
#define UCDN_SCRIPT_BOPOMOFO 34
#define UCDN_SCRIPT_HAN 35
#define UCDN_SCRIPT_YI 36
#define UCDN_SCRIPT_OLD_ITALIC 37
#define UCDN_SCRIPT_GOTHIC 38
#define UCDN_SCRIPT_DESERET 39
#define UCDN_SCRIPT_INHERITED 40
#define UCDN_SCRIPT_TAGALOG 41
#define UCDN_SCRIPT_HANUNOO 42
#define UCDN_SCRIPT_BUHID 43
#define UCDN_SCRIPT_TAGBANWA 44
#define UCDN_SCRIPT_LIMBU 45
#define UCDN_SCRIPT_TAI_LE 46
#define UCDN_SCRIPT_LINEAR_B 47
#define UCDN_SCRIPT_UGARITIC 48
#define UCDN_SCRIPT_SHAVIAN 49
#define UCDN_SCRIPT_OSMANYA 50
#define UCDN_SCRIPT_CYPRIOT 51
#define UCDN_SCRIPT_BRAILLE 52
#define UCDN_SCRIPT_BUGINESE 53
#define UCDN_SCRIPT_COPTIC 54
#define UCDN_SCRIPT_NEW_TAI_LUE 55
#define UCDN_SCRIPT_GLAGOLITIC 56
#define UCDN_SCRIPT_TIFINAGH 57
#define UCDN_SCRIPT_SYLOTI_NAGRI 58
#define UCDN_SCRIPT_OLD_PERSIAN 59
#define UCDN_SCRIPT_KHAROSHTHI 60
#define UCDN_SCRIPT_BALINESE 61
#define UCDN_SCRIPT_CUNEIFORM 62
#define UCDN_SCRIPT_PHOENICIAN 63
#define UCDN_SCRIPT_PHAGS_PA 64
#define UCDN_SCRIPT_NKO 65
#define UCDN_SCRIPT_SUNDANESE 66
#define UCDN_SCRIPT_LEPCHA 67
#define UCDN_SCRIPT_OL_CHIKI 68
#define UCDN_SCRIPT_VAI 69
#define UCDN_SCRIPT_SAURASHTRA 70
#define UCDN_SCRIPT_KAYAH_LI 71
#define UCDN_SCRIPT_REJANG 72
#define UCDN_SCRIPT_LYCIAN 73
#define UCDN_SCRIPT_CARIAN 74
#define UCDN_SCRIPT_LYDIAN 75
#define UCDN_SCRIPT_CHAM 76
#define UCDN_SCRIPT_TAI_THAM 77
#define UCDN_SCRIPT_TAI_VIET 78
#define UCDN_SCRIPT_AVESTAN 79
#define UCDN_SCRIPT_EGYPTIAN_HIEROGLYPHS 80
#define UCDN_SCRIPT_SAMARITAN 81
#define UCDN_SCRIPT_LISU 82
#define UCDN_SCRIPT_BAMUM 83
#define UCDN_SCRIPT_JAVANESE 84
#define UCDN_SCRIPT_MEETEI_MAYEK 85
#define UCDN_SCRIPT_IMPERIAL_ARAMAIC 86
#define UCDN_SCRIPT_OLD_SOUTH_ARABIAN 87
#define UCDN_SCRIPT_INSCRIPTIONAL_PARTHIAN 88
#define UCDN_SCRIPT_INSCRIPTIONAL_PAHLAVI 89
#define UCDN_SCRIPT_OLD_TURKIC 90
#define UCDN_SCRIPT_KAITHI 91
#define UCDN_SCRIPT_BATAK 92
#define UCDN_SCRIPT_BRAHMI 93
#define UCDN_SCRIPT_MANDAIC 94
#define UCDN_SCRIPT_CHAKMA 95
#define UCDN_SCRIPT_MEROITIC_CURSIVE 96
#define UCDN_SCRIPT_MEROITIC_HIEROGLYPHS 97
#define UCDN_SCRIPT_MIAO 98
#define UCDN_SCRIPT_SHARADA 99
#define UCDN_SCRIPT_SORA_SOMPENG 100
#define UCDN_SCRIPT_TAKRI 101
#define UCDN_SCRIPT_UNKNOWN 102
#define UCDN_GENERAL_CATEGORY_CC 0
#define UCDN_GENERAL_CATEGORY_CF 1
#define UCDN_GENERAL_CATEGORY_CN 2
#define UCDN_GENERAL_CATEGORY_CO 3
#define UCDN_GENERAL_CATEGORY_CS 4
#define UCDN_GENERAL_CATEGORY_LL 5
#define UCDN_GENERAL_CATEGORY_LM 6
#define UCDN_GENERAL_CATEGORY_LO 7
#define UCDN_GENERAL_CATEGORY_LT 8
#define UCDN_GENERAL_CATEGORY_LU 9
#define UCDN_GENERAL_CATEGORY_MC 10
#define UCDN_GENERAL_CATEGORY_ME 11
#define UCDN_GENERAL_CATEGORY_MN 12
#define UCDN_GENERAL_CATEGORY_ND 13
#define UCDN_GENERAL_CATEGORY_NL 14
#define UCDN_GENERAL_CATEGORY_NO 15
#define UCDN_GENERAL_CATEGORY_PC 16
#define UCDN_GENERAL_CATEGORY_PD 17
#define UCDN_GENERAL_CATEGORY_PE 18
#define UCDN_GENERAL_CATEGORY_PF 19
#define UCDN_GENERAL_CATEGORY_PI 20
#define UCDN_GENERAL_CATEGORY_PO 21
#define UCDN_GENERAL_CATEGORY_PS 22
#define UCDN_GENERAL_CATEGORY_SC 23
#define UCDN_GENERAL_CATEGORY_SK 24
#define UCDN_GENERAL_CATEGORY_SM 25
#define UCDN_GENERAL_CATEGORY_SO 26
#define UCDN_GENERAL_CATEGORY_ZL 27
#define UCDN_GENERAL_CATEGORY_ZP 28
#define UCDN_GENERAL_CATEGORY_ZS 29
#define UCDN_BIDI_CLASS_L 0
#define UCDN_BIDI_CLASS_LRE 1
#define UCDN_BIDI_CLASS_LRO 2
#define UCDN_BIDI_CLASS_R 3
#define UCDN_BIDI_CLASS_AL 4
#define UCDN_BIDI_CLASS_RLE 5
#define UCDN_BIDI_CLASS_RLO 6
#define UCDN_BIDI_CLASS_PDF 7
#define UCDN_BIDI_CLASS_EN 8
#define UCDN_BIDI_CLASS_ES 9
#define UCDN_BIDI_CLASS_ET 10
#define UCDN_BIDI_CLASS_AN 11
#define UCDN_BIDI_CLASS_CS 12
#define UCDN_BIDI_CLASS_NSM 13
#define UCDN_BIDI_CLASS_BN 14
#define UCDN_BIDI_CLASS_B 15
#define UCDN_BIDI_CLASS_S 16
#define UCDN_BIDI_CLASS_WS 17
#define UCDN_BIDI_CLASS_ON 18
/**
* Return version of the Unicode database.
*
* @return Unicode database version
*/
const char *ucdn_get_unicode_version(void);
/**
* Get combining class of a codepoint.
*
* @param code Unicode codepoint
* @return combining class value, as defined in UAX#44
*/
int ucdn_get_combining_class(uint32_t code);
/**
* Get east-asian width of a codepoint.
*
* @param code Unicode codepoint
* @return value according to UCDN_EAST_ASIAN_* and as defined in UAX#11.
*/
int ucdn_get_east_asian_width(uint32_t code);
/**
* Get general category of a codepoint.
*
* @param code Unicode codepoint
* @return value according to UCDN_GENERAL_CATEGORY_* and as defined in
* UAX#44.
*/
int ucdn_get_general_category(uint32_t code);
/**
* Get bidirectional class of a codepoint.
*
* @param code Unicode codepoint
* @return value according to UCDN_BIDI_CLASS_* and as defined in UAX#44.
*/
int ucdn_get_bidi_class(uint32_t code);
/**
* Get script of a codepoint.
*
* @param code Unicode codepoint
* @return value according to UCDN_SCRIPT_* and as defined in UAX#24.
*/
int ucdn_get_script(uint32_t code);
/**
* Check if codepoint can be mirrored.
*
* @param code Unicode codepoint
* @return 1 if mirrored character exists, otherwise 0
*/
int ucdn_get_mirrored(uint32_t code);
/**
* Mirror a codepoint.
*
* @param code Unicode codepoint
* @return mirrored codepoint or the original codepoint if no
* mirrored character exists
*/
uint32_t ucdn_mirror(uint32_t code);
/**
* Pairwise canonical decomposition of a codepoint. This includes
* Hangul Jamo decomposition (see chapter 3.12 of the Unicode core
* specification).
*
* Hangul is decomposed into L and V jamos for LV forms, and an
* LV precomposed syllable and a T jamo for LVT forms.
*
* @param code Unicode codepoint
* @param a filled with first codepoint of decomposition
* @param b filled with second codepoint of decomposition, or 0
* @return success
*/
int ucdn_decompose(uint32_t code, uint32_t *a, uint32_t *b);
/**
* Compatibility decomposition of a codepoint.
*
* @param code Unicode codepoint
* @param decomposed filled with decomposition, must be able to hold 18
* characters
* @return length of decomposition or 0 in case none exists
*/
int ucdn_compat_decompose(uint32_t code, uint32_t *decomposed);
/**
* Pairwise canonical composition of two codepoints. This includes
* Hangul Jamo composition (see chapter 3.12 of the Unicode core
* specification).
*
* Hangul composition expects either L and V jamos, or an LV
* precomposed syllable and a T jamo. This is exactly the inverse
* of pairwise Hangul decomposition.
*
* @param code filled with composition
* @param a first codepoint
* @param b second codepoint
* @return success
*/
int ucdn_compose(uint32_t *code, uint32_t a, uint32_t b);
#endif

4648
src/hb-ucdn/unicodedata_db.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -112,6 +112,7 @@ hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED
#define HB_UNICODE_FUNCS_IMPLEMENT_SET \ #define HB_UNICODE_FUNCS_IMPLEMENT_SET \
HB_UNICODE_FUNCS_IMPLEMENT (glib) \ HB_UNICODE_FUNCS_IMPLEMENT (glib) \
HB_UNICODE_FUNCS_IMPLEMENT (icu) \ HB_UNICODE_FUNCS_IMPLEMENT (icu) \
HB_UNICODE_FUNCS_IMPLEMENT (ucdn) \
HB_UNICODE_FUNCS_IMPLEMENT (nil) \ HB_UNICODE_FUNCS_IMPLEMENT (nil) \
/* ^--- Add new callbacks before nil */ /* ^--- Add new callbacks before nil */
@ -134,6 +135,8 @@ hb_unicode_funcs_get_default (void)
HB_UNICODE_FUNCS_IMPLEMENT(glib) HB_UNICODE_FUNCS_IMPLEMENT(glib)
#elif defined(HAVE_ICU) #elif defined(HAVE_ICU)
HB_UNICODE_FUNCS_IMPLEMENT(icu) HB_UNICODE_FUNCS_IMPLEMENT(icu)
#elif defined(HAVE_UCDN)
HB_UNICODE_FUNCS_IMPLEMENT(ucdn)
#else #else
#define HB_UNICODE_FUNCS_NIL 1 #define HB_UNICODE_FUNCS_NIL 1
HB_UNICODE_FUNCS_IMPLEMENT(nil) HB_UNICODE_FUNCS_IMPLEMENT(nil)