parent
0e292eb2a2
commit
3f33f0d1f2
11
configure.ac
11
configure.ac
|
@ -125,6 +125,14 @@ AM_CONDITIONAL(HAVE_HB_OLD, $have_hb_old)
|
|||
|
||||
dnl ===========================================================================
|
||||
|
||||
have_ucdn=true
|
||||
if $have_ucdn; then
|
||||
AC_DEFINE(HAVE_UCDN, 1, [Have UCDN Unicode functions])
|
||||
fi
|
||||
AM_CONDITIONAL(HAVE_UCDN, $have_ucdn)
|
||||
|
||||
dnl ===========================================================================
|
||||
|
||||
PKG_CHECK_MODULES(GLIB, glib-2.0 >= 2.16, have_glib=true, have_glib=false)
|
||||
if $have_glib; then
|
||||
AC_DEFINE(HAVE_GLIB, 1, [Have glib2 library])
|
||||
|
@ -245,8 +253,9 @@ Makefile
|
|||
harfbuzz.pc
|
||||
src/Makefile
|
||||
src/hb-version.h
|
||||
src/hb-old/Makefile
|
||||
src/hb-icu-le/Makefile
|
||||
src/hb-old/Makefile
|
||||
src/hb-ucdn/Makefile
|
||||
util/Makefile
|
||||
test/Makefile
|
||||
test/api/Makefile
|
||||
|
|
|
@ -178,6 +178,13 @@ HBSOURCES += hb-icu-le.cc
|
|||
endif
|
||||
DIST_SUBDIRS += hb-icu-le
|
||||
|
||||
if HAVE_UCDN
|
||||
SUBDIRS += hb-ucdn
|
||||
HBCFLAGS += -I$(srcdir)/hb-ucdn
|
||||
HBLIBS += hb-ucdn/libhb-ucdn.la
|
||||
HBSOURCES += hb-ucdn.cc
|
||||
endif
|
||||
DIST_SUBDIRS += hb-ucdn
|
||||
|
||||
|
||||
# Put the library together
|
||||
|
|
|
@ -0,0 +1,207 @@
|
|||
/*
|
||||
* Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hb-private.hh"
|
||||
|
||||
#include "hb-unicode-private.hh"
|
||||
|
||||
HB_BEGIN_DECLS
|
||||
#include "ucdn.h"
|
||||
HB_END_DECLS
|
||||
|
||||
static const hb_script_t ucdn_script_translate[] =
|
||||
{
|
||||
HB_SCRIPT_COMMON,
|
||||
HB_SCRIPT_LATIN,
|
||||
HB_SCRIPT_GREEK,
|
||||
HB_SCRIPT_CYRILLIC,
|
||||
HB_SCRIPT_ARMENIAN,
|
||||
HB_SCRIPT_HEBREW,
|
||||
HB_SCRIPT_ARABIC,
|
||||
HB_SCRIPT_SYRIAC,
|
||||
HB_SCRIPT_THAANA,
|
||||
HB_SCRIPT_DEVANAGARI,
|
||||
HB_SCRIPT_BENGALI,
|
||||
HB_SCRIPT_GURMUKHI,
|
||||
HB_SCRIPT_GUJARATI,
|
||||
HB_SCRIPT_ORIYA,
|
||||
HB_SCRIPT_TAMIL,
|
||||
HB_SCRIPT_TELUGU,
|
||||
HB_SCRIPT_KANNADA,
|
||||
HB_SCRIPT_MALAYALAM,
|
||||
HB_SCRIPT_SINHALA,
|
||||
HB_SCRIPT_THAI,
|
||||
HB_SCRIPT_LAO,
|
||||
HB_SCRIPT_TIBETAN,
|
||||
HB_SCRIPT_MYANMAR,
|
||||
HB_SCRIPT_GEORGIAN,
|
||||
HB_SCRIPT_HANGUL,
|
||||
HB_SCRIPT_ETHIOPIC,
|
||||
HB_SCRIPT_CHEROKEE,
|
||||
HB_SCRIPT_CANADIAN_ABORIGINAL,
|
||||
HB_SCRIPT_OGHAM,
|
||||
HB_SCRIPT_RUNIC,
|
||||
HB_SCRIPT_KHMER,
|
||||
HB_SCRIPT_MONGOLIAN,
|
||||
HB_SCRIPT_HIRAGANA,
|
||||
HB_SCRIPT_KATAKANA,
|
||||
HB_SCRIPT_BOPOMOFO,
|
||||
HB_SCRIPT_HAN,
|
||||
HB_SCRIPT_YI,
|
||||
HB_SCRIPT_OLD_ITALIC,
|
||||
HB_SCRIPT_GOTHIC,
|
||||
HB_SCRIPT_DESERET,
|
||||
HB_SCRIPT_INHERITED,
|
||||
HB_SCRIPT_TAGALOG,
|
||||
HB_SCRIPT_HANUNOO,
|
||||
HB_SCRIPT_BUHID,
|
||||
HB_SCRIPT_TAGBANWA,
|
||||
HB_SCRIPT_LIMBU,
|
||||
HB_SCRIPT_TAI_LE,
|
||||
HB_SCRIPT_LINEAR_B,
|
||||
HB_SCRIPT_UGARITIC,
|
||||
HB_SCRIPT_SHAVIAN,
|
||||
HB_SCRIPT_OSMANYA,
|
||||
HB_SCRIPT_CYPRIOT,
|
||||
HB_SCRIPT_BRAILLE,
|
||||
HB_SCRIPT_BUGINESE,
|
||||
HB_SCRIPT_COPTIC,
|
||||
HB_SCRIPT_NEW_TAI_LUE,
|
||||
HB_SCRIPT_GLAGOLITIC,
|
||||
HB_SCRIPT_TIFINAGH,
|
||||
HB_SCRIPT_SYLOTI_NAGRI,
|
||||
HB_SCRIPT_OLD_PERSIAN,
|
||||
HB_SCRIPT_KHAROSHTHI,
|
||||
HB_SCRIPT_BALINESE,
|
||||
HB_SCRIPT_CUNEIFORM,
|
||||
HB_SCRIPT_PHOENICIAN,
|
||||
HB_SCRIPT_PHAGS_PA,
|
||||
HB_SCRIPT_NKO,
|
||||
HB_SCRIPT_SUNDANESE,
|
||||
HB_SCRIPT_LEPCHA,
|
||||
HB_SCRIPT_OL_CHIKI,
|
||||
HB_SCRIPT_VAI,
|
||||
HB_SCRIPT_SAURASHTRA,
|
||||
HB_SCRIPT_KAYAH_LI,
|
||||
HB_SCRIPT_REJANG,
|
||||
HB_SCRIPT_LYCIAN,
|
||||
HB_SCRIPT_CARIAN,
|
||||
HB_SCRIPT_LYDIAN,
|
||||
HB_SCRIPT_CHAM,
|
||||
HB_SCRIPT_TAI_THAM,
|
||||
HB_SCRIPT_TAI_VIET,
|
||||
HB_SCRIPT_AVESTAN,
|
||||
HB_SCRIPT_EGYPTIAN_HIEROGLYPHS,
|
||||
HB_SCRIPT_SAMARITAN,
|
||||
HB_SCRIPT_LISU,
|
||||
HB_SCRIPT_BAMUM,
|
||||
HB_SCRIPT_JAVANESE,
|
||||
HB_SCRIPT_MEETEI_MAYEK,
|
||||
HB_SCRIPT_IMPERIAL_ARAMAIC,
|
||||
HB_SCRIPT_OLD_SOUTH_ARABIAN,
|
||||
HB_SCRIPT_INSCRIPTIONAL_PARTHIAN,
|
||||
HB_SCRIPT_INSCRIPTIONAL_PAHLAVI,
|
||||
HB_SCRIPT_OLD_TURKIC,
|
||||
HB_SCRIPT_KAITHI,
|
||||
HB_SCRIPT_BATAK,
|
||||
HB_SCRIPT_BRAHMI,
|
||||
HB_SCRIPT_MANDAIC,
|
||||
HB_SCRIPT_CHAKMA,
|
||||
HB_SCRIPT_MEROITIC_CURSIVE,
|
||||
HB_SCRIPT_MEROITIC_HIEROGLYPHS,
|
||||
HB_SCRIPT_MIAO,
|
||||
HB_SCRIPT_SHARADA,
|
||||
HB_SCRIPT_SORA_SOMPENG,
|
||||
HB_SCRIPT_TAKRI,
|
||||
HB_SCRIPT_UNKNOWN,
|
||||
};
|
||||
|
||||
static hb_unicode_combining_class_t
|
||||
hb_ucdn_combining_class(hb_unicode_funcs_t *ufuncs, hb_codepoint_t unicode,
|
||||
void *user_data)
|
||||
{
|
||||
return (hb_unicode_combining_class_t) ucdn_get_combining_class(unicode);
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
hb_ucdn_eastasian_width(hb_unicode_funcs_t *ufuncs, hb_codepoint_t unicode,
|
||||
void *user_data)
|
||||
{
|
||||
int w = ucdn_get_east_asian_width(unicode);
|
||||
return (w == UCDN_EAST_ASIAN_F || w == UCDN_EAST_ASIAN_W) ? 2 : 1;
|
||||
}
|
||||
|
||||
static hb_unicode_general_category_t
|
||||
hb_ucdn_general_category(hb_unicode_funcs_t *ufuncs,
|
||||
hb_codepoint_t unicode, void *user_data)
|
||||
{
|
||||
return (hb_unicode_general_category_t)ucdn_get_general_category(unicode);
|
||||
}
|
||||
|
||||
static hb_codepoint_t
|
||||
hb_ucdn_mirroring(hb_unicode_funcs_t *ufuncs, hb_codepoint_t unicode,
|
||||
void *user_data)
|
||||
{
|
||||
return ucdn_mirror(unicode);
|
||||
}
|
||||
|
||||
static hb_script_t
|
||||
hb_ucdn_script(hb_unicode_funcs_t *ufuncs, hb_codepoint_t unicode,
|
||||
void *user_data)
|
||||
{
|
||||
return ucdn_script_translate[ucdn_get_script(unicode)];
|
||||
}
|
||||
|
||||
static hb_bool_t
|
||||
hb_ucdn_compose(hb_unicode_funcs_t *ufuncs, hb_codepoint_t a,
|
||||
hb_codepoint_t b, hb_codepoint_t *ab, void *user_data)
|
||||
{
|
||||
return ucdn_compose(ab, a, b);
|
||||
}
|
||||
|
||||
static hb_bool_t
|
||||
hb_ucdn_decompose(hb_unicode_funcs_t *ufuncs, hb_codepoint_t ab,
|
||||
hb_codepoint_t *a, hb_codepoint_t *b, void *user_data)
|
||||
{
|
||||
return ucdn_decompose(ab, a, b);
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
hb_ucdn_decompose_compatibility(hb_unicode_funcs_t *ufuncs, hb_codepoint_t u,
|
||||
hb_codepoint_t *decomposed, void *user_data)
|
||||
{
|
||||
return ucdn_compat_decompose(u, decomposed);
|
||||
}
|
||||
|
||||
extern "C" HB_INTERNAL
|
||||
hb_unicode_funcs_t *
|
||||
hb_ucdn_get_unicode_funcs (void)
|
||||
{
|
||||
static const hb_unicode_funcs_t _hb_ucdn_unicode_funcs = {
|
||||
HB_OBJECT_HEADER_STATIC,
|
||||
|
||||
NULL, /* parent */
|
||||
true, /* immutable */
|
||||
{
|
||||
#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_ucdn_##name,
|
||||
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
|
||||
#undef HB_UNICODE_FUNC_IMPLEMENT
|
||||
}
|
||||
};
|
||||
|
||||
return const_cast<hb_unicode_funcs_t *> (&_hb_ucdn_unicode_funcs);
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
## Process this file with automake to produce Makefile.in
|
||||
|
||||
noinst_LTLIBRARIES = libhb-ucdn.la
|
||||
|
||||
|
||||
libhb_ucdn_la_SOURCES = \
|
||||
ucdn.h \
|
||||
ucdn.c \
|
||||
unicodedata_db.h
|
||||
libhb_ucdn_CPPFLAGS = \
|
||||
-I$(top_srcdir) \
|
||||
-I$(top_srcdir)/src \
|
||||
-I$(top_builddir)/src
|
||||
libhb_ucdn_la_LIBADD =
|
||||
|
||||
EXTRA_DIST = README
|
||||
|
||||
-include $(top_srcdir)/git.mk
|
|
@ -0,0 +1,33 @@
|
|||
UCDN - Unicode Database and Normalization
|
||||
|
||||
UCDN is a Unicode support library. Currently, it provides access
|
||||
to basic character properties contained in the Unicode Character
|
||||
Database and low-level normalization functions (pairwise canonical
|
||||
composition/decomposition and compatibility decomposition). More
|
||||
functionality might be provided in the future, such as additional
|
||||
properties, string normalization and encoding conversion.
|
||||
|
||||
UCDN uses standard C89 with no particular dependencies or requirements
|
||||
except for stdint.h, and can be easily integrated into existing
|
||||
projects. However, it can also be used as a standalone library,
|
||||
and a CMake build script is provided for this. The first motivation
|
||||
behind UCDN development was to provide a standalone set of Unicode
|
||||
functions for the HarfBuzz OpenType shaping library. For this purpose,
|
||||
a HarfBuzz-specific wrapper is shipped along with it (hb-ucdn.h).
|
||||
|
||||
UCDN is published under the ISC license, please see the license header
|
||||
in the C source code for more information. The makeunicodata.py script
|
||||
required for parsing Unicode database files is licensed under the
|
||||
PSF license, please see PYTHON-LICENSE for more information.
|
||||
|
||||
UCDN was written by Grigori Goronzy <greg@kinoho.net>.
|
||||
|
||||
How to Use
|
||||
|
||||
Include ucdn.c, ucdn.h and unicodedata_db.h in your project. Now,
|
||||
just use the functions as documented in ucdn.h.
|
||||
|
||||
In some cases, it might be necessary to regenerate the Unicode
|
||||
database file. The script makeunicodedata.py (Python 3.x required)
|
||||
fetches the appropriate files and dumps the compressed database into
|
||||
unicodedata_db.h.
|
|
@ -0,0 +1,282 @@
|
|||
/*
|
||||
* Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include "ucdn.h"
|
||||
|
||||
typedef struct {
|
||||
const unsigned char category;
|
||||
const unsigned char combining;
|
||||
const unsigned char bidi_class;
|
||||
const unsigned char mirrored;
|
||||
const unsigned char east_asian_width;
|
||||
const unsigned char normalization_check;
|
||||
const unsigned char script;
|
||||
} UCDRecord;
|
||||
|
||||
typedef struct {
|
||||
unsigned short from, to;
|
||||
} MirrorPair;
|
||||
|
||||
typedef struct {
|
||||
int start;
|
||||
short count, index;
|
||||
} Reindex;
|
||||
|
||||
#include "unicodedata_db.h"
|
||||
|
||||
/* constants required for Hangul (de)composition */
|
||||
#define SBASE 0xAC00
|
||||
#define LBASE 0x1100
|
||||
#define VBASE 0x1161
|
||||
#define TBASE 0x11A7
|
||||
#define SCOUNT 11172
|
||||
#define LCOUNT 19
|
||||
#define VCOUNT 21
|
||||
#define TCOUNT 28
|
||||
#define NCOUNT (VCOUNT * TCOUNT)
|
||||
|
||||
static UCDRecord *get_ucd_record(uint32_t code)
|
||||
{
|
||||
int index, offset;
|
||||
|
||||
if (code >= 0x110000)
|
||||
index = 0;
|
||||
else {
|
||||
index = index0[code >> (SHIFT1+SHIFT2)] << SHIFT1;
|
||||
offset = (code >> SHIFT2) & ((1<<SHIFT1) - 1);
|
||||
index = index1[index + offset] << SHIFT2;
|
||||
offset = code & ((1<<SHIFT2) - 1);
|
||||
index = index2[index + offset];
|
||||
}
|
||||
|
||||
return &ucd_records[index];
|
||||
}
|
||||
|
||||
static unsigned short *get_decomp_record(uint32_t code)
|
||||
{
|
||||
int index, offset;
|
||||
|
||||
if (code >= 0x110000)
|
||||
index = 0;
|
||||
else {
|
||||
index = decomp_index0[code >> (DECOMP_SHIFT1+DECOMP_SHIFT2)]
|
||||
<< DECOMP_SHIFT1;
|
||||
offset = (code >> DECOMP_SHIFT2) & ((1<<DECOMP_SHIFT1) - 1);
|
||||
index = decomp_index1[index + offset] << DECOMP_SHIFT2;
|
||||
offset = code & ((1<<DECOMP_SHIFT2) - 1);
|
||||
index = decomp_index2[index + offset];
|
||||
}
|
||||
|
||||
return &decomp_data[index];
|
||||
}
|
||||
|
||||
static int get_comp_index(uint32_t code, Reindex *idx)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; idx[i].start; i++) {
|
||||
Reindex *cur = &idx[i];
|
||||
if (code < cur->start)
|
||||
return -1;
|
||||
if (code <= cur->start + cur->count) {
|
||||
return cur->index + (code - cur->start);
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int compare_mp(const void *a, const void *b)
|
||||
{
|
||||
MirrorPair *mpa = (MirrorPair *)a;
|
||||
MirrorPair *mpb = (MirrorPair *)b;
|
||||
return mpa->from - mpb->from;
|
||||
}
|
||||
|
||||
static int hangul_pair_decompose(uint32_t code, uint32_t *a, uint32_t *b)
|
||||
{
|
||||
int si = code - SBASE;
|
||||
|
||||
if (si < 0 || si >= SCOUNT)
|
||||
return 0;
|
||||
|
||||
if (si % TCOUNT) {
|
||||
/* LV,T */
|
||||
*a = SBASE + (si / TCOUNT) * TCOUNT;
|
||||
*b = TBASE + (si % TCOUNT);
|
||||
return 3;
|
||||
} else {
|
||||
/* L,V */
|
||||
*a = LBASE + (si / NCOUNT);
|
||||
*b = VBASE + (si % NCOUNT) / TCOUNT;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
static int hangul_pair_compose(uint32_t *code, uint32_t a, uint32_t b)
|
||||
{
|
||||
if (b < VBASE || b >= (TBASE + TCOUNT))
|
||||
return 0;
|
||||
|
||||
if ((a < LBASE || a >= (LBASE + LCOUNT))
|
||||
&& (a < SBASE || a >= (SBASE + SCOUNT)))
|
||||
return 0;
|
||||
|
||||
if (a >= SBASE) {
|
||||
/* LV,T */
|
||||
*code = a + (b - TBASE);
|
||||
return 3;
|
||||
} else {
|
||||
/* L,V */
|
||||
int li = a - LBASE;
|
||||
int vi = b - VBASE;
|
||||
*code = SBASE + li * NCOUNT + vi * TCOUNT;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t decode_utf16(unsigned short **code_ptr)
|
||||
{
|
||||
unsigned short *code = *code_ptr;
|
||||
|
||||
if ((code[0] & 0xd800) != 0xd800) {
|
||||
*code_ptr += 1;
|
||||
return (uint32_t)code[0];
|
||||
} else {
|
||||
*code_ptr += 2;
|
||||
return 0x10000 + ((uint32_t)code[1] - 0xdc00) +
|
||||
(((uint32_t)code[0] - 0xd800) << 10);
|
||||
}
|
||||
}
|
||||
|
||||
const char *ucdn_get_unicode_version(void)
|
||||
{
|
||||
return UNIDATA_VERSION;
|
||||
}
|
||||
|
||||
int ucdn_get_combining_class(uint32_t code)
|
||||
{
|
||||
return get_ucd_record(code)->combining;
|
||||
}
|
||||
|
||||
int ucdn_get_east_asian_width(uint32_t code)
|
||||
{
|
||||
return get_ucd_record(code)->east_asian_width;
|
||||
}
|
||||
|
||||
int ucdn_get_general_category(uint32_t code)
|
||||
{
|
||||
return get_ucd_record(code)->category;
|
||||
}
|
||||
|
||||
int ucdn_get_bidi_class(uint32_t code)
|
||||
{
|
||||
return get_ucd_record(code)->bidi_class;
|
||||
}
|
||||
|
||||
int ucdn_get_mirrored(uint32_t code)
|
||||
{
|
||||
return get_ucd_record(code)->mirrored;
|
||||
}
|
||||
|
||||
int ucdn_get_script(uint32_t code)
|
||||
{
|
||||
return get_ucd_record(code)->script;
|
||||
}
|
||||
|
||||
uint32_t ucdn_mirror(uint32_t code)
|
||||
{
|
||||
MirrorPair mp = {0};
|
||||
MirrorPair *res;
|
||||
|
||||
if (get_ucd_record(code)->mirrored == 0)
|
||||
return code;
|
||||
|
||||
mp.from = code;
|
||||
res = bsearch(&mp, mirror_pairs, BIDI_MIRROR_LEN, sizeof(MirrorPair),
|
||||
compare_mp);
|
||||
|
||||
if (res == NULL)
|
||||
return code;
|
||||
else
|
||||
return res->to;
|
||||
}
|
||||
|
||||
int ucdn_decompose(uint32_t code, uint32_t *a, uint32_t *b)
|
||||
{
|
||||
unsigned short *rec;
|
||||
int len;
|
||||
|
||||
if (hangul_pair_decompose(code, a, b))
|
||||
return 1;
|
||||
|
||||
rec = get_decomp_record(code);
|
||||
len = rec[0] >> 8;
|
||||
|
||||
if ((rec[0] & 0xff) != 0 || len == 0)
|
||||
return 0;
|
||||
|
||||
rec++;
|
||||
*a = decode_utf16(&rec);
|
||||
if (len > 1)
|
||||
*b = decode_utf16(&rec);
|
||||
else
|
||||
*b = 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int ucdn_compose(uint32_t *code, uint32_t a, uint32_t b)
|
||||
{
|
||||
int l, r, index, indexi, offset;
|
||||
|
||||
if (hangul_pair_compose(code, a, b))
|
||||
return 1;
|
||||
|
||||
l = get_comp_index(a, nfc_first);
|
||||
r = get_comp_index(b, nfc_last);
|
||||
|
||||
if (l < 0 || r < 0)
|
||||
return 0;
|
||||
|
||||
indexi = l * TOTAL_LAST + r;
|
||||
index = comp_index0[indexi >> (COMP_SHIFT1+COMP_SHIFT2)] << COMP_SHIFT1;
|
||||
offset = (indexi >> COMP_SHIFT2) & ((1<<COMP_SHIFT1) - 1);
|
||||
index = comp_index1[index + offset] << COMP_SHIFT2;
|
||||
offset = indexi & ((1<<COMP_SHIFT2) - 1);
|
||||
*code = comp_data[index + offset];
|
||||
|
||||
return *code != 0;
|
||||
}
|
||||
|
||||
int ucdn_compat_decompose(uint32_t code, uint32_t *decomposed)
|
||||
{
|
||||
int i, len;
|
||||
unsigned short *rec = get_decomp_record(code);
|
||||
len = rec[0] >> 8;
|
||||
|
||||
if (len == 0)
|
||||
return 0;
|
||||
|
||||
rec++;
|
||||
for (i = 0; i < len; i++)
|
||||
decomposed[i] = decode_utf16(&rec);
|
||||
|
||||
return len;
|
||||
}
|
|
@ -0,0 +1,290 @@
|
|||
/*
|
||||
* Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UCDN_H
|
||||
#define UCDN_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define UCDN_EAST_ASIAN_F 0
|
||||
#define UCDN_EAST_ASIAN_H 1
|
||||
#define UCDN_EAST_ASIAN_W 2
|
||||
#define UCDN_EAST_ASIAN_NA 3
|
||||
#define UCDN_EAST_ASIAN_A 4
|
||||
#define UCDN_EAST_ASIAN_N 5
|
||||
|
||||
#define UCDN_SCRIPT_COMMON 0
|
||||
#define UCDN_SCRIPT_LATIN 1
|
||||
#define UCDN_SCRIPT_GREEK 2
|
||||
#define UCDN_SCRIPT_CYRILLIC 3
|
||||
#define UCDN_SCRIPT_ARMENIAN 4
|
||||
#define UCDN_SCRIPT_HEBREW 5
|
||||
#define UCDN_SCRIPT_ARABIC 6
|
||||
#define UCDN_SCRIPT_SYRIAC 7
|
||||
#define UCDN_SCRIPT_THAANA 8
|
||||
#define UCDN_SCRIPT_DEVANAGARI 9
|
||||
#define UCDN_SCRIPT_BENGALI 10
|
||||
#define UCDN_SCRIPT_GURMUKHI 11
|
||||
#define UCDN_SCRIPT_GUJARATI 12
|
||||
#define UCDN_SCRIPT_ORIYA 13
|
||||
#define UCDN_SCRIPT_TAMIL 14
|
||||
#define UCDN_SCRIPT_TELUGU 15
|
||||
#define UCDN_SCRIPT_KANNADA 16
|
||||
#define UCDN_SCRIPT_MALAYALAM 17
|
||||
#define UCDN_SCRIPT_SINHALA 18
|
||||
#define UCDN_SCRIPT_THAI 19
|
||||
#define UCDN_SCRIPT_LAO 20
|
||||
#define UCDN_SCRIPT_TIBETAN 21
|
||||
#define UCDN_SCRIPT_MYANMAR 22
|
||||
#define UCDN_SCRIPT_GEORGIAN 23
|
||||
#define UCDN_SCRIPT_HANGUL 24
|
||||
#define UCDN_SCRIPT_ETHIOPIC 25
|
||||
#define UCDN_SCRIPT_CHEROKEE 26
|
||||
#define UCDN_SCRIPT_CANADIAN_ABORIGINAL 27
|
||||
#define UCDN_SCRIPT_OGHAM 28
|
||||
#define UCDN_SCRIPT_RUNIC 29
|
||||
#define UCDN_SCRIPT_KHMER 30
|
||||
#define UCDN_SCRIPT_MONGOLIAN 31
|
||||
#define UCDN_SCRIPT_HIRAGANA 32
|
||||
#define UCDN_SCRIPT_KATAKANA 33
|
||||
#define UCDN_SCRIPT_BOPOMOFO 34
|
||||
#define UCDN_SCRIPT_HAN 35
|
||||
#define UCDN_SCRIPT_YI 36
|
||||
#define UCDN_SCRIPT_OLD_ITALIC 37
|
||||
#define UCDN_SCRIPT_GOTHIC 38
|
||||
#define UCDN_SCRIPT_DESERET 39
|
||||
#define UCDN_SCRIPT_INHERITED 40
|
||||
#define UCDN_SCRIPT_TAGALOG 41
|
||||
#define UCDN_SCRIPT_HANUNOO 42
|
||||
#define UCDN_SCRIPT_BUHID 43
|
||||
#define UCDN_SCRIPT_TAGBANWA 44
|
||||
#define UCDN_SCRIPT_LIMBU 45
|
||||
#define UCDN_SCRIPT_TAI_LE 46
|
||||
#define UCDN_SCRIPT_LINEAR_B 47
|
||||
#define UCDN_SCRIPT_UGARITIC 48
|
||||
#define UCDN_SCRIPT_SHAVIAN 49
|
||||
#define UCDN_SCRIPT_OSMANYA 50
|
||||
#define UCDN_SCRIPT_CYPRIOT 51
|
||||
#define UCDN_SCRIPT_BRAILLE 52
|
||||
#define UCDN_SCRIPT_BUGINESE 53
|
||||
#define UCDN_SCRIPT_COPTIC 54
|
||||
#define UCDN_SCRIPT_NEW_TAI_LUE 55
|
||||
#define UCDN_SCRIPT_GLAGOLITIC 56
|
||||
#define UCDN_SCRIPT_TIFINAGH 57
|
||||
#define UCDN_SCRIPT_SYLOTI_NAGRI 58
|
||||
#define UCDN_SCRIPT_OLD_PERSIAN 59
|
||||
#define UCDN_SCRIPT_KHAROSHTHI 60
|
||||
#define UCDN_SCRIPT_BALINESE 61
|
||||
#define UCDN_SCRIPT_CUNEIFORM 62
|
||||
#define UCDN_SCRIPT_PHOENICIAN 63
|
||||
#define UCDN_SCRIPT_PHAGS_PA 64
|
||||
#define UCDN_SCRIPT_NKO 65
|
||||
#define UCDN_SCRIPT_SUNDANESE 66
|
||||
#define UCDN_SCRIPT_LEPCHA 67
|
||||
#define UCDN_SCRIPT_OL_CHIKI 68
|
||||
#define UCDN_SCRIPT_VAI 69
|
||||
#define UCDN_SCRIPT_SAURASHTRA 70
|
||||
#define UCDN_SCRIPT_KAYAH_LI 71
|
||||
#define UCDN_SCRIPT_REJANG 72
|
||||
#define UCDN_SCRIPT_LYCIAN 73
|
||||
#define UCDN_SCRIPT_CARIAN 74
|
||||
#define UCDN_SCRIPT_LYDIAN 75
|
||||
#define UCDN_SCRIPT_CHAM 76
|
||||
#define UCDN_SCRIPT_TAI_THAM 77
|
||||
#define UCDN_SCRIPT_TAI_VIET 78
|
||||
#define UCDN_SCRIPT_AVESTAN 79
|
||||
#define UCDN_SCRIPT_EGYPTIAN_HIEROGLYPHS 80
|
||||
#define UCDN_SCRIPT_SAMARITAN 81
|
||||
#define UCDN_SCRIPT_LISU 82
|
||||
#define UCDN_SCRIPT_BAMUM 83
|
||||
#define UCDN_SCRIPT_JAVANESE 84
|
||||
#define UCDN_SCRIPT_MEETEI_MAYEK 85
|
||||
#define UCDN_SCRIPT_IMPERIAL_ARAMAIC 86
|
||||
#define UCDN_SCRIPT_OLD_SOUTH_ARABIAN 87
|
||||
#define UCDN_SCRIPT_INSCRIPTIONAL_PARTHIAN 88
|
||||
#define UCDN_SCRIPT_INSCRIPTIONAL_PAHLAVI 89
|
||||
#define UCDN_SCRIPT_OLD_TURKIC 90
|
||||
#define UCDN_SCRIPT_KAITHI 91
|
||||
#define UCDN_SCRIPT_BATAK 92
|
||||
#define UCDN_SCRIPT_BRAHMI 93
|
||||
#define UCDN_SCRIPT_MANDAIC 94
|
||||
#define UCDN_SCRIPT_CHAKMA 95
|
||||
#define UCDN_SCRIPT_MEROITIC_CURSIVE 96
|
||||
#define UCDN_SCRIPT_MEROITIC_HIEROGLYPHS 97
|
||||
#define UCDN_SCRIPT_MIAO 98
|
||||
#define UCDN_SCRIPT_SHARADA 99
|
||||
#define UCDN_SCRIPT_SORA_SOMPENG 100
|
||||
#define UCDN_SCRIPT_TAKRI 101
|
||||
#define UCDN_SCRIPT_UNKNOWN 102
|
||||
|
||||
#define UCDN_GENERAL_CATEGORY_CC 0
|
||||
#define UCDN_GENERAL_CATEGORY_CF 1
|
||||
#define UCDN_GENERAL_CATEGORY_CN 2
|
||||
#define UCDN_GENERAL_CATEGORY_CO 3
|
||||
#define UCDN_GENERAL_CATEGORY_CS 4
|
||||
#define UCDN_GENERAL_CATEGORY_LL 5
|
||||
#define UCDN_GENERAL_CATEGORY_LM 6
|
||||
#define UCDN_GENERAL_CATEGORY_LO 7
|
||||
#define UCDN_GENERAL_CATEGORY_LT 8
|
||||
#define UCDN_GENERAL_CATEGORY_LU 9
|
||||
#define UCDN_GENERAL_CATEGORY_MC 10
|
||||
#define UCDN_GENERAL_CATEGORY_ME 11
|
||||
#define UCDN_GENERAL_CATEGORY_MN 12
|
||||
#define UCDN_GENERAL_CATEGORY_ND 13
|
||||
#define UCDN_GENERAL_CATEGORY_NL 14
|
||||
#define UCDN_GENERAL_CATEGORY_NO 15
|
||||
#define UCDN_GENERAL_CATEGORY_PC 16
|
||||
#define UCDN_GENERAL_CATEGORY_PD 17
|
||||
#define UCDN_GENERAL_CATEGORY_PE 18
|
||||
#define UCDN_GENERAL_CATEGORY_PF 19
|
||||
#define UCDN_GENERAL_CATEGORY_PI 20
|
||||
#define UCDN_GENERAL_CATEGORY_PO 21
|
||||
#define UCDN_GENERAL_CATEGORY_PS 22
|
||||
#define UCDN_GENERAL_CATEGORY_SC 23
|
||||
#define UCDN_GENERAL_CATEGORY_SK 24
|
||||
#define UCDN_GENERAL_CATEGORY_SM 25
|
||||
#define UCDN_GENERAL_CATEGORY_SO 26
|
||||
#define UCDN_GENERAL_CATEGORY_ZL 27
|
||||
#define UCDN_GENERAL_CATEGORY_ZP 28
|
||||
#define UCDN_GENERAL_CATEGORY_ZS 29
|
||||
|
||||
#define UCDN_BIDI_CLASS_L 0
|
||||
#define UCDN_BIDI_CLASS_LRE 1
|
||||
#define UCDN_BIDI_CLASS_LRO 2
|
||||
#define UCDN_BIDI_CLASS_R 3
|
||||
#define UCDN_BIDI_CLASS_AL 4
|
||||
#define UCDN_BIDI_CLASS_RLE 5
|
||||
#define UCDN_BIDI_CLASS_RLO 6
|
||||
#define UCDN_BIDI_CLASS_PDF 7
|
||||
#define UCDN_BIDI_CLASS_EN 8
|
||||
#define UCDN_BIDI_CLASS_ES 9
|
||||
#define UCDN_BIDI_CLASS_ET 10
|
||||
#define UCDN_BIDI_CLASS_AN 11
|
||||
#define UCDN_BIDI_CLASS_CS 12
|
||||
#define UCDN_BIDI_CLASS_NSM 13
|
||||
#define UCDN_BIDI_CLASS_BN 14
|
||||
#define UCDN_BIDI_CLASS_B 15
|
||||
#define UCDN_BIDI_CLASS_S 16
|
||||
#define UCDN_BIDI_CLASS_WS 17
|
||||
#define UCDN_BIDI_CLASS_ON 18
|
||||
|
||||
/**
|
||||
* Return version of the Unicode database.
|
||||
*
|
||||
* @return Unicode database version
|
||||
*/
|
||||
const char *ucdn_get_unicode_version(void);
|
||||
|
||||
/**
|
||||
* Get combining class of a codepoint.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @return combining class value, as defined in UAX#44
|
||||
*/
|
||||
int ucdn_get_combining_class(uint32_t code);
|
||||
|
||||
/**
|
||||
* Get east-asian width of a codepoint.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @return value according to UCDN_EAST_ASIAN_* and as defined in UAX#11.
|
||||
*/
|
||||
int ucdn_get_east_asian_width(uint32_t code);
|
||||
|
||||
/**
|
||||
* Get general category of a codepoint.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @return value according to UCDN_GENERAL_CATEGORY_* and as defined in
|
||||
* UAX#44.
|
||||
*/
|
||||
int ucdn_get_general_category(uint32_t code);
|
||||
|
||||
/**
|
||||
* Get bidirectional class of a codepoint.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @return value according to UCDN_BIDI_CLASS_* and as defined in UAX#44.
|
||||
*/
|
||||
int ucdn_get_bidi_class(uint32_t code);
|
||||
|
||||
/**
|
||||
* Get script of a codepoint.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @return value according to UCDN_SCRIPT_* and as defined in UAX#24.
|
||||
*/
|
||||
int ucdn_get_script(uint32_t code);
|
||||
|
||||
/**
|
||||
* Check if codepoint can be mirrored.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @return 1 if mirrored character exists, otherwise 0
|
||||
*/
|
||||
int ucdn_get_mirrored(uint32_t code);
|
||||
|
||||
/**
|
||||
* Mirror a codepoint.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @return mirrored codepoint or the original codepoint if no
|
||||
* mirrored character exists
|
||||
*/
|
||||
uint32_t ucdn_mirror(uint32_t code);
|
||||
|
||||
/**
|
||||
* Pairwise canonical decomposition of a codepoint. This includes
|
||||
* Hangul Jamo decomposition (see chapter 3.12 of the Unicode core
|
||||
* specification).
|
||||
*
|
||||
* Hangul is decomposed into L and V jamos for LV forms, and an
|
||||
* LV precomposed syllable and a T jamo for LVT forms.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @param a filled with first codepoint of decomposition
|
||||
* @param b filled with second codepoint of decomposition, or 0
|
||||
* @return success
|
||||
*/
|
||||
int ucdn_decompose(uint32_t code, uint32_t *a, uint32_t *b);
|
||||
|
||||
/**
|
||||
* Compatibility decomposition of a codepoint.
|
||||
*
|
||||
* @param code Unicode codepoint
|
||||
* @param decomposed filled with decomposition, must be able to hold 18
|
||||
* characters
|
||||
* @return length of decomposition or 0 in case none exists
|
||||
*/
|
||||
int ucdn_compat_decompose(uint32_t code, uint32_t *decomposed);
|
||||
|
||||
/**
|
||||
* Pairwise canonical composition of two codepoints. This includes
|
||||
* Hangul Jamo composition (see chapter 3.12 of the Unicode core
|
||||
* specification).
|
||||
*
|
||||
* Hangul composition expects either L and V jamos, or an LV
|
||||
* precomposed syllable and a T jamo. This is exactly the inverse
|
||||
* of pairwise Hangul decomposition.
|
||||
*
|
||||
* @param code filled with composition
|
||||
* @param a first codepoint
|
||||
* @param b second codepoint
|
||||
* @return success
|
||||
*/
|
||||
int ucdn_compose(uint32_t *code, uint32_t a, uint32_t b);
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -112,6 +112,7 @@ hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED
|
|||
#define HB_UNICODE_FUNCS_IMPLEMENT_SET \
|
||||
HB_UNICODE_FUNCS_IMPLEMENT (glib) \
|
||||
HB_UNICODE_FUNCS_IMPLEMENT (icu) \
|
||||
HB_UNICODE_FUNCS_IMPLEMENT (ucdn) \
|
||||
HB_UNICODE_FUNCS_IMPLEMENT (nil) \
|
||||
/* ^--- Add new callbacks before nil */
|
||||
|
||||
|
@ -134,6 +135,8 @@ hb_unicode_funcs_get_default (void)
|
|||
HB_UNICODE_FUNCS_IMPLEMENT(glib)
|
||||
#elif defined(HAVE_ICU)
|
||||
HB_UNICODE_FUNCS_IMPLEMENT(icu)
|
||||
#elif defined(HAVE_UCDN)
|
||||
HB_UNICODE_FUNCS_IMPLEMENT(ucdn)
|
||||
#else
|
||||
#define HB_UNICODE_FUNCS_NIL 1
|
||||
HB_UNICODE_FUNCS_IMPLEMENT(nil)
|
||||
|
|
Loading…
Reference in New Issue