[API] Add compose() and decompose() unicode funcs, rename other ones

Add compose() and decompose() unicode funcs.  These implement
pair-wise canonical composition/decomposition.

The glib/icu implementations are lacking for now.  We are adding
API for this to glib, but I cannot find any useful API in ICU.
May end of implementing these in-house.

Changed all unicode_funcs callback names to remove the "_get" part.
Eg, hb_unicode_get_script_func_t is now hb_unicode_script_func_t,
and hb_unicode_get_script() is hb_unicode_script() now.
This commit is contained in:
Behdad Esfahbod 2011-07-07 23:47:19 -04:00
parent d05dded167
commit c4641723fb
9 changed files with 146 additions and 62 deletions

4
TODO
View File

@ -68,7 +68,9 @@ Tests to write:
- ot-layout enumeration API (needs font)
- Finish test-shape.c
- Finish test-shape.c, grep for TODO
- Finish test-unicode.c, grep for TODO
Optimizations:

View File

@ -231,7 +231,11 @@ hb_unicode_funcs_t _hb_glib_unicode_funcs = {
hb_glib_get_eastasian_width,
hb_glib_get_general_category,
hb_glib_get_mirroring,
hb_glib_get_script
hb_glib_get_script,
/* TODO
hb_glib_compose,
hb_glib_decompose,
*/
}
};

View File

@ -174,7 +174,11 @@ hb_unicode_funcs_t _hb_icu_unicode_funcs = {
hb_icu_get_eastasian_width,
hb_icu_get_general_category,
hb_icu_get_mirroring,
hb_icu_get_script
hb_icu_get_script,
/* TODO
hb_icu_compose,
hb_icu_decompose,
*/
}
};

View File

@ -197,8 +197,8 @@ hb_set_unicode_props (hb_ot_shape_context_t *c)
unsigned int count = c->buffer->len;
for (unsigned int i = 1; i < count; i++) {
info[i].general_category() = hb_unicode_get_general_category (unicode, info[i].codepoint);
info[i].combining_class() = hb_unicode_get_combining_class (unicode, info[i].codepoint);
info[i].general_category() = hb_unicode_general_category (unicode, info[i].codepoint);
info[i].combining_class() = hb_unicode_combining_class (unicode, info[i].codepoint);
}
}
@ -252,7 +252,7 @@ hb_mirror_chars (hb_ot_shape_context_t *c)
unsigned int count = c->buffer->len;
for (unsigned int i = 0; i < count; i++) {
hb_codepoint_t codepoint = hb_unicode_get_mirroring (unicode, c->buffer->info[i].codepoint);
hb_codepoint_t codepoint = hb_unicode_mirroring (unicode, c->buffer->info[i].codepoint);
if (likely (codepoint == c->buffer->info[i].codepoint))
c->buffer->info[i].mask |= rtlm_mask; /* XXX this should be moved to before setting user-feature masks */
else

View File

@ -63,7 +63,7 @@ hb_shape (hb_font_t *font,
hb_unicode_funcs_t *unicode = buffer->unicode;
unsigned int count = buffer->len;
for (unsigned int i = 0; i < count; i++) {
hb_script_t script = hb_unicode_get_script (unicode, buffer->info[i].codepoint);
hb_script_t script = hb_unicode_script (unicode, buffer->info[i].codepoint);
if (likely (script != HB_SCRIPT_COMMON &&
script != HB_SCRIPT_INHERITED &&
script != HB_SCRIPT_UNKNOWN)) {

View File

@ -49,6 +49,8 @@ HB_BEGIN_DECLS
HB_UNICODE_FUNC_IMPLEMENT (general_category) \
HB_UNICODE_FUNC_IMPLEMENT (mirroring) \
HB_UNICODE_FUNC_IMPLEMENT (script) \
HB_UNICODE_FUNC_IMPLEMENT (compose) \
HB_UNICODE_FUNC_IMPLEMENT (decompose) \
/* ^--- Add new callbacks here */
/* Simple callbacks are those taking a hb_codepoint_t and returning a hb_codepoint_t */
@ -67,13 +69,13 @@ struct _hb_unicode_funcs_t {
bool immutable;
/* Don't access these directly. Call hb_unicode_get_*() instead. */
/* Don't access these directly. Call hb_unicode_*() instead. */
struct {
#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_get_##name##_func_t name;
#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_func_t name;
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
#undef HB_UNICODE_FUNC_IMPLEMENT
} get;
} func;
struct {
#define HB_UNICODE_FUNC_IMPLEMENT(name) void *name;

View File

@ -40,45 +40,67 @@ HB_BEGIN_DECLS
*/
static unsigned int
hb_unicode_get_combining_class_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode HB_UNUSED,
void *user_data HB_UNUSED)
hb_unicode_combining_class_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode HB_UNUSED,
void *user_data HB_UNUSED)
{
return 0;
}
static unsigned int
hb_unicode_get_eastasian_width_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode HB_UNUSED,
void *user_data HB_UNUSED)
hb_unicode_eastasian_width_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode HB_UNUSED,
void *user_data HB_UNUSED)
{
return 1;
}
static hb_unicode_general_category_t
hb_unicode_get_general_category_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode HB_UNUSED,
void *user_data HB_UNUSED)
hb_unicode_general_category_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode HB_UNUSED,
void *user_data HB_UNUSED)
{
return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
}
static hb_codepoint_t
hb_unicode_get_mirroring_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode HB_UNUSED,
void *user_data HB_UNUSED)
hb_unicode_mirroring_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode HB_UNUSED,
void *user_data HB_UNUSED)
{
return unicode;
}
static hb_script_t
hb_unicode_get_script_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode HB_UNUSED,
void *user_data HB_UNUSED)
hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode HB_UNUSED,
void *user_data HB_UNUSED)
{
return HB_SCRIPT_UNKNOWN;
}
static hb_bool_t
hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t a HB_UNUSED,
hb_codepoint_t b HB_UNUSED,
hb_codepoint_t *ab HB_UNUSED,
void *user_data HB_UNUSED)
{
/* TODO handle Hangul jamo here? */
return FALSE;
}
static hb_bool_t
hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t ab HB_UNUSED,
hb_codepoint_t *a HB_UNUSED,
hb_codepoint_t *b HB_UNUSED,
void *user_data HB_UNUSED)
{
/* TODO handle Hangul jamo here? */
return FALSE;
}
hb_unicode_funcs_t _hb_unicode_funcs_nil = {
HB_OBJECT_HEADER_STATIC,
@ -86,7 +108,7 @@ hb_unicode_funcs_t _hb_unicode_funcs_nil = {
NULL, /* parent */
TRUE, /* immutable */
{
#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_get_##name##_nil,
#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil,
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
#undef HB_UNICODE_FUNC_IMPLEMENT
}
@ -113,7 +135,7 @@ hb_unicode_funcs_create (hb_unicode_funcs_t *parent)
hb_unicode_funcs_make_immutable (parent);
ufuncs->parent = hb_unicode_funcs_reference (parent);
ufuncs->get = parent->get;
ufuncs->func = parent->func;
/* We can safely copy user_data from parent since we hold a reference
* onto it and it's immutable. We should not copy the destroy notifiers
@ -193,7 +215,7 @@ hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs)
\
void \
hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t *ufuncs, \
hb_unicode_get_##name##_func_t func, \
hb_unicode_##name##_func_t func, \
void *user_data, \
hb_destroy_func_t destroy) \
{ \
@ -204,11 +226,11 @@ hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t *ufuncs, \
ufuncs->destroy.name (ufuncs->user_data.name); \
\
if (func) { \
ufuncs->get.name = func; \
ufuncs->func.name = func; \
ufuncs->user_data.name = user_data; \
ufuncs->destroy.name = destroy; \
} else { \
ufuncs->get.name = ufuncs->parent->get.name; \
ufuncs->func.name = ufuncs->parent->func.name; \
ufuncs->user_data.name = ufuncs->parent->user_data.name; \
ufuncs->destroy.name = NULL; \
} \
@ -221,13 +243,30 @@ hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t *ufuncs, \
#define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \
\
return_type \
hb_unicode_get_##name (hb_unicode_funcs_t *ufuncs, \
hb_codepoint_t unicode) \
hb_unicode_##name (hb_unicode_funcs_t *ufuncs, \
hb_codepoint_t unicode) \
{ \
return ufuncs->get.name (ufuncs, unicode, ufuncs->user_data.name); \
return ufuncs->func.name (ufuncs, unicode, ufuncs->user_data.name); \
}
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
#undef HB_UNICODE_FUNC_IMPLEMENT
hb_bool_t
hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t a,
hb_codepoint_t b,
hb_codepoint_t *ab)
{
return ufuncs->func.compose (ufuncs, a, b, ab, ufuncs->user_data.compose);
}
hb_bool_t
hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t ab,
hb_codepoint_t *a,
hb_codepoint_t *b)
{
return ufuncs->func.decompose (ufuncs, ab, a, b, ufuncs->user_data.decompose);
}
HB_END_DECLS

View File

@ -90,73 +90,103 @@ hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs);
/* typedefs */
typedef unsigned int (*hb_unicode_get_combining_class_func_t) (hb_unicode_funcs_t *ufuncs,
typedef unsigned int (*hb_unicode_combining_class_func_t) (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode,
void *user_data);
typedef unsigned int (*hb_unicode_get_eastasian_width_func_t) (hb_unicode_funcs_t *ufuncs,
typedef unsigned int (*hb_unicode_eastasian_width_func_t) (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode,
void *user_data);
typedef hb_unicode_general_category_t (*hb_unicode_get_general_category_func_t) (hb_unicode_funcs_t *ufuncs,
typedef hb_unicode_general_category_t (*hb_unicode_general_category_func_t) (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode,
void *user_data);
typedef hb_codepoint_t (*hb_unicode_get_mirroring_func_t) (hb_unicode_funcs_t *ufuncs,
typedef hb_codepoint_t (*hb_unicode_mirroring_func_t) (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode,
void *user_data);
typedef hb_script_t (*hb_unicode_get_script_func_t) (hb_unicode_funcs_t *ufuncs,
typedef hb_script_t (*hb_unicode_script_func_t) (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode,
void *user_data);
typedef hb_bool_t (*hb_unicode_compose_func_t) (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t a,
hb_codepoint_t b,
hb_codepoint_t *ab,
void *user_data);
typedef hb_bool_t (*hb_unicode_decompose_func_t) (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t ab,
hb_codepoint_t *a,
hb_codepoint_t *b,
void *user_data);
/* setters */
void
hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs,
hb_unicode_get_combining_class_func_t combining_class_func,
hb_unicode_combining_class_func_t combining_class_func,
void *user_data, hb_destroy_func_t destroy);
void
hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs,
hb_unicode_get_eastasian_width_func_t eastasian_width_func,
hb_unicode_eastasian_width_func_t eastasian_width_func,
void *user_data, hb_destroy_func_t destroy);
void
hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs,
hb_unicode_get_general_category_func_t general_category_func,
hb_unicode_general_category_func_t general_category_func,
void *user_data, hb_destroy_func_t destroy);
void
hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs,
hb_unicode_get_mirroring_func_t mirroring_func,
hb_unicode_mirroring_func_t mirroring_func,
void *user_data, hb_destroy_func_t destroy);
void
hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs,
hb_unicode_get_script_func_t script_func,
hb_unicode_script_func_t script_func,
void *user_data, hb_destroy_func_t destroy);
void
hb_unicode_funcs_set_compose_func (hb_unicode_funcs_t *ufuncs,
hb_unicode_compose_func_t compose_func,
void *user_data, hb_destroy_func_t destroy);
void
hb_unicode_funcs_set_decompose_func (hb_unicode_funcs_t *ufuncs,
hb_unicode_decompose_func_t decompose_func,
void *user_data, hb_destroy_func_t destroy);
/* accessors */
unsigned int
hb_unicode_get_combining_class (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode);
hb_unicode_combining_class (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode);
unsigned int
hb_unicode_get_eastasian_width (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode);
hb_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode);
hb_unicode_general_category_t
hb_unicode_get_general_category (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode);
hb_unicode_general_category (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode);
hb_codepoint_t
hb_unicode_get_mirroring (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode);
hb_unicode_mirroring (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode);
hb_script_t
hb_unicode_get_script (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode);
hb_unicode_script (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode);
hb_bool_t
hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t a,
hb_codepoint_t b,
hb_codepoint_t *ab);
hb_bool_t
hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t ab,
hb_codepoint_t *a,
hb_codepoint_t *b);
HB_END_DECLS

View File

@ -93,7 +93,7 @@ a_is_for_arabic_get_script (hb_unicode_funcs_t *ufuncs,
} else {
hb_unicode_funcs_t *parent = hb_unicode_funcs_get_parent (ufuncs);
return hb_unicode_get_script (parent, codepoint);
return hb_unicode_script (parent, codepoint);
}
}
@ -457,7 +457,7 @@ typedef struct {
{ \
#name, \
(func_setter_func_t) hb_unicode_funcs_set_##name##_func, \
(getter_func_t) hb_unicode_get_##name, \
(getter_func_t) hb_unicode_##name, \
name##_tests, \
G_N_ELEMENTS (name##_tests), \
name##_tests_more, \
@ -667,8 +667,8 @@ test_unicode_subclassing_nil (data_fixture_t *f, gconstpointer user_data)
hb_unicode_funcs_set_script_func (aa, a_is_for_arabic_get_script,
&f->data[1], free_up);
g_assert_cmphex (hb_unicode_get_script (aa, 'a'), ==, HB_SCRIPT_ARABIC);
g_assert_cmphex (hb_unicode_get_script (aa, 'b'), ==, HB_SCRIPT_UNKNOWN);
g_assert_cmphex (hb_unicode_script (aa, 'a'), ==, HB_SCRIPT_ARABIC);
g_assert_cmphex (hb_unicode_script (aa, 'b'), ==, HB_SCRIPT_UNKNOWN);
g_assert (!f->data[0].freed && !f->data[1].freed);
hb_unicode_funcs_destroy (aa);
@ -686,8 +686,8 @@ test_unicode_subclassing_default (data_fixture_t *f, gconstpointer user_data)
hb_unicode_funcs_set_script_func (aa, a_is_for_arabic_get_script,
&f->data[1], free_up);
g_assert_cmphex (hb_unicode_get_script (aa, 'a'), ==, HB_SCRIPT_ARABIC);
g_assert_cmphex (hb_unicode_get_script (aa, 'b'), ==, HB_SCRIPT_LATIN);
g_assert_cmphex (hb_unicode_script (aa, 'a'), ==, HB_SCRIPT_ARABIC);
g_assert_cmphex (hb_unicode_script (aa, 'b'), ==, HB_SCRIPT_LATIN);
g_assert (!f->data[0].freed && !f->data[1].freed);
hb_unicode_funcs_destroy (aa);
@ -714,9 +714,9 @@ test_unicode_subclassing_deep (data_fixture_t *f, gconstpointer user_data)
hb_unicode_funcs_set_script_func (aa, a_is_for_arabic_get_script,
&f->data[1], free_up);
g_assert_cmphex (hb_unicode_get_script (aa, 'a'), ==, HB_SCRIPT_ARABIC);
g_assert_cmphex (hb_unicode_get_script (aa, 'b'), ==, HB_SCRIPT_LATIN);
g_assert_cmphex (hb_unicode_get_script (aa, '0'), ==, HB_SCRIPT_UNKNOWN);
g_assert_cmphex (hb_unicode_script (aa, 'a'), ==, HB_SCRIPT_ARABIC);
g_assert_cmphex (hb_unicode_script (aa, 'b'), ==, HB_SCRIPT_LATIN);
g_assert_cmphex (hb_unicode_script (aa, '0'), ==, HB_SCRIPT_UNKNOWN);
g_assert (!f->data[0].freed && !f->data[1].freed);
hb_unicode_funcs_destroy (aa);
@ -779,6 +779,9 @@ test_unicode_script_roundtrip (gconstpointer user_data)
}
/* TODO test compose() and decompose() */
int
main (int argc, char **argv)
{