From ffd4a436f7baccb68a0c3602f94ea0246e32844f Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Wed, 20 Jul 2011 22:30:29 -0400 Subject: [PATCH] Add tests for compose()/decompose() Adjust glib fallback implementation. The tests are not hooked up for ICU yet. --- src/hb-glib.cc | 17 ++++++++++-- src/hb-unicode.cc | 2 +- test/test-unicode.c | 66 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 3 deletions(-) diff --git a/src/hb-glib.cc b/src/hb-glib.cc index 6174498ba..fbf8cf575 100644 --- a/src/hb-glib.cc +++ b/src/hb-glib.cc @@ -244,6 +244,9 @@ hb_glib_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, /* We don't ifdef-out the fallback code such that compiler always * sees it and makes sure it's compilable. */ + if (!a || !b) + return FALSE; + gchar utf8[12]; gchar *normalized; gint len; @@ -293,8 +296,18 @@ hb_glib_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, *b = 0; ret = *a != ab; } else if (len == 2) { - *a = g_utf8_get_char (normalized); - *b = g_utf8_get_char (g_utf8_next_char (normalized)); + /* Here's the ugly part: if ab decomposes to a single character and + * that character decomposes again, we have to detect that and undo + * the second part :-(. */ + gchar *recomposed = g_utf8_normalize (normalized, -1, G_NORMALIZE_NFC); + if (g_utf8_get_char (recomposed) != ab) { + *a = g_utf8_get_char (recomposed); + *b = 0; + } else { + *a = g_utf8_get_char (normalized); + *b = g_utf8_get_char (g_utf8_next_char (normalized)); + } + g_free (recomposed); ret = TRUE; } else { /* If decomposed to more than two characters, take the last one, diff --git a/src/hb-unicode.cc b/src/hb-unicode.cc index 63e83b769..aba2cd3b2 100644 --- a/src/hb-unicode.cc +++ b/src/hb-unicode.cc @@ -267,7 +267,7 @@ hb_unicode_decompose (hb_unicode_funcs_t *ufuncs, hb_codepoint_t *a, hb_codepoint_t *b) { - *a = *b = 0; + *a = ab; *b = 0; return ufuncs->func.decompose (ufuncs, ab, a, b, ufuncs->user_data.decompose); } diff --git a/test/test-unicode.c b/test/test-unicode.c index a691cb4d5..09dde54bc 100644 --- a/test/test-unicode.c +++ b/test/test-unicode.c @@ -780,6 +780,70 @@ test_unicode_script_roundtrip (gconstpointer user_data) /* TODO test compose() and decompose() */ +static void +test_unicode_normalization (gconstpointer user_data) +{ + hb_unicode_funcs_t *uf = (hb_unicode_funcs_t *) user_data; + gunichar a, b, ab; + + + /* Test compose() */ + + /* Not composable */ + g_assert (!hb_unicode_compose (uf, 0x0041, 0x0042, &ab) && ab == 0); + g_assert (!hb_unicode_compose (uf, 0x0041, 0, &ab) && ab == 0); + g_assert (!hb_unicode_compose (uf, 0x0066, 0x0069, &ab) && ab == 0); + + /* Singletons should not compose */ + g_assert (!hb_unicode_compose (uf, 0x212B, 0, &ab) && ab == 0); + g_assert (!hb_unicode_compose (uf, 0x00C5, 0, &ab) && ab == 0); + g_assert (!hb_unicode_compose (uf, 0x2126, 0, &ab) && ab == 0); + g_assert (!hb_unicode_compose (uf, 0x03A9, 0, &ab) && ab == 0); + + /* Pairs */ + g_assert (hb_unicode_compose (uf, 0x0041, 0x030A, &ab) && ab == 0x00C5); + g_assert (hb_unicode_compose (uf, 0x006F, 0x0302, &ab) && ab == 0x00F4); + g_assert (hb_unicode_compose (uf, 0x1E63, 0x0307, &ab) && ab == 0x1E69); + g_assert (hb_unicode_compose (uf, 0x0073, 0x0323, &ab) && ab == 0x1E63); + g_assert (hb_unicode_compose (uf, 0x0064, 0x0307, &ab) && ab == 0x1E0B); + g_assert (hb_unicode_compose (uf, 0x0064, 0x0323, &ab) && ab == 0x1E0D); + + /* Hangul */ + g_assert (hb_unicode_compose (uf, 0xD4CC, 0x11B6, &ab) && ab == 0xD4DB); + g_assert (hb_unicode_compose (uf, 0x1111, 0x1171, &ab) && ab == 0xD4CC); + g_assert (hb_unicode_compose (uf, 0xCE20, 0x11B8, &ab) && ab == 0xCE31); + g_assert (hb_unicode_compose (uf, 0x110E, 0x1173, &ab) && ab == 0xCE20); + + + /* Test decompose() */ + + /* Not decomposable */ + g_assert (!hb_unicode_decompose (uf, 0x0041, &a, &b) && a == 0x0041 && b == 0); + g_assert (!hb_unicode_decompose (uf, 0xFB01, &a, &b) && a == 0xFB01 && b == 0); + + /* Singletons */ + g_assert (hb_unicode_decompose (uf, 0x212B, &a, &b)); + g_assert_cmphex (a, ==, 0x00C5); + g_assert_cmphex (b, ==, 0); + g_assert (hb_unicode_decompose (uf, 0x212B, &a, &b) && a == 0x00C5 && b == 0); + g_assert (hb_unicode_decompose (uf, 0x2126, &a, &b) && a == 0x03A9 && b == 0); + + /* Pairs */ + g_assert (hb_unicode_decompose (uf, 0x00C5, &a, &b) && a == 0x0041 && b == 0x030A); + g_assert (hb_unicode_decompose (uf, 0x00F4, &a, &b) && a == 0x006F && b == 0x0302); + g_assert (hb_unicode_decompose (uf, 0x1E69, &a, &b) && a == 0x1E63 && b == 0x0307); + g_assert (hb_unicode_decompose (uf, 0x1E63, &a, &b) && a == 0x0073 && b == 0x0323); + g_assert (hb_unicode_decompose (uf, 0x1E0B, &a, &b) && a == 0x0064 && b == 0x0307); + g_assert (hb_unicode_decompose (uf, 0x1E0D, &a, &b) && a == 0x0064 && b == 0x0323); + + /* Hangul */ + g_assert (hb_unicode_decompose (uf, 0xD4DB, &a, &b) && a == 0xD4CC && b == 0x11B6); + g_assert (hb_unicode_decompose (uf, 0xD4CC, &a, &b) && a == 0x1111 && b == 0x1171); + g_assert (hb_unicode_decompose (uf, 0xCE31, &a, &b) && a == 0xCE20 && b == 0x11B8); + g_assert (hb_unicode_decompose (uf, 0xCE20, &a, &b) && a == 0x110E && b == 0x1173); + +} + int @@ -791,9 +855,11 @@ main (int argc, char **argv) hb_test_add (test_unicode_properties_empty); hb_test_add_data_flavor (hb_unicode_funcs_get_default (), "default", test_unicode_properties); + hb_test_add_data_flavor (hb_unicode_funcs_get_default (), "default", test_unicode_normalization); hb_test_add_data_flavor ((gconstpointer) script_roundtrip_default, "default", test_unicode_script_roundtrip); #ifdef HAVE_GLIB hb_test_add_data_flavor (hb_glib_get_unicode_funcs (), "glib", test_unicode_properties); + hb_test_add_data_flavor (hb_glib_get_unicode_funcs (), "glib", test_unicode_normalization); hb_test_add_data_flavor ((gconstpointer) script_roundtrip_glib, "glib", test_unicode_script_roundtrip); #endif #ifdef HAVE_ICU