Fix decompose() implementations to work with non-starter non-composables

Add tests.
This commit is contained in:
Behdad Esfahbod 2011-07-21 20:58:42 -04:00
parent 5d90a342e3
commit 63c0ef4a07
3 changed files with 26 additions and 15 deletions

View File

@ -296,16 +296,16 @@ hb_glib_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
*b = 0;
ret = *a != ab;
} else if (len == 2) {
*a = g_utf8_get_char (normalized);
*b = g_utf8_get_char (g_utf8_next_char (normalized));
/* Here's the ugly part: if ab decomposes to a single character and
* that character decomposes again, we have to detect that and undo
* the second part :-(. */
gchar *recomposed = g_utf8_normalize (normalized, -1, G_NORMALIZE_NFC);
if (g_utf8_get_char (recomposed) != ab) {
*a = g_utf8_get_char (recomposed);
hb_codepoint_t c = g_utf8_get_char (recomposed);
if (c != ab && c != *a) {
*a = c;
*b = 0;
} else {
*a = g_utf8_get_char (normalized);
*b = g_utf8_get_char (g_utf8_next_char (normalized));
}
g_free (recomposed);
ret = TRUE;

View File

@ -214,6 +214,10 @@ hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_bool_t ret, err;
UErrorCode icu_err;
/* This function is a monster! Maybe it wasn't a good idea adding a
* pairwise decompose API... */
/* Watchout for the dragons. Err, watchout for macros changing len. */
len = 0;
err = FALSE;
U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), ab, err);
@ -232,21 +236,23 @@ hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
*b = 0;
ret = *a != ab;
} else if (len == 2) {
len =0;
U16_NEXT_UNSAFE (normalized, len, *a);
U16_NEXT_UNSAFE (normalized, len, *b);
/* Here's the ugly part: if ab decomposes to a single character and
* that character decomposes again, we have to detect that and undo
* the second part :-(. */
UChar recomposed[20];
icu_err = U_ZERO_ERROR;
len = unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENGTH (recomposed), &icu_err);
unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENGTH (recomposed), &icu_err);
if (icu_err)
return FALSE;
U16_GET_UNSAFE (recomposed, 0, *a);
if (*a != ab) {
hb_codepoint_t c;
U16_GET_UNSAFE (recomposed, 0, c);
if (c != *a && c != ab) {
*a = c;
*b = 0;
} else {
len =0;
U16_NEXT_UNSAFE (normalized, len, *a);
U16_GET_UNSAFE (normalized, len, *b);
}
ret = TRUE;
} else {

View File

@ -800,6 +800,10 @@ test_unicode_normalization (gconstpointer user_data)
g_assert (!hb_unicode_compose (uf, 0x2126, 0, &ab) && ab == 0);
g_assert (!hb_unicode_compose (uf, 0x03A9, 0, &ab) && ab == 0);
/* Non-starter pairs should not compose */
g_assert (!hb_unicode_compose (uf, 0x0308, 0x0301, &ab) && ab == 0); /* !0x0344 */
g_assert (!hb_unicode_compose (uf, 0x0F71, 0x0F72, &ab) && ab == 0); /* !0x0F73 */
/* Pairs */
g_assert (hb_unicode_compose (uf, 0x0041, 0x030A, &ab) && ab == 0x00C5);
g_assert (hb_unicode_compose (uf, 0x006F, 0x0302, &ab) && ab == 0x00F4);
@ -822,12 +826,13 @@ test_unicode_normalization (gconstpointer user_data)
g_assert (!hb_unicode_decompose (uf, 0xFB01, &a, &b) && a == 0xFB01 && b == 0);
/* Singletons */
g_assert (hb_unicode_decompose (uf, 0x212B, &a, &b));
g_assert_cmphex (a, ==, 0x00C5);
g_assert_cmphex (b, ==, 0);
g_assert (hb_unicode_decompose (uf, 0x212B, &a, &b) && a == 0x00C5 && b == 0);
g_assert (hb_unicode_decompose (uf, 0x2126, &a, &b) && a == 0x03A9 && b == 0);
/* Non-starter pairs decompose, but not compose */
g_assert (hb_unicode_decompose (uf, 0x0344, &a, &b) && a == 0x0308 && b == 0x0301);
g_assert (hb_unicode_decompose (uf, 0x0F73, &a, &b) && a == 0x0F71 && b == 0x0F72);
/* Pairs */
g_assert (hb_unicode_decompose (uf, 0x00C5, &a, &b) && a == 0x0041 && b == 0x030A);
g_assert (hb_unicode_decompose (uf, 0x00F4, &a, &b) && a == 0x006F && b == 0x0302);