Fix decompose() implementations to work with non-starter non-composables
Add tests.
This commit is contained in:
parent
5d90a342e3
commit
63c0ef4a07
|
@ -296,16 +296,16 @@ hb_glib_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
|||
*b = 0;
|
||||
ret = *a != ab;
|
||||
} else if (len == 2) {
|
||||
*a = g_utf8_get_char (normalized);
|
||||
*b = g_utf8_get_char (g_utf8_next_char (normalized));
|
||||
/* Here's the ugly part: if ab decomposes to a single character and
|
||||
* that character decomposes again, we have to detect that and undo
|
||||
* the second part :-(. */
|
||||
gchar *recomposed = g_utf8_normalize (normalized, -1, G_NORMALIZE_NFC);
|
||||
if (g_utf8_get_char (recomposed) != ab) {
|
||||
*a = g_utf8_get_char (recomposed);
|
||||
hb_codepoint_t c = g_utf8_get_char (recomposed);
|
||||
if (c != ab && c != *a) {
|
||||
*a = c;
|
||||
*b = 0;
|
||||
} else {
|
||||
*a = g_utf8_get_char (normalized);
|
||||
*b = g_utf8_get_char (g_utf8_next_char (normalized));
|
||||
}
|
||||
g_free (recomposed);
|
||||
ret = TRUE;
|
||||
|
|
|
@ -214,6 +214,10 @@ hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
|||
hb_bool_t ret, err;
|
||||
UErrorCode icu_err;
|
||||
|
||||
/* This function is a monster! Maybe it wasn't a good idea adding a
|
||||
* pairwise decompose API... */
|
||||
/* Watchout for the dragons. Err, watchout for macros changing len. */
|
||||
|
||||
len = 0;
|
||||
err = FALSE;
|
||||
U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), ab, err);
|
||||
|
@ -232,21 +236,23 @@ hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
|||
*b = 0;
|
||||
ret = *a != ab;
|
||||
} else if (len == 2) {
|
||||
len =0;
|
||||
U16_NEXT_UNSAFE (normalized, len, *a);
|
||||
U16_NEXT_UNSAFE (normalized, len, *b);
|
||||
|
||||
/* Here's the ugly part: if ab decomposes to a single character and
|
||||
* that character decomposes again, we have to detect that and undo
|
||||
* the second part :-(. */
|
||||
UChar recomposed[20];
|
||||
icu_err = U_ZERO_ERROR;
|
||||
len = unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENGTH (recomposed), &icu_err);
|
||||
unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENGTH (recomposed), &icu_err);
|
||||
if (icu_err)
|
||||
return FALSE;
|
||||
U16_GET_UNSAFE (recomposed, 0, *a);
|
||||
if (*a != ab) {
|
||||
hb_codepoint_t c;
|
||||
U16_GET_UNSAFE (recomposed, 0, c);
|
||||
if (c != *a && c != ab) {
|
||||
*a = c;
|
||||
*b = 0;
|
||||
} else {
|
||||
len =0;
|
||||
U16_NEXT_UNSAFE (normalized, len, *a);
|
||||
U16_GET_UNSAFE (normalized, len, *b);
|
||||
}
|
||||
ret = TRUE;
|
||||
} else {
|
||||
|
|
|
@ -800,6 +800,10 @@ test_unicode_normalization (gconstpointer user_data)
|
|||
g_assert (!hb_unicode_compose (uf, 0x2126, 0, &ab) && ab == 0);
|
||||
g_assert (!hb_unicode_compose (uf, 0x03A9, 0, &ab) && ab == 0);
|
||||
|
||||
/* Non-starter pairs should not compose */
|
||||
g_assert (!hb_unicode_compose (uf, 0x0308, 0x0301, &ab) && ab == 0); /* !0x0344 */
|
||||
g_assert (!hb_unicode_compose (uf, 0x0F71, 0x0F72, &ab) && ab == 0); /* !0x0F73 */
|
||||
|
||||
/* Pairs */
|
||||
g_assert (hb_unicode_compose (uf, 0x0041, 0x030A, &ab) && ab == 0x00C5);
|
||||
g_assert (hb_unicode_compose (uf, 0x006F, 0x0302, &ab) && ab == 0x00F4);
|
||||
|
@ -822,12 +826,13 @@ test_unicode_normalization (gconstpointer user_data)
|
|||
g_assert (!hb_unicode_decompose (uf, 0xFB01, &a, &b) && a == 0xFB01 && b == 0);
|
||||
|
||||
/* Singletons */
|
||||
g_assert (hb_unicode_decompose (uf, 0x212B, &a, &b));
|
||||
g_assert_cmphex (a, ==, 0x00C5);
|
||||
g_assert_cmphex (b, ==, 0);
|
||||
g_assert (hb_unicode_decompose (uf, 0x212B, &a, &b) && a == 0x00C5 && b == 0);
|
||||
g_assert (hb_unicode_decompose (uf, 0x2126, &a, &b) && a == 0x03A9 && b == 0);
|
||||
|
||||
/* Non-starter pairs decompose, but not compose */
|
||||
g_assert (hb_unicode_decompose (uf, 0x0344, &a, &b) && a == 0x0308 && b == 0x0301);
|
||||
g_assert (hb_unicode_decompose (uf, 0x0F73, &a, &b) && a == 0x0F71 && b == 0x0F72);
|
||||
|
||||
/* Pairs */
|
||||
g_assert (hb_unicode_decompose (uf, 0x00C5, &a, &b) && a == 0x0041 && b == 0x030A);
|
||||
g_assert (hb_unicode_decompose (uf, 0x00F4, &a, &b) && a == 0x006F && b == 0x0302);
|
||||
|
|
Loading…
Reference in New Issue