Fix decompose() implementations to work with non-starter non-composables
Add tests.
This commit is contained in:
parent
5d90a342e3
commit
63c0ef4a07
|
@ -296,16 +296,16 @@ hb_glib_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
||||||
*b = 0;
|
*b = 0;
|
||||||
ret = *a != ab;
|
ret = *a != ab;
|
||||||
} else if (len == 2) {
|
} else if (len == 2) {
|
||||||
|
*a = g_utf8_get_char (normalized);
|
||||||
|
*b = g_utf8_get_char (g_utf8_next_char (normalized));
|
||||||
/* Here's the ugly part: if ab decomposes to a single character and
|
/* Here's the ugly part: if ab decomposes to a single character and
|
||||||
* that character decomposes again, we have to detect that and undo
|
* that character decomposes again, we have to detect that and undo
|
||||||
* the second part :-(. */
|
* the second part :-(. */
|
||||||
gchar *recomposed = g_utf8_normalize (normalized, -1, G_NORMALIZE_NFC);
|
gchar *recomposed = g_utf8_normalize (normalized, -1, G_NORMALIZE_NFC);
|
||||||
if (g_utf8_get_char (recomposed) != ab) {
|
hb_codepoint_t c = g_utf8_get_char (recomposed);
|
||||||
*a = g_utf8_get_char (recomposed);
|
if (c != ab && c != *a) {
|
||||||
|
*a = c;
|
||||||
*b = 0;
|
*b = 0;
|
||||||
} else {
|
|
||||||
*a = g_utf8_get_char (normalized);
|
|
||||||
*b = g_utf8_get_char (g_utf8_next_char (normalized));
|
|
||||||
}
|
}
|
||||||
g_free (recomposed);
|
g_free (recomposed);
|
||||||
ret = TRUE;
|
ret = TRUE;
|
||||||
|
|
|
@ -214,6 +214,10 @@ hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
||||||
hb_bool_t ret, err;
|
hb_bool_t ret, err;
|
||||||
UErrorCode icu_err;
|
UErrorCode icu_err;
|
||||||
|
|
||||||
|
/* This function is a monster! Maybe it wasn't a good idea adding a
|
||||||
|
* pairwise decompose API... */
|
||||||
|
/* Watchout for the dragons. Err, watchout for macros changing len. */
|
||||||
|
|
||||||
len = 0;
|
len = 0;
|
||||||
err = FALSE;
|
err = FALSE;
|
||||||
U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), ab, err);
|
U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), ab, err);
|
||||||
|
@ -232,21 +236,23 @@ hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
||||||
*b = 0;
|
*b = 0;
|
||||||
ret = *a != ab;
|
ret = *a != ab;
|
||||||
} else if (len == 2) {
|
} else if (len == 2) {
|
||||||
|
len =0;
|
||||||
|
U16_NEXT_UNSAFE (normalized, len, *a);
|
||||||
|
U16_NEXT_UNSAFE (normalized, len, *b);
|
||||||
|
|
||||||
/* Here's the ugly part: if ab decomposes to a single character and
|
/* Here's the ugly part: if ab decomposes to a single character and
|
||||||
* that character decomposes again, we have to detect that and undo
|
* that character decomposes again, we have to detect that and undo
|
||||||
* the second part :-(. */
|
* the second part :-(. */
|
||||||
UChar recomposed[20];
|
UChar recomposed[20];
|
||||||
icu_err = U_ZERO_ERROR;
|
icu_err = U_ZERO_ERROR;
|
||||||
len = unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENGTH (recomposed), &icu_err);
|
unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENGTH (recomposed), &icu_err);
|
||||||
if (icu_err)
|
if (icu_err)
|
||||||
return FALSE;
|
return FALSE;
|
||||||
U16_GET_UNSAFE (recomposed, 0, *a);
|
hb_codepoint_t c;
|
||||||
if (*a != ab) {
|
U16_GET_UNSAFE (recomposed, 0, c);
|
||||||
|
if (c != *a && c != ab) {
|
||||||
|
*a = c;
|
||||||
*b = 0;
|
*b = 0;
|
||||||
} else {
|
|
||||||
len =0;
|
|
||||||
U16_NEXT_UNSAFE (normalized, len, *a);
|
|
||||||
U16_GET_UNSAFE (normalized, len, *b);
|
|
||||||
}
|
}
|
||||||
ret = TRUE;
|
ret = TRUE;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -800,6 +800,10 @@ test_unicode_normalization (gconstpointer user_data)
|
||||||
g_assert (!hb_unicode_compose (uf, 0x2126, 0, &ab) && ab == 0);
|
g_assert (!hb_unicode_compose (uf, 0x2126, 0, &ab) && ab == 0);
|
||||||
g_assert (!hb_unicode_compose (uf, 0x03A9, 0, &ab) && ab == 0);
|
g_assert (!hb_unicode_compose (uf, 0x03A9, 0, &ab) && ab == 0);
|
||||||
|
|
||||||
|
/* Non-starter pairs should not compose */
|
||||||
|
g_assert (!hb_unicode_compose (uf, 0x0308, 0x0301, &ab) && ab == 0); /* !0x0344 */
|
||||||
|
g_assert (!hb_unicode_compose (uf, 0x0F71, 0x0F72, &ab) && ab == 0); /* !0x0F73 */
|
||||||
|
|
||||||
/* Pairs */
|
/* Pairs */
|
||||||
g_assert (hb_unicode_compose (uf, 0x0041, 0x030A, &ab) && ab == 0x00C5);
|
g_assert (hb_unicode_compose (uf, 0x0041, 0x030A, &ab) && ab == 0x00C5);
|
||||||
g_assert (hb_unicode_compose (uf, 0x006F, 0x0302, &ab) && ab == 0x00F4);
|
g_assert (hb_unicode_compose (uf, 0x006F, 0x0302, &ab) && ab == 0x00F4);
|
||||||
|
@ -822,12 +826,13 @@ test_unicode_normalization (gconstpointer user_data)
|
||||||
g_assert (!hb_unicode_decompose (uf, 0xFB01, &a, &b) && a == 0xFB01 && b == 0);
|
g_assert (!hb_unicode_decompose (uf, 0xFB01, &a, &b) && a == 0xFB01 && b == 0);
|
||||||
|
|
||||||
/* Singletons */
|
/* Singletons */
|
||||||
g_assert (hb_unicode_decompose (uf, 0x212B, &a, &b));
|
|
||||||
g_assert_cmphex (a, ==, 0x00C5);
|
|
||||||
g_assert_cmphex (b, ==, 0);
|
|
||||||
g_assert (hb_unicode_decompose (uf, 0x212B, &a, &b) && a == 0x00C5 && b == 0);
|
g_assert (hb_unicode_decompose (uf, 0x212B, &a, &b) && a == 0x00C5 && b == 0);
|
||||||
g_assert (hb_unicode_decompose (uf, 0x2126, &a, &b) && a == 0x03A9 && b == 0);
|
g_assert (hb_unicode_decompose (uf, 0x2126, &a, &b) && a == 0x03A9 && b == 0);
|
||||||
|
|
||||||
|
/* Non-starter pairs decompose, but not compose */
|
||||||
|
g_assert (hb_unicode_decompose (uf, 0x0344, &a, &b) && a == 0x0308 && b == 0x0301);
|
||||||
|
g_assert (hb_unicode_decompose (uf, 0x0F73, &a, &b) && a == 0x0F71 && b == 0x0F72);
|
||||||
|
|
||||||
/* Pairs */
|
/* Pairs */
|
||||||
g_assert (hb_unicode_decompose (uf, 0x00C5, &a, &b) && a == 0x0041 && b == 0x030A);
|
g_assert (hb_unicode_decompose (uf, 0x00C5, &a, &b) && a == 0x0041 && b == 0x030A);
|
||||||
g_assert (hb_unicode_decompose (uf, 0x00F4, &a, &b) && a == 0x006F && b == 0x0302);
|
g_assert (hb_unicode_decompose (uf, 0x00F4, &a, &b) && a == 0x006F && b == 0x0302);
|
||||||
|
|
Loading…
Reference in New Issue