Fix decompose() implementations to work with non-starter non-composables

Add tests.
2011-07-21 20:58:42 -04:00 · 2011-07-21 20:58:42 -04:00 · 63c0ef4a07
parent 5d90a342e3
commit 63c0ef4a07
3 changed files with 26 additions and 15 deletions
--- a/src/hb-glib.cc
+++ b/src/hb-glib.cc
@ -296,16 +296,16 @@ hb_glib_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
    *b = 0;
    ret = *a != ab;
  } else if (len == 2) {
    *a = g_utf8_get_char (normalized);
    *b = g_utf8_get_char (g_utf8_next_char (normalized));
    /* Here's the ugly part: if ab decomposes to a single character and
     * that character decomposes again, we have to detect that and undo
     * the second part :-(. */
    gchar *recomposed = g_utf8_normalize (normalized, -1, G_NORMALIZE_NFC);
-    if (g_utf8_get_char (recomposed) != ab) {
+    hb_codepoint_t c = g_utf8_get_char (recomposed);
-      *a = g_utf8_get_char (recomposed);
+    if (c != ab && c != *a) {
      *a = c;
      *b = 0;
    } else {
      *a = g_utf8_get_char (normalized);
      *b = g_utf8_get_char (g_utf8_next_char (normalized));
    }
    g_free (recomposed);
    ret = TRUE;
--- a/src/hb-icu.cc
+++ b/src/hb-icu.cc
@ -214,6 +214,10 @@ hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
  hb_bool_t ret, err;
  UErrorCode icu_err;
  /* This function is a monster! Maybe it wasn't a good idea adding a
   * pairwise decompose API... */
  /* Watchout for the dragons.  Err, watchout for macros changing len. */
  len = 0;
  err = FALSE;
  U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), ab, err);
@ -232,21 +236,23 @@ hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
    *b = 0;
    ret = *a != ab;
  } else if (len == 2) {
    len =0;
    U16_NEXT_UNSAFE (normalized, len, *a);
    U16_NEXT_UNSAFE (normalized, len, *b);
    /* Here's the ugly part: if ab decomposes to a single character and
     * that character decomposes again, we have to detect that and undo
     * the second part :-(. */
    UChar recomposed[20];
    icu_err = U_ZERO_ERROR;
-    len = unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENGTH (recomposed), &icu_err);
+    unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENGTH (recomposed), &icu_err);
    if (icu_err)
      return FALSE;
-    U16_GET_UNSAFE (recomposed, 0, *a);
+    hb_codepoint_t c;
-    if (*a != ab) {
+    U16_GET_UNSAFE (recomposed, 0, c);
    if (c != *a && c != ab) {
      *a = c;
      *b = 0;
    } else {
      len =0;
      U16_NEXT_UNSAFE (normalized, len, *a);
      U16_GET_UNSAFE (normalized, len, *b);
    }
    ret = TRUE;
  } else {
--- a/test/test-unicode.c
+++ b/test/test-unicode.c
@ -800,6 +800,10 @@ test_unicode_normalization (gconstpointer user_data)
  g_assert (!hb_unicode_compose (uf, 0x2126, 0, &ab) && ab == 0);
  g_assert (!hb_unicode_compose (uf, 0x03A9, 0, &ab) && ab == 0);
  /* Non-starter pairs should not compose */
  g_assert (!hb_unicode_compose (uf, 0x0308, 0x0301, &ab) && ab == 0); /* !0x0344 */
  g_assert (!hb_unicode_compose (uf, 0x0F71, 0x0F72, &ab) && ab == 0); /* !0x0F73 */
  /* Pairs */
  g_assert (hb_unicode_compose (uf, 0x0041, 0x030A, &ab) && ab == 0x00C5);
  g_assert (hb_unicode_compose (uf, 0x006F, 0x0302, &ab) && ab == 0x00F4);
@ -822,12 +826,13 @@ test_unicode_normalization (gconstpointer user_data)
  g_assert (!hb_unicode_decompose (uf, 0xFB01, &a, &b) && a == 0xFB01 && b == 0);
  /* Singletons */
  g_assert (hb_unicode_decompose (uf, 0x212B, &a, &b));
  g_assert_cmphex (a, ==, 0x00C5);
  g_assert_cmphex (b, ==, 0);
  g_assert (hb_unicode_decompose (uf, 0x212B, &a, &b) && a == 0x00C5 && b == 0);
  g_assert (hb_unicode_decompose (uf, 0x2126, &a, &b) && a == 0x03A9 && b == 0);
  /* Non-starter pairs decompose, but not compose */
  g_assert (hb_unicode_decompose (uf, 0x0344, &a, &b) && a == 0x0308 && b == 0x0301);
  g_assert (hb_unicode_decompose (uf, 0x0F73, &a, &b) && a == 0x0F71 && b == 0x0F72);
  /* Pairs */
  g_assert (hb_unicode_decompose (uf, 0x00C5, &a, &b) && a == 0x0041 && b == 0x030A);
  g_assert (hb_unicode_decompose (uf, 0x00F4, &a, &b) && a == 0x006F && b == 0x0302);