[Indic] Minor refactoring

2012-07-19 12:32:16 -04:00 · 2012-07-19 12:32:16 -04:00 · 9ccc6382ba
parent f83aaa3133
commit 9ccc6382ba
1 changed files with 71 additions and 56 deletions
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@ -113,12 +113,14 @@ is_ra (hb_codepoint_t u)
 		    compare_codepoint);
 }

+#define JOINER_FLAGS (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ))
 static bool
 is_joiner (const hb_glyph_info_t &info)
 {
-  return !!(FLAG (info.indic_category()) & (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ)));
+  return !!(FLAG (info.indic_category()) & JOINER_FLAGS);
 }

+#define CONSONANT_FLAGS (FLAG (OT_C) | FLAG (OT_Ra) | FLAG (OT_V) | FLAG (OT_NBSP) | FLAG (OT_DOTTEDCIRCLE))
 static bool
 is_consonant (const hb_glyph_info_t &info)
 {
@ -127,15 +129,80 @@ is_consonant (const hb_glyph_info_t &info)
   * We treat Vowels and placeholders as if they were consonants.  This is safe because Vowels
   * cannot happen in a consonant syllable.  The plus side however is, we can call the
   * consonant syllable logic from the vowel syllable function and get it all right! */
-  return !!(FLAG (info.indic_category()) & (FLAG (OT_C) | FLAG (OT_Ra) | FLAG (OT_V) | FLAG (OT_NBSP) | FLAG (OT_DOTTEDCIRCLE)));
+  return !!(FLAG (info.indic_category()) & CONSONANT_FLAGS);
 }

+#define HALANT_OR_COENG_FLAGS (FLAG (OT_H) | FLAG (OT_Coeng))
 static bool
 is_halant_or_coeng (const hb_glyph_info_t &info)
 {
-  return !!(FLAG (info.indic_category()) & (FLAG (OT_H) | FLAG (OT_Coeng)));
+  return !!(FLAG (info.indic_category()) & HALANT_OR_COENG_FLAGS);
 }

+static inline void
+set_indic_properties (hb_glyph_info_t &info)
+{
+  hb_codepoint_t u = info.codepoint;
+  unsigned int type = get_indic_categories (u);
+  unsigned int cat = type & 0x0F;
+  unsigned int pos = type >> 4;
+
+  /* The spec says U+0952 is OT_A.  However, testing shows that Uniscribe
+   * treats U+0951..U+0952 all as OT_VD.
+   * TESTS:
+   * U+092E,U+0947,U+0952
+   * U+092E,U+0952,U+0947
+   * U+092E,U+0947,U+0951
+   * U+092E,U+0951,U+0947
+   * */
+  if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0951, 0x0954)))
+    cat = OT_VD;
+
+  if (cat == OT_X &&
+      unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CB, 0x17D2))) /* Khmer Various signs */
+    cat = OT_N;
+
+  /* Khmer Virama is different since it can be used to form a final consonant. */
+  if (unlikely (u == 0x17D2))
+    cat = OT_Coeng;
+
+  if (cat == OT_Repha) {
+    /* There are two kinds of characters marked as Repha:
+     * - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer)
+     * - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam)
+     *
+     * We recategorize the first kind to look like a Nukta and attached to the base directly.
+     */
+    if (_hb_glyph_info_get_general_category (&info) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
+      cat = OT_N;
+  }
+
+
+  /* Assign positions... */
+  if ((FLAG (cat) & CONSONANT_FLAGS)) {
+    pos = consonant_position (u);
+    if (is_ra (u))
+      cat = OT_Ra;
+  } else if (cat == OT_SM ||
+	     cat == OT_VD) {
+    pos = POS_SMVD;
+  } else if (unlikely (u == 0x200C))
+    cat = OT_ZWNJ;
+  else if (unlikely (u == 0x200D))
+    cat = OT_ZWJ;
+  else if (unlikely (u == 0x25CC))
+    cat = OT_DOTTEDCIRCLE;
+
+  info.indic_category() = cat;
+  info.indic_position() = pos;
+}
+
+
+
+
+
+
+
 struct feature_list_t {
  hb_tag_t tag;
  hb_bool_t is_global;
@ -256,59 +323,7 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED,

  unsigned int count = buffer->len;
  for (unsigned int i = 0; i < count; i++)
-  {
-    hb_glyph_info_t &info = buffer->info[i];
-    unsigned int type = get_indic_categories (info.codepoint);
-
-    info.indic_category() = type & 0x0F;
-    info.indic_position() = type >> 4;
-
-    /* The spec says U+0952 is OT_A.  However, testing shows that Uniscribe
-     * treats U+0951..U+0952 all as OT_VD.
-     * TESTS:
-     * U+092E,U+0947,U+0952
-     * U+092E,U+0952,U+0947
-     * U+092E,U+0947,U+0951
-     * U+092E,U+0951,U+0947
-     * */
-    if (unlikely (hb_in_range<hb_codepoint_t> (info.codepoint, 0x0951, 0x0954)))
-      info.indic_category() = OT_VD;
-
-    if (info.indic_category() == OT_X &&
-	unlikely (hb_in_range<hb_codepoint_t> (info.codepoint, 0x17CB, 0x17D2))) /* Khmer Various signs */
-      info.indic_category() = OT_N;
-
-    /* Khmer Virama is different since it can be used to form a final consonant. */
-    if (unlikely (info.codepoint == 0x17D2))
-      info.indic_category() = OT_Coeng;
-
-    if (info.indic_category() == OT_Repha) {
-      /* There are two kinds of characters marked as Repha:
-       * - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer)
-       * - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam)
-       *
-       * We recategorize the first kind to look like a Nukta and attached to the base directly.
-       */
-      if (_hb_glyph_info_get_general_category (&info) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
-        info.indic_category() = OT_N;
-    }
-
-
-    /* Assign positions... */
-    if (is_consonant (info)) {
-      info.indic_position() = consonant_position (info.codepoint);
-      if (is_ra (info.codepoint))
-	info.indic_category() = OT_Ra;
-    } else if (info.indic_category() == OT_SM ||
-	       info.indic_category() == OT_VD) {
-      info.indic_position() = POS_SMVD;
-    } else if (unlikely (info.codepoint == 0x200C))
-      info.indic_category() = OT_ZWNJ;
-    else if (unlikely (info.codepoint == 0x200D))
-      info.indic_category() = OT_ZWJ;
-    else if (unlikely (info.codepoint == 0x25CC))
-      info.indic_category() = OT_DOTTEDCIRCLE;
-  }
+    set_indic_properties (buffer->info[i]);
 }

 static int