From 9ccc6382ba43760167c134c18c1c4ada4b8c3f22 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Thu, 19 Jul 2012 12:32:16 -0400 Subject: [PATCH] [Indic] Minor refactoring --- src/hb-ot-shape-complex-indic.cc | 127 +++++++++++++++++-------------- 1 file changed, 71 insertions(+), 56 deletions(-) diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index bb56c425e..0ec6b627b 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -113,12 +113,14 @@ is_ra (hb_codepoint_t u) compare_codepoint); } +#define JOINER_FLAGS (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ)) static bool is_joiner (const hb_glyph_info_t &info) { - return !!(FLAG (info.indic_category()) & (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ))); + return !!(FLAG (info.indic_category()) & JOINER_FLAGS); } +#define CONSONANT_FLAGS (FLAG (OT_C) | FLAG (OT_Ra) | FLAG (OT_V) | FLAG (OT_NBSP) | FLAG (OT_DOTTEDCIRCLE)) static bool is_consonant (const hb_glyph_info_t &info) { @@ -127,15 +129,80 @@ is_consonant (const hb_glyph_info_t &info) * We treat Vowels and placeholders as if they were consonants. This is safe because Vowels * cannot happen in a consonant syllable. The plus side however is, we can call the * consonant syllable logic from the vowel syllable function and get it all right! */ - return !!(FLAG (info.indic_category()) & (FLAG (OT_C) | FLAG (OT_Ra) | FLAG (OT_V) | FLAG (OT_NBSP) | FLAG (OT_DOTTEDCIRCLE))); + return !!(FLAG (info.indic_category()) & CONSONANT_FLAGS); } +#define HALANT_OR_COENG_FLAGS (FLAG (OT_H) | FLAG (OT_Coeng)) static bool is_halant_or_coeng (const hb_glyph_info_t &info) { - return !!(FLAG (info.indic_category()) & (FLAG (OT_H) | FLAG (OT_Coeng))); + return !!(FLAG (info.indic_category()) & HALANT_OR_COENG_FLAGS); } +static inline void +set_indic_properties (hb_glyph_info_t &info) +{ + hb_codepoint_t u = info.codepoint; + unsigned int type = get_indic_categories (u); + unsigned int cat = type & 0x0F; + unsigned int pos = type >> 4; + + /* The spec says U+0952 is OT_A. However, testing shows that Uniscribe + * treats U+0951..U+0952 all as OT_VD. + * TESTS: + * U+092E,U+0947,U+0952 + * U+092E,U+0952,U+0947 + * U+092E,U+0947,U+0951 + * U+092E,U+0951,U+0947 + * */ + if (unlikely (hb_in_range (u, 0x0951, 0x0954))) + cat = OT_VD; + + if (cat == OT_X && + unlikely (hb_in_range (u, 0x17CB, 0x17D2))) /* Khmer Various signs */ + cat = OT_N; + + /* Khmer Virama is different since it can be used to form a final consonant. */ + if (unlikely (u == 0x17D2)) + cat = OT_Coeng; + + if (cat == OT_Repha) { + /* There are two kinds of characters marked as Repha: + * - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer) + * - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam) + * + * We recategorize the first kind to look like a Nukta and attached to the base directly. + */ + if (_hb_glyph_info_get_general_category (&info) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) + cat = OT_N; + } + + + /* Assign positions... */ + if ((FLAG (cat) & CONSONANT_FLAGS)) { + pos = consonant_position (u); + if (is_ra (u)) + cat = OT_Ra; + } else if (cat == OT_SM || + cat == OT_VD) { + pos = POS_SMVD; + } else if (unlikely (u == 0x200C)) + cat = OT_ZWNJ; + else if (unlikely (u == 0x200D)) + cat = OT_ZWJ; + else if (unlikely (u == 0x25CC)) + cat = OT_DOTTEDCIRCLE; + + info.indic_category() = cat; + info.indic_position() = pos; +} + + + + + + + struct feature_list_t { hb_tag_t tag; hb_bool_t is_global; @@ -256,59 +323,7 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED, unsigned int count = buffer->len; for (unsigned int i = 0; i < count; i++) - { - hb_glyph_info_t &info = buffer->info[i]; - unsigned int type = get_indic_categories (info.codepoint); - - info.indic_category() = type & 0x0F; - info.indic_position() = type >> 4; - - /* The spec says U+0952 is OT_A. However, testing shows that Uniscribe - * treats U+0951..U+0952 all as OT_VD. - * TESTS: - * U+092E,U+0947,U+0952 - * U+092E,U+0952,U+0947 - * U+092E,U+0947,U+0951 - * U+092E,U+0951,U+0947 - * */ - if (unlikely (hb_in_range (info.codepoint, 0x0951, 0x0954))) - info.indic_category() = OT_VD; - - if (info.indic_category() == OT_X && - unlikely (hb_in_range (info.codepoint, 0x17CB, 0x17D2))) /* Khmer Various signs */ - info.indic_category() = OT_N; - - /* Khmer Virama is different since it can be used to form a final consonant. */ - if (unlikely (info.codepoint == 0x17D2)) - info.indic_category() = OT_Coeng; - - if (info.indic_category() == OT_Repha) { - /* There are two kinds of characters marked as Repha: - * - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer) - * - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam) - * - * We recategorize the first kind to look like a Nukta and attached to the base directly. - */ - if (_hb_glyph_info_get_general_category (&info) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) - info.indic_category() = OT_N; - } - - - /* Assign positions... */ - if (is_consonant (info)) { - info.indic_position() = consonant_position (info.codepoint); - if (is_ra (info.codepoint)) - info.indic_category() = OT_Ra; - } else if (info.indic_category() == OT_SM || - info.indic_category() == OT_VD) { - info.indic_position() = POS_SMVD; - } else if (unlikely (info.codepoint == 0x200C)) - info.indic_category() = OT_ZWNJ; - else if (unlikely (info.codepoint == 0x200D)) - info.indic_category() = OT_ZWJ; - else if (unlikely (info.codepoint == 0x25CC)) - info.indic_category() = OT_DOTTEDCIRCLE; - } + set_indic_properties (buffer->info[i]); } static int