[Indic] Minor refactoring

This commit is contained in:
Behdad Esfahbod 2012-07-19 12:32:16 -04:00
parent f83aaa3133
commit 9ccc6382ba
1 changed files with 71 additions and 56 deletions

View File

@ -113,12 +113,14 @@ is_ra (hb_codepoint_t u)
compare_codepoint); compare_codepoint);
} }
#define JOINER_FLAGS (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ))
static bool static bool
is_joiner (const hb_glyph_info_t &info) is_joiner (const hb_glyph_info_t &info)
{ {
return !!(FLAG (info.indic_category()) & (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ))); return !!(FLAG (info.indic_category()) & JOINER_FLAGS);
} }
#define CONSONANT_FLAGS (FLAG (OT_C) | FLAG (OT_Ra) | FLAG (OT_V) | FLAG (OT_NBSP) | FLAG (OT_DOTTEDCIRCLE))
static bool static bool
is_consonant (const hb_glyph_info_t &info) is_consonant (const hb_glyph_info_t &info)
{ {
@ -127,15 +129,80 @@ is_consonant (const hb_glyph_info_t &info)
* We treat Vowels and placeholders as if they were consonants. This is safe because Vowels * We treat Vowels and placeholders as if they were consonants. This is safe because Vowels
* cannot happen in a consonant syllable. The plus side however is, we can call the * cannot happen in a consonant syllable. The plus side however is, we can call the
* consonant syllable logic from the vowel syllable function and get it all right! */ * consonant syllable logic from the vowel syllable function and get it all right! */
return !!(FLAG (info.indic_category()) & (FLAG (OT_C) | FLAG (OT_Ra) | FLAG (OT_V) | FLAG (OT_NBSP) | FLAG (OT_DOTTEDCIRCLE))); return !!(FLAG (info.indic_category()) & CONSONANT_FLAGS);
} }
#define HALANT_OR_COENG_FLAGS (FLAG (OT_H) | FLAG (OT_Coeng))
static bool static bool
is_halant_or_coeng (const hb_glyph_info_t &info) is_halant_or_coeng (const hb_glyph_info_t &info)
{ {
return !!(FLAG (info.indic_category()) & (FLAG (OT_H) | FLAG (OT_Coeng))); return !!(FLAG (info.indic_category()) & HALANT_OR_COENG_FLAGS);
} }
static inline void
set_indic_properties (hb_glyph_info_t &info)
{
hb_codepoint_t u = info.codepoint;
unsigned int type = get_indic_categories (u);
unsigned int cat = type & 0x0F;
unsigned int pos = type >> 4;
/* The spec says U+0952 is OT_A. However, testing shows that Uniscribe
* treats U+0951..U+0952 all as OT_VD.
* TESTS:
* U+092E,U+0947,U+0952
* U+092E,U+0952,U+0947
* U+092E,U+0947,U+0951
* U+092E,U+0951,U+0947
* */
if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0951, 0x0954)))
cat = OT_VD;
if (cat == OT_X &&
unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CB, 0x17D2))) /* Khmer Various signs */
cat = OT_N;
/* Khmer Virama is different since it can be used to form a final consonant. */
if (unlikely (u == 0x17D2))
cat = OT_Coeng;
if (cat == OT_Repha) {
/* There are two kinds of characters marked as Repha:
* - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer)
* - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam)
*
* We recategorize the first kind to look like a Nukta and attached to the base directly.
*/
if (_hb_glyph_info_get_general_category (&info) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
cat = OT_N;
}
/* Assign positions... */
if ((FLAG (cat) & CONSONANT_FLAGS)) {
pos = consonant_position (u);
if (is_ra (u))
cat = OT_Ra;
} else if (cat == OT_SM ||
cat == OT_VD) {
pos = POS_SMVD;
} else if (unlikely (u == 0x200C))
cat = OT_ZWNJ;
else if (unlikely (u == 0x200D))
cat = OT_ZWJ;
else if (unlikely (u == 0x25CC))
cat = OT_DOTTEDCIRCLE;
info.indic_category() = cat;
info.indic_position() = pos;
}
struct feature_list_t { struct feature_list_t {
hb_tag_t tag; hb_tag_t tag;
hb_bool_t is_global; hb_bool_t is_global;
@ -256,59 +323,7 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED,
unsigned int count = buffer->len; unsigned int count = buffer->len;
for (unsigned int i = 0; i < count; i++) for (unsigned int i = 0; i < count; i++)
{ set_indic_properties (buffer->info[i]);
hb_glyph_info_t &info = buffer->info[i];
unsigned int type = get_indic_categories (info.codepoint);
info.indic_category() = type & 0x0F;
info.indic_position() = type >> 4;
/* The spec says U+0952 is OT_A. However, testing shows that Uniscribe
* treats U+0951..U+0952 all as OT_VD.
* TESTS:
* U+092E,U+0947,U+0952
* U+092E,U+0952,U+0947
* U+092E,U+0947,U+0951
* U+092E,U+0951,U+0947
* */
if (unlikely (hb_in_range<hb_codepoint_t> (info.codepoint, 0x0951, 0x0954)))
info.indic_category() = OT_VD;
if (info.indic_category() == OT_X &&
unlikely (hb_in_range<hb_codepoint_t> (info.codepoint, 0x17CB, 0x17D2))) /* Khmer Various signs */
info.indic_category() = OT_N;
/* Khmer Virama is different since it can be used to form a final consonant. */
if (unlikely (info.codepoint == 0x17D2))
info.indic_category() = OT_Coeng;
if (info.indic_category() == OT_Repha) {
/* There are two kinds of characters marked as Repha:
* - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer)
* - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam)
*
* We recategorize the first kind to look like a Nukta and attached to the base directly.
*/
if (_hb_glyph_info_get_general_category (&info) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
info.indic_category() = OT_N;
}
/* Assign positions... */
if (is_consonant (info)) {
info.indic_position() = consonant_position (info.codepoint);
if (is_ra (info.codepoint))
info.indic_category() = OT_Ra;
} else if (info.indic_category() == OT_SM ||
info.indic_category() == OT_VD) {
info.indic_position() = POS_SMVD;
} else if (unlikely (info.codepoint == 0x200C))
info.indic_category() = OT_ZWNJ;
else if (unlikely (info.codepoint == 0x200D))
info.indic_category() = OT_ZWJ;
else if (unlikely (info.codepoint == 0x25CC))
info.indic_category() = OT_DOTTEDCIRCLE;
}
} }
static int static int