From 0f8881d6bbf6cd59938315eeff9b71cfc736aa4e Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 7 Aug 2012 16:57:02 -0400 Subject: [PATCH] More refactoring --- src/hb-ot-shape-normalize.cc | 161 +++++++++++++++++++++++++++++++++- src/hb-unicode-private.hh | 164 +---------------------------------- 2 files changed, 162 insertions(+), 163 deletions(-) diff --git a/src/hb-ot-shape-normalize.cc b/src/hb-ot-shape-normalize.cc index 53f575f64..b5e0b4d3f 100644 --- a/src/hb-ot-shape-normalize.cc +++ b/src/hb-ot-shape-normalize.cc @@ -87,6 +87,39 @@ decompose_func (hb_unicode_funcs_t *unicode, hb_codepoint_t *a, hb_codepoint_t *b) { + /* XXX FIXME, move these to complex shapers and propagage to normalizer.*/ + switch (ab) { + case 0x0AC9 : return false; + + case 0x0931 : return false; + case 0x0B94 : return false; + + /* These ones have Unicode decompositions, but we do it + * this way to be close to what Uniscribe does. */ + case 0x0DDA : *a = 0x0DD9; *b= 0x0DDA; return true; + case 0x0DDC : *a = 0x0DD9; *b= 0x0DDC; return true; + case 0x0DDD : *a = 0x0DD9; *b= 0x0DDD; return true; + case 0x0DDE : *a = 0x0DD9; *b= 0x0DDE; return true; + + case 0x0F77 : *a = 0x0FB2; *b= 0x0F81; return true; + case 0x0F79 : *a = 0x0FB3; *b= 0x0F81; return true; + case 0x17BE : *a = 0x17C1; *b= 0x17BE; return true; + case 0x17BF : *a = 0x17C1; *b= 0x17BF; return true; + case 0x17C0 : *a = 0x17C1; *b= 0x17C0; return true; + case 0x17C4 : *a = 0x17C1; *b= 0x17C4; return true; + case 0x17C5 : *a = 0x17C1; *b= 0x17C5; return true; + case 0x1925 : *a = 0x1920; *b= 0x1923; return true; + case 0x1926 : *a = 0x1920; *b= 0x1924; return true; + case 0x1B3C : *a = 0x1B42; *b= 0x1B3C; return true; + case 0x1112E : *a = 0x11127; *b= 0x11131; return true; + case 0x1112F : *a = 0x11127; *b= 0x11132; return true; +#if 0 + case 0x0B57 : *a = 0xno decomp, -> RIGHT; return true; + case 0x1C29 : *a = 0xno decomp, -> LEFT; return true; + case 0xA9C0 : *a = 0xno decomp, -> RIGHT; return true; + case 0x111BF : *a = 0xno decomp, -> ABOVE; return true; +#endif + } return unicode->decompose (ab, a, b); } @@ -96,7 +129,133 @@ compose_func (hb_unicode_funcs_t *unicode, hb_codepoint_t b, hb_codepoint_t *ab) { - return unicode->compose (a, b, ab); + /* XXX, this belongs to indic normalizer. */ + if ((FLAG (unicode->general_category (a)) & + (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | + FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | + FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))) + return false; + /* XXX, add composition-exclusion exceptions to Indic shaper. */ + if (a == 0x09AF && b == 0x09BC) { *ab = 0x09DF; return true; } + + /* XXX, these belong to the hebew / default shaper. */ + /* Hebrew presentation-form shaping. + * https://bugzilla.mozilla.org/show_bug.cgi?id=728866 */ + // Hebrew presentation forms with dagesh, for characters 0x05D0..0x05EA; + // note that some letters do not have a dagesh presForm encoded + static const hb_codepoint_t sDageshForms[0x05EA - 0x05D0 + 1] = { + 0xFB30, // ALEF + 0xFB31, // BET + 0xFB32, // GIMEL + 0xFB33, // DALET + 0xFB34, // HE + 0xFB35, // VAV + 0xFB36, // ZAYIN + 0, // HET + 0xFB38, // TET + 0xFB39, // YOD + 0xFB3A, // FINAL KAF + 0xFB3B, // KAF + 0xFB3C, // LAMED + 0, // FINAL MEM + 0xFB3E, // MEM + 0, // FINAL NUN + 0xFB40, // NUN + 0xFB41, // SAMEKH + 0, // AYIN + 0xFB43, // FINAL PE + 0xFB44, // PE + 0, // FINAL TSADI + 0xFB46, // TSADI + 0xFB47, // QOF + 0xFB48, // RESH + 0xFB49, // SHIN + 0xFB4A // TAV + }; + + hb_bool_t found = unicode->compose (a, b, ab); + + if (!found && (b & ~0x7F) == 0x0580) { + // special-case Hebrew presentation forms that are excluded from + // standard normalization, but wanted for old fonts + switch (b) { + case 0x05B4: // HIRIQ + if (a == 0x05D9) { // YOD + *ab = 0xFB1D; + found = true; + } + break; + case 0x05B7: // patah + if (a == 0x05F2) { // YIDDISH YOD YOD + *ab = 0xFB1F; + found = true; + } else if (a == 0x05D0) { // ALEF + *ab = 0xFB2E; + found = true; + } + break; + case 0x05B8: // QAMATS + if (a == 0x05D0) { // ALEF + *ab = 0xFB2F; + found = true; + } + break; + case 0x05B9: // HOLAM + if (a == 0x05D5) { // VAV + *ab = 0xFB4B; + found = true; + } + break; + case 0x05BC: // DAGESH + if (a >= 0x05D0 && a <= 0x05EA) { + *ab = sDageshForms[a - 0x05D0]; + found = (*ab != 0); + } else if (a == 0xFB2A) { // SHIN WITH SHIN DOT + *ab = 0xFB2C; + found = true; + } else if (a == 0xFB2B) { // SHIN WITH SIN DOT + *ab = 0xFB2D; + found = true; + } + break; + case 0x05BF: // RAFE + switch (a) { + case 0x05D1: // BET + *ab = 0xFB4C; + found = true; + break; + case 0x05DB: // KAF + *ab = 0xFB4D; + found = true; + break; + case 0x05E4: // PE + *ab = 0xFB4E; + found = true; + break; + } + break; + case 0x05C1: // SHIN DOT + if (a == 0x05E9) { // SHIN + *ab = 0xFB2A; + found = true; + } else if (a == 0xFB49) { // SHIN WITH DAGESH + *ab = 0xFB2C; + found = true; + } + break; + case 0x05C2: // SIN DOT + if (a == 0x05E9) { // SHIN + *ab = 0xFB2B; + found = true; + } else if (a == 0xFB49) { // SHIN WITH DAGESH + *ab = 0xFB2D; + found = true; + } + break; + } + } + + return found; } static void diff --git a/src/hb-unicode-private.hh b/src/hb-unicode-private.hh index 2a67f0a14..7367f0b29 100644 --- a/src/hb-unicode-private.hh +++ b/src/hb-unicode-private.hh @@ -80,173 +80,13 @@ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE hb_codepoint_t *ab) { *ab = 0; - - /* XXX, this belongs to indic normalizer. */ - if ((FLAG (general_category (a)) & - (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | - FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | - FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))) - return false; - /* XXX, add composition-exclusion exceptions to Indic shaper. */ - if (a == 0x09AF && b == 0x09BC) { *ab = 0x09DF; return true; } - - /* XXX, these belong to the hebew / default shaper. */ - /* Hebrew presentation-form shaping. - * https://bugzilla.mozilla.org/show_bug.cgi?id=728866 */ - // Hebrew presentation forms with dagesh, for characters 0x05D0..0x05EA; - // note that some letters do not have a dagesh presForm encoded - static const hb_codepoint_t sDageshForms[0x05EA - 0x05D0 + 1] = { - 0xFB30, // ALEF - 0xFB31, // BET - 0xFB32, // GIMEL - 0xFB33, // DALET - 0xFB34, // HE - 0xFB35, // VAV - 0xFB36, // ZAYIN - 0, // HET - 0xFB38, // TET - 0xFB39, // YOD - 0xFB3A, // FINAL KAF - 0xFB3B, // KAF - 0xFB3C, // LAMED - 0, // FINAL MEM - 0xFB3E, // MEM - 0, // FINAL NUN - 0xFB40, // NUN - 0xFB41, // SAMEKH - 0, // AYIN - 0xFB43, // FINAL PE - 0xFB44, // PE - 0, // FINAL TSADI - 0xFB46, // TSADI - 0xFB47, // QOF - 0xFB48, // RESH - 0xFB49, // SHIN - 0xFB4A // TAV - }; - - hb_bool_t found = func.compose (this, a, b, ab, user_data.compose); - - if (!found && (b & ~0x7F) == 0x0580) { - // special-case Hebrew presentation forms that are excluded from - // standard normalization, but wanted for old fonts - switch (b) { - case 0x05B4: // HIRIQ - if (a == 0x05D9) { // YOD - *ab = 0xFB1D; - found = true; - } - break; - case 0x05B7: // patah - if (a == 0x05F2) { // YIDDISH YOD YOD - *ab = 0xFB1F; - found = true; - } else if (a == 0x05D0) { // ALEF - *ab = 0xFB2E; - found = true; - } - break; - case 0x05B8: // QAMATS - if (a == 0x05D0) { // ALEF - *ab = 0xFB2F; - found = true; - } - break; - case 0x05B9: // HOLAM - if (a == 0x05D5) { // VAV - *ab = 0xFB4B; - found = true; - } - break; - case 0x05BC: // DAGESH - if (a >= 0x05D0 && a <= 0x05EA) { - *ab = sDageshForms[a - 0x05D0]; - found = (*ab != 0); - } else if (a == 0xFB2A) { // SHIN WITH SHIN DOT - *ab = 0xFB2C; - found = true; - } else if (a == 0xFB2B) { // SHIN WITH SIN DOT - *ab = 0xFB2D; - found = true; - } - break; - case 0x05BF: // RAFE - switch (a) { - case 0x05D1: // BET - *ab = 0xFB4C; - found = true; - break; - case 0x05DB: // KAF - *ab = 0xFB4D; - found = true; - break; - case 0x05E4: // PE - *ab = 0xFB4E; - found = true; - break; - } - break; - case 0x05C1: // SHIN DOT - if (a == 0x05E9) { // SHIN - *ab = 0xFB2A; - found = true; - } else if (a == 0xFB49) { // SHIN WITH DAGESH - *ab = 0xFB2C; - found = true; - } - break; - case 0x05C2: // SIN DOT - if (a == 0x05E9) { // SHIN - *ab = 0xFB2B; - found = true; - } else if (a == 0xFB49) { // SHIN WITH DAGESH - *ab = 0xFB2D; - found = true; - } - break; - } - } - - return found; + return func.compose (this, a, b, ab, user_data.compose); } inline hb_bool_t decompose (hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b) { - /* XXX FIXME, move these to complex shapers and propagage to normalizer.*/ - switch (ab) { - case 0x0AC9 : return false; - - case 0x0931 : return false; - case 0x0B94 : return false; - - /* These ones have Unicode decompositions, but we do it - * this way to be close to what Uniscribe does. */ - case 0x0DDA : *a = 0x0DD9; *b= 0x0DDA; return true; - case 0x0DDC : *a = 0x0DD9; *b= 0x0DDC; return true; - case 0x0DDD : *a = 0x0DD9; *b= 0x0DDD; return true; - case 0x0DDE : *a = 0x0DD9; *b= 0x0DDE; return true; - - case 0x0F77 : *a = 0x0FB2; *b= 0x0F81; return true; - case 0x0F79 : *a = 0x0FB3; *b= 0x0F81; return true; - case 0x17BE : *a = 0x17C1; *b= 0x17BE; return true; - case 0x17BF : *a = 0x17C1; *b= 0x17BF; return true; - case 0x17C0 : *a = 0x17C1; *b= 0x17C0; return true; - case 0x17C4 : *a = 0x17C1; *b= 0x17C4; return true; - case 0x17C5 : *a = 0x17C1; *b= 0x17C5; return true; - case 0x1925 : *a = 0x1920; *b= 0x1923; return true; - case 0x1926 : *a = 0x1920; *b= 0x1924; return true; - case 0x1B3C : *a = 0x1B42; *b= 0x1B3C; return true; - case 0x1112E : *a = 0x11127; *b= 0x11131; return true; - case 0x1112F : *a = 0x11127; *b= 0x11132; return true; -#if 0 - case 0x0B57 : *a = 0xno decomp, -> RIGHT; return true; - case 0x1C29 : *a = 0xno decomp, -> LEFT; return true; - case 0xA9C0 : *a = 0xno decomp, -> RIGHT; return true; - case 0x111BF : *a = 0xno decomp, -> ABOVE; return true; -#endif - } - *a = ab; *b = 0; + *a = *b = 0; return func.decompose (this, ab, a, b, user_data.decompose); }