More refactoring

This commit is contained in:
Behdad Esfahbod 2012-08-07 16:57:02 -04:00
parent 428dfcab66
commit 0f8881d6bb
2 changed files with 162 additions and 163 deletions

View File

@ -87,6 +87,39 @@ decompose_func (hb_unicode_funcs_t *unicode,
hb_codepoint_t *a,
hb_codepoint_t *b)
{
/* XXX FIXME, move these to complex shapers and propagage to normalizer.*/
switch (ab) {
case 0x0AC9 : return false;
case 0x0931 : return false;
case 0x0B94 : return false;
/* These ones have Unicode decompositions, but we do it
* this way to be close to what Uniscribe does. */
case 0x0DDA : *a = 0x0DD9; *b= 0x0DDA; return true;
case 0x0DDC : *a = 0x0DD9; *b= 0x0DDC; return true;
case 0x0DDD : *a = 0x0DD9; *b= 0x0DDD; return true;
case 0x0DDE : *a = 0x0DD9; *b= 0x0DDE; return true;
case 0x0F77 : *a = 0x0FB2; *b= 0x0F81; return true;
case 0x0F79 : *a = 0x0FB3; *b= 0x0F81; return true;
case 0x17BE : *a = 0x17C1; *b= 0x17BE; return true;
case 0x17BF : *a = 0x17C1; *b= 0x17BF; return true;
case 0x17C0 : *a = 0x17C1; *b= 0x17C0; return true;
case 0x17C4 : *a = 0x17C1; *b= 0x17C4; return true;
case 0x17C5 : *a = 0x17C1; *b= 0x17C5; return true;
case 0x1925 : *a = 0x1920; *b= 0x1923; return true;
case 0x1926 : *a = 0x1920; *b= 0x1924; return true;
case 0x1B3C : *a = 0x1B42; *b= 0x1B3C; return true;
case 0x1112E : *a = 0x11127; *b= 0x11131; return true;
case 0x1112F : *a = 0x11127; *b= 0x11132; return true;
#if 0
case 0x0B57 : *a = 0xno decomp, -> RIGHT; return true;
case 0x1C29 : *a = 0xno decomp, -> LEFT; return true;
case 0xA9C0 : *a = 0xno decomp, -> RIGHT; return true;
case 0x111BF : *a = 0xno decomp, -> ABOVE; return true;
#endif
}
return unicode->decompose (ab, a, b);
}
@ -96,7 +129,133 @@ compose_func (hb_unicode_funcs_t *unicode,
hb_codepoint_t b,
hb_codepoint_t *ab)
{
return unicode->compose (a, b, ab);
/* XXX, this belongs to indic normalizer. */
if ((FLAG (unicode->general_category (a)) &
(FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))))
return false;
/* XXX, add composition-exclusion exceptions to Indic shaper. */
if (a == 0x09AF && b == 0x09BC) { *ab = 0x09DF; return true; }
/* XXX, these belong to the hebew / default shaper. */
/* Hebrew presentation-form shaping.
* https://bugzilla.mozilla.org/show_bug.cgi?id=728866 */
// Hebrew presentation forms with dagesh, for characters 0x05D0..0x05EA;
// note that some letters do not have a dagesh presForm encoded
static const hb_codepoint_t sDageshForms[0x05EA - 0x05D0 + 1] = {
0xFB30, // ALEF
0xFB31, // BET
0xFB32, // GIMEL
0xFB33, // DALET
0xFB34, // HE
0xFB35, // VAV
0xFB36, // ZAYIN
0, // HET
0xFB38, // TET
0xFB39, // YOD
0xFB3A, // FINAL KAF
0xFB3B, // KAF
0xFB3C, // LAMED
0, // FINAL MEM
0xFB3E, // MEM
0, // FINAL NUN
0xFB40, // NUN
0xFB41, // SAMEKH
0, // AYIN
0xFB43, // FINAL PE
0xFB44, // PE
0, // FINAL TSADI
0xFB46, // TSADI
0xFB47, // QOF
0xFB48, // RESH
0xFB49, // SHIN
0xFB4A // TAV
};
hb_bool_t found = unicode->compose (a, b, ab);
if (!found && (b & ~0x7F) == 0x0580) {
// special-case Hebrew presentation forms that are excluded from
// standard normalization, but wanted for old fonts
switch (b) {
case 0x05B4: // HIRIQ
if (a == 0x05D9) { // YOD
*ab = 0xFB1D;
found = true;
}
break;
case 0x05B7: // patah
if (a == 0x05F2) { // YIDDISH YOD YOD
*ab = 0xFB1F;
found = true;
} else if (a == 0x05D0) { // ALEF
*ab = 0xFB2E;
found = true;
}
break;
case 0x05B8: // QAMATS
if (a == 0x05D0) { // ALEF
*ab = 0xFB2F;
found = true;
}
break;
case 0x05B9: // HOLAM
if (a == 0x05D5) { // VAV
*ab = 0xFB4B;
found = true;
}
break;
case 0x05BC: // DAGESH
if (a >= 0x05D0 && a <= 0x05EA) {
*ab = sDageshForms[a - 0x05D0];
found = (*ab != 0);
} else if (a == 0xFB2A) { // SHIN WITH SHIN DOT
*ab = 0xFB2C;
found = true;
} else if (a == 0xFB2B) { // SHIN WITH SIN DOT
*ab = 0xFB2D;
found = true;
}
break;
case 0x05BF: // RAFE
switch (a) {
case 0x05D1: // BET
*ab = 0xFB4C;
found = true;
break;
case 0x05DB: // KAF
*ab = 0xFB4D;
found = true;
break;
case 0x05E4: // PE
*ab = 0xFB4E;
found = true;
break;
}
break;
case 0x05C1: // SHIN DOT
if (a == 0x05E9) { // SHIN
*ab = 0xFB2A;
found = true;
} else if (a == 0xFB49) { // SHIN WITH DAGESH
*ab = 0xFB2C;
found = true;
}
break;
case 0x05C2: // SIN DOT
if (a == 0x05E9) { // SHIN
*ab = 0xFB2B;
found = true;
} else if (a == 0xFB49) { // SHIN WITH DAGESH
*ab = 0xFB2D;
found = true;
}
break;
}
}
return found;
}
static void

View File

@ -80,173 +80,13 @@ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
hb_codepoint_t *ab)
{
*ab = 0;
/* XXX, this belongs to indic normalizer. */
if ((FLAG (general_category (a)) &
(FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))))
return false;
/* XXX, add composition-exclusion exceptions to Indic shaper. */
if (a == 0x09AF && b == 0x09BC) { *ab = 0x09DF; return true; }
/* XXX, these belong to the hebew / default shaper. */
/* Hebrew presentation-form shaping.
* https://bugzilla.mozilla.org/show_bug.cgi?id=728866 */
// Hebrew presentation forms with dagesh, for characters 0x05D0..0x05EA;
// note that some letters do not have a dagesh presForm encoded
static const hb_codepoint_t sDageshForms[0x05EA - 0x05D0 + 1] = {
0xFB30, // ALEF
0xFB31, // BET
0xFB32, // GIMEL
0xFB33, // DALET
0xFB34, // HE
0xFB35, // VAV
0xFB36, // ZAYIN
0, // HET
0xFB38, // TET
0xFB39, // YOD
0xFB3A, // FINAL KAF
0xFB3B, // KAF
0xFB3C, // LAMED
0, // FINAL MEM
0xFB3E, // MEM
0, // FINAL NUN
0xFB40, // NUN
0xFB41, // SAMEKH
0, // AYIN
0xFB43, // FINAL PE
0xFB44, // PE
0, // FINAL TSADI
0xFB46, // TSADI
0xFB47, // QOF
0xFB48, // RESH
0xFB49, // SHIN
0xFB4A // TAV
};
hb_bool_t found = func.compose (this, a, b, ab, user_data.compose);
if (!found && (b & ~0x7F) == 0x0580) {
// special-case Hebrew presentation forms that are excluded from
// standard normalization, but wanted for old fonts
switch (b) {
case 0x05B4: // HIRIQ
if (a == 0x05D9) { // YOD
*ab = 0xFB1D;
found = true;
}
break;
case 0x05B7: // patah
if (a == 0x05F2) { // YIDDISH YOD YOD
*ab = 0xFB1F;
found = true;
} else if (a == 0x05D0) { // ALEF
*ab = 0xFB2E;
found = true;
}
break;
case 0x05B8: // QAMATS
if (a == 0x05D0) { // ALEF
*ab = 0xFB2F;
found = true;
}
break;
case 0x05B9: // HOLAM
if (a == 0x05D5) { // VAV
*ab = 0xFB4B;
found = true;
}
break;
case 0x05BC: // DAGESH
if (a >= 0x05D0 && a <= 0x05EA) {
*ab = sDageshForms[a - 0x05D0];
found = (*ab != 0);
} else if (a == 0xFB2A) { // SHIN WITH SHIN DOT
*ab = 0xFB2C;
found = true;
} else if (a == 0xFB2B) { // SHIN WITH SIN DOT
*ab = 0xFB2D;
found = true;
}
break;
case 0x05BF: // RAFE
switch (a) {
case 0x05D1: // BET
*ab = 0xFB4C;
found = true;
break;
case 0x05DB: // KAF
*ab = 0xFB4D;
found = true;
break;
case 0x05E4: // PE
*ab = 0xFB4E;
found = true;
break;
}
break;
case 0x05C1: // SHIN DOT
if (a == 0x05E9) { // SHIN
*ab = 0xFB2A;
found = true;
} else if (a == 0xFB49) { // SHIN WITH DAGESH
*ab = 0xFB2C;
found = true;
}
break;
case 0x05C2: // SIN DOT
if (a == 0x05E9) { // SHIN
*ab = 0xFB2B;
found = true;
} else if (a == 0xFB49) { // SHIN WITH DAGESH
*ab = 0xFB2D;
found = true;
}
break;
}
}
return found;
return func.compose (this, a, b, ab, user_data.compose);
}
inline hb_bool_t decompose (hb_codepoint_t ab,
hb_codepoint_t *a, hb_codepoint_t *b)
{
/* XXX FIXME, move these to complex shapers and propagage to normalizer.*/
switch (ab) {
case 0x0AC9 : return false;
case 0x0931 : return false;
case 0x0B94 : return false;
/* These ones have Unicode decompositions, but we do it
* this way to be close to what Uniscribe does. */
case 0x0DDA : *a = 0x0DD9; *b= 0x0DDA; return true;
case 0x0DDC : *a = 0x0DD9; *b= 0x0DDC; return true;
case 0x0DDD : *a = 0x0DD9; *b= 0x0DDD; return true;
case 0x0DDE : *a = 0x0DD9; *b= 0x0DDE; return true;
case 0x0F77 : *a = 0x0FB2; *b= 0x0F81; return true;
case 0x0F79 : *a = 0x0FB3; *b= 0x0F81; return true;
case 0x17BE : *a = 0x17C1; *b= 0x17BE; return true;
case 0x17BF : *a = 0x17C1; *b= 0x17BF; return true;
case 0x17C0 : *a = 0x17C1; *b= 0x17C0; return true;
case 0x17C4 : *a = 0x17C1; *b= 0x17C4; return true;
case 0x17C5 : *a = 0x17C1; *b= 0x17C5; return true;
case 0x1925 : *a = 0x1920; *b= 0x1923; return true;
case 0x1926 : *a = 0x1920; *b= 0x1924; return true;
case 0x1B3C : *a = 0x1B42; *b= 0x1B3C; return true;
case 0x1112E : *a = 0x11127; *b= 0x11131; return true;
case 0x1112F : *a = 0x11127; *b= 0x11132; return true;
#if 0
case 0x0B57 : *a = 0xno decomp, -> RIGHT; return true;
case 0x1C29 : *a = 0xno decomp, -> LEFT; return true;
case 0xA9C0 : *a = 0xno decomp, -> RIGHT; return true;
case 0x111BF : *a = 0xno decomp, -> ABOVE; return true;
#endif
}
*a = ab; *b = 0;
*a = *b = 0;
return func.decompose (this, ab, a, b, user_data.decompose);
}