More refactoring
This commit is contained in:
parent
428dfcab66
commit
0f8881d6bb
|
@ -87,6 +87,39 @@ decompose_func (hb_unicode_funcs_t *unicode,
|
|||
hb_codepoint_t *a,
|
||||
hb_codepoint_t *b)
|
||||
{
|
||||
/* XXX FIXME, move these to complex shapers and propagage to normalizer.*/
|
||||
switch (ab) {
|
||||
case 0x0AC9 : return false;
|
||||
|
||||
case 0x0931 : return false;
|
||||
case 0x0B94 : return false;
|
||||
|
||||
/* These ones have Unicode decompositions, but we do it
|
||||
* this way to be close to what Uniscribe does. */
|
||||
case 0x0DDA : *a = 0x0DD9; *b= 0x0DDA; return true;
|
||||
case 0x0DDC : *a = 0x0DD9; *b= 0x0DDC; return true;
|
||||
case 0x0DDD : *a = 0x0DD9; *b= 0x0DDD; return true;
|
||||
case 0x0DDE : *a = 0x0DD9; *b= 0x0DDE; return true;
|
||||
|
||||
case 0x0F77 : *a = 0x0FB2; *b= 0x0F81; return true;
|
||||
case 0x0F79 : *a = 0x0FB3; *b= 0x0F81; return true;
|
||||
case 0x17BE : *a = 0x17C1; *b= 0x17BE; return true;
|
||||
case 0x17BF : *a = 0x17C1; *b= 0x17BF; return true;
|
||||
case 0x17C0 : *a = 0x17C1; *b= 0x17C0; return true;
|
||||
case 0x17C4 : *a = 0x17C1; *b= 0x17C4; return true;
|
||||
case 0x17C5 : *a = 0x17C1; *b= 0x17C5; return true;
|
||||
case 0x1925 : *a = 0x1920; *b= 0x1923; return true;
|
||||
case 0x1926 : *a = 0x1920; *b= 0x1924; return true;
|
||||
case 0x1B3C : *a = 0x1B42; *b= 0x1B3C; return true;
|
||||
case 0x1112E : *a = 0x11127; *b= 0x11131; return true;
|
||||
case 0x1112F : *a = 0x11127; *b= 0x11132; return true;
|
||||
#if 0
|
||||
case 0x0B57 : *a = 0xno decomp, -> RIGHT; return true;
|
||||
case 0x1C29 : *a = 0xno decomp, -> LEFT; return true;
|
||||
case 0xA9C0 : *a = 0xno decomp, -> RIGHT; return true;
|
||||
case 0x111BF : *a = 0xno decomp, -> ABOVE; return true;
|
||||
#endif
|
||||
}
|
||||
return unicode->decompose (ab, a, b);
|
||||
}
|
||||
|
||||
|
@ -96,7 +129,133 @@ compose_func (hb_unicode_funcs_t *unicode,
|
|||
hb_codepoint_t b,
|
||||
hb_codepoint_t *ab)
|
||||
{
|
||||
return unicode->compose (a, b, ab);
|
||||
/* XXX, this belongs to indic normalizer. */
|
||||
if ((FLAG (unicode->general_category (a)) &
|
||||
(FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
|
||||
FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
|
||||
FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))))
|
||||
return false;
|
||||
/* XXX, add composition-exclusion exceptions to Indic shaper. */
|
||||
if (a == 0x09AF && b == 0x09BC) { *ab = 0x09DF; return true; }
|
||||
|
||||
/* XXX, these belong to the hebew / default shaper. */
|
||||
/* Hebrew presentation-form shaping.
|
||||
* https://bugzilla.mozilla.org/show_bug.cgi?id=728866 */
|
||||
// Hebrew presentation forms with dagesh, for characters 0x05D0..0x05EA;
|
||||
// note that some letters do not have a dagesh presForm encoded
|
||||
static const hb_codepoint_t sDageshForms[0x05EA - 0x05D0 + 1] = {
|
||||
0xFB30, // ALEF
|
||||
0xFB31, // BET
|
||||
0xFB32, // GIMEL
|
||||
0xFB33, // DALET
|
||||
0xFB34, // HE
|
||||
0xFB35, // VAV
|
||||
0xFB36, // ZAYIN
|
||||
0, // HET
|
||||
0xFB38, // TET
|
||||
0xFB39, // YOD
|
||||
0xFB3A, // FINAL KAF
|
||||
0xFB3B, // KAF
|
||||
0xFB3C, // LAMED
|
||||
0, // FINAL MEM
|
||||
0xFB3E, // MEM
|
||||
0, // FINAL NUN
|
||||
0xFB40, // NUN
|
||||
0xFB41, // SAMEKH
|
||||
0, // AYIN
|
||||
0xFB43, // FINAL PE
|
||||
0xFB44, // PE
|
||||
0, // FINAL TSADI
|
||||
0xFB46, // TSADI
|
||||
0xFB47, // QOF
|
||||
0xFB48, // RESH
|
||||
0xFB49, // SHIN
|
||||
0xFB4A // TAV
|
||||
};
|
||||
|
||||
hb_bool_t found = unicode->compose (a, b, ab);
|
||||
|
||||
if (!found && (b & ~0x7F) == 0x0580) {
|
||||
// special-case Hebrew presentation forms that are excluded from
|
||||
// standard normalization, but wanted for old fonts
|
||||
switch (b) {
|
||||
case 0x05B4: // HIRIQ
|
||||
if (a == 0x05D9) { // YOD
|
||||
*ab = 0xFB1D;
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
case 0x05B7: // patah
|
||||
if (a == 0x05F2) { // YIDDISH YOD YOD
|
||||
*ab = 0xFB1F;
|
||||
found = true;
|
||||
} else if (a == 0x05D0) { // ALEF
|
||||
*ab = 0xFB2E;
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
case 0x05B8: // QAMATS
|
||||
if (a == 0x05D0) { // ALEF
|
||||
*ab = 0xFB2F;
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
case 0x05B9: // HOLAM
|
||||
if (a == 0x05D5) { // VAV
|
||||
*ab = 0xFB4B;
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
case 0x05BC: // DAGESH
|
||||
if (a >= 0x05D0 && a <= 0x05EA) {
|
||||
*ab = sDageshForms[a - 0x05D0];
|
||||
found = (*ab != 0);
|
||||
} else if (a == 0xFB2A) { // SHIN WITH SHIN DOT
|
||||
*ab = 0xFB2C;
|
||||
found = true;
|
||||
} else if (a == 0xFB2B) { // SHIN WITH SIN DOT
|
||||
*ab = 0xFB2D;
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
case 0x05BF: // RAFE
|
||||
switch (a) {
|
||||
case 0x05D1: // BET
|
||||
*ab = 0xFB4C;
|
||||
found = true;
|
||||
break;
|
||||
case 0x05DB: // KAF
|
||||
*ab = 0xFB4D;
|
||||
found = true;
|
||||
break;
|
||||
case 0x05E4: // PE
|
||||
*ab = 0xFB4E;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x05C1: // SHIN DOT
|
||||
if (a == 0x05E9) { // SHIN
|
||||
*ab = 0xFB2A;
|
||||
found = true;
|
||||
} else if (a == 0xFB49) { // SHIN WITH DAGESH
|
||||
*ab = 0xFB2C;
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
case 0x05C2: // SIN DOT
|
||||
if (a == 0x05E9) { // SHIN
|
||||
*ab = 0xFB2B;
|
||||
found = true;
|
||||
} else if (a == 0xFB49) { // SHIN WITH DAGESH
|
||||
*ab = 0xFB2D;
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
@ -80,173 +80,13 @@ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
|
|||
hb_codepoint_t *ab)
|
||||
{
|
||||
*ab = 0;
|
||||
|
||||
/* XXX, this belongs to indic normalizer. */
|
||||
if ((FLAG (general_category (a)) &
|
||||
(FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
|
||||
FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
|
||||
FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))))
|
||||
return false;
|
||||
/* XXX, add composition-exclusion exceptions to Indic shaper. */
|
||||
if (a == 0x09AF && b == 0x09BC) { *ab = 0x09DF; return true; }
|
||||
|
||||
/* XXX, these belong to the hebew / default shaper. */
|
||||
/* Hebrew presentation-form shaping.
|
||||
* https://bugzilla.mozilla.org/show_bug.cgi?id=728866 */
|
||||
// Hebrew presentation forms with dagesh, for characters 0x05D0..0x05EA;
|
||||
// note that some letters do not have a dagesh presForm encoded
|
||||
static const hb_codepoint_t sDageshForms[0x05EA - 0x05D0 + 1] = {
|
||||
0xFB30, // ALEF
|
||||
0xFB31, // BET
|
||||
0xFB32, // GIMEL
|
||||
0xFB33, // DALET
|
||||
0xFB34, // HE
|
||||
0xFB35, // VAV
|
||||
0xFB36, // ZAYIN
|
||||
0, // HET
|
||||
0xFB38, // TET
|
||||
0xFB39, // YOD
|
||||
0xFB3A, // FINAL KAF
|
||||
0xFB3B, // KAF
|
||||
0xFB3C, // LAMED
|
||||
0, // FINAL MEM
|
||||
0xFB3E, // MEM
|
||||
0, // FINAL NUN
|
||||
0xFB40, // NUN
|
||||
0xFB41, // SAMEKH
|
||||
0, // AYIN
|
||||
0xFB43, // FINAL PE
|
||||
0xFB44, // PE
|
||||
0, // FINAL TSADI
|
||||
0xFB46, // TSADI
|
||||
0xFB47, // QOF
|
||||
0xFB48, // RESH
|
||||
0xFB49, // SHIN
|
||||
0xFB4A // TAV
|
||||
};
|
||||
|
||||
hb_bool_t found = func.compose (this, a, b, ab, user_data.compose);
|
||||
|
||||
if (!found && (b & ~0x7F) == 0x0580) {
|
||||
// special-case Hebrew presentation forms that are excluded from
|
||||
// standard normalization, but wanted for old fonts
|
||||
switch (b) {
|
||||
case 0x05B4: // HIRIQ
|
||||
if (a == 0x05D9) { // YOD
|
||||
*ab = 0xFB1D;
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
case 0x05B7: // patah
|
||||
if (a == 0x05F2) { // YIDDISH YOD YOD
|
||||
*ab = 0xFB1F;
|
||||
found = true;
|
||||
} else if (a == 0x05D0) { // ALEF
|
||||
*ab = 0xFB2E;
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
case 0x05B8: // QAMATS
|
||||
if (a == 0x05D0) { // ALEF
|
||||
*ab = 0xFB2F;
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
case 0x05B9: // HOLAM
|
||||
if (a == 0x05D5) { // VAV
|
||||
*ab = 0xFB4B;
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
case 0x05BC: // DAGESH
|
||||
if (a >= 0x05D0 && a <= 0x05EA) {
|
||||
*ab = sDageshForms[a - 0x05D0];
|
||||
found = (*ab != 0);
|
||||
} else if (a == 0xFB2A) { // SHIN WITH SHIN DOT
|
||||
*ab = 0xFB2C;
|
||||
found = true;
|
||||
} else if (a == 0xFB2B) { // SHIN WITH SIN DOT
|
||||
*ab = 0xFB2D;
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
case 0x05BF: // RAFE
|
||||
switch (a) {
|
||||
case 0x05D1: // BET
|
||||
*ab = 0xFB4C;
|
||||
found = true;
|
||||
break;
|
||||
case 0x05DB: // KAF
|
||||
*ab = 0xFB4D;
|
||||
found = true;
|
||||
break;
|
||||
case 0x05E4: // PE
|
||||
*ab = 0xFB4E;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x05C1: // SHIN DOT
|
||||
if (a == 0x05E9) { // SHIN
|
||||
*ab = 0xFB2A;
|
||||
found = true;
|
||||
} else if (a == 0xFB49) { // SHIN WITH DAGESH
|
||||
*ab = 0xFB2C;
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
case 0x05C2: // SIN DOT
|
||||
if (a == 0x05E9) { // SHIN
|
||||
*ab = 0xFB2B;
|
||||
found = true;
|
||||
} else if (a == 0xFB49) { // SHIN WITH DAGESH
|
||||
*ab = 0xFB2D;
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return found;
|
||||
return func.compose (this, a, b, ab, user_data.compose);
|
||||
}
|
||||
|
||||
inline hb_bool_t decompose (hb_codepoint_t ab,
|
||||
hb_codepoint_t *a, hb_codepoint_t *b)
|
||||
{
|
||||
/* XXX FIXME, move these to complex shapers and propagage to normalizer.*/
|
||||
switch (ab) {
|
||||
case 0x0AC9 : return false;
|
||||
|
||||
case 0x0931 : return false;
|
||||
case 0x0B94 : return false;
|
||||
|
||||
/* These ones have Unicode decompositions, but we do it
|
||||
* this way to be close to what Uniscribe does. */
|
||||
case 0x0DDA : *a = 0x0DD9; *b= 0x0DDA; return true;
|
||||
case 0x0DDC : *a = 0x0DD9; *b= 0x0DDC; return true;
|
||||
case 0x0DDD : *a = 0x0DD9; *b= 0x0DDD; return true;
|
||||
case 0x0DDE : *a = 0x0DD9; *b= 0x0DDE; return true;
|
||||
|
||||
case 0x0F77 : *a = 0x0FB2; *b= 0x0F81; return true;
|
||||
case 0x0F79 : *a = 0x0FB3; *b= 0x0F81; return true;
|
||||
case 0x17BE : *a = 0x17C1; *b= 0x17BE; return true;
|
||||
case 0x17BF : *a = 0x17C1; *b= 0x17BF; return true;
|
||||
case 0x17C0 : *a = 0x17C1; *b= 0x17C0; return true;
|
||||
case 0x17C4 : *a = 0x17C1; *b= 0x17C4; return true;
|
||||
case 0x17C5 : *a = 0x17C1; *b= 0x17C5; return true;
|
||||
case 0x1925 : *a = 0x1920; *b= 0x1923; return true;
|
||||
case 0x1926 : *a = 0x1920; *b= 0x1924; return true;
|
||||
case 0x1B3C : *a = 0x1B42; *b= 0x1B3C; return true;
|
||||
case 0x1112E : *a = 0x11127; *b= 0x11131; return true;
|
||||
case 0x1112F : *a = 0x11127; *b= 0x11132; return true;
|
||||
#if 0
|
||||
case 0x0B57 : *a = 0xno decomp, -> RIGHT; return true;
|
||||
case 0x1C29 : *a = 0xno decomp, -> LEFT; return true;
|
||||
case 0xA9C0 : *a = 0xno decomp, -> RIGHT; return true;
|
||||
case 0x111BF : *a = 0xno decomp, -> ABOVE; return true;
|
||||
#endif
|
||||
}
|
||||
*a = ab; *b = 0;
|
||||
*a = *b = 0;
|
||||
return func.decompose (this, ab, a, b, user_data.decompose);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue