[Indic] Position Khmer Robat

It's a visual Repha.

Still not positioning logical Repha as occurs in Malayalam.

Another 200 Khmer failures fixed.  547 to go.  That's better than
Devanagari!
This commit is contained in:
Behdad Esfahbod 2012-07-17 18:17:30 -04:00
parent 25bc489498
commit db8981f1e0
3 changed files with 36 additions and 20 deletions

View File

@ -40,33 +40,35 @@
# Same order as enum indic_category_t. Not sure how to avoid duplication. # Same order as enum indic_category_t. Not sure how to avoid duplication.
X = 0; X = 0;
C = 1; C = 1;
Ra = 2; V = 2;
V = 3; N = 3;
N = 4; H = 4;
H = 5; ZWNJ = 5;
ZWNJ = 6; ZWJ = 6;
ZWJ = 7; M = 7;
M = 8; SM = 8;
SM = 9; VD = 9;
VD = 10; A = 10;
A = 11; NBSP = 11;
NBSP = 12; DOTTEDCIRCLE = 12;
DOTTEDCIRCLE = 13; RS = 13;
RS = 14; Coeng = 14;
Coeng = 15; Repha = 15;
Ra = 16;
c = C | Ra; # is_consonant c = C | Ra; # is_consonant
n = (N.N? | ZWNJ?.RS); # is_consonant_modifier n = (N.N? | ZWNJ?.RS); # is_consonant_modifier
z = ZWJ|ZWNJ; # is_joiner z = ZWJ|ZWNJ; # is_joiner
h = H | Coeng; # is_halant_or_coeng h = H | Coeng; # is_halant_or_coeng
reph = (Ra H | Repha); # possible reph
matra_group = M.N?.H?; matra_group = M.N?.H?;
syllable_tail = SM? (Coeng (c|V))? (VD VD?)?; syllable_tail = SM? (Coeng (c|V))? (VD VD?)?;
place_holder = NBSP | DOTTEDCIRCLE; place_holder = NBSP | DOTTEDCIRCLE;
consonant_syllable = (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail; consonant_syllable = Repha? (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
vowel_syllable = (Ra H)? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail; vowel_syllable = reph? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
standalone_cluster = (Ra H)? place_holder.n? (z? h c)* matra_group* syllable_tail; standalone_cluster = reph? place_holder.n? (z? h c)* matra_group* syllable_tail;
other = any; other = any;
main := |* main := |*

View File

@ -47,7 +47,6 @@
enum indic_category_t { enum indic_category_t {
OT_X = 0, OT_X = 0,
OT_C, OT_C,
OT_Ra, /* Not explicitly listed in the OT spec, but used in the grammar. */
OT_V, OT_V,
OT_N, OT_N,
OT_H, OT_H,
@ -60,7 +59,9 @@ enum indic_category_t {
OT_NBSP, OT_NBSP,
OT_DOTTEDCIRCLE, /* Not in the spec, but special in Uniscribe. /Very very/ special! */ OT_DOTTEDCIRCLE, /* Not in the spec, but special in Uniscribe. /Very very/ special! */
OT_RS, /* Register Shifter, used in Khmer OT spec */ OT_RS, /* Register Shifter, used in Khmer OT spec */
OT_Coeng OT_Coeng,
OT_Repha,
OT_Ra /* Not explicitly listed in the OT spec, but used in the grammar. */
}; };
/* Visual positions in a syllable from left to right. */ /* Visual positions in a syllable from left to right. */
@ -92,7 +93,7 @@ enum indic_syllabic_category_t {
INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL = OT_C, INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL = OT_C,
INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER = OT_NBSP, INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER = OT_NBSP,
INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED = OT_C, INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED = OT_C,
INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA = OT_C, INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA = OT_Repha,
INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER = OT_X, INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER = OT_X,
INDIC_SYLLABIC_CATEGORY_NUKTA = OT_N, INDIC_SYLLABIC_CATEGORY_NUKTA = OT_N,
INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER = OT_RS, INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER = OT_RS,

View File

@ -282,6 +282,19 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED,
if (unlikely (info.codepoint == 0x17D2)) if (unlikely (info.codepoint == 0x17D2))
info.indic_category() = OT_Coeng; info.indic_category() = OT_Coeng;
if (info.indic_category() == OT_Repha) {
/* There are two kinds of characters marked as Repha:
* - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer)
* - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam)
*
* We recategorize the first kind to look like a Nukta and attached to the base directly.
*/
if (_hb_glyph_info_get_general_category (&info) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
info.indic_category() = OT_N;
}
/* Assign positions... */
if (is_consonant (info)) { if (is_consonant (info)) {
info.indic_position() = consonant_position (info.codepoint); info.indic_position() = consonant_position (info.codepoint);
if (is_ra (info.codepoint)) if (is_ra (info.codepoint))