[Indic] Position Khmer Robat

It's a visual Repha.

Still not positioning logical Repha as occurs in Malayalam.

Another 200 Khmer failures fixed.  547 to go.  That's better than
Devanagari!
This commit is contained in:
Behdad Esfahbod 2012-07-17 18:17:30 -04:00
parent 25bc489498
commit db8981f1e0
3 changed files with 36 additions and 20 deletions

View File

@ -40,33 +40,35 @@
# Same order as enum indic_category_t. Not sure how to avoid duplication.
X = 0;
C = 1;
Ra = 2;
V = 3;
N = 4;
H = 5;
ZWNJ = 6;
ZWJ = 7;
M = 8;
SM = 9;
VD = 10;
A = 11;
NBSP = 12;
DOTTEDCIRCLE = 13;
RS = 14;
Coeng = 15;
V = 2;
N = 3;
H = 4;
ZWNJ = 5;
ZWJ = 6;
M = 7;
SM = 8;
VD = 9;
A = 10;
NBSP = 11;
DOTTEDCIRCLE = 12;
RS = 13;
Coeng = 14;
Repha = 15;
Ra = 16;
c = C | Ra; # is_consonant
n = (N.N? | ZWNJ?.RS); # is_consonant_modifier
z = ZWJ|ZWNJ; # is_joiner
h = H | Coeng; # is_halant_or_coeng
reph = (Ra H | Repha); # possible reph
matra_group = M.N?.H?;
syllable_tail = SM? (Coeng (c|V))? (VD VD?)?;
place_holder = NBSP | DOTTEDCIRCLE;
consonant_syllable = (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
vowel_syllable = (Ra H)? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
standalone_cluster = (Ra H)? place_holder.n? (z? h c)* matra_group* syllable_tail;
consonant_syllable = Repha? (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
vowel_syllable = reph? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
standalone_cluster = reph? place_holder.n? (z? h c)* matra_group* syllable_tail;
other = any;
main := |*

View File

@ -47,7 +47,6 @@
enum indic_category_t {
OT_X = 0,
OT_C,
OT_Ra, /* Not explicitly listed in the OT spec, but used in the grammar. */
OT_V,
OT_N,
OT_H,
@ -60,7 +59,9 @@ enum indic_category_t {
OT_NBSP,
OT_DOTTEDCIRCLE, /* Not in the spec, but special in Uniscribe. /Very very/ special! */
OT_RS, /* Register Shifter, used in Khmer OT spec */
OT_Coeng
OT_Coeng,
OT_Repha,
OT_Ra /* Not explicitly listed in the OT spec, but used in the grammar. */
};
/* Visual positions in a syllable from left to right. */
@ -92,7 +93,7 @@ enum indic_syllabic_category_t {
INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL = OT_C,
INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER = OT_NBSP,
INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED = OT_C,
INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA = OT_C,
INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA = OT_Repha,
INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER = OT_X,
INDIC_SYLLABIC_CATEGORY_NUKTA = OT_N,
INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER = OT_RS,

View File

@ -282,6 +282,19 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED,
if (unlikely (info.codepoint == 0x17D2))
info.indic_category() = OT_Coeng;
if (info.indic_category() == OT_Repha) {
/* There are two kinds of characters marked as Repha:
* - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer)
* - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam)
*
* We recategorize the first kind to look like a Nukta and attached to the base directly.
*/
if (_hb_glyph_info_get_general_category (&info) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
info.indic_category() = OT_N;
}
/* Assign positions... */
if (is_consonant (info)) {
info.indic_position() = consonant_position (info.codepoint);
if (is_ra (info.codepoint))