[Indic] Position Khmer Robat
It's a visual Repha. Still not positioning logical Repha as occurs in Malayalam. Another 200 Khmer failures fixed. 547 to go. That's better than Devanagari!
This commit is contained in:
parent
25bc489498
commit
db8981f1e0
|
@ -40,33 +40,35 @@
|
|||
# Same order as enum indic_category_t. Not sure how to avoid duplication.
|
||||
X = 0;
|
||||
C = 1;
|
||||
Ra = 2;
|
||||
V = 3;
|
||||
N = 4;
|
||||
H = 5;
|
||||
ZWNJ = 6;
|
||||
ZWJ = 7;
|
||||
M = 8;
|
||||
SM = 9;
|
||||
VD = 10;
|
||||
A = 11;
|
||||
NBSP = 12;
|
||||
DOTTEDCIRCLE = 13;
|
||||
RS = 14;
|
||||
Coeng = 15;
|
||||
V = 2;
|
||||
N = 3;
|
||||
H = 4;
|
||||
ZWNJ = 5;
|
||||
ZWJ = 6;
|
||||
M = 7;
|
||||
SM = 8;
|
||||
VD = 9;
|
||||
A = 10;
|
||||
NBSP = 11;
|
||||
DOTTEDCIRCLE = 12;
|
||||
RS = 13;
|
||||
Coeng = 14;
|
||||
Repha = 15;
|
||||
Ra = 16;
|
||||
|
||||
c = C | Ra; # is_consonant
|
||||
n = (N.N? | ZWNJ?.RS); # is_consonant_modifier
|
||||
z = ZWJ|ZWNJ; # is_joiner
|
||||
h = H | Coeng; # is_halant_or_coeng
|
||||
reph = (Ra H | Repha); # possible reph
|
||||
matra_group = M.N?.H?;
|
||||
syllable_tail = SM? (Coeng (c|V))? (VD VD?)?;
|
||||
place_holder = NBSP | DOTTEDCIRCLE;
|
||||
|
||||
|
||||
consonant_syllable = (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
|
||||
vowel_syllable = (Ra H)? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
|
||||
standalone_cluster = (Ra H)? place_holder.n? (z? h c)* matra_group* syllable_tail;
|
||||
consonant_syllable = Repha? (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
|
||||
vowel_syllable = reph? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
|
||||
standalone_cluster = reph? place_holder.n? (z? h c)* matra_group* syllable_tail;
|
||||
other = any;
|
||||
|
||||
main := |*
|
||||
|
|
|
@ -47,7 +47,6 @@
|
|||
enum indic_category_t {
|
||||
OT_X = 0,
|
||||
OT_C,
|
||||
OT_Ra, /* Not explicitly listed in the OT spec, but used in the grammar. */
|
||||
OT_V,
|
||||
OT_N,
|
||||
OT_H,
|
||||
|
@ -60,7 +59,9 @@ enum indic_category_t {
|
|||
OT_NBSP,
|
||||
OT_DOTTEDCIRCLE, /* Not in the spec, but special in Uniscribe. /Very very/ special! */
|
||||
OT_RS, /* Register Shifter, used in Khmer OT spec */
|
||||
OT_Coeng
|
||||
OT_Coeng,
|
||||
OT_Repha,
|
||||
OT_Ra /* Not explicitly listed in the OT spec, but used in the grammar. */
|
||||
};
|
||||
|
||||
/* Visual positions in a syllable from left to right. */
|
||||
|
@ -92,7 +93,7 @@ enum indic_syllabic_category_t {
|
|||
INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL = OT_C,
|
||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER = OT_NBSP,
|
||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED = OT_C,
|
||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA = OT_C,
|
||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA = OT_Repha,
|
||||
INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER = OT_X,
|
||||
INDIC_SYLLABIC_CATEGORY_NUKTA = OT_N,
|
||||
INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER = OT_RS,
|
||||
|
|
|
@ -282,6 +282,19 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED,
|
|||
if (unlikely (info.codepoint == 0x17D2))
|
||||
info.indic_category() = OT_Coeng;
|
||||
|
||||
if (info.indic_category() == OT_Repha) {
|
||||
/* There are two kinds of characters marked as Repha:
|
||||
* - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer)
|
||||
* - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam)
|
||||
*
|
||||
* We recategorize the first kind to look like a Nukta and attached to the base directly.
|
||||
*/
|
||||
if (_hb_glyph_info_get_general_category (&info) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
|
||||
info.indic_category() = OT_N;
|
||||
}
|
||||
|
||||
|
||||
/* Assign positions... */
|
||||
if (is_consonant (info)) {
|
||||
info.indic_position() = consonant_position (info.codepoint);
|
||||
if (is_ra (info.codepoint))
|
||||
|
|
Loading…
Reference in New Issue