[Indic] Position Khmer Robat
It's a visual Repha. Still not positioning logical Repha as occurs in Malayalam. Another 200 Khmer failures fixed. 547 to go. That's better than Devanagari!
This commit is contained in:
parent
25bc489498
commit
db8981f1e0
|
@ -40,33 +40,35 @@
|
||||||
# Same order as enum indic_category_t. Not sure how to avoid duplication.
|
# Same order as enum indic_category_t. Not sure how to avoid duplication.
|
||||||
X = 0;
|
X = 0;
|
||||||
C = 1;
|
C = 1;
|
||||||
Ra = 2;
|
V = 2;
|
||||||
V = 3;
|
N = 3;
|
||||||
N = 4;
|
H = 4;
|
||||||
H = 5;
|
ZWNJ = 5;
|
||||||
ZWNJ = 6;
|
ZWJ = 6;
|
||||||
ZWJ = 7;
|
M = 7;
|
||||||
M = 8;
|
SM = 8;
|
||||||
SM = 9;
|
VD = 9;
|
||||||
VD = 10;
|
A = 10;
|
||||||
A = 11;
|
NBSP = 11;
|
||||||
NBSP = 12;
|
DOTTEDCIRCLE = 12;
|
||||||
DOTTEDCIRCLE = 13;
|
RS = 13;
|
||||||
RS = 14;
|
Coeng = 14;
|
||||||
Coeng = 15;
|
Repha = 15;
|
||||||
|
Ra = 16;
|
||||||
|
|
||||||
c = C | Ra; # is_consonant
|
c = C | Ra; # is_consonant
|
||||||
n = (N.N? | ZWNJ?.RS); # is_consonant_modifier
|
n = (N.N? | ZWNJ?.RS); # is_consonant_modifier
|
||||||
z = ZWJ|ZWNJ; # is_joiner
|
z = ZWJ|ZWNJ; # is_joiner
|
||||||
h = H | Coeng; # is_halant_or_coeng
|
h = H | Coeng; # is_halant_or_coeng
|
||||||
|
reph = (Ra H | Repha); # possible reph
|
||||||
matra_group = M.N?.H?;
|
matra_group = M.N?.H?;
|
||||||
syllable_tail = SM? (Coeng (c|V))? (VD VD?)?;
|
syllable_tail = SM? (Coeng (c|V))? (VD VD?)?;
|
||||||
place_holder = NBSP | DOTTEDCIRCLE;
|
place_holder = NBSP | DOTTEDCIRCLE;
|
||||||
|
|
||||||
|
|
||||||
consonant_syllable = (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
|
consonant_syllable = Repha? (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
|
||||||
vowel_syllable = (Ra H)? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
|
vowel_syllable = reph? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
|
||||||
standalone_cluster = (Ra H)? place_holder.n? (z? h c)* matra_group* syllable_tail;
|
standalone_cluster = reph? place_holder.n? (z? h c)* matra_group* syllable_tail;
|
||||||
other = any;
|
other = any;
|
||||||
|
|
||||||
main := |*
|
main := |*
|
||||||
|
|
|
@ -47,7 +47,6 @@
|
||||||
enum indic_category_t {
|
enum indic_category_t {
|
||||||
OT_X = 0,
|
OT_X = 0,
|
||||||
OT_C,
|
OT_C,
|
||||||
OT_Ra, /* Not explicitly listed in the OT spec, but used in the grammar. */
|
|
||||||
OT_V,
|
OT_V,
|
||||||
OT_N,
|
OT_N,
|
||||||
OT_H,
|
OT_H,
|
||||||
|
@ -60,7 +59,9 @@ enum indic_category_t {
|
||||||
OT_NBSP,
|
OT_NBSP,
|
||||||
OT_DOTTEDCIRCLE, /* Not in the spec, but special in Uniscribe. /Very very/ special! */
|
OT_DOTTEDCIRCLE, /* Not in the spec, but special in Uniscribe. /Very very/ special! */
|
||||||
OT_RS, /* Register Shifter, used in Khmer OT spec */
|
OT_RS, /* Register Shifter, used in Khmer OT spec */
|
||||||
OT_Coeng
|
OT_Coeng,
|
||||||
|
OT_Repha,
|
||||||
|
OT_Ra /* Not explicitly listed in the OT spec, but used in the grammar. */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Visual positions in a syllable from left to right. */
|
/* Visual positions in a syllable from left to right. */
|
||||||
|
@ -92,7 +93,7 @@ enum indic_syllabic_category_t {
|
||||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL = OT_C,
|
INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL = OT_C,
|
||||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER = OT_NBSP,
|
INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER = OT_NBSP,
|
||||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED = OT_C,
|
INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED = OT_C,
|
||||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA = OT_C,
|
INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA = OT_Repha,
|
||||||
INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER = OT_X,
|
INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER = OT_X,
|
||||||
INDIC_SYLLABIC_CATEGORY_NUKTA = OT_N,
|
INDIC_SYLLABIC_CATEGORY_NUKTA = OT_N,
|
||||||
INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER = OT_RS,
|
INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER = OT_RS,
|
||||||
|
|
|
@ -282,6 +282,19 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED,
|
||||||
if (unlikely (info.codepoint == 0x17D2))
|
if (unlikely (info.codepoint == 0x17D2))
|
||||||
info.indic_category() = OT_Coeng;
|
info.indic_category() = OT_Coeng;
|
||||||
|
|
||||||
|
if (info.indic_category() == OT_Repha) {
|
||||||
|
/* There are two kinds of characters marked as Repha:
|
||||||
|
* - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer)
|
||||||
|
* - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam)
|
||||||
|
*
|
||||||
|
* We recategorize the first kind to look like a Nukta and attached to the base directly.
|
||||||
|
*/
|
||||||
|
if (_hb_glyph_info_get_general_category (&info) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
|
||||||
|
info.indic_category() = OT_N;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Assign positions... */
|
||||||
if (is_consonant (info)) {
|
if (is_consonant (info)) {
|
||||||
info.indic_position() = consonant_position (info.codepoint);
|
info.indic_position() = consonant_position (info.codepoint);
|
||||||
if (is_ra (info.codepoint))
|
if (is_ra (info.codepoint))
|
||||||
|
|
Loading…
Reference in New Issue