From c50ed71e9a3df1844f564de66d54b46a696c1356 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 17 Jul 2012 11:54:28 -0400 Subject: [PATCH] [Indic] Recategorize Khmer coeng sign as a separate category OT_Coeng Amend the syllable structure to allow a final subscripted consonant (Coeng+C) and a final subscripted independent vowel (Coeng+V). Fixes another 2k of Khmer failures. --- src/hb-ot-shape-complex-indic-machine.rl | 7 ++++--- src/hb-ot-shape-complex-indic.cc | 6 +++++- .../south-east-asian/script-khmer/misc/misc.txt | 1 + 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl index 6130529c0..675a68d6f 100644 --- a/src/hb-ot-shape-complex-indic-machine.rl +++ b/src/hb-ot-shape-complex-indic-machine.rl @@ -58,14 +58,15 @@ Coeng = 15; c = C | Ra; n = N N?; z = ZWJ|ZWNJ; +h = H | Coeng; matra_group = (M | RS) N? H?; syllable_tail = SM? (VD VD?)?; place_holder = NBSP | DOTTEDCIRCLE; -consonant_syllable = (c.n? (H.z?|z.H))* c.n? A? (H.z? | matra_group*)? syllable_tail; -vowel_syllable = (Ra H)? V n? (z?.H.c | ZWJ.c)* matra_group* syllable_tail; -standalone_cluster = (Ra H)? place_holder n? (z? H c)* matra_group* syllable_tail; +consonant_syllable = (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? (Coeng (c|V))? syllable_tail; +vowel_syllable = (Ra H)? V n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail; +standalone_cluster = (Ra H)? place_holder n? (z? h c)* matra_group* syllable_tail; other = any; main := |* diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index 039664dfb..8e738db5b 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -276,7 +276,11 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED, unlikely (hb_in_range (info.codepoint, 0x17CB, 0x17D0))) info.indic_category() = OT_RS; - if (info.indic_category() == OT_C) { + /* Khmer Virama is different since it can be used to form a final consonant. */ + if (unlikely (info.codepoint == 0x17D2)) + info.indic_category() = OT_Coeng; + + if (is_consonant (info)) { info.indic_position() = consonant_position (info.codepoint); if (is_ra (info.codepoint)) info.indic_category() = OT_Ra; diff --git a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt index 8e4deb97b..b305cb8cb 100644 --- a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt +++ b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt @@ -7,3 +7,4 @@ រី រ៍ សៅ +រ្ឥ