From 25bc489498ef7d0beb8fe9ab663e3f0b2f52c9c2 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 17 Jul 2012 17:53:03 -0400 Subject: [PATCH] [Indic] Better categorize Register Shifters and Khmer Various signs Down another 500 or so Khmer failures! --- src/hb-ot-shape-complex-indic-machine.rl | 14 +++++++------- src/hb-ot-shape-complex-indic-private.hh | 2 +- src/hb-ot-shape-complex-indic.cc | 8 ++++---- .../south-east-asian/script-khmer/misc/MANIFEST | 2 ++ .../script-khmer/misc/other-marks-invalid.txt | 4 ++++ .../script-khmer/misc/other-marks.txt | 6 ++++++ 6 files changed, 24 insertions(+), 12 deletions(-) create mode 100644 test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks-invalid.txt create mode 100644 test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks.txt diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl index 3c7193d92..b87d2df98 100644 --- a/src/hb-ot-shape-complex-indic-machine.rl +++ b/src/hb-ot-shape-complex-indic-machine.rl @@ -55,16 +55,16 @@ DOTTEDCIRCLE = 13; RS = 14; Coeng = 15; -c = C | Ra; -n = (N.N? | ZWNJ?.RS); -z = ZWJ|ZWNJ; -h = H | Coeng; -matra_group = (M | RS) N? H?; -syllable_tail = SM? (VD VD?)?; +c = C | Ra; # is_consonant +n = (N.N? | ZWNJ?.RS); # is_consonant_modifier +z = ZWJ|ZWNJ; # is_joiner +h = H | Coeng; # is_halant_or_coeng +matra_group = M.N?.H?; +syllable_tail = SM? (Coeng (c|V))? (VD VD?)?; place_holder = NBSP | DOTTEDCIRCLE; -consonant_syllable = (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? (Coeng (c|V))? syllable_tail; +consonant_syllable = (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail; vowel_syllable = (Ra H)? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail; standalone_cluster = (Ra H)? place_holder.n? (z? h c)* matra_group* syllable_tail; other = any; diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh index 0fe350f0f..0541738cc 100644 --- a/src/hb-ot-shape-complex-indic-private.hh +++ b/src/hb-ot-shape-complex-indic-private.hh @@ -59,7 +59,7 @@ enum indic_category_t { OT_A, OT_NBSP, OT_DOTTEDCIRCLE, /* Not in the spec, but special in Uniscribe. /Very very/ special! */ - OT_RS, /* Register Shifter (and other marks), used in Khmer OT spec */ + OT_RS, /* Register Shifter, used in Khmer OT spec */ OT_Coeng }; diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index 4482dd399..3c83ce64b 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -275,8 +275,8 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED, info.indic_category() = OT_VD; if (info.indic_category() == OT_X && - unlikely (hb_in_range (info.codepoint, 0x17CB, 0x17D0))) - info.indic_category() = OT_RS; + unlikely (hb_in_range (info.codepoint, 0x17CB, 0x17D2))) /* Khmer Various signs */ + info.indic_category() = OT_N; /* Khmer Virama is different since it can be used to form a final consonant. */ if (unlikely (info.codepoint == 0x17D2)) @@ -488,7 +488,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff { /* Please update the Uniscribe branch when touching this! */ for (unsigned int i = start + 1; i < end; i++) - if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_H)))) + if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_RS) | FLAG (OT_H)))) info[i].indic_position() = info[i - 1].indic_position(); } else { /* @@ -497,7 +497,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff */ /* Please update the non-Uniscribe branch when touching this! */ for (unsigned int i = start + 1; i < end; i++) - if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_H)))) { + if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_RS) | FLAG (OT_H)))) { info[i].indic_position() = info[i - 1].indic_position(); if (info[i].indic_category() == OT_H && info[i].indic_position() == POS_PRE_M) for (unsigned int j = i; j > start; j--) diff --git a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/MANIFEST b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/MANIFEST index 29cfb2f28..fde3fa113 100644 --- a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/MANIFEST +++ b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/MANIFEST @@ -1 +1,3 @@ misc.txt +other-marks-invalid.txt +other-marks.txt diff --git a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks-invalid.txt b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks-invalid.txt new file mode 100644 index 000000000..213cfc29a --- /dev/null +++ b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks-invalid.txt @@ -0,0 +1,4 @@ +ព់្ឈា +ព្ឈា៉ +ព្ឈា៌ +ព្ឈ៌ា diff --git a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks.txt b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks.txt new file mode 100644 index 000000000..0ad62e7b9 --- /dev/null +++ b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/other-marks.txt @@ -0,0 +1,6 @@ +ព្ឈា +ព្ឈា់ +ព្ឈ់ា +ព្ឈ៉ា +ព៉្ឈា +ព៌្ឈា