From 71fd5e80ad06c8e85a1112cc89e129d6cd03f82c Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 24 Jul 2012 00:21:16 -0400 Subject: [PATCH] [Indic] Further adjust base algorithm for Sinhala Apparently if there is C,V,ZWJ,C, the first C will be base, but if it's C,ZWJ,V,C, the second one will be. Note that Uniscribe implements this differently, by breaking syllable in the case of C,ZWJ,V,C and putting the first consonant in one syllable and the rest in the next syllable. Sinhala failures down from 208 to 158 (0.0581209%). No changes to Khmer. --- src/hb-ot-shape-complex-indic.cc | 11 +++++++---- .../shaper-indic/indic/script-sinhala/misc/misc.txt | 3 +++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index 83d7ab58e..80bdb31bd 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -560,12 +560,15 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff base = limit; /* Find the last base consonant that is not blocked by ZWJ. If there is - * a ZWJ before a bse consonant, that would request a subjoined form. */ + * a ZWJ right before a base consonant, that would request a subjoined form. */ for (unsigned int i = limit; i < end; i++) if (is_consonant (info[i]) && info[i].indic_position() == POS_BASE_C) - base = i; - else if (info[i].indic_category() == OT_ZWJ) - break; + { + if (limit < i && info[i - 1].indic_category() == OT_ZWJ) + break; + else + base = i; + } /* Mark all subsequent consonants as below. */ for (unsigned int i = base + 1; i < end; i++) diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt index d6b7abde5..a54967390 100644 --- a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt +++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt @@ -32,3 +32,6 @@ ග්‍යෙ ර්‍ය්‍ය එ‍ඬේ +න්ගේ +න්‍ගේ +න‍්ගේ