From 21d2803133c2c424ed37a9f3d17c7fc4963e5a60 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Thu, 10 May 2012 18:34:34 +0200 Subject: [PATCH] [Indic] Do clustering like Uniscribe does Hindi Wikipedia failures down to 6639 (0.938381%)! --- src/hb-ot-shape-complex-indic.cc | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index b0e8ade07..1ce25413b 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -575,8 +575,6 @@ final_reordering_syllable (hb_buffer_t *buffer, * 6. Otherwise, reorder reph to the end of the syllable. */ - start_of_last_cluster = start; /* Yay, one big cluster! */ - /* Now let's go shopping for a position. */ unsigned int new_reph_pos = end - 1; while (new_reph_pos > start && (FLAG (info[new_reph_pos].indic_position()) & (FLAG (POS_SMVD)))) @@ -598,6 +596,7 @@ final_reordering_syllable (hb_buffer_t *buffer, hb_glyph_info_t reph = info[start]; memmove (&info[start], &info[start + 1], (new_reph_pos - start) * sizeof (info[0])); info[new_reph_pos] = reph; + start_of_last_cluster = start; /* Yay, one big cluster! */ } @@ -617,8 +616,25 @@ final_reordering_syllable (hb_buffer_t *buffer, * consonant. */ - /* TODO */ - buffer->merge_clusters (start, end); + + /* Finish off the clusters and go home! */ + + if (1) { + /* This is what Uniscribe does. Ie. add cluster boundaries after Halant,ZWNJ. + * This means, half forms are submerged into the main consonants cluster. + * This is unnecessary, and makes cursor positioning harder, but that's what + * Uniscribe does. */ + unsigned int cluster_start = start; + for (unsigned int i = start + 1; i < start_of_last_cluster; i++) + if (info[i - 1].indic_category() == OT_H && info[i].indic_category() == OT_ZWNJ) { + i++; + buffer->merge_clusters (cluster_start, i); + cluster_start = i; + } + start_of_last_cluster = cluster_start; + } + + buffer->merge_clusters (start_of_last_cluster, end); }