From 9e4f94a72cea6d65a6a7ba5a47db92e00dbfbb91 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Fri, 20 Jul 2012 13:48:03 -0400 Subject: [PATCH] [Indic] Break syllables at Halant,ZWNJ That's really what Uniscribe does, and explains a lot of pecularities of Halant,ZWNJ before the base. Sent Telugu from 1% failures to 0.03%. Improved Kannada and Malayalam slightly. Fixed half of Bengali, and did NOT break anything! --- src/hb-ot-shape-complex-indic-machine.rl | 5 +++-- src/hb-ot-shape-complex-indic.cc | 5 ++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl index 4501773eb..4be7698fb 100644 --- a/src/hb-ot-shape-complex-indic-machine.rl +++ b/src/hb-ot-shape-complex-indic-machine.rl @@ -67,8 +67,9 @@ forced_rakar = ZWJ H ZWJ Ra; matra_group = z*.M.N?.(H | forced_rakar)?; syllable_tail = SM? (Coeng (cn|V))? (VD VD?)?; place_holder = NBSP | DOTTEDCIRCLE; -halant_group = (z?.h.z?); -halant_or_matra_group = (halant_group | matra_group*); +halant_group = (z?.h.ZWJ?); +final_halant_group = halant_group | h.ZWNJ; +halant_or_matra_group = (final_halant_group | matra_group*); consonant_syllable = Repha? (cn.halant_group)* cn A? halant_or_matra_group? syllable_tail; diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index 42a7e8d39..e771e5747 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -531,9 +531,8 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff } else { - /* A ZWJ at the end of syllable, or any ZWJ/ZWNJ in other places, stop the base - * search (to request explicit half or halant forms. */ - if (is_joiner (info[i]) && (i + 1 < end || info[i].indic_category() == OT_ZWJ)) + /* A ZWJ stops the base search, and requests an explicit half form. */ + if (info[i].indic_category() == OT_ZWJ) break; } } while (i > limit);