From 8b2c94c43fd335b944d5e5487265706b8e0f9041 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 2 Oct 2017 20:02:45 +0200 Subject: [PATCH] Tweak ligature component matching for ligature formation If two marks want to ligate and they belong to different components of the same ligature glyph, and said ligature glyph is to be ignored according to mark-filtering rules, then allow. Example Burmese senquence: U+1004,U+103A,U+1039,U+101B,U+103D,U+102D Test font provided by Norbert Lindenberg. Fixes https://github.com/behdad/harfbuzz/issues/545 --- src/hb-ot-layout-gsubgpos-private.hh | 59 +++++++++++++++--- ...c76d1bafde4a0b1026ebcc932d2e5c6fd02442.ttf | Bin 0 -> 1384 bytes test/shaping/tests/ligature-id.tests | 1 + 3 files changed, 51 insertions(+), 9 deletions(-) create mode 100644 test/shaping/fonts/sha1sum/a6c76d1bafde4a0b1026ebcc932d2e5c6fd02442.ttf diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh index 472628a39..f2ad9f1c9 100644 --- a/src/hb-ot-layout-gsubgpos-private.hh +++ b/src/hb-ot-layout-gsubgpos-private.hh @@ -374,6 +374,13 @@ struct hb_apply_context_t : inline void reject (void) { num_items++; match_glyph_data--; } + inline matcher_t::may_skip_t + may_skip (const hb_apply_context_t *c, + const hb_glyph_info_t &info) const + { + return matcher.may_skip (c, info); + } + inline bool next (void) { assert (num_items > 0); @@ -736,11 +743,17 @@ static inline bool match_input (hb_apply_context_t *c, * - Ligatures cannot be formed across glyphs attached to different components * of previous ligatures. Eg. the sequence is LAM,SHADDA,LAM,FATHA,HEH, and * LAM,LAM,HEH form a ligature, leaving SHADDA,FATHA next to eachother. - * However, it would be wrong to ligate that SHADDA,FATHA sequence.o - * There is an exception to this: If a ligature tries ligating with marks that - * belong to it itself, go ahead, assuming that the font designer knows what - * they are doing (otherwise it can break Indic stuff when a matra wants to - * ligate with a conjunct...) + * However, it would be wrong to ligate that SHADDA,FATHA sequence. + * There are a couple of exceptions to this: + * + * o If a ligature tries ligating with marks that belong to it itself, go ahead, + * assuming that the font designer knows what they are doing (otherwise it can + * break Indic stuff when a matra wants to ligate with a conjunct, + * + * o If two marks want to ligate and they belong to different components of the + * same ligature glyph, and said ligature glyph is to be ignored according to + * mark-filtering rules, then allow. + * https://github.com/behdad/harfbuzz/issues/545 */ bool is_mark_ligature = _hb_glyph_info_is_mark (&buffer->cur()); @@ -761,13 +774,41 @@ static inline bool match_input (hb_apply_context_t *c, unsigned int this_lig_id = _hb_glyph_info_get_lig_id (&buffer->info[skippy_iter.idx]); unsigned int this_lig_comp = _hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx]); - if (first_lig_id && first_lig_comp) { + if (first_lig_id && first_lig_comp) + { /* If first component was attached to a previous ligature component, * all subsequent components should be attached to the same ligature - * component, otherwise we shouldn't ligate them. */ + * component, otherwise we shouldn't ligate them... */ if (first_lig_id != this_lig_id || first_lig_comp != this_lig_comp) - return_trace (false); - } else { + { + if (first_lig_id != this_lig_id && this_lig_id != 0) + return_trace (false); + + /* ...unless, we are attached to a base ligature and that base + * ligature is ignorable. */ + bool found = false; + const hb_glyph_info_t *out = buffer->out_info; + unsigned int j = buffer->out_len; + while (j && _hb_glyph_info_get_lig_id (&out[j - 1]) == first_lig_id) + { + if (_hb_glyph_info_get_lig_comp (&out[j - 1]) == 0) + { + j--; + found = true; + break; + } + j--; + } + + if (!found) + return_trace (false); + + if (skippy_iter.may_skip (c, out[j]) != hb_apply_context_t::matcher_t::SKIP_YES) + return_trace (false); + } + } + else + { /* If first component was NOT attached to a previous ligature component, * all subsequent components should also NOT be attached to any ligature * component, unless they are attached to the first component itself! */ diff --git a/test/shaping/fonts/sha1sum/a6c76d1bafde4a0b1026ebcc932d2e5c6fd02442.ttf b/test/shaping/fonts/sha1sum/a6c76d1bafde4a0b1026ebcc932d2e5c6fd02442.ttf new file mode 100644 index 0000000000000000000000000000000000000000..7930a96b0bfa9a4024c56888a70032fceaeb76fb GIT binary patch literal 1384 zcmZuwOH30{6g}_Fz?6zu6!9BUtgoRjMAWJAb$8nqEW2;kt#t^bmf|r8ygc{ zm_`@6av?ibEKC>Z3Zn}*#)Y`CF_FYz=y>j%rW*B4-sjwRKWFZ{7Ye{y+=YeHZLLR- zVHmprYmw4}ZKqFl-T(CH8DNKqpSN}OBzilWwh+$=KGn6?oyM(EK(`WK>Q5yn!qd@j z#7~I3hsSRZ^=+Sj&hrP(mm`D8f%4jiZGg%W?;oL{q}t0+KTN!HBz0rD^lAPzF(GQk zuk|PK6|boOg19=BoSwi|oF~3V9KM=N4O&0fJfQv$;>5(Y>o?xVS`LtZGjSRUuGgsU zsqZKQc10TEmGR2j%DAe}uDJQk%1U(mvm}u6Y@jwIqgbzvwGwU+#IF&v=b` z#tIWVeu`=G9T#*j_j$N7$!JXUy)kObi9W3*VXAxcnS8mPDRK}s86~jvVZKNS0(wS# zq21rrMf$AwRb>5l^t7Kni6bF9bjS)bMwXnVGUQ*#jClvbk{`oX_Nqcos+hwg#N`M` zKIvgwXiw;1=n&r$1=J)cx0?H$dzV|tE&Rhe1Y})V_jzx<$(us;WsVMNN;aNVZC7XR9yR=t1=X#zMWr|Xp zH4!kjQh!mjvZ;@CbfY45JLR(Lq-MeF%G>-t(^o1vd4*bLE!ayWQ{(ig7-=&jhUw9l zOB{*mv$A;-9x^x6zJ{GK%c@k~8MDq(rcF0UN0`ds`jK~jodoH*8OpE>6q(5%GQ#EY zTSKUroERB0FYHVG{8(hM8y&O|uY4)4!a*3@%!f7{g{5ueLCXKsFm-2Sj*049R#D=6 NaOk6yJt0