diff --git a/src/hb-buffer-private.hh b/src/hb-buffer-private.hh index 97bdc1be3..fa5ca166d 100644 --- a/src/hb-buffer-private.hh +++ b/src/hb-buffer-private.hh @@ -69,6 +69,7 @@ enum hb_buffer_scratch_flags_t { HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK = 0x00000004u, HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT = 0x00000008u, HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK = 0x00000010u, + HB_BUFFER_SCRATCH_FLAG_HAS_CGJ = 0x00000020u, /* Reserved for complex shapers' internal use. */ HB_BUFFER_SCRATCH_FLAG_COMPLEX0 = 0x01000000u, diff --git a/src/hb-ot-layout-private.hh b/src/hb-ot-layout-private.hh index 0f0926f89..be5695d98 100644 --- a/src/hb-ot-layout-private.hh +++ b/src/hb-ot-layout-private.hh @@ -280,7 +280,11 @@ _hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_buffer_t *buffer) else if (unlikely (hb_in_range (u, 0xE0020u, 0xE007Fu))) props |= UPROPS_MASK_HIDDEN; /* COMBINING GRAPHEME JOINER should not be skipped; at least some times. * https://github.com/harfbuzz/harfbuzz/issues/554 */ - else if (unlikely (u == 0x034Fu)) props |= UPROPS_MASK_HIDDEN; + else if (unlikely (u == 0x034Fu)) + { + buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_CGJ; + props |= UPROPS_MASK_HIDDEN; + } } else if (unlikely (HB_UNICODE_GENERAL_CATEGORY_IS_NON_ENCLOSING_MARK_OR_MODIFIER_SYMBOL (gen_cat))) { @@ -388,6 +392,11 @@ _hb_glyph_info_is_default_ignorable_and_not_hidden (const hb_glyph_info_t *info) == UPROPS_MASK_IGNORABLE) && !_hb_glyph_info_ligated (info); } +static inline void +_hb_glyph_info_unhide (hb_glyph_info_t *info) +{ + info->unicode_props() &= ~ UPROPS_MASK_HIDDEN; +} static inline bool _hb_glyph_info_is_unicode_format (const hb_glyph_info_t *info) diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc index eb9d36ff1..47961bfd5 100644 --- a/src/hb-ot-shape-complex-arabic.cc +++ b/src/hb-ot-shape-complex-arabic.cc @@ -644,13 +644,15 @@ reorder_marks_arabic (const hb_ot_shape_plan_t *plan, { hb_glyph_info_t *info = buffer->info; + DEBUG_MSG (ARABIC, buffer, "Reordering marks from %d to %d", start, end); + unsigned int i = start; for (unsigned int cc = 220; cc <= 230; cc += 10) { - DEBUG_MSG (ARABIC, buffer, "Looking for %d's starting at %d\n", cc, i); + DEBUG_MSG (ARABIC, buffer, "Looking for %d's starting at %d", cc, i); while (i < end && info_cc(info[i]) < cc) i++; - DEBUG_MSG (ARABIC, buffer, "Looking for %d's stopped at %d\n", cc, i); + DEBUG_MSG (ARABIC, buffer, "Looking for %d's stopped at %d", cc, i); if (i == end) break; @@ -658,20 +660,17 @@ reorder_marks_arabic (const hb_ot_shape_plan_t *plan, if (info_cc(info[i]) > cc) continue; - /* Technically we should also check "info_cc(info[j]) == cc" - * in the following loop. But not doing it is safe; we might - * end up moving all the 220 MCMs and 230 MCMs together in one - * move and be done. */ unsigned int j = i; - while (j < end && info_is_mcm (info[j])) + while (j < end && info_cc(info[j]) == cc && info_is_mcm (info[j])) j++; - DEBUG_MSG (ARABIC, buffer, "Found %d's from %d to %d\n", cc, i, j); if (i == j) continue; + DEBUG_MSG (ARABIC, buffer, "Found %d's from %d to %d", cc, i, j); + /* Shift it! */ - DEBUG_MSG (ARABIC, buffer, "Shifting %d's: %d %d\n", cc, i, j); + DEBUG_MSG (ARABIC, buffer, "Shifting %d's: %d %d", cc, i, j); hb_glyph_info_t temp[HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS]; assert (j - i <= ARRAY_LENGTH (temp)); buffer->merge_clusters (start, j); @@ -679,7 +678,25 @@ reorder_marks_arabic (const hb_ot_shape_plan_t *plan, memmove (&info[start + j - i], &info[start], (i - start) * sizeof (hb_glyph_info_t)); memmove (&info[start], temp, (j - i) * sizeof (hb_glyph_info_t)); - start += j - i; + /* Renumber CC such that the reordered sequence is still sorted. + * 22 and 26 are chosen because they are smaller than all Arabic categories, + * and are folded back to 220/230 respectively during fallback mark positioning. + * + * We do this because the CGJ-handling logic in the normalizer relies on + * mark sequences having an increasing order even after this reordering. + * https://github.com/harfbuzz/harfbuzz/issues/554 + * This, however, does break some obscure sequences, where the normalizer + * might compose a sequence that it should not. For example, in the seequence + * ALEF, HAMZAH, MADDAH, we should NOT try to compose ALEF+MADDAH, but with this + * renumbering, we will. + */ + unsigned int new_start = start + j - i; + unsigned int new_cc = cc == 220 ? HB_MODIFIED_COMBINING_CLASS_CCC22 : HB_MODIFIED_COMBINING_CLASS_CCC26; + while (start < new_start) + { + _hb_glyph_info_set_modified_combining_class (&info[start], new_cc); + start++; + } i = j; } diff --git a/src/hb-ot-shape-normalize.cc b/src/hb-ot-shape-normalize.cc index fd9e7c2a8..62cbb9de9 100644 --- a/src/hb-ot-shape-normalize.cc +++ b/src/hb-ot-shape-normalize.cc @@ -345,9 +345,8 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan, if (_hb_glyph_info_get_modified_combining_class (&buffer->info[end]) == 0) break; - /* We are going to do a O(n^2). Only do this if the sequence is short, - * but not too short ;). */ - if (end - i < 2 || end - i > HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS) { + /* We are going to do a O(n^2). Only do this if the sequence is short. */ + if (end - i > HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS) { i = end; continue; } @@ -373,13 +372,11 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan, buffer->clear_output (); count = buffer->len; unsigned int starter = 0; - bool combine = true; buffer->next_glyph (); while (buffer->idx < count && !buffer->in_error) { hb_codepoint_t composed, glyph; - if (combine && - /* We don't try to compose a non-mark character with it's preceding starter. + if (/* We don't try to compose a non-mark character with it's preceding starter. * This is both an optimization to avoid trying to compose every two neighboring * glyphs in most scripts AND a desired feature for Hangul. Apparently Hangul * fonts are not designed to mix-and-match pre-composed syllables and Jamo. */ @@ -410,22 +407,27 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan, continue; } - else if (/* We sometimes custom-tailor the sorted order of marks. In that case, stop - * trying to combine as soon as combining-class drops. */ - starter < buffer->out_len - 1 && - info_cc (buffer->prev()) > info_cc (buffer->cur())) - combine = false; } /* Blocked, or doesn't compose. */ buffer->next_glyph (); if (info_cc (buffer->prev()) == 0) - { starter = buffer->out_len - 1; - combine = true; - } } buffer->swap_buffers (); + if (buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_CGJ) + { + /* For all CGJ, check if it prevented any reordering at all. + * If it did NOT, then make it skippable. + * https://github.com/harfbuzz/harfbuzz/issues/554 + */ + for (unsigned int i = 1; i + 1 < buffer->len; i++) + if (buffer->info[i].codepoint == 0x034Fu/*CGJ*/ && + info_cc(buffer->info[i-1]) <= info_cc(buffer->info[i+1])) + { + _hb_glyph_info_unhide (&buffer->info[i]); + } + } } diff --git a/test/shaping/tests/arabic-mark-order.tests b/test/shaping/tests/arabic-mark-order.tests index b48c82fd3..cc5226bb2 100644 --- a/test/shaping/tests/arabic-mark-order.tests +++ b/test/shaping/tests/arabic-mark-order.tests @@ -1,2 +1,6 @@ fonts/sha1sum/94a5d6fb15a27521fba9ea4aee9cb39b2d03322a.ttf::U+064A,U+064E,U+0670,U+0653,U+0640,U+0654,U+064E,U+0627:[afii57415.zz04=7+481|afii57454=4@25,975+0|uni0654=4@-50,50+0|afii57440=4+650|uni0670_uni0653=0@75,400+0|afii57454=0@750,1125+0|afii57450.calt=0+1331] fonts/sha1sum/24b8d24d00ae86f49791b746da4c9d3f717a51a8.ttf::U+0628,U+0618,U+0619,U+064E,U+064F,U+0654,U+0658,U+0653,U+0654,U+0651,U+0656,U+0651,U+065C,U+0655,U+0650:[uni0653.small=0@266,2508+0|uni0654=0@308,2151+0|uni0655=0@518,-1544+0|uni065C=0@501,-1453+0|uni0656=0@573,-659+0|uni0650=0@500,133+0|uni0619=0@300,1807+0|uni0618=0@357,1674+0|uni0651064E=0@387,1178+0|uni0651=0@402,764+0|uni0658=0@424,404+0|uni0654064F=0@540,-435+0|uni0628=0+1352] +fonts/sha1sum/21b7fb9c1eeae260473809fbc1fe330f66a507cd.ttf::U+0649,U+0655,U+034F,U+0650:[uni0650.small2=0@727,-774+0|space=0+0|uni0655=0@727,-209+0|uni0649=0+1566] +fonts/sha1sum/21b7fb9c1eeae260473809fbc1fe330f66a507cd.ttf::U+0649,U+0655,U+0650:[uni0650.small2=0@727,-774+0|uni0655=0@727,-209+0|uni0649=0+1566] +fonts/sha1sum/21b7fb9c1eeae260473809fbc1fe330f66a507cd.ttf::U+0649,U+0650,U+0655:[uni0650.small2=0@727,-774+0|uni0655=0@727,-209+0|uni0649=0+1566] +fonts/sha1sum/21b7fb9c1eeae260473809fbc1fe330f66a507cd.ttf::U+0649,U+0650,U+034F,U+0655:[uni0655=0+0|space=0+0|uni0650=0@166,0+0|uni0649=0+1566]