From 62d1e0852a5549a1b510ad46a4b89f12730bb708 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 23 Sep 2018 21:32:18 -0400 Subject: [PATCH] Prefer decomposed form if font has GPOS mark feature Fixes https://github.com/harfbuzz/harfbuzz/issues/653 --- src/hb-ot-shape-complex-hebrew.cc | 2 +- src/hb-ot-shape-normalize.cc | 126 ++++++++++++++++-------------- src/hb-ot-shape-normalize.hh | 7 +- 3 files changed, 72 insertions(+), 63 deletions(-) diff --git a/src/hb-ot-shape-complex-hebrew.cc b/src/hb-ot-shape-complex-hebrew.cc index ba25258a3..5ad8daf77 100644 --- a/src/hb-ot-shape-complex-hebrew.cc +++ b/src/hb-ot-shape-complex-hebrew.cc @@ -70,7 +70,7 @@ compose_hebrew (const hb_ot_shape_normalize_context_t *c, bool found = (bool) c->unicode->compose (a, b, ab); - if (!found && !c->plan->has_mark) + if (!found) { /* Special-case Hebrew presentation forms that are excluded from * standard normalization, but wanted for old fonts. */ diff --git a/src/hb-ot-shape-normalize.cc b/src/hb-ot-shape-normalize.cc index fd0b63aec..487fd96a7 100644 --- a/src/hb-ot-shape-normalize.cc +++ b/src/hb-ot-shape-normalize.cc @@ -294,6 +294,14 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan, _hb_buffer_assert_unicode_vars (buffer); hb_ot_shape_normalization_mode_t mode = plan->shaper->normalization_preference; + if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_AUTO) + { + if (plan->has_mark) + mode = HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED; + else + mode = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS; + } + const hb_ot_shape_normalize_context_t c = { plan, buffer, @@ -358,65 +366,6 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan, i = end; } - - - if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_NONE || - mode == HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED) - return; - - /* Third round, recompose */ - - /* As noted in the comment earlier, we don't try to combine - * ccc=0 chars with their previous Starter. */ - - buffer->clear_output (); - count = buffer->len; - unsigned int starter = 0; - buffer->next_glyph (); - while (buffer->idx < count && buffer->successful) - { - hb_codepoint_t composed, glyph; - if (/* We don't try to compose a non-mark character with it's preceding starter. - * This is both an optimization to avoid trying to compose every two neighboring - * glyphs in most scripts AND a desired feature for Hangul. Apparently Hangul - * fonts are not designed to mix-and-match pre-composed syllables and Jamo. */ - HB_UNICODE_GENERAL_CATEGORY_IS_MARK (_hb_glyph_info_get_general_category (&buffer->cur()))) - { - if (/* If there's anything between the starter and this char, they should have CCC - * smaller than this character's. */ - (starter == buffer->out_len - 1 || - info_cc (buffer->prev()) < info_cc (buffer->cur())) && - /* And compose. */ - c.compose (&c, - buffer->out_info[starter].codepoint, - buffer->cur().codepoint, - &composed) && - /* And the font has glyph for the composite. */ - font->get_nominal_glyph (composed, &glyph)) - { - /* Composes. */ - buffer->next_glyph (); /* Copy to out-buffer. */ - if (unlikely (!buffer->successful)) - return; - buffer->merge_out_clusters (starter, buffer->out_len); - buffer->out_len--; /* Remove the second composable. */ - /* Modify starter and carry on. */ - buffer->out_info[starter].codepoint = composed; - buffer->out_info[starter].glyph_index() = glyph; - _hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer); - - continue; - } - } - - /* Blocked, or doesn't compose. */ - buffer->next_glyph (); - - if (info_cc (buffer->prev()) == 0) - starter = buffer->out_len - 1; - } - buffer->swap_buffers (); - if (buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_CGJ) { /* For all CGJ, check if it prevented any reordering at all. @@ -430,4 +379,63 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan, _hb_glyph_info_unhide (&buffer->info[i]); } } + + + /* Third round, recompose */ + + if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS || + mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT) + return; + { + /* As noted in the comment earlier, we don't try to combine + * ccc=0 chars with their previous Starter. */ + + buffer->clear_output (); + count = buffer->len; + unsigned int starter = 0; + buffer->next_glyph (); + while (buffer->idx < count && buffer->successful) + { + hb_codepoint_t composed, glyph; + if (/* We don't try to compose a non-mark character with it's preceding starter. + * This is both an optimization to avoid trying to compose every two neighboring + * glyphs in most scripts AND a desired feature for Hangul. Apparently Hangul + * fonts are not designed to mix-and-match pre-composed syllables and Jamo. */ + HB_UNICODE_GENERAL_CATEGORY_IS_MARK (_hb_glyph_info_get_general_category (&buffer->cur()))) + { + if (/* If there's anything between the starter and this char, they should have CCC + * smaller than this character's. */ + (starter == buffer->out_len - 1 || + info_cc (buffer->prev()) < info_cc (buffer->cur())) && + /* And compose. */ + c.compose (&c, + buffer->out_info[starter].codepoint, + buffer->cur().codepoint, + &composed) && + /* And the font has glyph for the composite. */ + font->get_nominal_glyph (composed, &glyph)) + { + /* Composes. */ + buffer->next_glyph (); /* Copy to out-buffer. */ + if (unlikely (!buffer->successful)) + return; + buffer->merge_out_clusters (starter, buffer->out_len); + buffer->out_len--; /* Remove the second composable. */ + /* Modify starter and carry on. */ + buffer->out_info[starter].codepoint = composed; + buffer->out_info[starter].glyph_index() = glyph; + _hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer); + + continue; + } + } + + /* Blocked, or doesn't compose. */ + buffer->next_glyph (); + + if (info_cc (buffer->prev()) == 0) + starter = buffer->out_len - 1; + } + buffer->swap_buffers (); + } } diff --git a/src/hb-ot-shape-normalize.hh b/src/hb-ot-shape-normalize.hh index 4aea640b1..80755f775 100644 --- a/src/hb-ot-shape-normalize.hh +++ b/src/hb-ot-shape-normalize.hh @@ -38,10 +38,11 @@ struct hb_ot_shape_plan_t; enum hb_ot_shape_normalization_mode_t { HB_OT_SHAPE_NORMALIZATION_MODE_NONE, HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED, - HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS, /* never composes base-to-base */ - HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, /* always fully decomposes and then recompose back */ + HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS, /* Never composes base-to-base */ + HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, /* Always fully decomposes and then recompose back */ - HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS + HB_OT_SHAPE_NORMALIZATION_MODE_AUTO, /* Choose decomposed if GPOS mark feature available, compose otherwise. */ + HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT = HB_OT_SHAPE_NORMALIZATION_MODE_AUTO }; HB_INTERNAL void _hb_ot_shape_normalize (const hb_ot_shape_plan_t *shaper,