Prefer decomposed form if font has GPOS mark feature

Fixes https://github.com/harfbuzz/harfbuzz/issues/653
This commit is contained in:
Behdad Esfahbod 2018-09-23 21:32:18 -04:00
parent d7f21777e6
commit 62d1e0852a
3 changed files with 72 additions and 63 deletions

View File

@ -70,7 +70,7 @@ compose_hebrew (const hb_ot_shape_normalize_context_t *c,
bool found = (bool) c->unicode->compose (a, b, ab);
if (!found && !c->plan->has_mark)
if (!found)
{
/* Special-case Hebrew presentation forms that are excluded from
* standard normalization, but wanted for old fonts. */

View File

@ -294,6 +294,14 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan,
_hb_buffer_assert_unicode_vars (buffer);
hb_ot_shape_normalization_mode_t mode = plan->shaper->normalization_preference;
if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_AUTO)
{
if (plan->has_mark)
mode = HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED;
else
mode = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
}
const hb_ot_shape_normalize_context_t c = {
plan,
buffer,
@ -358,65 +366,6 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan,
i = end;
}
if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_NONE ||
mode == HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED)
return;
/* Third round, recompose */
/* As noted in the comment earlier, we don't try to combine
* ccc=0 chars with their previous Starter. */
buffer->clear_output ();
count = buffer->len;
unsigned int starter = 0;
buffer->next_glyph ();
while (buffer->idx < count && buffer->successful)
{
hb_codepoint_t composed, glyph;
if (/* We don't try to compose a non-mark character with it's preceding starter.
* This is both an optimization to avoid trying to compose every two neighboring
* glyphs in most scripts AND a desired feature for Hangul. Apparently Hangul
* fonts are not designed to mix-and-match pre-composed syllables and Jamo. */
HB_UNICODE_GENERAL_CATEGORY_IS_MARK (_hb_glyph_info_get_general_category (&buffer->cur())))
{
if (/* If there's anything between the starter and this char, they should have CCC
* smaller than this character's. */
(starter == buffer->out_len - 1 ||
info_cc (buffer->prev()) < info_cc (buffer->cur())) &&
/* And compose. */
c.compose (&c,
buffer->out_info[starter].codepoint,
buffer->cur().codepoint,
&composed) &&
/* And the font has glyph for the composite. */
font->get_nominal_glyph (composed, &glyph))
{
/* Composes. */
buffer->next_glyph (); /* Copy to out-buffer. */
if (unlikely (!buffer->successful))
return;
buffer->merge_out_clusters (starter, buffer->out_len);
buffer->out_len--; /* Remove the second composable. */
/* Modify starter and carry on. */
buffer->out_info[starter].codepoint = composed;
buffer->out_info[starter].glyph_index() = glyph;
_hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer);
continue;
}
}
/* Blocked, or doesn't compose. */
buffer->next_glyph ();
if (info_cc (buffer->prev()) == 0)
starter = buffer->out_len - 1;
}
buffer->swap_buffers ();
if (buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_CGJ)
{
/* For all CGJ, check if it prevented any reordering at all.
@ -430,4 +379,63 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan,
_hb_glyph_info_unhide (&buffer->info[i]);
}
}
/* Third round, recompose */
if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS ||
mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT)
return;
{
/* As noted in the comment earlier, we don't try to combine
* ccc=0 chars with their previous Starter. */
buffer->clear_output ();
count = buffer->len;
unsigned int starter = 0;
buffer->next_glyph ();
while (buffer->idx < count && buffer->successful)
{
hb_codepoint_t composed, glyph;
if (/* We don't try to compose a non-mark character with it's preceding starter.
* This is both an optimization to avoid trying to compose every two neighboring
* glyphs in most scripts AND a desired feature for Hangul. Apparently Hangul
* fonts are not designed to mix-and-match pre-composed syllables and Jamo. */
HB_UNICODE_GENERAL_CATEGORY_IS_MARK (_hb_glyph_info_get_general_category (&buffer->cur())))
{
if (/* If there's anything between the starter and this char, they should have CCC
* smaller than this character's. */
(starter == buffer->out_len - 1 ||
info_cc (buffer->prev()) < info_cc (buffer->cur())) &&
/* And compose. */
c.compose (&c,
buffer->out_info[starter].codepoint,
buffer->cur().codepoint,
&composed) &&
/* And the font has glyph for the composite. */
font->get_nominal_glyph (composed, &glyph))
{
/* Composes. */
buffer->next_glyph (); /* Copy to out-buffer. */
if (unlikely (!buffer->successful))
return;
buffer->merge_out_clusters (starter, buffer->out_len);
buffer->out_len--; /* Remove the second composable. */
/* Modify starter and carry on. */
buffer->out_info[starter].codepoint = composed;
buffer->out_info[starter].glyph_index() = glyph;
_hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer);
continue;
}
}
/* Blocked, or doesn't compose. */
buffer->next_glyph ();
if (info_cc (buffer->prev()) == 0)
starter = buffer->out_len - 1;
}
buffer->swap_buffers ();
}
}

View File

@ -38,10 +38,11 @@ struct hb_ot_shape_plan_t;
enum hb_ot_shape_normalization_mode_t {
HB_OT_SHAPE_NORMALIZATION_MODE_NONE,
HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED,
HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS, /* never composes base-to-base */
HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, /* always fully decomposes and then recompose back */
HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS, /* Never composes base-to-base */
HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, /* Always fully decomposes and then recompose back */
HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS
HB_OT_SHAPE_NORMALIZATION_MODE_AUTO, /* Choose decomposed if GPOS mark feature available, compose otherwise. */
HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT = HB_OT_SHAPE_NORMALIZATION_MODE_AUTO
};
HB_INTERNAL void _hb_ot_shape_normalize (const hb_ot_shape_plan_t *shaper,