Prefer decomposed form if font has GPOS mark feature

Fixes https://github.com/harfbuzz/harfbuzz/issues/653
2018-09-23 21:32:18 -04:00 · 2018-09-23 21:32:18 -04:00 · 62d1e0852a
parent d7f21777e6
commit 62d1e0852a
3 changed files with 72 additions and 63 deletions
--- a/src/hb-ot-shape-complex-hebrew.cc
+++ b/src/hb-ot-shape-complex-hebrew.cc
@ -70,7 +70,7 @@ compose_hebrew (const hb_ot_shape_normalize_context_t *c,

  bool found = (bool) c->unicode->compose (a, b, ab);

-  if (!found && !c->plan->has_mark)
+  if (!found)
  {
      /* Special-case Hebrew presentation forms that are excluded from
       * standard normalization, but wanted for old fonts. */
--- a/src/hb-ot-shape-normalize.cc
+++ b/src/hb-ot-shape-normalize.cc
@ -294,6 +294,14 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan,
  _hb_buffer_assert_unicode_vars (buffer);

  hb_ot_shape_normalization_mode_t mode = plan->shaper->normalization_preference;
+  if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_AUTO)
+  {
+    if (plan->has_mark)
+      mode = HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED;
+    else
+      mode = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
+  }
+
  const hb_ot_shape_normalize_context_t c = {
    plan,
    buffer,
@ -358,65 +366,6 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan,

    i = end;
  }
-
-
-  if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_NONE ||
-      mode == HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED)
-    return;
-
-  /* Third round, recompose */
-
-  /* As noted in the comment earlier, we don't try to combine
-   * ccc=0 chars with their previous Starter. */
-
-  buffer->clear_output ();
-  count = buffer->len;
-  unsigned int starter = 0;
-  buffer->next_glyph ();
-  while (buffer->idx < count && buffer->successful)
-  {
-    hb_codepoint_t composed, glyph;
-    if (/* We don't try to compose a non-mark character with it's preceding starter.
-	 * This is both an optimization to avoid trying to compose every two neighboring
-	 * glyphs in most scripts AND a desired feature for Hangul.  Apparently Hangul
-	 * fonts are not designed to mix-and-match pre-composed syllables and Jamo. */
-	HB_UNICODE_GENERAL_CATEGORY_IS_MARK (_hb_glyph_info_get_general_category (&buffer->cur())))
-    {
-      if (/* If there's anything between the starter and this char, they should have CCC
-	   * smaller than this character's. */
-	  (starter == buffer->out_len - 1 ||
-	   info_cc (buffer->prev()) < info_cc (buffer->cur())) &&
-	  /* And compose. */
-	  c.compose (&c,
-		     buffer->out_info[starter].codepoint,
-		     buffer->cur().codepoint,
-		     &composed) &&
-	  /* And the font has glyph for the composite. */
-	  font->get_nominal_glyph (composed, &glyph))
-      {
-	/* Composes. */
-	buffer->next_glyph (); /* Copy to out-buffer. */
-	if (unlikely (!buffer->successful))
-	  return;
-	buffer->merge_out_clusters (starter, buffer->out_len);
-	buffer->out_len--; /* Remove the second composable. */
-	/* Modify starter and carry on. */
-	buffer->out_info[starter].codepoint = composed;
-	buffer->out_info[starter].glyph_index() = glyph;
-	_hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer);
-
-	continue;
-      }
-    }
-
-    /* Blocked, or doesn't compose. */
-    buffer->next_glyph ();
-
-    if (info_cc (buffer->prev()) == 0)
-      starter = buffer->out_len - 1;
-  }
-  buffer->swap_buffers ();
-
  if (buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_CGJ)
  {
    /* For all CGJ, check if it prevented any reordering at all.
@ -430,4 +379,63 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan,
 	_hb_glyph_info_unhide (&buffer->info[i]);
      }
  }
+
+
+  /* Third round, recompose */
+
+  if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS ||
+      mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT)
+    return;
+  {
+    /* As noted in the comment earlier, we don't try to combine
+     * ccc=0 chars with their previous Starter. */
+
+    buffer->clear_output ();
+    count = buffer->len;
+    unsigned int starter = 0;
+    buffer->next_glyph ();
+    while (buffer->idx < count && buffer->successful)
+    {
+      hb_codepoint_t composed, glyph;
+      if (/* We don't try to compose a non-mark character with it's preceding starter.
+	   * This is both an optimization to avoid trying to compose every two neighboring
+	   * glyphs in most scripts AND a desired feature for Hangul.  Apparently Hangul
+	   * fonts are not designed to mix-and-match pre-composed syllables and Jamo. */
+	  HB_UNICODE_GENERAL_CATEGORY_IS_MARK (_hb_glyph_info_get_general_category (&buffer->cur())))
+      {
+	if (/* If there's anything between the starter and this char, they should have CCC
+	     * smaller than this character's. */
+	    (starter == buffer->out_len - 1 ||
+	     info_cc (buffer->prev()) < info_cc (buffer->cur())) &&
+	    /* And compose. */
+	    c.compose (&c,
+		       buffer->out_info[starter].codepoint,
+		       buffer->cur().codepoint,
+		       &composed) &&
+	    /* And the font has glyph for the composite. */
+	    font->get_nominal_glyph (composed, &glyph))
+	{
+	  /* Composes. */
+	  buffer->next_glyph (); /* Copy to out-buffer. */
+	  if (unlikely (!buffer->successful))
+	    return;
+	  buffer->merge_out_clusters (starter, buffer->out_len);
+	  buffer->out_len--; /* Remove the second composable. */
+	  /* Modify starter and carry on. */
+	  buffer->out_info[starter].codepoint = composed;
+	  buffer->out_info[starter].glyph_index() = glyph;
+	  _hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer);
+
+	  continue;
+	}
+      }
+
+      /* Blocked, or doesn't compose. */
+      buffer->next_glyph ();
+
+      if (info_cc (buffer->prev()) == 0)
+	starter = buffer->out_len - 1;
+    }
+    buffer->swap_buffers ();
+  }
 }
--- a/src/hb-ot-shape-normalize.hh
+++ b/src/hb-ot-shape-normalize.hh
@ -38,10 +38,11 @@ struct hb_ot_shape_plan_t;
 enum hb_ot_shape_normalization_mode_t {
  HB_OT_SHAPE_NORMALIZATION_MODE_NONE,
  HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED,
-  HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS, /* never composes base-to-base */
-  HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, /* always fully decomposes and then recompose back */
+  HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS, /* Never composes base-to-base */
+  HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, /* Always fully decomposes and then recompose back */

-  HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS
+  HB_OT_SHAPE_NORMALIZATION_MODE_AUTO, /* Choose decomposed if GPOS mark feature available, compose otherwise. */
+  HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT = HB_OT_SHAPE_NORMALIZATION_MODE_AUTO
 };

 HB_INTERNAL void _hb_ot_shape_normalize (const hb_ot_shape_plan_t *shaper,