Merge pull request #3150 from harfbuzz/item-context-tests

[test] Add tests for item context
2021-08-25 17:19:06 -06:00 · 2021-08-25 17:19:06 -06:00 · 99356ea5f0
parent 1620698bd5 4b5a81f13c
commit 99356ea5f0
8 changed files with 163 additions and 33 deletions
--- a/src/hb-buffer.cc
+++ b/src/hb-buffer.cc
@ -1755,6 +1755,28 @@ hb_buffer_append (hb_buffer_t *buffer,
  memcpy (buffer->info + orig_len, source->info + start, (end - start) * sizeof (buffer->info[0]));
  if (buffer->have_positions)
    memcpy (buffer->pos + orig_len, source->pos + start, (end - start) * sizeof (buffer->pos[0]));
+
+  if (source->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE)
+  {
+    /* See similar logic in add_utf. */
+
+    /* pre-context */
+    if (!orig_len && start + source->context_len[0] > 0)
+    {
+      buffer->clear_context (0);
+      while (start > 0 && buffer->context_len[0] < buffer->CONTEXT_LENGTH)
+	buffer->context[0][buffer->context_len[0]++] = source->info[--start].codepoint;
+      for (auto i = 0u; i < source->context_len[0] && buffer->context_len[0] < buffer->CONTEXT_LENGTH; i++)
+	buffer->context[0][buffer->context_len[0]++] = source->context[0][i];
+    }
+
+    /* post-context */
+    buffer->clear_context (1);
+    while (end < source->len && buffer->context_len[1] < buffer->CONTEXT_LENGTH)
+      buffer->context[1][buffer->context_len[1]++] = source->info[end++].codepoint;
+    for (auto i = 0u; i < source->context_len[1] && buffer->context_len[1] < buffer->CONTEXT_LENGTH; i++)
+      buffer->context[1][buffer->context_len[1]++] = source->context[1][i];
+  }
 }


--- a/test/shape/data/in-house/Makefile.sources
+++ b/test/shape/data/in-house/Makefile.sources
@ -31,6 +31,7 @@ TESTS = \
 	tests/indic-special-cases.tests \
 	tests/indic-syllable.tests \
 	tests/indic-vowel-letter-spoofing.tests \
+	tests/item-context.tests \
 	tests/kern-format2.tests \
 	tests/khmer-mark-order.tests \
 	tests/khmer-misc.tests \
--- a/test/shape/data/in-house/fonts/3105b51976b879032c66aa93a634b3b3672cd344.ttf
+++ b/test/shape/data/in-house/fonts/3105b51976b879032c66aa93a634b3b3672cd344.ttf
--- a/test/shape/data/in-house/fonts/65984dfce552a785f564422aadf4715fa07795ad.ttf
+++ b/test/shape/data/in-house/fonts/65984dfce552a785f564422aadf4715fa07795ad.ttf
--- a/test/shape/data/in-house/meson.build
+++ b/test/shape/data/in-house/meson.build
@ -31,6 +31,7 @@ in_house_tests = [
  'indic-special-cases.tests',
  'indic-syllable.tests',
  'indic-vowel-letter-spoofing.tests',
+  'item-context.tests',
  'kern-format2.tests',
  'khmer-mark-order.tests',
  'khmer-misc.tests',
--- a/test/shape/data/in-house/tests/item-context.tests
+++ b/test/shape/data/in-house/tests/item-context.tests
@ -0,0 +1,11 @@
+../fonts/65984dfce552a785f564422aadf4715fa07795ad.ttf;;U+0643,U+0650,U+062A,U+064E,U+0627,U+0628,U+064F,U+0646,U+064E,U+0627;[uniFE8E=9+316|uni064E=7@169,-24+0|uniFEE8=7+341|uni064F=5@167,-222+0|uniFE91=5+301|uniFE8E=4+316|uni064E=2@196,-28+0|uniFE98=2+391|uni0650=0@288,44+0|uniFEDB=0+576]
+../fonts/65984dfce552a785f564422aadf4715fa07795ad.ttf;--unicodes-before=U+0643,U+0650;U+062A,U+064E,U+0627,U+0628,U+064F,U+0646,U+064E,U+0627;[uniFE8E=7+316|uni064E=5@169,-24+0|uniFEE8=5+341|uni064F=3@167,-222+0|uniFE91=3+301|uniFE8E=2+316|uni064E=0@196,-28+0|uniFE98=0+391]
+../fonts/65984dfce552a785f564422aadf4715fa07795ad.ttf;--unicodes-before=U+0643;U+062A,U+064E,U+0627,U+0628,U+064F,U+0646,U+064E,U+0627;[uniFE8E=7+316|uni064E=5@169,-24+0|uniFEE8=5+341|uni064F=3@167,-222+0|uniFE91=3+301|uniFE8E=2+316|uni064E=0@196,-28+0|uniFE98=0+391]
+../fonts/65984dfce552a785f564422aadf4715fa07795ad.ttf;--unicodes-after=U+0646,U+064E,U+0627;U+0643,U+0650,U+062A,U+064E,U+0627,U+0628,U+064F;[uni064F=5@167,-222+0|uniFE91=5+301|uniFE8E=4+316|uni064E=2@196,-28+0|uniFE98=2+391|uni0650=0@288,44+0|uniFEDB=0+576]
+../fonts/65984dfce552a785f564422aadf4715fa07795ad.ttf;--unicodes-after=U+0646;U+0643,U+0650,U+062A,U+064E,U+0627,U+0628,U+064F;[uni064F=5@167,-222+0|uniFE91=5+301|uniFE8E=4+316|uni064E=2@196,-28+0|uniFE98=2+391|uni0650=0@288,44+0|uniFEDB=0+576]
+../fonts/65984dfce552a785f564422aadf4715fa07795ad.ttf;--unicodes-before=U+0643,U+0650 --unicodes-after=U+0646,U+064E,U+0627;U+062A,U+064E,U+0627,U+0628,U+064F;[uni064F=3@167,-222+0|uniFE91=3+301|uniFE8E=2+316|uni064E=0@196,-28+0|uniFE98=0+391]
+../fonts/65984dfce552a785f564422aadf4715fa07795ad.ttf;--unicodes-before=U+0643 --unicodes-after=U+0646;U+062A,U+064E,U+0627,U+0628,U+064F;[uni064F=3@167,-222+0|uniFE91=3+301|uniFE8E=2+316|uni064E=0@196,-28+0|uniFE98=0+391]
+../fonts/65984dfce552a785f564422aadf4715fa07795ad.ttf;--unicodes-before=U+0627;U+0643,U+062A,U+0628;[uniFE90=2+821|uniFE98=1+391|uniFEDB=0+576]
+../fonts/65984dfce552a785f564422aadf4715fa07795ad.ttf;--unicodes-after=U+0627;U+0643,U+062A,U+0628,U+0627;[uniFE8E=3+316|uniFE92=2+341|uniFE98=1+391|uniFEDB=0+576]
+../fonts/3105b51976b879032c66aa93a634b3b3672cd344.ttf;--bot;U+064E;[uni25CC=0+679|uni064E=0@-607,-210+0]
+../fonts/3105b51976b879032c66aa93a634b3b3672cd344.ttf;--bot --unicodes-before=0627;U+064E;[uni064E=0+0]
--- a/util/shape-options.hh
+++ b/util/shape-options.hh
@ -235,7 +235,6 @@ struct shape_options_t
      hb_buffer_clear_contents (fragment);
      copy_buffer_properties (fragment, buffer);

-      /* TODO: Add pre/post context text. */
      hb_buffer_flags_t flags = hb_buffer_get_flags (fragment);
      if (0 < text_start)
 	flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_BOT);
--- a/util/text-options.hh
+++ b/util/text-options.hh
@ -119,6 +119,43 @@ parse_text (const char *name G_GNUC_UNUSED,
  return true;
 }

+static bool
+encode_unicodes (const char *unicodes,
+		 GString    *gs,
+		 GError    **error)
+{
+#define DELIMITERS "<+->{},;&#\\xXuUnNiI\n\t\v\f\r "
+
+  char *s = (char *) unicodes;
+  char *p;
+
+  while (s && *s)
+  {
+    while (*s && strchr (DELIMITERS, *s))
+      s++;
+    if (!*s)
+      break;
+
+    errno = 0;
+    hb_codepoint_t u = strtoul (s, &p, 16);
+    if (errno || s == p)
+    {
+      g_string_free (gs, TRUE);
+      g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_BAD_VALUE,
+		   "Failed parsing Unicode value at: '%s'", s);
+      return false;
+    }
+
+    g_string_append_unichar (gs, u);
+
+    s = p;
+  }
+
+#undef DELIMITERS
+
+  return true;
+}
+
 static gboolean
 parse_unicodes (const char *name G_GNUC_UNUSED,
 		const char *arg,
@ -136,44 +173,101 @@ parse_unicodes (const char *name G_GNUC_UNUSED,

  GString *gs = g_string_new (nullptr);
  if (0 == strcmp (arg, "*"))
-  {
    g_string_append_c (gs, '*');
-  }
  else
-  {
-#define DELIMITERS "<+->{},;&#\\xXuUnNiI\n\t\v\f\r "
-
-    char *s = (char *) arg;
-    char *p;
-
-    while (s && *s)
-    {
-      while (*s && strchr (DELIMITERS, *s))
-	s++;
-      if (!*s)
-	break;
-
-      errno = 0;
-      hb_codepoint_t u = strtoul (s, &p, 16);
-      if (errno || s == p)
-      {
-	g_string_free (gs, TRUE);
-	g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_BAD_VALUE,
-		     "Failed parsing Unicode value at: '%s'", s);
-	return false;
-      }
-
-      g_string_append_unichar (gs, u);
-
-      s = p;
-    }
-  }
+    if (!encode_unicodes (arg, gs, error))
+      return false;

  text_opts->text_len = gs->len;
  text_opts->text = g_string_free (gs, FALSE);
  return true;
 }

+static gboolean
+parse_text_before (const char *name G_GNUC_UNUSED,
+		   const char *arg,
+		   gpointer    data,
+		   GError    **error)
+{
+  auto *opts = (shape_text_options_t *) data;
+
+  if (opts->text_before)
+  {
+    g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_BAD_VALUE,
+		 "Either --text-before or --unicodes-before can be provided but not both");
+    return false;
+  }
+
+  opts->text_before = g_strdup (arg);
+  fprintf(stderr, "%s\n", opts->text_before);
+  return true;
+}
+
+static gboolean
+parse_unicodes_before (const char *name G_GNUC_UNUSED,
+		       const char *arg,
+		       gpointer    data,
+		       GError    **error)
+{
+  auto *opts = (shape_text_options_t *) data;
+
+  if (opts->text_before)
+  {
+    g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_BAD_VALUE,
+		 "Either --text-before or --unicodes-before can be provided but not both");
+    return false;
+  }
+
+  GString *gs = g_string_new (nullptr);
+  if (!encode_unicodes (arg, gs, error))
+    return false;
+
+  opts->text_before = g_string_free (gs, FALSE);
+  return true;
+}
+
+static gboolean
+parse_text_after (const char *name G_GNUC_UNUSED,
+		  const char *arg,
+		  gpointer    data,
+		  GError    **error)
+{
+  auto *opts = (shape_text_options_t *) data;
+
+  if (opts->text_after)
+  {
+    g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_BAD_VALUE,
+		 "Either --text-after or --unicodes-after can be provided but not both");
+    return false;
+  }
+
+  opts->text_after = g_strdup (arg);
+  return true;
+}
+
+static gboolean
+parse_unicodes_after (const char *name G_GNUC_UNUSED,
+		      const char *arg,
+		      gpointer    data,
+		      GError    **error)
+{
+  auto *opts = (shape_text_options_t *) data;
+
+  if (opts->text_after)
+  {
+    g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_BAD_VALUE,
+		 "Either --text-after or --unicodes-after can be provided but not both");
+    return false;
+  }
+
+  GString *gs = g_string_new (nullptr);
+  if (!encode_unicodes (arg, gs, error))
+    return false;
+
+  opts->text_after = g_string_free (gs, FALSE);
+  return true;
+}
+
 const char *
 text_options_t::get_line (unsigned int *len)
 {
@ -236,8 +330,10 @@ shape_text_options_t::add_options (option_parser_t *parser)

  GOptionEntry entries[] =
  {
-    {"text-before",	0, 0, G_OPTION_ARG_STRING,	&this->text_before,		"Set text context before each line",	"string"},
-    {"text-after",	0, 0, G_OPTION_ARG_STRING,	&this->text_after,		"Set text context after each line",	"string"},
+    {"text-before",	0, 0, G_OPTION_ARG_CALLBACK,	(gpointer) &parse_text_before,		"Set text context before each line",	"string"},
+    {"text-after",	0, 0, G_OPTION_ARG_CALLBACK,	(gpointer) &parse_text_after,		"Set text context after each line",	"string"},
+    {"unicodes-before",	0, 0, G_OPTION_ARG_CALLBACK,	(gpointer) &parse_unicodes_before,	"Set Unicode codepoints context before each line",	"list of hex numbers"},
+    {"unicodes-after",	0, 0, G_OPTION_ARG_CALLBACK,	(gpointer) &parse_unicodes_after,	"Set Unicode codepoints context after each line",	"list of hex numbers"},
    {nullptr}
  };
  parser->add_group (entries,