[util/hb-subset] Support ranges in --unicodes / --unicodes-file

Fixes https://github.com/harfbuzz/harfbuzz/issues/3109
2021-08-11 22:49:47 -06:00 · 2021-08-11 22:49:47 -06:00 · 23282c9c9d
parent f60f005b1f
commit 23282c9c9d
3 changed files with 48 additions and 25 deletions
--- a/test/shape/hb_test_tools.py
+++ b/test/shape/hb_test_tools.py
@ -384,7 +384,7 @@ class Unicode:
 	@staticmethod
 	def parse (s):
 		s = re.sub (r"0[xX]", " ", s)
-		s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n\t]", " ", s)
+		s = re.sub (r"[<+\->{},;&#\\xXuUnNiI\n\t]", " ", s)
 		return [int (x, 16) for x in s.split ()]

 	@staticmethod
--- a/util/hb-subset.cc
+++ b/util/hb-subset.cc
@ -157,7 +157,7 @@ parse_gids (const char *name G_GNUC_UNUSED,
      if (end_code < start_code)
      {
 	g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_BAD_VALUE,
-		     "Invalid gid-index range %u-%u", start_code, end_code);
+		     "Invalid glyph-index range %u-%u", start_code, end_code);
 	return false;
      }
      hb_set_add_range (gids, start_code, end_code);
@ -166,6 +166,7 @@ parse_gids (const char *name G_GNUC_UNUSED,
    {
      hb_set_add (gids, start_code);
    }
+
    s = p;
  }

@ -204,13 +205,13 @@ parse_text (const char *name G_GNUC_UNUSED,
    return true;
  }

-  hb_set_t *codepoints = hb_subset_input_unicode_set (subset_main->input);
+  hb_set_t *unicodes = hb_subset_input_unicode_set (subset_main->input);
  for (gchar *c = (gchar *) arg;
       *c;
       c = g_utf8_find_next_char(c, nullptr))
  {
    gunichar cp = g_utf8_get_char(c);
-    hb_set_add (codepoints, cp);
+    hb_set_add (unicodes, cp);
  }
  return true;
 }
@ -230,33 +231,56 @@ parse_unicodes (const char *name G_GNUC_UNUSED,
  }

  // XXX TODO Ranges
-  hb_set_t *codepoints = hb_subset_input_unicode_set (subset_main->input);
-  {
+  hb_set_t *unicodes = hb_subset_input_unicode_set (subset_main->input);
+
+#define DELIMITERS "<+->{},;&#\\xXuUnNiI\n\t\v\f\r "
+
  char *s = (char *) arg;
  char *p;

  while (s && *s)
  {
-#define DELIMITERS "<+>{},;&#\\xXuUnNiI\n\t\v\f\r "
-
    while (*s && strchr (DELIMITERS, *s))
      s++;
    if (!*s)
      break;

    errno = 0;
-      hb_codepoint_t u = strtoul (s, &p, 16);
+    hb_codepoint_t start_code = strtoul (s, &p, 16);
    if (errno || s == p)
    {
      g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_BAD_VALUE,
-		     "Failed parsing Unicode value at: '%s'", s);
+		   "Failed parsing Unicode at: '%s'", s);
      return false;
    }
-      hb_set_add (codepoints, u);
+
+    if (p && p[0] == '-') // ranges
+    {
+      s = ++p;
+      hb_codepoint_t end_code = strtoul (s, &p, 16);
+      if (s[0] == '-' || errno || s == p)
+      {
+	g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_BAD_VALUE,
+		     "Failed parsing Unicode at: '%s'", s);
+	return false;
+      }
+
+      if (end_code < start_code)
+      {
+	g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_BAD_VALUE,
+		     "Invalid Unicode range %u-%u", start_code, end_code);
+	return false;
+      }
+      hb_set_add_range (unicodes, start_code, end_code);
+    }
+    else
+    {
+      hb_set_add (unicodes, start_code);
+    }

    s = p;
  }
-  }
+
  return true;
 }

@ -298,7 +322,7 @@ parse_nameids (const char *name,
    if (errno || s == p)
    {
      g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_BAD_VALUE,
-		   "Failed parsing nameID value at: '%s'", s);
+		   "Failed parsing nameID at: '%s'", s);
      return false;
    }

@ -419,7 +443,7 @@ parse_layout_features (const char *name,
    if (strlen (s) > 4) // table tags are at most 4 bytes
    {
      g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_BAD_VALUE,
-                   "Failed parsing table tag value at: '%s'", s);
+                   "Failed parsing table tag at: '%s'", s);
      return false;
    }

@ -456,7 +480,7 @@ parse_drop_tables (const char *name,
    if (strlen (s) > 4) // Table tags are at most 4 bytes.
    {
      g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_BAD_VALUE,
-		   "Failed parsing table tag value at: '%s'", s);
+		   "Failed parsing table tag at: '%s'", s);
      return false;
    }

--- a/util/text-options.hh
+++ b/util/text-options.hh
@ -141,14 +141,13 @@ parse_unicodes (const char *name G_GNUC_UNUSED,
  }
  else
  {
+#define DELIMITERS "<+->{},;&#\\xXuUnNiI\n\t\v\f\r "

    char *s = (char *) arg;
    char *p;

    while (s && *s)
    {
-#define DELIMITERS "<+>{},;&#\\xXuUnNiI\n\t\v\f\r "
-
      while (*s && strchr (DELIMITERS, *s))
 	s++;
      if (!*s)