[util] Make clusters work with char offset instead of UTF-8 offset

This means the --features indices also refer to char position instead of byte position now. Same for cluster values reported by hb-shape. Will add an option for byte indices later.
2012-01-21 19:07:22 -05:00 · 2012-01-21 19:07:22 -05:00 · d530024168
parent 0f68f4a0b5
commit d530024168
2 changed files with 17 additions and 2 deletions
--- a/util/options.cc
+++ b/util/options.cc
@ -404,8 +404,11 @@ shape_options_t::add_options (option_parser_t *parser)
    "    Comma-separated list of font features to apply to text\n"
    "\n"
    "    Features can be enabled or disabled, either globally or limited to\n"
-    "    specific byte ranges. The format is Python-esque.  Here is how it all\n"
-    "    works:\n"
+    "    specific character ranges.  The range indices refer to the positions\n"
+    "    between Unicode characters.  The position before the first character\n"
+    "    is 0, and the position after the first character is 1, and so on.\n"
+    "\n"
+    "    The format is Python-esque.  Here is how it all works:\n"
    "\n"
    "      Syntax:       Value:    Start:    End:\n"
    "\n"
--- a/util/options.hh
+++ b/util/options.hh
@ -153,6 +153,18 @@ struct shape_options_t : option_group_t
 		   hb_font_t *font, hb_buffer_t *buffer) {
    hb_buffer_reset (buffer);
    hb_buffer_add_utf8 (buffer, text, text_len, 0, text_len);
+
+    /* Reset cluster values to refer to Unicode character index
+     * instead of UTF-8 index.
+     * TODO: Add an option for this. */
+    unsigned int num_glyphs = hb_buffer_get_length (buffer);
+    hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, NULL);
+    for (unsigned int i = 0; i < num_glyphs; i++)
+    {
+      info->cluster = i;
+      info++;
+    }
+
    setup_buffer (buffer);
    return hb_shape_full (font, buffer, features, num_features, NULL, shapers);
  }