From 95cefdf96efe43a44133aa8a186155cf4e63e2b7 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 16 Apr 2012 18:08:20 -0400 Subject: [PATCH] Add --utf8-clusters Also fix cairo cluster generation. --- util/hb-shape.cc | 8 +++++--- util/hb-view.hh | 2 +- util/helper-cairo.cc | 22 +++++++++++++++++----- util/helper-cairo.hh | 3 ++- util/options.cc | 19 ++++++++++++++----- util/options.hh | 26 ++++++++++++++++---------- util/view-cairo.cc | 5 +++-- util/view-cairo.hh | 3 ++- 8 files changed, 60 insertions(+), 28 deletions(-) diff --git a/util/hb-shape.cc b/util/hb-shape.cc index a76a7786f..b22bc1fd1 100644 --- a/util/hb-shape.cc +++ b/util/hb-shape.cc @@ -36,7 +36,8 @@ struct output_buffer_t : output_options_t, format_options_t void init (const font_options_t *font_opts); void consume_line (hb_buffer_t *buffer, const char *text, - unsigned int text_len); + unsigned int text_len, + hb_bool_t utf8_clusters); void finish (const font_options_t *font_opts); protected: @@ -57,11 +58,12 @@ output_buffer_t::init (const font_options_t *font_opts) void output_buffer_t::consume_line (hb_buffer_t *buffer, const char *text, - unsigned int text_len) + unsigned int text_len, + hb_bool_t utf8_clusters) { line_no++; g_string_set_size (gs, 0); - serialize_line (buffer, line_no, text, text_len, font, gs); + serialize_line (buffer, line_no, text, text_len, font, utf8_clusters, gs); fprintf (fp, "%s", gs->str); } diff --git a/util/hb-view.hh b/util/hb-view.hh index 68a5dd8e5..66d955b52 100644 --- a/util/hb-view.hh +++ b/util/hb-view.hh @@ -65,7 +65,7 @@ struct hb_view_t buffer)) fail (FALSE, "All shapers failed"); - output.consume_line (buffer, text, text_len); + output.consume_line (buffer, text, text_len, shaper.utf8_clusters); } hb_buffer_destroy (buffer); diff --git a/util/helper-cairo.cc b/util/helper-cairo.cc index abb8c1538..9374d9eb0 100644 --- a/util/helper-cairo.cc +++ b/util/helper-cairo.cc @@ -301,7 +301,8 @@ helper_cairo_line_from_buffer (helper_cairo_line_t *l, hb_buffer_t *buffer, const char *text, unsigned int text_len, - double scale) + double scale, + hb_bool_t utf8_clusters) { memset (l, 0, sizeof (*l)); @@ -349,27 +350,38 @@ helper_cairo_line_from_buffer (helper_cairo_line_t *l, hb_bool_t backward = HB_DIRECTION_IS_BACKWARD (hb_buffer_get_direction (buffer)); l->cluster_flags = backward ? CAIRO_TEXT_CLUSTER_FLAG_BACKWARD : (cairo_text_cluster_flags_t) 0; unsigned int cluster = 0; + const char *start = l->utf8, *end = start; l->clusters[cluster].num_glyphs++; if (backward) { for (i = l->num_glyphs - 2; i >= 0; i--) { if (hb_glyph[i].cluster != hb_glyph[i+1].cluster) { g_assert (hb_glyph[i].cluster > hb_glyph[i+1].cluster); - l->clusters[cluster].num_bytes += hb_glyph[i].cluster - hb_glyph[i+1].cluster; + if (utf8_clusters) + end = start + hb_glyph[i].cluster - hb_glyph[i+1].cluster; + else + end = g_utf8_offset_to_pointer (start, hb_glyph[i].cluster - hb_glyph[i+1].cluster); + l->clusters[cluster].num_bytes = end - start; + start = end; cluster++; } l->clusters[cluster].num_glyphs++; } - l->clusters[cluster].num_bytes += text_len - hb_glyph[0].cluster; + l->clusters[cluster].num_bytes = l->utf8 + text_len - start; } else { for (i = 1; i < (int) l->num_glyphs; i++) { if (hb_glyph[i].cluster != hb_glyph[i-1].cluster) { g_assert (hb_glyph[i].cluster > hb_glyph[i-1].cluster); - l->clusters[cluster].num_bytes += hb_glyph[i].cluster - hb_glyph[i-1].cluster; + if (utf8_clusters) + end = start + hb_glyph[i].cluster - hb_glyph[i-1].cluster; + else + end = g_utf8_offset_to_pointer (start, hb_glyph[i].cluster - hb_glyph[i-1].cluster); + l->clusters[cluster].num_bytes = end - start; + start = end; cluster++; } l->clusters[cluster].num_glyphs++; } - l->clusters[cluster].num_bytes += text_len - hb_glyph[i - 1].cluster; + l->clusters[cluster].num_bytes = l->utf8 + text_len - start; } } } diff --git a/util/helper-cairo.hh b/util/helper-cairo.hh index bc3fe1d25..2f2c9d4e2 100644 --- a/util/helper-cairo.hh +++ b/util/helper-cairo.hh @@ -75,6 +75,7 @@ helper_cairo_line_from_buffer (helper_cairo_line_t *l, hb_buffer_t *buffer, const char *text, unsigned int text_len, - double scale); + double scale, + hb_bool_t utf8_clusters); #endif diff --git a/util/options.cc b/util/options.cc index e5e76c976..e24a026ad 100644 --- a/util/options.cc +++ b/util/options.cc @@ -391,6 +391,7 @@ shape_options_t::add_options (option_parser_t *parser) {"direction", 0, 0, G_OPTION_ARG_STRING, &this->direction, "Set text direction (default: auto)", "ltr/rtl/ttb/btt"}, {"language", 0, 0, G_OPTION_ARG_STRING, &this->language, "Set text language (default: $LANG)", "langstr"}, {"script", 0, 0, G_OPTION_ARG_STRING, &this->script, "Set text script (default: auto)", "ISO-15924 tag"}, + {"utf8-clusters", 0, 0, G_OPTION_ARG_NONE, &this->utf8_clusters, "Use UTF-8 byte indices, not char indices", NULL}, {NULL} }; parser->add_group (entries, @@ -404,9 +405,12 @@ shape_options_t::add_options (option_parser_t *parser) " Comma-separated list of font features to apply to text\n" "\n" " Features can be enabled or disabled, either globally or limited to\n" - " specific character ranges. The range indices refer to the positions\n" - " between Unicode characters. The position before the first character\n" - " is 0, and the position after the first character is 1, and so on.\n" + " specific character ranges.\n" + "\n" + " The range indices refer to the positions between Unicode characters,\n" + " unless the --utf8-clusters is provided, in which case range indices\n" + " refer to UTF-8 byte indices. The position before the first character\n" + " is always 0.\n" "\n" " The format is Python-esque. Here is how it all works:\n" "\n" @@ -716,6 +720,7 @@ format_options_t::serialize_unicode (hb_buffer_t *buffer, void format_options_t::serialize_glyphs (hb_buffer_t *buffer, hb_font_t *font, + hb_bool_t utf8_clusters, GString *gs) { FT_Face ft_face = show_glyph_names ? hb_ft_font_get_face (font) : NULL; @@ -739,8 +744,11 @@ format_options_t::serialize_glyphs (hb_buffer_t *buffer, } else g_string_append_printf (gs, "%u", info->codepoint); - if (show_clusters) + if (show_clusters) { g_string_append_printf (gs, "=%u", info->cluster); + if (utf8_clusters) + g_string_append (gs, "u8"); + } if (show_positions && (pos->x_offset || pos->y_offset)) { g_string_append_c (gs, '@'); @@ -771,6 +779,7 @@ format_options_t::serialize_line (hb_buffer_t *buffer, const char *text, unsigned int text_len, hb_font_t *font, + hb_bool_t utf8_clusters, GString *gs) { if (show_text) { @@ -790,6 +799,6 @@ format_options_t::serialize_line (hb_buffer_t *buffer, } serialize_line_no (line_no, gs); - serialize_glyphs (buffer, font, gs); + serialize_glyphs (buffer, font, utf8_clusters, gs); g_string_append_c (gs, '\n'); } diff --git a/util/options.hh b/util/options.hh index da950177f..15d9402db 100644 --- a/util/options.hh +++ b/util/options.hh @@ -140,6 +140,7 @@ struct shape_options_t : option_group_t features = NULL; num_features = 0; shapers = NULL; + utf8_clusters = false; add_options (parser); } @@ -161,15 +162,16 @@ struct shape_options_t : option_group_t hb_buffer_reset (buffer); hb_buffer_add_utf8 (buffer, text, text_len, 0, text_len); - /* Reset cluster values to refer to Unicode character index - * instead of UTF-8 index. - * TODO: Add an option for this. */ - unsigned int num_glyphs = hb_buffer_get_length (buffer); - hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, NULL); - for (unsigned int i = 0; i < num_glyphs; i++) - { - info->cluster = i; - info++; + if (!utf8_clusters) { + /* Reset cluster values to refer to Unicode character index + * instead of UTF-8 index. */ + unsigned int num_glyphs = hb_buffer_get_length (buffer); + hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, NULL); + for (unsigned int i = 0; i < num_glyphs; i++) + { + info->cluster = i; + info++; + } } setup_buffer (buffer); @@ -182,6 +184,7 @@ struct shape_options_t : option_group_t hb_feature_t *features; unsigned int num_features; char **shapers; + hb_bool_t utf8_clusters; }; @@ -285,7 +288,8 @@ struct output_options_t : option_group_t virtual void init (const font_options_t *font_opts) = 0; virtual void consume_line (hb_buffer_t *buffer, const char *text, - unsigned int text_len) = 0; + unsigned int text_len, + hb_bool_t utf8_clusters) = 0; virtual void finish (const font_options_t *font_opts) = 0; const char *output_file; @@ -319,6 +323,7 @@ struct format_options_t : option_group_t GString *gs); void serialize_glyphs (hb_buffer_t *buffer, hb_font_t *font, + hb_bool_t utf8_clusters, GString *gs); void serialize_line_no (unsigned int line_no, GString *gs); @@ -327,6 +332,7 @@ struct format_options_t : option_group_t const char *text, unsigned int text_len, hb_font_t *font, + hb_bool_t utf8_clusters, GString *gs); diff --git a/util/view-cairo.cc b/util/view-cairo.cc index a03c555d6..5d8ead7c3 100644 --- a/util/view-cairo.cc +++ b/util/view-cairo.cc @@ -36,11 +36,12 @@ view_cairo_t::init (const font_options_t *font_opts) void view_cairo_t::consume_line (hb_buffer_t *buffer, const char *text, - unsigned int text_len) + unsigned int text_len, + hb_bool_t utf8_clusters) { direction = hb_buffer_get_direction (buffer); helper_cairo_line_t l; - helper_cairo_line_from_buffer (&l, buffer, text, text_len, scale); + helper_cairo_line_from_buffer (&l, buffer, text, text_len, scale, utf8_clusters); g_array_append_val (lines, l); } diff --git a/util/view-cairo.hh b/util/view-cairo.hh index 0f4fe9454..eec90ea80 100644 --- a/util/view-cairo.hh +++ b/util/view-cairo.hh @@ -43,7 +43,8 @@ struct view_cairo_t : output_options_t, view_options_t { void init (const font_options_t *font_opts); void consume_line (hb_buffer_t *buffer, const char *text, - unsigned int text_len); + unsigned int text_len, + hb_bool_t utf8_clusters); void finish (const font_options_t *font_opts); protected: