Add --utf8-clusters

Also fix cairo cluster generation.
This commit is contained in:
Behdad Esfahbod 2012-04-16 18:08:20 -04:00
parent effb42e5c5
commit 95cefdf96e
8 changed files with 60 additions and 28 deletions

View File

@ -36,7 +36,8 @@ struct output_buffer_t : output_options_t, format_options_t
void init (const font_options_t *font_opts); void init (const font_options_t *font_opts);
void consume_line (hb_buffer_t *buffer, void consume_line (hb_buffer_t *buffer,
const char *text, const char *text,
unsigned int text_len); unsigned int text_len,
hb_bool_t utf8_clusters);
void finish (const font_options_t *font_opts); void finish (const font_options_t *font_opts);
protected: protected:
@ -57,11 +58,12 @@ output_buffer_t::init (const font_options_t *font_opts)
void void
output_buffer_t::consume_line (hb_buffer_t *buffer, output_buffer_t::consume_line (hb_buffer_t *buffer,
const char *text, const char *text,
unsigned int text_len) unsigned int text_len,
hb_bool_t utf8_clusters)
{ {
line_no++; line_no++;
g_string_set_size (gs, 0); g_string_set_size (gs, 0);
serialize_line (buffer, line_no, text, text_len, font, gs); serialize_line (buffer, line_no, text, text_len, font, utf8_clusters, gs);
fprintf (fp, "%s", gs->str); fprintf (fp, "%s", gs->str);
} }

View File

@ -65,7 +65,7 @@ struct hb_view_t
buffer)) buffer))
fail (FALSE, "All shapers failed"); fail (FALSE, "All shapers failed");
output.consume_line (buffer, text, text_len); output.consume_line (buffer, text, text_len, shaper.utf8_clusters);
} }
hb_buffer_destroy (buffer); hb_buffer_destroy (buffer);

View File

@ -301,7 +301,8 @@ helper_cairo_line_from_buffer (helper_cairo_line_t *l,
hb_buffer_t *buffer, hb_buffer_t *buffer,
const char *text, const char *text,
unsigned int text_len, unsigned int text_len,
double scale) double scale,
hb_bool_t utf8_clusters)
{ {
memset (l, 0, sizeof (*l)); memset (l, 0, sizeof (*l));
@ -349,27 +350,38 @@ helper_cairo_line_from_buffer (helper_cairo_line_t *l,
hb_bool_t backward = HB_DIRECTION_IS_BACKWARD (hb_buffer_get_direction (buffer)); hb_bool_t backward = HB_DIRECTION_IS_BACKWARD (hb_buffer_get_direction (buffer));
l->cluster_flags = backward ? CAIRO_TEXT_CLUSTER_FLAG_BACKWARD : (cairo_text_cluster_flags_t) 0; l->cluster_flags = backward ? CAIRO_TEXT_CLUSTER_FLAG_BACKWARD : (cairo_text_cluster_flags_t) 0;
unsigned int cluster = 0; unsigned int cluster = 0;
const char *start = l->utf8, *end = start;
l->clusters[cluster].num_glyphs++; l->clusters[cluster].num_glyphs++;
if (backward) { if (backward) {
for (i = l->num_glyphs - 2; i >= 0; i--) { for (i = l->num_glyphs - 2; i >= 0; i--) {
if (hb_glyph[i].cluster != hb_glyph[i+1].cluster) { if (hb_glyph[i].cluster != hb_glyph[i+1].cluster) {
g_assert (hb_glyph[i].cluster > hb_glyph[i+1].cluster); g_assert (hb_glyph[i].cluster > hb_glyph[i+1].cluster);
l->clusters[cluster].num_bytes += hb_glyph[i].cluster - hb_glyph[i+1].cluster; if (utf8_clusters)
end = start + hb_glyph[i].cluster - hb_glyph[i+1].cluster;
else
end = g_utf8_offset_to_pointer (start, hb_glyph[i].cluster - hb_glyph[i+1].cluster);
l->clusters[cluster].num_bytes = end - start;
start = end;
cluster++; cluster++;
} }
l->clusters[cluster].num_glyphs++; l->clusters[cluster].num_glyphs++;
} }
l->clusters[cluster].num_bytes += text_len - hb_glyph[0].cluster; l->clusters[cluster].num_bytes = l->utf8 + text_len - start;
} else { } else {
for (i = 1; i < (int) l->num_glyphs; i++) { for (i = 1; i < (int) l->num_glyphs; i++) {
if (hb_glyph[i].cluster != hb_glyph[i-1].cluster) { if (hb_glyph[i].cluster != hb_glyph[i-1].cluster) {
g_assert (hb_glyph[i].cluster > hb_glyph[i-1].cluster); g_assert (hb_glyph[i].cluster > hb_glyph[i-1].cluster);
l->clusters[cluster].num_bytes += hb_glyph[i].cluster - hb_glyph[i-1].cluster; if (utf8_clusters)
end = start + hb_glyph[i].cluster - hb_glyph[i-1].cluster;
else
end = g_utf8_offset_to_pointer (start, hb_glyph[i].cluster - hb_glyph[i-1].cluster);
l->clusters[cluster].num_bytes = end - start;
start = end;
cluster++; cluster++;
} }
l->clusters[cluster].num_glyphs++; l->clusters[cluster].num_glyphs++;
} }
l->clusters[cluster].num_bytes += text_len - hb_glyph[i - 1].cluster; l->clusters[cluster].num_bytes = l->utf8 + text_len - start;
} }
} }
} }

View File

@ -75,6 +75,7 @@ helper_cairo_line_from_buffer (helper_cairo_line_t *l,
hb_buffer_t *buffer, hb_buffer_t *buffer,
const char *text, const char *text,
unsigned int text_len, unsigned int text_len,
double scale); double scale,
hb_bool_t utf8_clusters);
#endif #endif

View File

@ -391,6 +391,7 @@ shape_options_t::add_options (option_parser_t *parser)
{"direction", 0, 0, G_OPTION_ARG_STRING, &this->direction, "Set text direction (default: auto)", "ltr/rtl/ttb/btt"}, {"direction", 0, 0, G_OPTION_ARG_STRING, &this->direction, "Set text direction (default: auto)", "ltr/rtl/ttb/btt"},
{"language", 0, 0, G_OPTION_ARG_STRING, &this->language, "Set text language (default: $LANG)", "langstr"}, {"language", 0, 0, G_OPTION_ARG_STRING, &this->language, "Set text language (default: $LANG)", "langstr"},
{"script", 0, 0, G_OPTION_ARG_STRING, &this->script, "Set text script (default: auto)", "ISO-15924 tag"}, {"script", 0, 0, G_OPTION_ARG_STRING, &this->script, "Set text script (default: auto)", "ISO-15924 tag"},
{"utf8-clusters", 0, 0, G_OPTION_ARG_NONE, &this->utf8_clusters, "Use UTF-8 byte indices, not char indices", NULL},
{NULL} {NULL}
}; };
parser->add_group (entries, parser->add_group (entries,
@ -404,9 +405,12 @@ shape_options_t::add_options (option_parser_t *parser)
" Comma-separated list of font features to apply to text\n" " Comma-separated list of font features to apply to text\n"
"\n" "\n"
" Features can be enabled or disabled, either globally or limited to\n" " Features can be enabled or disabled, either globally or limited to\n"
" specific character ranges. The range indices refer to the positions\n" " specific character ranges.\n"
" between Unicode characters. The position before the first character\n" "\n"
" is 0, and the position after the first character is 1, and so on.\n" " The range indices refer to the positions between Unicode characters,\n"
" unless the --utf8-clusters is provided, in which case range indices\n"
" refer to UTF-8 byte indices. The position before the first character\n"
" is always 0.\n"
"\n" "\n"
" The format is Python-esque. Here is how it all works:\n" " The format is Python-esque. Here is how it all works:\n"
"\n" "\n"
@ -716,6 +720,7 @@ format_options_t::serialize_unicode (hb_buffer_t *buffer,
void void
format_options_t::serialize_glyphs (hb_buffer_t *buffer, format_options_t::serialize_glyphs (hb_buffer_t *buffer,
hb_font_t *font, hb_font_t *font,
hb_bool_t utf8_clusters,
GString *gs) GString *gs)
{ {
FT_Face ft_face = show_glyph_names ? hb_ft_font_get_face (font) : NULL; FT_Face ft_face = show_glyph_names ? hb_ft_font_get_face (font) : NULL;
@ -739,8 +744,11 @@ format_options_t::serialize_glyphs (hb_buffer_t *buffer,
} else } else
g_string_append_printf (gs, "%u", info->codepoint); g_string_append_printf (gs, "%u", info->codepoint);
if (show_clusters) if (show_clusters) {
g_string_append_printf (gs, "=%u", info->cluster); g_string_append_printf (gs, "=%u", info->cluster);
if (utf8_clusters)
g_string_append (gs, "u8");
}
if (show_positions && (pos->x_offset || pos->y_offset)) { if (show_positions && (pos->x_offset || pos->y_offset)) {
g_string_append_c (gs, '@'); g_string_append_c (gs, '@');
@ -771,6 +779,7 @@ format_options_t::serialize_line (hb_buffer_t *buffer,
const char *text, const char *text,
unsigned int text_len, unsigned int text_len,
hb_font_t *font, hb_font_t *font,
hb_bool_t utf8_clusters,
GString *gs) GString *gs)
{ {
if (show_text) { if (show_text) {
@ -790,6 +799,6 @@ format_options_t::serialize_line (hb_buffer_t *buffer,
} }
serialize_line_no (line_no, gs); serialize_line_no (line_no, gs);
serialize_glyphs (buffer, font, gs); serialize_glyphs (buffer, font, utf8_clusters, gs);
g_string_append_c (gs, '\n'); g_string_append_c (gs, '\n');
} }

View File

@ -140,6 +140,7 @@ struct shape_options_t : option_group_t
features = NULL; features = NULL;
num_features = 0; num_features = 0;
shapers = NULL; shapers = NULL;
utf8_clusters = false;
add_options (parser); add_options (parser);
} }
@ -161,15 +162,16 @@ struct shape_options_t : option_group_t
hb_buffer_reset (buffer); hb_buffer_reset (buffer);
hb_buffer_add_utf8 (buffer, text, text_len, 0, text_len); hb_buffer_add_utf8 (buffer, text, text_len, 0, text_len);
/* Reset cluster values to refer to Unicode character index if (!utf8_clusters) {
* instead of UTF-8 index. /* Reset cluster values to refer to Unicode character index
* TODO: Add an option for this. */ * instead of UTF-8 index. */
unsigned int num_glyphs = hb_buffer_get_length (buffer); unsigned int num_glyphs = hb_buffer_get_length (buffer);
hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, NULL); hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, NULL);
for (unsigned int i = 0; i < num_glyphs; i++) for (unsigned int i = 0; i < num_glyphs; i++)
{ {
info->cluster = i; info->cluster = i;
info++; info++;
}
} }
setup_buffer (buffer); setup_buffer (buffer);
@ -182,6 +184,7 @@ struct shape_options_t : option_group_t
hb_feature_t *features; hb_feature_t *features;
unsigned int num_features; unsigned int num_features;
char **shapers; char **shapers;
hb_bool_t utf8_clusters;
}; };
@ -285,7 +288,8 @@ struct output_options_t : option_group_t
virtual void init (const font_options_t *font_opts) = 0; virtual void init (const font_options_t *font_opts) = 0;
virtual void consume_line (hb_buffer_t *buffer, virtual void consume_line (hb_buffer_t *buffer,
const char *text, const char *text,
unsigned int text_len) = 0; unsigned int text_len,
hb_bool_t utf8_clusters) = 0;
virtual void finish (const font_options_t *font_opts) = 0; virtual void finish (const font_options_t *font_opts) = 0;
const char *output_file; const char *output_file;
@ -319,6 +323,7 @@ struct format_options_t : option_group_t
GString *gs); GString *gs);
void serialize_glyphs (hb_buffer_t *buffer, void serialize_glyphs (hb_buffer_t *buffer,
hb_font_t *font, hb_font_t *font,
hb_bool_t utf8_clusters,
GString *gs); GString *gs);
void serialize_line_no (unsigned int line_no, void serialize_line_no (unsigned int line_no,
GString *gs); GString *gs);
@ -327,6 +332,7 @@ struct format_options_t : option_group_t
const char *text, const char *text,
unsigned int text_len, unsigned int text_len,
hb_font_t *font, hb_font_t *font,
hb_bool_t utf8_clusters,
GString *gs); GString *gs);

View File

@ -36,11 +36,12 @@ view_cairo_t::init (const font_options_t *font_opts)
void void
view_cairo_t::consume_line (hb_buffer_t *buffer, view_cairo_t::consume_line (hb_buffer_t *buffer,
const char *text, const char *text,
unsigned int text_len) unsigned int text_len,
hb_bool_t utf8_clusters)
{ {
direction = hb_buffer_get_direction (buffer); direction = hb_buffer_get_direction (buffer);
helper_cairo_line_t l; helper_cairo_line_t l;
helper_cairo_line_from_buffer (&l, buffer, text, text_len, scale); helper_cairo_line_from_buffer (&l, buffer, text, text_len, scale, utf8_clusters);
g_array_append_val (lines, l); g_array_append_val (lines, l);
} }

View File

@ -43,7 +43,8 @@ struct view_cairo_t : output_options_t, view_options_t {
void init (const font_options_t *font_opts); void init (const font_options_t *font_opts);
void consume_line (hb_buffer_t *buffer, void consume_line (hb_buffer_t *buffer,
const char *text, const char *text,
unsigned int text_len); unsigned int text_len,
hb_bool_t utf8_clusters);
void finish (const font_options_t *font_opts); void finish (const font_options_t *font_opts);
protected: protected: