Implement more granular cluster-merging

TODO: Documentation.

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=71445
This commit is contained in:
Behdad Esfahbod 2015-07-22 16:51:12 +01:00
parent a60e2cfa39
commit 376d587f36
8 changed files with 77 additions and 14 deletions

8
NEWS
View File

@ -1,3 +1,11 @@
- Removed HB_NO_MERGE_CLUSTERS hack.
- New API:
hb_buffer_cluster_level_t enum
hb_buffer_get_cluster_level()
hb_buffer_set_cluster_level()
hb-shape / hb-view --cluster-level
Overview of changes leading to 0.9.41 Overview of changes leading to 0.9.41
Thursday, June 18, 2015 Thursday, June 18, 2015
===================================== =====================================

View File

@ -50,6 +50,7 @@ struct hb_buffer_t {
/* Information about how the text in the buffer should be treated */ /* Information about how the text in the buffer should be treated */
hb_unicode_funcs_t *unicode; /* Unicode functions */ hb_unicode_funcs_t *unicode; /* Unicode functions */
hb_buffer_flags_t flags; /* BOT / EOT / etc. */ hb_buffer_flags_t flags; /* BOT / EOT / etc. */
hb_buffer_cluster_level_t cluster_level;
hb_codepoint_t replacement; /* U+FFFD or something else. */ hb_codepoint_t replacement; /* U+FFFD or something else. */
/* Buffer contents */ /* Buffer contents */

View File

@ -507,9 +507,8 @@ void
hb_buffer_t::merge_clusters_impl (unsigned int start, hb_buffer_t::merge_clusters_impl (unsigned int start,
unsigned int end) unsigned int end)
{ {
#ifdef HB_NO_MERGE_CLUSTERS if (cluster_level == HB_BUFFER_CLUSTER_LEVEL_CHARACTERS)
return; return;
#endif
unsigned int cluster = info[start].cluster; unsigned int cluster = info[start].cluster;
@ -536,9 +535,8 @@ void
hb_buffer_t::merge_out_clusters (unsigned int start, hb_buffer_t::merge_out_clusters (unsigned int start,
unsigned int end) unsigned int end)
{ {
#ifdef HB_NO_MERGE_CLUSTERS if (cluster_level == HB_BUFFER_CLUSTER_LEVEL_CHARACTERS)
return; return;
#endif
if (unlikely (end - start < 2)) if (unlikely (end - start < 2))
return; return;
@ -738,6 +736,7 @@ hb_buffer_get_empty (void)
const_cast<hb_unicode_funcs_t *> (&_hb_unicode_funcs_nil), const_cast<hb_unicode_funcs_t *> (&_hb_unicode_funcs_nil),
HB_BUFFER_FLAG_DEFAULT, HB_BUFFER_FLAG_DEFAULT,
HB_BUFFER_CLUSTER_LEVEL_DEFAULT,
HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT, HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT,
HB_BUFFER_CONTENT_TYPE_INVALID, HB_BUFFER_CONTENT_TYPE_INVALID,
@ -1083,6 +1082,41 @@ hb_buffer_get_flags (hb_buffer_t *buffer)
return buffer->flags; return buffer->flags;
} }
/**
* hb_buffer_set_cluster_level:
* @buffer: a buffer.
* @cluster_level:
*
*
*
* Since: 0.9.42
**/
void
hb_buffer_set_cluster_level (hb_buffer_t *buffer,
hb_buffer_cluster_level_t cluster_level)
{
if (unlikely (hb_object_is_inert (buffer)))
return;
buffer->cluster_level = cluster_level;
}
/**
* hb_buffer_get_cluster_level:
* @buffer: a buffer.
*
*
*
* Return value:
*
* Since: 0.9.42
**/
hb_buffer_cluster_level_t
hb_buffer_get_cluster_level (hb_buffer_t *buffer)
{
return buffer->cluster_level;
}
/** /**
* hb_buffer_set_replacement_codepoint: * hb_buffer_set_replacement_codepoint:

View File

@ -185,7 +185,19 @@ hb_buffer_set_flags (hb_buffer_t *buffer,
hb_buffer_flags_t hb_buffer_flags_t
hb_buffer_get_flags (hb_buffer_t *buffer); hb_buffer_get_flags (hb_buffer_t *buffer);
typedef enum {
HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES = 0,
HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS = 1,
HB_BUFFER_CLUSTER_LEVEL_CHARACTERS = 2,
HB_BUFFER_CLUSTER_LEVEL_DEFAULT = HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES
} hb_buffer_cluster_level_t;
void
hb_buffer_set_cluster_level (hb_buffer_t *buffer,
hb_buffer_cluster_level_t cluster_level);
hb_buffer_cluster_level_t
hb_buffer_get_cluster_level (hb_buffer_t *buffer);
#define HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT 0xFFFDu #define HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT 0xFFFDu

View File

@ -209,14 +209,9 @@ preprocess_text_hangul (const hb_ot_shape_plan_t *plan,
hb_glyph_info_t tone = info[end]; hb_glyph_info_t tone = info[end];
memmove (&info[start + 1], &info[start], (end - start) * sizeof (hb_glyph_info_t)); memmove (&info[start + 1], &info[start], (end - start) * sizeof (hb_glyph_info_t));
info[start] = tone; info[start] = tone;
}
/* Merge clusters across the (possibly reordered) syllable+tone.
* We want to merge even in the zero-width tone mark case here,
* so that clustering behavior isn't dependent on how the tone mark
* is handled by the font.
*/
buffer->merge_out_clusters (start, end + 1); buffer->merge_out_clusters (start, end + 1);
} }
}
else else
{ {
/* No valid syllable as base for tone mark; try to insert dotted circle. */ /* No valid syllable as base for tone mark; try to insert dotted circle. */
@ -296,6 +291,7 @@ preprocess_text_hangul (const hb_ot_shape_plan_t *plan,
} }
else else
end = start + 2; end = start + 2;
if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES)
buffer->merge_out_clusters (start, end); buffer->merge_out_clusters (start, end);
continue; continue;
} }
@ -368,6 +364,7 @@ preprocess_text_hangul (const hb_ot_shape_plan_t *plan,
info[i++].hangul_shaping_feature() = VJMO; info[i++].hangul_shaping_feature() = VJMO;
if (i < end) if (i < end)
info[i++].hangul_shaping_feature() = TJMO; info[i++].hangul_shaping_feature() = TJMO;
if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES)
buffer->merge_out_clusters (start, end); buffer->merge_out_clusters (start, end);
continue; continue;
} }

View File

@ -264,6 +264,9 @@ hb_insert_dotted_circle (hb_buffer_t *buffer, hb_font_t *font)
static void static void
hb_form_clusters (hb_buffer_t *buffer) hb_form_clusters (hb_buffer_t *buffer)
{ {
if (buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES)
return;
/* Loop duplicated in hb_ensure_native_direction(). */ /* Loop duplicated in hb_ensure_native_direction(). */
unsigned int base = 0; unsigned int base = 0;
unsigned int count = buffer->len; unsigned int count = buffer->len;
@ -301,10 +304,14 @@ hb_ensure_native_direction (hb_buffer_t *buffer)
if (likely (!HB_UNICODE_GENERAL_CATEGORY_IS_MARK (_hb_glyph_info_get_general_category (&info[i])))) if (likely (!HB_UNICODE_GENERAL_CATEGORY_IS_MARK (_hb_glyph_info_get_general_category (&info[i]))))
{ {
buffer->reverse_range (base, i); buffer->reverse_range (base, i);
if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS)
buffer->merge_clusters (base, i);
base = i; base = i;
} }
} }
buffer->reverse_range (base, count); buffer->reverse_range (base, count);
if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS)
buffer->merge_clusters (base, count);
buffer->reverse (); buffer->reverse ();

View File

@ -291,6 +291,7 @@ shape_options_t::add_options (option_parser_t *parser)
{"eot", 0, 0, G_OPTION_ARG_NONE, &this->eot, "Treat text as end-of-paragraph", NULL}, {"eot", 0, 0, G_OPTION_ARG_NONE, &this->eot, "Treat text as end-of-paragraph", NULL},
{"preserve-default-ignorables",0, 0, G_OPTION_ARG_NONE, &this->preserve_default_ignorables, "Preserve Default-Ignorable characters", NULL}, {"preserve-default-ignorables",0, 0, G_OPTION_ARG_NONE, &this->preserve_default_ignorables, "Preserve Default-Ignorable characters", NULL},
{"utf8-clusters", 0, 0, G_OPTION_ARG_NONE, &this->utf8_clusters, "Use UTF8 byte indices, not char indices", NULL}, {"utf8-clusters", 0, 0, G_OPTION_ARG_NONE, &this->utf8_clusters, "Use UTF8 byte indices, not char indices", NULL},
{"cluster-level", 0, 0, G_OPTION_ARG_INT, &this->cluster_level, "Cluster merging level (default: 0)", "0/1/2"},
{"normalize-glyphs",0, 0, G_OPTION_ARG_NONE, &this->normalize_glyphs, "Rearrange glyph clusters in nominal order", NULL}, {"normalize-glyphs",0, 0, G_OPTION_ARG_NONE, &this->normalize_glyphs, "Rearrange glyph clusters in nominal order", NULL},
{"num-iterations", 0, 0, G_OPTION_ARG_INT, &this->num_iterations, "Run shaper N times (default: 1)", "N"}, {"num-iterations", 0, 0, G_OPTION_ARG_INT, &this->num_iterations, "Run shaper N times (default: 1)", "N"},
{NULL} {NULL}

View File

@ -180,6 +180,7 @@ struct shape_options_t : option_group_t
num_features = 0; num_features = 0;
shapers = NULL; shapers = NULL;
utf8_clusters = false; utf8_clusters = false;
cluster_level = HB_BUFFER_CLUSTER_LEVEL_DEFAULT;
normalize_glyphs = false; normalize_glyphs = false;
num_iterations = 1; num_iterations = 1;
@ -202,6 +203,7 @@ struct shape_options_t : option_group_t
(bot ? HB_BUFFER_FLAG_BOT : 0) | (bot ? HB_BUFFER_FLAG_BOT : 0) |
(eot ? HB_BUFFER_FLAG_EOT : 0) | (eot ? HB_BUFFER_FLAG_EOT : 0) |
(preserve_default_ignorables ? HB_BUFFER_FLAG_PRESERVE_DEFAULT_IGNORABLES : 0))); (preserve_default_ignorables ? HB_BUFFER_FLAG_PRESERVE_DEFAULT_IGNORABLES : 0)));
hb_buffer_set_cluster_level (buffer, cluster_level);
hb_buffer_guess_segment_properties (buffer); hb_buffer_guess_segment_properties (buffer);
} }
@ -265,6 +267,7 @@ struct shape_options_t : option_group_t
unsigned int num_features; unsigned int num_features;
char **shapers; char **shapers;
hb_bool_t utf8_clusters; hb_bool_t utf8_clusters;
hb_buffer_cluster_level_t cluster_level;
hb_bool_t normalize_glyphs; hb_bool_t normalize_glyphs;
unsigned int num_iterations; unsigned int num_iterations;
}; };