[util] Make clusters work with char offset instead of UTF-8 offset
This means the --features indices also refer to char position instead of byte position now. Same for cluster values reported by hb-shape. Will add an option for byte indices later.
This commit is contained in:
parent
0f68f4a0b5
commit
d530024168
|
@ -404,8 +404,11 @@ shape_options_t::add_options (option_parser_t *parser)
|
|||
" Comma-separated list of font features to apply to text\n"
|
||||
"\n"
|
||||
" Features can be enabled or disabled, either globally or limited to\n"
|
||||
" specific byte ranges. The format is Python-esque. Here is how it all\n"
|
||||
" works:\n"
|
||||
" specific character ranges. The range indices refer to the positions\n"
|
||||
" between Unicode characters. The position before the first character\n"
|
||||
" is 0, and the position after the first character is 1, and so on.\n"
|
||||
"\n"
|
||||
" The format is Python-esque. Here is how it all works:\n"
|
||||
"\n"
|
||||
" Syntax: Value: Start: End:\n"
|
||||
"\n"
|
||||
|
|
|
@ -153,6 +153,18 @@ struct shape_options_t : option_group_t
|
|||
hb_font_t *font, hb_buffer_t *buffer) {
|
||||
hb_buffer_reset (buffer);
|
||||
hb_buffer_add_utf8 (buffer, text, text_len, 0, text_len);
|
||||
|
||||
/* Reset cluster values to refer to Unicode character index
|
||||
* instead of UTF-8 index.
|
||||
* TODO: Add an option for this. */
|
||||
unsigned int num_glyphs = hb_buffer_get_length (buffer);
|
||||
hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, NULL);
|
||||
for (unsigned int i = 0; i < num_glyphs; i++)
|
||||
{
|
||||
info->cluster = i;
|
||||
info++;
|
||||
}
|
||||
|
||||
setup_buffer (buffer);
|
||||
return hb_shape_full (font, buffer, features, num_features, NULL, shapers);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue