From 40bd7e9a1cf422b17f15d0f66547bde9098e6ef3 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 2 May 2016 14:47:45 +0200 Subject: [PATCH] [unsafe-to-break] Add UNSAFE_TO_BREAK flag Not all shapers code is updated to set this properly. GSUB and Arabic shaper are updated. GPOS and other shapers are NOT. Fixes https://github.com/behdad/harfbuzz/issues/224 --- src/hb-buffer-private.hh | 26 ++++++----- src/hb-buffer-serialize.cc | 24 +++++++--- src/hb-buffer.cc | 67 ++++++++++++++++++++++++++++ src/hb-buffer.h | 10 ++++- src/hb-ot-layout-gsub-table.hh | 7 ++- src/hb-ot-layout-gsubgpos-private.hh | 27 +++++++---- src/hb-ot-map.cc | 2 + src/hb-ot-shape-complex-arabic.cc | 3 ++ util/hb-shape.cc | 2 + util/options.cc | 1 + util/options.hh | 2 + 11 files changed, 142 insertions(+), 29 deletions(-) diff --git a/src/hb-buffer-private.hh b/src/hb-buffer-private.hh index bca308da2..e598f9e1a 100644 --- a/src/hb-buffer-private.hh +++ b/src/hb-buffer-private.hh @@ -232,25 +232,31 @@ struct hb_buffer_t { for (unsigned int j = 0; j < len; j++) info[j].mask |= mask; } - HB_INTERNAL void set_masks (hb_mask_t value, - hb_mask_t mask, - unsigned int cluster_start, - unsigned int cluster_end); + HB_INTERNAL void set_masks (hb_mask_t value, hb_mask_t mask, + unsigned int cluster_start, unsigned int cluster_end); - HB_INTERNAL void merge_clusters (unsigned int start, - unsigned int end) + inline void merge_clusters (unsigned int start, unsigned int end) { if (end - start < 2) return; merge_clusters_impl (start, end); } - HB_INTERNAL void merge_clusters_impl (unsigned int start, - unsigned int end); - HB_INTERNAL void merge_out_clusters (unsigned int start, - unsigned int end); + HB_INTERNAL void merge_clusters_impl (unsigned int start, unsigned int end); + HB_INTERNAL void merge_out_clusters (unsigned int start, unsigned int end); /* Merge clusters for deleting current glyph, and skip it. */ HB_INTERNAL void delete_glyph (void); + inline void unsafe_to_break (unsigned int start, + unsigned int end) + { + if (end - start < 2) + return; + unsafe_to_break_impl (start, end); + } + HB_INTERNAL void unsafe_to_break_impl (unsigned int start, unsigned int end); + HB_INTERNAL void unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end); + + /* Internal methods */ HB_INTERNAL bool enlarge (unsigned int size); diff --git a/src/hb-buffer-serialize.cc b/src/hb-buffer-serialize.cc index 85696c589..f4a89ac26 100644 --- a/src/hb-buffer-serialize.cc +++ b/src/hb-buffer-serialize.cc @@ -145,10 +145,16 @@ _hb_buffer_serialize_glyphs_json (hb_buffer_t *buffer, if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS)) { - p += snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"dx\":%d,\"dy\":%d", - pos[i].x_offset, pos[i].y_offset); - p += snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"ax\":%d,\"ay\":%d", - pos[i].x_advance, pos[i].y_advance); + p += MAX (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"dx\":%d,\"dy\":%d", + pos[i].x_offset, pos[i].y_offset)); + p += MAX (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"ax\":%d,\"ay\":%d", + pos[i].x_advance, pos[i].y_advance)); + } + + if (flags & HB_BUFFER_SERIALIZE_FLAG_GLYPH_FLAGS) + { + if (info[i].mask &HB_GLYPH_FLAG_DEFINED) + p += MAX (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"fl\":%u", info[i].mask &HB_GLYPH_FLAG_DEFINED)); } if (flags & HB_BUFFER_SERIALIZE_FLAG_GLYPH_EXTENTS) @@ -156,9 +162,9 @@ _hb_buffer_serialize_glyphs_json (hb_buffer_t *buffer, hb_glyph_extents_t extents; hb_font_get_glyph_extents(font, info[i].codepoint, &extents); p += MAX (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"xb\":%d,\"yb\":%d", - extents.x_bearing, extents.y_bearing)); + extents.x_bearing, extents.y_bearing)); p += MAX (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"w\":%d,\"h\":%d", - extents.width, extents.height)); + extents.width, extents.height)); } *p++ = '}'; @@ -226,6 +232,12 @@ _hb_buffer_serialize_glyphs_text (hb_buffer_t *buffer, p += MAX (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",%d", pos[i].y_advance)); } + if (flags & HB_BUFFER_SERIALIZE_FLAG_GLYPH_FLAGS) + { + if (info[i].mask &HB_GLYPH_FLAG_DEFINED) + p += MAX (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), "#%X", info[i].mask &HB_GLYPH_FLAG_DEFINED)); + } + if (flags & HB_BUFFER_SERIALIZE_FLAG_GLYPH_EXTENTS) { hb_glyph_extents_t extents; diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc index 3777f7f8f..afec26ff0 100644 --- a/src/hb-buffer.cc +++ b/src/hb-buffer.cc @@ -637,6 +637,73 @@ done: skip_glyph (); } +static inline void +unsafe_to_break_two_infos (hb_glyph_info_t &info1, hb_glyph_info_t &info2) +{ + if (info1.cluster == info2.cluster) + return; + hb_glyph_info_t &unsafe = info1.cluster > info2.cluster ? info1 : info2; + unsafe.mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK; +} +static void +unsafe_to_break_infos_monotone (hb_glyph_info_t *info, unsigned int start, unsigned int end) +{ + for (unsigned int i = start + 1; i < end; i++) + unsafe_to_break_two_infos (info[i - 1], info[i]); +} +static unsigned int +infos_min_cluster (const hb_glyph_info_t *info, unsigned int start, unsigned int end, + unsigned int cluster) +{ + for (unsigned int i = start; i < end; i++) + cluster = MIN (cluster, info[i].cluster); + return cluster; +} +static void +infos_unsafe_to_break_cluster (hb_glyph_info_t *info, unsigned int start, unsigned int end, + unsigned int cluster) +{ + for (unsigned int i = start; i < end; i++) + if (cluster != info[i].cluster) + info[i].mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK; +} +void +hb_buffer_t::unsafe_to_break_impl (unsigned int start, unsigned int end) +{ + if (cluster_level == HB_BUFFER_CLUSTER_LEVEL_CHARACTERS) + { + unsigned int cluster = (unsigned int) -1; + cluster = infos_min_cluster (info, start, end, cluster); + infos_unsafe_to_break_cluster (info, start, end, cluster); + return; + } + + /* The case of monotone clusters can be done faster. */ + unsafe_to_break_infos_monotone (info, start, end); +} +void +hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end) +{ + assert (start <= out_len); + assert (idx <= end); + + if (cluster_level == HB_BUFFER_CLUSTER_LEVEL_CHARACTERS) + { + unsigned int cluster = (unsigned int) -1; + cluster = infos_min_cluster (out_info, start, out_len, cluster); + cluster = infos_min_cluster (info, idx, end, cluster); + infos_unsafe_to_break_cluster (out_info, start, out_len, cluster); + infos_unsafe_to_break_cluster (info, idx, end, cluster); + return; + } + + /* The case of monotone clusters can be done faster. */ + unsafe_to_break_infos_monotone (out_info, start, out_len); + if (start < out_len && idx < end) + unsafe_to_break_two_infos (out_info[out_len - 1], info[idx]); + unsafe_to_break_infos_monotone (info, idx, end); +} + void hb_buffer_t::guess_segment_properties (void) { diff --git a/src/hb-buffer.h b/src/hb-buffer.h index bf289c19b..995191a9d 100644 --- a/src/hb-buffer.h +++ b/src/hb-buffer.h @@ -63,7 +63,7 @@ HB_BEGIN_DECLS */ typedef struct hb_glyph_info_t { hb_codepoint_t codepoint; - hb_mask_t mask; + hb_mask_t mask; /* Holds hb_glyph_flags_t after hb_shape() */ uint32_t cluster; /*< private >*/ @@ -71,6 +71,11 @@ typedef struct hb_glyph_info_t { hb_var_int_t var2; } hb_glyph_info_t; +typedef enum { /*< flags >*/ + HB_GLYPH_FLAG_UNSAFE_TO_BREAK = 0x00000002, + HB_GLYPH_FLAG_DEFINED = 0x00000002 /* OR of all defined flags */ +} hb_glyph_flags_t; + /** * hb_glyph_position_t: * @x_advance: how much the line advances after drawing this glyph when setting @@ -403,7 +408,8 @@ typedef enum { /*< flags >*/ HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS = 0x00000001u, HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS = 0x00000002u, HB_BUFFER_SERIALIZE_FLAG_NO_GLYPH_NAMES = 0x00000004u, - HB_BUFFER_SERIALIZE_FLAG_GLYPH_EXTENTS = 0x00000008u + HB_BUFFER_SERIALIZE_FLAG_GLYPH_EXTENTS = 0x00000008u, + HB_BUFFER_SERIALIZE_FLAG_GLYPH_FLAGS = 0x00000010u } hb_buffer_serialize_flags_t; /** diff --git a/src/hb-ot-layout-gsub-table.hh b/src/hb-ot-layout-gsub-table.hh index 66fcb3f3a..85be7e7ea 100644 --- a/src/hb-ot-layout-gsub-table.hh +++ b/src/hb-ot-layout-gsub-table.hh @@ -1014,14 +1014,17 @@ struct ReverseChainSingleSubstFormat1 const OffsetArrayOf &lookahead = StructAfter > (backtrack); const ArrayOf &substitute = StructAfter > (lookahead); + unsigned int start_index = 0, end_index = 0; if (match_backtrack (c, backtrack.len, (USHORT *) backtrack.array, - match_coverage, this) && + match_coverage, this, + &start_index) && match_lookahead (c, lookahead.len, (USHORT *) lookahead.array, match_coverage, this, - 1)) + 1, &end_index)) { + c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index); c->replace_glyph_inplace (substitute[index]); /* Note: We DON'T decrease buffer->idx. The main loop does it * for us. This is useful for preventing surprises if someone diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh index 2235d3a0a..472628a39 100644 --- a/src/hb-ot-layout-gsubgpos-private.hh +++ b/src/hb-ot-layout-gsubgpos-private.hh @@ -891,7 +891,8 @@ static inline bool match_backtrack (hb_apply_context_t *c, unsigned int count, const USHORT backtrack[], match_func_t match_func, - const void *match_data) + const void *match_data, + unsigned int *match_start) { TRACE_APPLY (NULL); @@ -903,6 +904,8 @@ static inline bool match_backtrack (hb_apply_context_t *c, if (!skippy_iter.prev ()) return_trace (false); + *match_start = skippy_iter.idx; + return_trace (true); } @@ -911,7 +914,8 @@ static inline bool match_lookahead (hb_apply_context_t *c, const USHORT lookahead[], match_func_t match_func, const void *match_data, - unsigned int offset) + unsigned int offset, + unsigned int *end_index) { TRACE_APPLY (NULL); @@ -923,6 +927,8 @@ static inline bool match_lookahead (hb_apply_context_t *c, if (!skippy_iter.next ()) return_trace (false); + *end_index = skippy_iter.idx + 1; + return_trace (true); } @@ -1145,10 +1151,11 @@ static inline bool context_apply_lookup (hb_apply_context_t *c, inputCount, input, lookup_context.funcs.match, lookup_context.match_data, &match_length, match_positions) - && apply_lookup (c, + && (c->buffer->unsafe_to_break (c->buffer->idx, c->buffer->idx + match_length), + apply_lookup (c, inputCount, match_positions, lookupCount, lookupRecord, - match_length); + match_length)); } struct Rule @@ -1666,7 +1673,7 @@ static inline bool chain_context_apply_lookup (hb_apply_context_t *c, const LookupRecord lookupRecord[], ChainContextApplyLookupContext &lookup_context) { - unsigned int match_length = 0; + unsigned int start_index = 0, match_length = 0, end_index = 0; unsigned int match_positions[HB_MAX_CONTEXT_LENGTH]; return match_input (c, inputCount, input, @@ -1674,15 +1681,17 @@ static inline bool chain_context_apply_lookup (hb_apply_context_t *c, &match_length, match_positions) && match_backtrack (c, backtrackCount, backtrack, - lookup_context.funcs.match, lookup_context.match_data[0]) + lookup_context.funcs.match, lookup_context.match_data[0], + &start_index) && match_lookahead (c, lookaheadCount, lookahead, lookup_context.funcs.match, lookup_context.match_data[2], - match_length) - && apply_lookup (c, + match_length, &end_index) + && (c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index), + apply_lookup (c, inputCount, match_positions, lookupCount, lookupRecord, - match_length); + match_length)); } struct ChainRule diff --git a/src/hb-ot-map.cc b/src/hb-ot-map.cc index 014e4430a..174fd529f 100644 --- a/src/hb-ot-map.cc +++ b/src/hb-ot-map.cc @@ -191,6 +191,8 @@ hb_ot_map_builder_t::compile (hb_ot_map_t &m, /* Allocate bits now */ unsigned int next_bit = 1; + next_bit++; /* Allocate for HB_GLYPH_FLAG_UNSAFE_TO_BREAK */ + for (unsigned int i = 0; i < feature_infos.len; i++) { const feature_info_t *info = &feature_infos[i]; diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc index 5dbbcd9af..ed7b3f2dd 100644 --- a/src/hb-ot-shape-complex-arabic.cc +++ b/src/hb-ot-shape-complex-arabic.cc @@ -321,7 +321,10 @@ arabic_joining (hb_buffer_t *buffer) const arabic_state_table_entry *entry = &arabic_state_table[state][this_type]; if (entry->prev_action != NONE && prev != (unsigned int) -1) + { info[prev].arabic_shaping_action() = entry->prev_action; + buffer->unsafe_to_break (prev, i + 1); + } info[i].arabic_shaping_action() = entry->curr_action; diff --git a/util/hb-shape.cc b/util/hb-shape.cc index 8a27da9e9..6adfbadd7 100644 --- a/util/hb-shape.cc +++ b/util/hb-shape.cc @@ -74,6 +74,8 @@ struct output_buffer_t flags |= HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS; if (format.show_extents) flags |= HB_BUFFER_SERIALIZE_FLAG_GLYPH_EXTENTS; + if (format.show_flags) + flags |= HB_BUFFER_SERIALIZE_FLAG_GLYPH_FLAGS; format_flags = (hb_buffer_serialize_flags_t) flags; if (format.trace) diff --git a/util/options.cc b/util/options.cc index f4c111194..2aba6d405 100644 --- a/util/options.cc +++ b/util/options.cc @@ -781,6 +781,7 @@ format_options_t::add_options (option_parser_t *parser) {"no-clusters", 0, G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &this->show_clusters, "Do not output cluster indices", NULL}, {"show-extents", 0, 0, G_OPTION_ARG_NONE, &this->show_extents, "Output glyph extents", NULL}, + {"show-flags", 0, 0, G_OPTION_ARG_NONE, &this->show_flags, "Output glyph flags", NULL}, {"trace", 0, 0, G_OPTION_ARG_NONE, &this->trace, "Output interim shaping results", NULL}, {NULL} }; diff --git a/util/options.hh b/util/options.hh index 4b528c2ba..521263d5d 100644 --- a/util/options.hh +++ b/util/options.hh @@ -437,6 +437,7 @@ struct format_options_t : option_group_t show_unicode = false; show_line_num = false; show_extents = false; + show_flags = false; trace = false; add_options (parser); @@ -479,6 +480,7 @@ struct format_options_t : option_group_t hb_bool_t show_unicode; hb_bool_t show_line_num; hb_bool_t show_extents; + hb_bool_t show_flags; hb_bool_t trace; };