From 3122c2cdc45a964efedad8953a2df67205c3e3a8 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 4 Dec 2021 19:50:33 -0800 Subject: [PATCH] [buffer] Add HB_GLYPH_FLAG_UNSAFE_TO_CONCAT Fixes https://github.com/harfbuzz/harfbuzz/issues/1463 --- src/hb-buffer.cc | 10 ++--- src/hb-buffer.h | 76 ++++++++++++++++++++++++++++++------ src/hb-buffer.hh | 33 ++++++++++------ src/hb-ot-layout-gsubgpos.hh | 39 +++++++++++++++--- src/hb-ot-shape.cc | 8 +--- 5 files changed, 124 insertions(+), 42 deletions(-) diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc index c673077bd..6d7c3182c 100644 --- a/src/hb-buffer.cc +++ b/src/hb-buffer.cc @@ -573,14 +573,14 @@ done: } void -hb_buffer_t::unsafe_to_break_impl (unsigned int start, unsigned int end) +hb_buffer_t::unsafe_to_break_impl (unsigned int start, unsigned int end, hb_mask_t mask) { unsigned int cluster = UINT_MAX; cluster = _infos_find_min_cluster (info, start, end, cluster); - _unsafe_to_break_set_mask (info, start, end, cluster); + _unsafe_to_break_set_mask (info, start, end, cluster, mask); } void -hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end) +hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end, hb_mask_t mask) { if (!have_output) { @@ -595,8 +595,8 @@ hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int en cluster = _infos_find_min_cluster (out_info, start, out_len, cluster); cluster = _infos_find_min_cluster (info, idx, end, cluster); - _unsafe_to_break_set_mask (out_info, start, out_len, cluster); - _unsafe_to_break_set_mask (info, idx, end, cluster); + _unsafe_to_break_set_mask (out_info, start, out_len, cluster, mask); + _unsafe_to_break_set_mask (info, idx, end, cluster, mask); } void diff --git a/src/hb-buffer.h b/src/hb-buffer.h index 2ba56d345..9fb218c0d 100644 --- a/src/hb-buffer.h +++ b/src/hb-buffer.h @@ -76,18 +76,67 @@ typedef struct hb_glyph_info_t { * @HB_GLYPH_FLAG_UNSAFE_TO_BREAK: Indicates that if input text is broken at the * beginning of the cluster this glyph is part of, * then both sides need to be re-shaped, as the - * result might be different. On the flip side, - * it means that when this flag is not present, - * then it's safe to break the glyph-run at the - * beginning of this cluster, and the two sides - * represent the exact same result one would get - * if breaking input text at the beginning of - * this cluster and shaping the two sides - * separately. This can be used to optimize - * paragraph layout, by avoiding re-shaping - * of each line after line-breaking, or limiting - * the reshaping to a small piece around the - * breaking point only. + * result might be different. + * + * On the flip side, it means that when this + * flag is not present, then it is safe to break + * the glyph-run at the beginning of this + * cluster, and the two sides will represent the + * exact same result one would get if breaking + * input text at the beginning of this cluster + * and shaping the two sides separately. + * + * This can be used to optimize paragraph + * layout, by avoiding re-shaping of each line + * after line-breaking. + * + * @HB_GLYPH_FLAG_UNSAFE_TO_CONCAT: Indicates that if input text is changed on one + * side of the beginning of the cluster this glyph + * is part of, then the shaping results for the + * other side might change. + * + * Note that the absence of this flag will NOT by + * itself mean that it IS safe to concat text. + * Only two pieces of text both of which clear of + * this flag can be concatenated safely. + * + * This can be used to optimize paragraph + * layout, by avoiding re-shaping of each line + * after line-breaking, by limiting the + * reshaping to a small piece around the + * breaking positin only, even if the breaking + * position carries the + * #HB_GLYPH_FLAG_UNSAFE_TO_BREAK or when + * hyphenation or other text transformation + * happens at line-break position, in the following + * way: + * + * 1. Iterate back from the line-break position till + * the the first cluster start position that is + * NOT unsafe-to-concat, 2. shape the segment from + * there till the end of line, 3. check whether the + * resulting glyph-run also is clear of the + * unsafe-to-concat at its start-of-text position; + * if it is, just splice it into place and the line + * is shaped; If not, move on to a position further + * back that is clear of unsafe-to-concat and retry + * from there, and repeat. + * + * At the start of next line a similar algorithm can + * be implemented. A slight complication will arise, + * because while our buffer API has a way to + * return flags for position corresponding to + * start-of-text, there is currently no position + * corresponding to end-of-text. This limitation + * can be alleviated by shaping more text than needed + * and looking for unsafe-to-concat flag within text + * clusters. + * + * The #HB_GLYPH_FLAG_UNSAFE_TO_BREAK flag will + * always imply this flag. + * + * Since: REPLACEME + * * @HB_GLYPH_FLAG_DEFINED: All the currently defined flags. * * Flags for #hb_glyph_info_t. @@ -96,8 +145,9 @@ typedef struct hb_glyph_info_t { */ typedef enum { /*< flags >*/ HB_GLYPH_FLAG_UNSAFE_TO_BREAK = 0x00000001, + HB_GLYPH_FLAG_UNSAFE_TO_CONCAT = 0x00000002, - HB_GLYPH_FLAG_DEFINED = 0x00000001 /* OR of all defined flags */ + HB_GLYPH_FLAG_DEFINED = 0x00000003 /* OR of all defined flags */ } hb_glyph_flags_t; HB_EXTERN hb_glyph_flags_t diff --git a/src/hb-buffer.hh b/src/hb-buffer.hh index c8ee0d90f..9f441305f 100644 --- a/src/hb-buffer.hh +++ b/src/hb-buffer.hh @@ -67,8 +67,8 @@ enum hb_buffer_scratch_flags_t { HB_BUFFER_SCRATCH_FLAG_HAS_DEFAULT_IGNORABLES = 0x00000002u, HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK = 0x00000004u, HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT = 0x00000008u, - HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK = 0x00000010u, - HB_BUFFER_SCRATCH_FLAG_HAS_CGJ = 0x00000020u, + HB_BUFFER_SCRATCH_FLAG_HAS_CGJ = 0x00000010u, + HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS = 0x00000020u, /* Reserved for complex shapers' internal use. */ HB_BUFFER_SCRATCH_FLAG_COMPLEX0 = 0x01000000u, @@ -392,8 +392,19 @@ struct hb_buffer_t return; unsafe_to_break_impl (start, end); } - HB_INTERNAL void unsafe_to_break_impl (unsigned int start, unsigned int end); - HB_INTERNAL void unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end); + void unsafe_to_concat (unsigned int start, + unsigned int end) + { + if (end - start < 2) + return; + unsafe_to_break_impl (start, end, HB_GLYPH_FLAG_UNSAFE_TO_CONCAT); + } + HB_INTERNAL void unsafe_to_break_impl (unsigned int start, unsigned int end, + hb_mask_t mask = HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT); + HB_INTERNAL void unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end, + hb_mask_t mask = HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT); + void unsafe_to_concat_from_outbuffer (unsigned int start, unsigned int end) + { unsafe_to_break_from_outbuffer (start, end, HB_GLYPH_FLAG_UNSAFE_TO_CONCAT); } /* Internal methods */ @@ -484,12 +495,7 @@ struct hb_buffer_t set_cluster (hb_glyph_info_t &inf, unsigned int cluster, unsigned int mask = 0) { if (inf.cluster != cluster) - { - if (mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK) - inf.mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK; - else - inf.mask &= ~HB_GLYPH_FLAG_UNSAFE_TO_BREAK; - } + inf.mask = (inf.mask & ~HB_GLYPH_FLAG_DEFINED) | (mask & HB_GLYPH_FLAG_DEFINED); inf.cluster = cluster; } @@ -505,13 +511,14 @@ struct hb_buffer_t void _unsafe_to_break_set_mask (hb_glyph_info_t *infos, unsigned int start, unsigned int end, - unsigned int cluster) + unsigned int cluster, + hb_mask_t mask) { for (unsigned int i = start; i < end; i++) if (cluster != infos[i].cluster) { - scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK; - infos[i].mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK; + scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS; + infos[i].mask |= mask; } } diff --git a/src/hb-ot-layout-gsubgpos.hh b/src/hb-ot-layout-gsubgpos.hh index cb16caebd..e1dca6da3 100644 --- a/src/hb-ot-layout-gsubgpos.hh +++ b/src/hb-ot-layout-gsubgpos.hh @@ -520,7 +520,7 @@ struct hb_ot_apply_context_t : may_skip (const hb_glyph_info_t &info) const { return matcher.may_skip (c, info); } - bool next () + bool next (unsigned *unsafe_to = nullptr) { assert (num_items > 0); while (idx + num_items < end) @@ -543,11 +543,17 @@ struct hb_ot_apply_context_t : } if (skip == matcher_t::SKIP_NO) + { + if (unsafe_to) + *unsafe_to = idx + 1; return false; + } } + if (unsafe_to) + *unsafe_to = end; return false; } - bool prev () + bool prev (unsigned *unsafe_from = nullptr) { assert (num_items > 0); while (idx > num_items - 1) @@ -570,8 +576,14 @@ struct hb_ot_apply_context_t : } if (skip == matcher_t::SKIP_NO) + { + if (unsafe_from) + *unsafe_from = hb_max (1u, idx) - 1u; return false; + } } + if (unsafe_from) + *unsafe_from = 0; return false; } @@ -1008,7 +1020,12 @@ static inline bool match_input (hb_ot_apply_context_t *c, match_positions[0] = buffer->idx; for (unsigned int i = 1; i < count; i++) { - if (!skippy_iter.next ()) return_trace (false); + unsigned unsafe_to; + if (!skippy_iter.next (&unsafe_to)) + { + c->buffer->unsafe_to_concat (c->buffer->idx, unsafe_to); + return_trace (false); + } match_positions[i] = skippy_iter.idx; @@ -1197,8 +1214,14 @@ static inline bool match_backtrack (hb_ot_apply_context_t *c, skippy_iter.set_match_func (match_func, match_data, backtrack); for (unsigned int i = 0; i < count; i++) - if (!skippy_iter.prev ()) + { + unsigned unsafe_from; + if (!skippy_iter.prev (&unsafe_from)) + { + c->buffer->unsafe_to_concat_from_outbuffer (unsafe_from, c->buffer->idx); return_trace (false); + } + } *match_start = skippy_iter.idx; @@ -1220,8 +1243,14 @@ static inline bool match_lookahead (hb_ot_apply_context_t *c, skippy_iter.set_match_func (match_func, match_data, lookahead); for (unsigned int i = 0; i < count; i++) - if (!skippy_iter.next ()) + { + unsigned unsafe_to; + if (!skippy_iter.next (&unsafe_to)) + { + c->buffer->unsafe_to_concat (c->buffer->idx + offset, unsafe_to); return_trace (false); + } + } *end_index = skippy_iter.idx + 1; diff --git a/src/hb-ot-shape.cc b/src/hb-ot-shape.cc index e16377f97..4bd8aaf03 100644 --- a/src/hb-ot-shape.cc +++ b/src/hb-ot-shape.cc @@ -1120,7 +1120,7 @@ hb_propagate_flags (hb_buffer_t *buffer) /* Propagate cluster-level glyph flags to be the same on all cluster glyphs. * Simplifies using them. */ - if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK)) + if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS)) return; hb_glyph_info_t *info = buffer->info; @@ -1129,11 +1129,7 @@ hb_propagate_flags (hb_buffer_t *buffer) { unsigned int mask = 0; for (unsigned int i = start; i < end; i++) - if (info[i].mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK) - { - mask = HB_GLYPH_FLAG_UNSAFE_TO_BREAK; - break; - } + mask |= info[i].mask & HB_GLYPH_FLAG_DEFINED; if (mask) for (unsigned int i = start; i < end; i++) info[i].mask |= mask;