[buffer] Add HB_GLYPH_FLAG_UNSAFE_TO_CONCAT

Fixes https://github.com/harfbuzz/harfbuzz/issues/1463
This commit is contained in:
Behdad Esfahbod 2021-12-04 19:50:33 -08:00
parent a8b7f18804
commit 3122c2cdc4
5 changed files with 124 additions and 42 deletions

View File

@ -573,14 +573,14 @@ done:
} }
void void
hb_buffer_t::unsafe_to_break_impl (unsigned int start, unsigned int end) hb_buffer_t::unsafe_to_break_impl (unsigned int start, unsigned int end, hb_mask_t mask)
{ {
unsigned int cluster = UINT_MAX; unsigned int cluster = UINT_MAX;
cluster = _infos_find_min_cluster (info, start, end, cluster); cluster = _infos_find_min_cluster (info, start, end, cluster);
_unsafe_to_break_set_mask (info, start, end, cluster); _unsafe_to_break_set_mask (info, start, end, cluster, mask);
} }
void void
hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end) hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end, hb_mask_t mask)
{ {
if (!have_output) if (!have_output)
{ {
@ -595,8 +595,8 @@ hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int en
cluster = _infos_find_min_cluster (out_info, start, out_len, cluster); cluster = _infos_find_min_cluster (out_info, start, out_len, cluster);
cluster = _infos_find_min_cluster (info, idx, end, cluster); cluster = _infos_find_min_cluster (info, idx, end, cluster);
_unsafe_to_break_set_mask (out_info, start, out_len, cluster); _unsafe_to_break_set_mask (out_info, start, out_len, cluster, mask);
_unsafe_to_break_set_mask (info, idx, end, cluster); _unsafe_to_break_set_mask (info, idx, end, cluster, mask);
} }
void void

View File

@ -76,18 +76,67 @@ typedef struct hb_glyph_info_t {
* @HB_GLYPH_FLAG_UNSAFE_TO_BREAK: Indicates that if input text is broken at the * @HB_GLYPH_FLAG_UNSAFE_TO_BREAK: Indicates that if input text is broken at the
* beginning of the cluster this glyph is part of, * beginning of the cluster this glyph is part of,
* then both sides need to be re-shaped, as the * then both sides need to be re-shaped, as the
* result might be different. On the flip side, * result might be different.
* it means that when this flag is not present, *
* then it's safe to break the glyph-run at the * On the flip side, it means that when this
* beginning of this cluster, and the two sides * flag is not present, then it is safe to break
* represent the exact same result one would get * the glyph-run at the beginning of this
* if breaking input text at the beginning of * cluster, and the two sides will represent the
* this cluster and shaping the two sides * exact same result one would get if breaking
* separately. This can be used to optimize * input text at the beginning of this cluster
* paragraph layout, by avoiding re-shaping * and shaping the two sides separately.
* of each line after line-breaking, or limiting *
* the reshaping to a small piece around the * This can be used to optimize paragraph
* breaking point only. * layout, by avoiding re-shaping of each line
* after line-breaking.
*
* @HB_GLYPH_FLAG_UNSAFE_TO_CONCAT: Indicates that if input text is changed on one
* side of the beginning of the cluster this glyph
* is part of, then the shaping results for the
* other side might change.
*
* Note that the absence of this flag will NOT by
* itself mean that it IS safe to concat text.
* Only two pieces of text both of which clear of
* this flag can be concatenated safely.
*
* This can be used to optimize paragraph
* layout, by avoiding re-shaping of each line
* after line-breaking, by limiting the
* reshaping to a small piece around the
* breaking positin only, even if the breaking
* position carries the
* #HB_GLYPH_FLAG_UNSAFE_TO_BREAK or when
* hyphenation or other text transformation
* happens at line-break position, in the following
* way:
*
* 1. Iterate back from the line-break position till
* the the first cluster start position that is
* NOT unsafe-to-concat, 2. shape the segment from
* there till the end of line, 3. check whether the
* resulting glyph-run also is clear of the
* unsafe-to-concat at its start-of-text position;
* if it is, just splice it into place and the line
* is shaped; If not, move on to a position further
* back that is clear of unsafe-to-concat and retry
* from there, and repeat.
*
* At the start of next line a similar algorithm can
* be implemented. A slight complication will arise,
* because while our buffer API has a way to
* return flags for position corresponding to
* start-of-text, there is currently no position
* corresponding to end-of-text. This limitation
* can be alleviated by shaping more text than needed
* and looking for unsafe-to-concat flag within text
* clusters.
*
* The #HB_GLYPH_FLAG_UNSAFE_TO_BREAK flag will
* always imply this flag.
*
* Since: REPLACEME
*
* @HB_GLYPH_FLAG_DEFINED: All the currently defined flags. * @HB_GLYPH_FLAG_DEFINED: All the currently defined flags.
* *
* Flags for #hb_glyph_info_t. * Flags for #hb_glyph_info_t.
@ -96,8 +145,9 @@ typedef struct hb_glyph_info_t {
*/ */
typedef enum { /*< flags >*/ typedef enum { /*< flags >*/
HB_GLYPH_FLAG_UNSAFE_TO_BREAK = 0x00000001, HB_GLYPH_FLAG_UNSAFE_TO_BREAK = 0x00000001,
HB_GLYPH_FLAG_UNSAFE_TO_CONCAT = 0x00000002,
HB_GLYPH_FLAG_DEFINED = 0x00000001 /* OR of all defined flags */ HB_GLYPH_FLAG_DEFINED = 0x00000003 /* OR of all defined flags */
} hb_glyph_flags_t; } hb_glyph_flags_t;
HB_EXTERN hb_glyph_flags_t HB_EXTERN hb_glyph_flags_t

View File

@ -67,8 +67,8 @@ enum hb_buffer_scratch_flags_t {
HB_BUFFER_SCRATCH_FLAG_HAS_DEFAULT_IGNORABLES = 0x00000002u, HB_BUFFER_SCRATCH_FLAG_HAS_DEFAULT_IGNORABLES = 0x00000002u,
HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK = 0x00000004u, HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK = 0x00000004u,
HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT = 0x00000008u, HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT = 0x00000008u,
HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK = 0x00000010u, HB_BUFFER_SCRATCH_FLAG_HAS_CGJ = 0x00000010u,
HB_BUFFER_SCRATCH_FLAG_HAS_CGJ = 0x00000020u, HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS = 0x00000020u,
/* Reserved for complex shapers' internal use. */ /* Reserved for complex shapers' internal use. */
HB_BUFFER_SCRATCH_FLAG_COMPLEX0 = 0x01000000u, HB_BUFFER_SCRATCH_FLAG_COMPLEX0 = 0x01000000u,
@ -392,8 +392,19 @@ struct hb_buffer_t
return; return;
unsafe_to_break_impl (start, end); unsafe_to_break_impl (start, end);
} }
HB_INTERNAL void unsafe_to_break_impl (unsigned int start, unsigned int end); void unsafe_to_concat (unsigned int start,
HB_INTERNAL void unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end); unsigned int end)
{
if (end - start < 2)
return;
unsafe_to_break_impl (start, end, HB_GLYPH_FLAG_UNSAFE_TO_CONCAT);
}
HB_INTERNAL void unsafe_to_break_impl (unsigned int start, unsigned int end,
hb_mask_t mask = HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT);
HB_INTERNAL void unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end,
hb_mask_t mask = HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT);
void unsafe_to_concat_from_outbuffer (unsigned int start, unsigned int end)
{ unsafe_to_break_from_outbuffer (start, end, HB_GLYPH_FLAG_UNSAFE_TO_CONCAT); }
/* Internal methods */ /* Internal methods */
@ -484,12 +495,7 @@ struct hb_buffer_t
set_cluster (hb_glyph_info_t &inf, unsigned int cluster, unsigned int mask = 0) set_cluster (hb_glyph_info_t &inf, unsigned int cluster, unsigned int mask = 0)
{ {
if (inf.cluster != cluster) if (inf.cluster != cluster)
{ inf.mask = (inf.mask & ~HB_GLYPH_FLAG_DEFINED) | (mask & HB_GLYPH_FLAG_DEFINED);
if (mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK)
inf.mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK;
else
inf.mask &= ~HB_GLYPH_FLAG_UNSAFE_TO_BREAK;
}
inf.cluster = cluster; inf.cluster = cluster;
} }
@ -505,13 +511,14 @@ struct hb_buffer_t
void void
_unsafe_to_break_set_mask (hb_glyph_info_t *infos, _unsafe_to_break_set_mask (hb_glyph_info_t *infos,
unsigned int start, unsigned int end, unsigned int start, unsigned int end,
unsigned int cluster) unsigned int cluster,
hb_mask_t mask)
{ {
for (unsigned int i = start; i < end; i++) for (unsigned int i = start; i < end; i++)
if (cluster != infos[i].cluster) if (cluster != infos[i].cluster)
{ {
scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK; scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS;
infos[i].mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK; infos[i].mask |= mask;
} }
} }

View File

@ -520,7 +520,7 @@ struct hb_ot_apply_context_t :
may_skip (const hb_glyph_info_t &info) const may_skip (const hb_glyph_info_t &info) const
{ return matcher.may_skip (c, info); } { return matcher.may_skip (c, info); }
bool next () bool next (unsigned *unsafe_to = nullptr)
{ {
assert (num_items > 0); assert (num_items > 0);
while (idx + num_items < end) while (idx + num_items < end)
@ -543,11 +543,17 @@ struct hb_ot_apply_context_t :
} }
if (skip == matcher_t::SKIP_NO) if (skip == matcher_t::SKIP_NO)
{
if (unsafe_to)
*unsafe_to = idx + 1;
return false; return false;
}
} }
if (unsafe_to)
*unsafe_to = end;
return false; return false;
} }
bool prev () bool prev (unsigned *unsafe_from = nullptr)
{ {
assert (num_items > 0); assert (num_items > 0);
while (idx > num_items - 1) while (idx > num_items - 1)
@ -570,8 +576,14 @@ struct hb_ot_apply_context_t :
} }
if (skip == matcher_t::SKIP_NO) if (skip == matcher_t::SKIP_NO)
{
if (unsafe_from)
*unsafe_from = hb_max (1u, idx) - 1u;
return false; return false;
}
} }
if (unsafe_from)
*unsafe_from = 0;
return false; return false;
} }
@ -1008,7 +1020,12 @@ static inline bool match_input (hb_ot_apply_context_t *c,
match_positions[0] = buffer->idx; match_positions[0] = buffer->idx;
for (unsigned int i = 1; i < count; i++) for (unsigned int i = 1; i < count; i++)
{ {
if (!skippy_iter.next ()) return_trace (false); unsigned unsafe_to;
if (!skippy_iter.next (&unsafe_to))
{
c->buffer->unsafe_to_concat (c->buffer->idx, unsafe_to);
return_trace (false);
}
match_positions[i] = skippy_iter.idx; match_positions[i] = skippy_iter.idx;
@ -1197,8 +1214,14 @@ static inline bool match_backtrack (hb_ot_apply_context_t *c,
skippy_iter.set_match_func (match_func, match_data, backtrack); skippy_iter.set_match_func (match_func, match_data, backtrack);
for (unsigned int i = 0; i < count; i++) for (unsigned int i = 0; i < count; i++)
if (!skippy_iter.prev ()) {
unsigned unsafe_from;
if (!skippy_iter.prev (&unsafe_from))
{
c->buffer->unsafe_to_concat_from_outbuffer (unsafe_from, c->buffer->idx);
return_trace (false); return_trace (false);
}
}
*match_start = skippy_iter.idx; *match_start = skippy_iter.idx;
@ -1220,8 +1243,14 @@ static inline bool match_lookahead (hb_ot_apply_context_t *c,
skippy_iter.set_match_func (match_func, match_data, lookahead); skippy_iter.set_match_func (match_func, match_data, lookahead);
for (unsigned int i = 0; i < count; i++) for (unsigned int i = 0; i < count; i++)
if (!skippy_iter.next ()) {
unsigned unsafe_to;
if (!skippy_iter.next (&unsafe_to))
{
c->buffer->unsafe_to_concat (c->buffer->idx + offset, unsafe_to);
return_trace (false); return_trace (false);
}
}
*end_index = skippy_iter.idx + 1; *end_index = skippy_iter.idx + 1;

View File

@ -1120,7 +1120,7 @@ hb_propagate_flags (hb_buffer_t *buffer)
/* Propagate cluster-level glyph flags to be the same on all cluster glyphs. /* Propagate cluster-level glyph flags to be the same on all cluster glyphs.
* Simplifies using them. */ * Simplifies using them. */
if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK)) if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS))
return; return;
hb_glyph_info_t *info = buffer->info; hb_glyph_info_t *info = buffer->info;
@ -1129,11 +1129,7 @@ hb_propagate_flags (hb_buffer_t *buffer)
{ {
unsigned int mask = 0; unsigned int mask = 0;
for (unsigned int i = start; i < end; i++) for (unsigned int i = start; i < end; i++)
if (info[i].mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK) mask |= info[i].mask & HB_GLYPH_FLAG_DEFINED;
{
mask = HB_GLYPH_FLAG_UNSAFE_TO_BREAK;
break;
}
if (mask) if (mask)
for (unsigned int i = start; i < end; i++) for (unsigned int i = start; i < end; i++)
info[i].mask |= mask; info[i].mask |= mask;