Merge pull request #3297 from harfbuzz/unsafe-to-concat

Implement UNSAFE_TO_CONCAT flag
This commit is contained in:
Behdad Esfahbod 2022-01-26 12:23:29 -08:00 committed by GitHub
commit e9cc5f6cdb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 588 additions and 159 deletions

View File

@ -572,33 +572,6 @@ done:
skip_glyph ();
}
void
hb_buffer_t::unsafe_to_break_impl (unsigned int start, unsigned int end)
{
unsigned int cluster = UINT_MAX;
cluster = _infos_find_min_cluster (info, start, end, cluster);
_unsafe_to_break_set_mask (info, start, end, cluster);
}
void
hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end)
{
if (!have_output)
{
unsafe_to_break_impl (start, end);
return;
}
assert (start <= out_len);
assert (idx <= end);
unsigned int cluster = UINT_MAX;
cluster = _infos_find_min_cluster (out_info, start, out_len, cluster);
cluster = _infos_find_min_cluster (info, idx, end, cluster);
_unsafe_to_break_set_mask (out_info, start, out_len, cluster);
_unsafe_to_break_set_mask (info, idx, end, cluster);
}
void
hb_buffer_t::guess_segment_properties ()
{

View File

@ -76,18 +76,79 @@ typedef struct hb_glyph_info_t {
* @HB_GLYPH_FLAG_UNSAFE_TO_BREAK: Indicates that if input text is broken at the
* beginning of the cluster this glyph is part of,
* then both sides need to be re-shaped, as the
* result might be different. On the flip side,
* it means that when this flag is not present,
* then it's safe to break the glyph-run at the
* beginning of this cluster, and the two sides
* represent the exact same result one would get
* if breaking input text at the beginning of
* this cluster and shaping the two sides
* separately. This can be used to optimize
* paragraph layout, by avoiding re-shaping
* of each line after line-breaking, or limiting
* the reshaping to a small piece around the
* breaking point only.
* result might be different.
*
* On the flip side, it means that when this
* flag is not present, then it is safe to break
* the glyph-run at the beginning of this
* cluster, and the two sides will represent the
* exact same result one would get if breaking
* input text at the beginning of this cluster
* and shaping the two sides separately.
*
* This can be used to optimize paragraph
* layout, by avoiding re-shaping of each line
* after line-breaking.
*
* @HB_GLYPH_FLAG_UNSAFE_TO_CONCAT: Indicates that if input text is changed on one
* side of the beginning of the cluster this glyph
* is part of, then the shaping results for the
* other side might change.
*
* Note that the absence of this flag will NOT by
* itself mean that it IS safe to concat text.
* Only two pieces of text both of which clear of
* this flag can be concatenated safely.
*
* This can be used to optimize paragraph
* layout, by avoiding re-shaping of each line
* after line-breaking, by limiting the
* reshaping to a small piece around the
* breaking positin only, even if the breaking
* position carries the
* #HB_GLYPH_FLAG_UNSAFE_TO_BREAK or when
* hyphenation or other text transformation
* happens at line-break position, in the following
* way:
*
* 1. Iterate back from the line-break position
* until the first cluster start position that is
* NOT unsafe-to-concat, 2. shape the segment from
* there till the end of line, 3. check whether the
* resulting glyph-run also is clear of the
* unsafe-to-concat at its start-of-text position;
* if it is, just splice it into place and the line
* is shaped; If not, move on to a position further
* back that is clear of unsafe-to-concat and retry
* from there, and repeat.
*
* At the start of next line a similar algorithm can
* be implemented. That is: 1. Iterate forward from
* the line-break position untill the first cluster
* start position that is NOT unsafe-to-concat, 2.
* shape the segment from beginning of the line to
* that position, 3. check whether the resulting
* glyph-run also is clear of the unsafe-to-concat
* at its end-of-text position; if it is, just splice
* it into place and the beginning is shaped; If not,
* move on to a position further forward that is clear
* of unsafe-to-concat and retry up to there, and repeat.
*
* A slight complication will arise in the
* implementation of the algorithm above,
* because while our buffer API has a way to
* return flags for position corresponding to
* start-of-text, there is currently no position
* corresponding to end-of-text. This limitation
* can be alleviated by shaping more text than needed
* and looking for unsafe-to-concat flag within text
* clusters.
*
* The #HB_GLYPH_FLAG_UNSAFE_TO_BREAK flag will
* always imply this flag.
*
* Since: REPLACEME
*
* @HB_GLYPH_FLAG_DEFINED: All the currently defined flags.
*
* Flags for #hb_glyph_info_t.
@ -96,8 +157,9 @@ typedef struct hb_glyph_info_t {
*/
typedef enum { /*< flags >*/
HB_GLYPH_FLAG_UNSAFE_TO_BREAK = 0x00000001,
HB_GLYPH_FLAG_UNSAFE_TO_CONCAT = 0x00000002,
HB_GLYPH_FLAG_DEFINED = 0x00000001 /* OR of all defined flags */
HB_GLYPH_FLAG_DEFINED = 0x00000003 /* OR of all defined flags */
} hb_glyph_flags_t;
HB_EXTERN hb_glyph_flags_t

View File

@ -67,8 +67,8 @@ enum hb_buffer_scratch_flags_t {
HB_BUFFER_SCRATCH_FLAG_HAS_DEFAULT_IGNORABLES = 0x00000002u,
HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK = 0x00000004u,
HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT = 0x00000008u,
HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK = 0x00000010u,
HB_BUFFER_SCRATCH_FLAG_HAS_CGJ = 0x00000020u,
HB_BUFFER_SCRATCH_FLAG_HAS_CGJ = 0x00000010u,
HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS = 0x00000020u,
/* Reserved for complex shapers' internal use. */
HB_BUFFER_SCRATCH_FLAG_COMPLEX0 = 0x01000000u,
@ -385,15 +385,80 @@ struct hb_buffer_t
/* Merge clusters for deleting current glyph, and skip it. */
HB_INTERNAL void delete_glyph ();
void unsafe_to_break (unsigned int start,
unsigned int end)
void set_glyph_flags (hb_mask_t mask,
unsigned start = 0,
unsigned end = (unsigned) -1,
bool interior = false,
bool from_out_buffer = false)
{
if (end - start < 2)
end = hb_min (end, len);
if (interior && !from_out_buffer && end - start < 2)
return;
unsafe_to_break_impl (start, end);
scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS;
if (!from_out_buffer || !have_output)
{
if (!interior)
{
for (unsigned i = start; i < end; i++)
info[i].mask |= mask;
}
else
{
unsigned cluster = _infos_find_min_cluster (info, start, end);
_infos_set_glyph_flags (info, start, end, cluster, mask);
}
}
else
{
assert (start <= out_len);
assert (idx <= end);
if (!interior)
{
for (unsigned i = start; i < out_len; i++)
out_info[i].mask |= mask;
for (unsigned i = idx; i < end; i++)
info[i].mask |= mask;
}
else
{
unsigned cluster = _infos_find_min_cluster (info, idx, end);
cluster = _infos_find_min_cluster (out_info, start, out_len, cluster);
_infos_set_glyph_flags (out_info, start, out_len, cluster, mask);
_infos_set_glyph_flags (info, idx, end, cluster, mask);
}
}
}
void unsafe_to_break (unsigned int start = 0, unsigned int end = -1)
{
set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT,
start, end,
true);
}
void unsafe_to_concat (unsigned int start = 0, unsigned int end = -1)
{
set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_CONCAT,
start, end,
true);
}
void unsafe_to_break_from_outbuffer (unsigned int start = 0, unsigned int end = -1)
{
set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT,
start, end,
true, true);
}
void unsafe_to_concat_from_outbuffer (unsigned int start = 0, unsigned int end = -1)
{
set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_CONCAT,
start, end,
false, true);
}
HB_INTERNAL void unsafe_to_break_impl (unsigned int start, unsigned int end);
HB_INTERNAL void unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end);
/* Internal methods */
@ -484,36 +549,31 @@ struct hb_buffer_t
set_cluster (hb_glyph_info_t &inf, unsigned int cluster, unsigned int mask = 0)
{
if (inf.cluster != cluster)
{
if (mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK)
inf.mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK;
else
inf.mask &= ~HB_GLYPH_FLAG_UNSAFE_TO_BREAK;
}
inf.mask = (inf.mask & ~HB_GLYPH_FLAG_DEFINED) | (mask & HB_GLYPH_FLAG_DEFINED);
inf.cluster = cluster;
}
static unsigned
_infos_find_min_cluster (const hb_glyph_info_t *infos,
unsigned start, unsigned end,
unsigned cluster)
{
for (unsigned int i = start; i < end; i++)
cluster = hb_min (cluster, infos[i].cluster);
return cluster;
}
void
_unsafe_to_break_set_mask (hb_glyph_info_t *infos,
unsigned int start, unsigned int end,
unsigned int cluster)
_infos_set_glyph_flags (hb_glyph_info_t *infos,
unsigned int start, unsigned int end,
unsigned int cluster,
hb_mask_t mask)
{
for (unsigned int i = start; i < end; i++)
if (cluster != infos[i].cluster)
{
scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK;
infos[i].mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK;
scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS;
infos[i].mask |= mask;
}
}
static unsigned
_infos_find_min_cluster (const hb_glyph_info_t *infos,
unsigned start, unsigned end,
unsigned cluster = UINT_MAX)
{
for (unsigned int i = start; i < end; i++)
cluster = hb_min (cluster, infos[i].cluster);
return cluster;
}
void clear_glyph_flags (hb_mask_t mask = 0)
{

View File

@ -1213,7 +1213,8 @@ resize_and_retry:
}
}
buffer->clear_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK);
buffer->clear_glyph_flags ();
buffer->unsafe_to_break ();
#undef FAIL

View File

@ -762,7 +762,8 @@ retry_getglyphs:
if (isRightToLeft) hb_buffer_reverse (buffer);
buffer->clear_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK);
buffer->clear_glyph_flags ();
buffer->unsafe_to_break ();
delete [] clusterMap;
delete [] glyphIndices;

View File

@ -439,7 +439,8 @@ _hb_graphite2_shape (hb_shape_plan_t *shape_plan HB_UNUSED,
if (feats) gr_featureval_destroy (feats);
gr_seg_destroy (seg);
buffer->clear_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK);
buffer->clear_glyph_flags ();
buffer->unsafe_to_break ();
return true;
}

View File

@ -49,6 +49,10 @@ struct hb_kern_machine_t
hb_mask_t kern_mask,
bool scale = true) const
{
if (!buffer->message (font, "start kern"))
return;
buffer->unsafe_to_concat ();
OT::hb_ot_apply_context_t c (1, font, buffer);
c.set_lookup_mask (kern_mask);
c.set_lookup_props (OT::LookupFlag::IgnoreMarks);
@ -67,7 +71,8 @@ struct hb_kern_machine_t
}
skippy_iter.reset (idx, 1);
if (!skippy_iter.next ())
unsigned unsafe_to;
if (!skippy_iter.next (&unsafe_to))
{
idx++;
continue;
@ -125,6 +130,8 @@ struct hb_kern_machine_t
skip:
idx = skippy_iter.idx;
}
(void) buffer->message (font, "end kern");
}
const Driver &driver;

View File

@ -1235,6 +1235,7 @@ struct PairSet
buffer->idx = pos;
return_trace (true);
}
buffer->unsafe_to_concat (buffer->idx, pos + 1);
return_trace (false);
}
@ -1362,7 +1363,12 @@ struct PairPosFormat1
hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
skippy_iter.reset (buffer->idx, 1);
if (!skippy_iter.next ()) return_trace (false);
unsigned unsafe_to;
if (!skippy_iter.next (&unsafe_to))
{
buffer->unsafe_to_concat (buffer->idx, unsafe_to);
return_trace (false);
}
return_trace ((this+pairSet[index]).apply (c, valueFormat, skippy_iter.idx));
}
@ -1555,7 +1561,12 @@ struct PairPosFormat2
hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
skippy_iter.reset (buffer->idx, 1);
if (!skippy_iter.next ()) return_trace (false);
unsigned unsafe_to;
if (!skippy_iter.next (&unsafe_to))
{
buffer->unsafe_to_concat (buffer->idx, unsafe_to);
return_trace (false);
}
unsigned int len1 = valueFormat1.get_len ();
unsigned int len2 = valueFormat2.get_len ();
@ -1563,7 +1574,11 @@ struct PairPosFormat2
unsigned int klass1 = (this+classDef1).get_class (buffer->cur().codepoint);
unsigned int klass2 = (this+classDef2).get_class (buffer->info[skippy_iter.idx].codepoint);
if (unlikely (klass1 >= class1Count || klass2 >= class2Count)) return_trace (false);
if (unlikely (klass1 >= class1Count || klass2 >= class2Count))
{
buffer->unsafe_to_concat (buffer->idx, skippy_iter.idx + 1);
return_trace (false);
}
const Value *v = &values[record_len * (klass1 * class2Count + klass2)];
@ -1630,8 +1645,10 @@ struct PairPosFormat2
success:
if (applied_first || applied_second)
buffer->unsafe_to_break (buffer->idx, skippy_iter.idx + 1);
else
boring:
buffer->unsafe_to_concat (buffer->idx, skippy_iter.idx + 1);
buffer->idx = skippy_iter.idx;
if (len2)
@ -1861,10 +1878,19 @@ struct CursivePosFormat1
hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
skippy_iter.reset (buffer->idx, 1);
if (!skippy_iter.prev ()) return_trace (false);
unsigned unsafe_from;
if (!skippy_iter.prev (&unsafe_from))
{
buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1);
return_trace (false);
}
const EntryExitRecord &prev_record = entryExitRecord[(this+coverage).get_coverage (buffer->info[skippy_iter.idx].codepoint)];
if (!prev_record.exitAnchor) return_trace (false);
if (!prev_record.exitAnchor)
{
buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1);
return_trace (false);
}
unsigned int i = skippy_iter.idx;
unsigned int j = buffer->idx;
@ -2128,7 +2154,13 @@ struct MarkBasePosFormat1
skippy_iter.reset (buffer->idx, 1);
skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks);
do {
if (!skippy_iter.prev ()) return_trace (false);
unsigned unsafe_from;
if (!skippy_iter.prev (&unsafe_from))
{
buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1);
return_trace (false);
}
/* We only want to attach to the first of a MultipleSubst sequence.
* https://github.com/harfbuzz/harfbuzz/issues/740
* Reject others...
@ -2151,7 +2183,11 @@ struct MarkBasePosFormat1
//if (!_hb_glyph_info_is_base_glyph (&buffer->info[skippy_iter.idx])) { return_trace (false); }
unsigned int base_index = (this+baseCoverage).get_coverage (buffer->info[skippy_iter.idx].codepoint);
if (base_index == NOT_COVERED) return_trace (false);
if (base_index == NOT_COVERED)
{
buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1);
return_trace (false);
}
return_trace ((this+markArray).apply (c, mark_index, base_index, this+baseArray, classCount, skippy_iter.idx));
}
@ -2382,21 +2418,34 @@ struct MarkLigPosFormat1
hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
skippy_iter.reset (buffer->idx, 1);
skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks);
if (!skippy_iter.prev ()) return_trace (false);
unsigned unsafe_from;
if (!skippy_iter.prev (&unsafe_from))
{
buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1);
return_trace (false);
}
/* Checking that matched glyph is actually a ligature by GDEF is too strong; disabled */
//if (!_hb_glyph_info_is_ligature (&buffer->info[skippy_iter.idx])) { return_trace (false); }
unsigned int j = skippy_iter.idx;
unsigned int lig_index = (this+ligatureCoverage).get_coverage (buffer->info[j].codepoint);
if (lig_index == NOT_COVERED) return_trace (false);
if (lig_index == NOT_COVERED)
{
buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1);
return_trace (false);
}
const LigatureArray& lig_array = this+ligatureArray;
const LigatureAttach& lig_attach = lig_array[lig_index];
/* Find component to attach to */
unsigned int comp_count = lig_attach.rows;
if (unlikely (!comp_count)) return_trace (false);
if (unlikely (!comp_count))
{
buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1);
return_trace (false);
}
/* We must now check whether the ligature ID of the current mark glyph
* is identical to the ligature ID of the found ligature. If yes, we
@ -2579,9 +2628,18 @@ struct MarkMarkPosFormat1
hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
skippy_iter.reset (buffer->idx, 1);
skippy_iter.set_lookup_props (c->lookup_props & ~LookupFlag::IgnoreFlags);
if (!skippy_iter.prev ()) return_trace (false);
unsigned unsafe_from;
if (!skippy_iter.prev (&unsafe_from))
{
buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1);
return_trace (false);
}
if (!_hb_glyph_info_is_mark (&buffer->info[skippy_iter.idx])) { return_trace (false); }
if (!_hb_glyph_info_is_mark (&buffer->info[skippy_iter.idx]))
{
buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1);
return_trace (false);
}
unsigned int j = skippy_iter.idx;
@ -2606,11 +2664,16 @@ struct MarkMarkPosFormat1
}
/* Didn't match. */
buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1);
return_trace (false);
good:
unsigned int mark2_index = (this+mark2Coverage).get_coverage (buffer->info[j].codepoint);
if (mark2_index == NOT_COVERED) return_trace (false);
if (mark2_index == NOT_COVERED)
{
buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1);
return_trace (false);
}
return_trace ((this+mark1Array).apply (c, mark1_index, mark2_index, this+mark2Array, classCount, j));
}

View File

@ -826,22 +826,25 @@ struct Ligature
unsigned int total_component_count = 0;
unsigned int match_length = 0;
unsigned int match_end = 0;
unsigned int match_positions[HB_MAX_CONTEXT_LENGTH];
if (likely (!match_input (c, count,
&component[1],
match_glyph,
nullptr,
&match_length,
&match_end,
match_positions,
&total_component_count)))
{
c->buffer->unsafe_to_concat (c->buffer->idx, match_end);
return_trace (false);
}
ligate_input (c,
count,
match_positions,
match_length,
match_end,
ligGlyph,
total_component_count);
@ -1296,7 +1299,7 @@ struct ReverseChainSingleSubstFormat1
match_lookahead (c,
lookahead.len, (HBUINT16 *) lookahead.arrayZ,
match_coverage, this,
1, &end_index))
c->buffer->idx + 1, &end_index))
{
c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index);
c->replace_glyph_inplace (substitute[index]);
@ -1305,8 +1308,11 @@ struct ReverseChainSingleSubstFormat1
* calls us through a Context lookup. */
return_trace (true);
}
return_trace (false);
else
{
c->buffer->unsafe_to_concat_from_outbuffer (start_index, end_index);
return_trace (false);
}
}
template<typename Iterator,

View File

@ -520,7 +520,7 @@ struct hb_ot_apply_context_t :
may_skip (const hb_glyph_info_t &info) const
{ return matcher.may_skip (c, info); }
bool next ()
bool next (unsigned *unsafe_to = nullptr)
{
assert (num_items > 0);
while (idx + num_items < end)
@ -543,11 +543,17 @@ struct hb_ot_apply_context_t :
}
if (skip == matcher_t::SKIP_NO)
{
if (unsafe_to)
*unsafe_to = idx + 1;
return false;
}
}
if (unsafe_to)
*unsafe_to = end;
return false;
}
bool prev ()
bool prev (unsigned *unsafe_from = nullptr)
{
assert (num_items > 0);
while (idx > num_items - 1)
@ -570,8 +576,14 @@ struct hb_ot_apply_context_t :
}
if (skip == matcher_t::SKIP_NO)
{
if (unsafe_from)
*unsafe_from = hb_max (1u, idx) - 1u;
return false;
}
}
if (unsafe_from)
*unsafe_from = 0;
return false;
}
@ -955,7 +967,7 @@ static inline bool match_input (hb_ot_apply_context_t *c,
const HBUINT16 input[], /* Array of input values--start with second glyph */
match_func_t match_func,
const void *match_data,
unsigned int *end_offset,
unsigned int *end_position,
unsigned int match_positions[HB_MAX_CONTEXT_LENGTH],
unsigned int *p_total_component_count = nullptr)
{
@ -1008,7 +1020,12 @@ static inline bool match_input (hb_ot_apply_context_t *c,
match_positions[0] = buffer->idx;
for (unsigned int i = 1; i < count; i++)
{
if (!skippy_iter.next ()) return_trace (false);
unsigned unsafe_to;
if (!skippy_iter.next (&unsafe_to))
{
*end_position = unsafe_to;
return_trace (false);
}
match_positions[i] = skippy_iter.idx;
@ -1062,7 +1079,7 @@ static inline bool match_input (hb_ot_apply_context_t *c,
total_component_count += _hb_glyph_info_get_lig_num_comps (&buffer->info[skippy_iter.idx]);
}
*end_offset = skippy_iter.idx - buffer->idx + 1;
*end_position = skippy_iter.idx + 1;
if (p_total_component_count)
*p_total_component_count = total_component_count;
@ -1072,7 +1089,7 @@ static inline bool match_input (hb_ot_apply_context_t *c,
static inline bool ligate_input (hb_ot_apply_context_t *c,
unsigned int count, /* Including the first glyph */
const unsigned int match_positions[HB_MAX_CONTEXT_LENGTH], /* Including the first glyph */
unsigned int match_length,
unsigned int match_end,
hb_codepoint_t lig_glyph,
unsigned int total_component_count)
{
@ -1080,7 +1097,7 @@ static inline bool ligate_input (hb_ot_apply_context_t *c,
hb_buffer_t *buffer = c->buffer;
buffer->merge_clusters (buffer->idx, buffer->idx + match_length);
buffer->merge_clusters (buffer->idx, match_end);
/* - If a base and one or more marks ligate, consider that as a base, NOT
* ligature, such that all following marks can still attach to it.
@ -1197,11 +1214,16 @@ static inline bool match_backtrack (hb_ot_apply_context_t *c,
skippy_iter.set_match_func (match_func, match_data, backtrack);
for (unsigned int i = 0; i < count; i++)
if (!skippy_iter.prev ())
{
unsigned unsafe_from;
if (!skippy_iter.prev (&unsafe_from))
{
*match_start = unsafe_from;
return_trace (false);
}
}
*match_start = skippy_iter.idx;
return_trace (true);
}
@ -1210,21 +1232,26 @@ static inline bool match_lookahead (hb_ot_apply_context_t *c,
const HBUINT16 lookahead[],
match_func_t match_func,
const void *match_data,
unsigned int offset,
unsigned int start_index,
unsigned int *end_index)
{
TRACE_APPLY (nullptr);
hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_context;
skippy_iter.reset (c->buffer->idx + offset - 1, count);
skippy_iter.reset (start_index - 1, count);
skippy_iter.set_match_func (match_func, match_data, lookahead);
for (unsigned int i = 0; i < count; i++)
if (!skippy_iter.next ())
{
unsigned unsafe_to;
if (!skippy_iter.next (&unsafe_to))
{
*end_index = unsafe_to;
return_trace (false);
}
}
*end_index = skippy_iter.idx + 1;
return_trace (true);
}
@ -1350,15 +1377,13 @@ static inline void recurse_lookups (context_t *c,
c->recurse (lookupRecord[i].lookupListIndex);
}
static inline bool apply_lookup (hb_ot_apply_context_t *c,
static inline void apply_lookup (hb_ot_apply_context_t *c,
unsigned int count, /* Including the first glyph */
unsigned int match_positions[HB_MAX_CONTEXT_LENGTH], /* Including the first glyph */
unsigned int lookupCount,
const LookupRecord lookupRecord[], /* Array of LookupRecords--in design order */
unsigned int match_length)
unsigned int match_end)
{
TRACE_APPLY (nullptr);
hb_buffer_t *buffer = c->buffer;
int end;
@ -1366,7 +1391,7 @@ static inline bool apply_lookup (hb_ot_apply_context_t *c,
* Adjust. */
{
unsigned int bl = buffer->backtrack_len ();
end = bl + match_length;
end = bl + match_end - buffer->idx;
int delta = bl - buffer->idx;
/* Convert positions to new indexing. */
@ -1468,8 +1493,6 @@ static inline bool apply_lookup (hb_ot_apply_context_t *c,
}
(void) buffer->move_to (end);
return_trace (true);
}
@ -1557,17 +1580,25 @@ static inline bool context_apply_lookup (hb_ot_apply_context_t *c,
const LookupRecord lookupRecord[],
ContextApplyLookupContext &lookup_context)
{
unsigned int match_length = 0;
unsigned int match_positions[HB_MAX_CONTEXT_LENGTH];
return match_input (c,
inputCount, input,
lookup_context.funcs.match, lookup_context.match_data,
&match_length, match_positions)
&& (c->buffer->unsafe_to_break (c->buffer->idx, c->buffer->idx + match_length),
apply_lookup (c,
inputCount, match_positions,
lookupCount, lookupRecord,
match_length));
unsigned match_end = 0;
unsigned match_positions[HB_MAX_CONTEXT_LENGTH];
if (match_input (c,
inputCount, input,
lookup_context.funcs.match, lookup_context.match_data,
&match_end, match_positions))
{
c->buffer->unsafe_to_break (c->buffer->idx, match_end);
apply_lookup (c,
inputCount, match_positions,
lookupCount, lookupRecord,
match_end);
return true;
}
else
{
c->buffer->unsafe_to_concat (c->buffer->idx, match_end);
return false;
}
}
struct Rule
@ -2459,25 +2490,38 @@ static inline bool chain_context_apply_lookup (hb_ot_apply_context_t *c,
const LookupRecord lookupRecord[],
ChainContextApplyLookupContext &lookup_context)
{
unsigned int start_index = 0, match_length = 0, end_index = 0;
unsigned int match_positions[HB_MAX_CONTEXT_LENGTH];
return match_input (c,
inputCount, input,
lookup_context.funcs.match, lookup_context.match_data[1],
&match_length, match_positions)
&& match_backtrack (c,
backtrackCount, backtrack,
lookup_context.funcs.match, lookup_context.match_data[0],
&start_index)
&& match_lookahead (c,
lookaheadCount, lookahead,
lookup_context.funcs.match, lookup_context.match_data[2],
match_length, &end_index)
&& (c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index),
apply_lookup (c,
inputCount, match_positions,
lookupCount, lookupRecord,
match_length));
unsigned end_index = c->buffer->idx;
unsigned match_end = 0;
unsigned match_positions[HB_MAX_CONTEXT_LENGTH];
if (!(match_input (c,
inputCount, input,
lookup_context.funcs.match, lookup_context.match_data[1],
&match_end, match_positions) && (end_index = match_end)
&& match_lookahead (c,
lookaheadCount, lookahead,
lookup_context.funcs.match, lookup_context.match_data[2],
match_end, &end_index)))
{
c->buffer->unsafe_to_concat (c->buffer->idx, end_index);
return false;
}
unsigned start_index = c->buffer->out_len;
if (!match_backtrack (c,
backtrackCount, backtrack,
lookup_context.funcs.match, lookup_context.match_data[0],
&start_index))
{
c->buffer->unsafe_to_concat_from_outbuffer (start_index, end_index);
return false;
}
c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index);
apply_lookup (c,
inputCount, match_positions,
lookupCount, lookupRecord,
match_end);
return true;
}
struct ChainRule

View File

@ -321,6 +321,20 @@ arabic_joining (hb_buffer_t *buffer)
info[prev].arabic_shaping_action() = entry->prev_action;
buffer->unsafe_to_break (prev, i + 1);
}
else
{
if (prev == UINT_MAX)
{
if (this_type >= JOINING_TYPE_R)
buffer->unsafe_to_concat_from_outbuffer (0, i + 1);
}
else
{
if (this_type >= JOINING_TYPE_R ||
(2 <= state && state <= 5) /* States that have a possible prev_action. */)
buffer->unsafe_to_concat (prev, i + 1);
}
}
info[i].arabic_shaping_action() = entry->curr_action;
@ -337,7 +351,14 @@ arabic_joining (hb_buffer_t *buffer)
const arabic_state_table_entry *entry = &arabic_state_table[state][this_type];
if (entry->prev_action != NONE && prev != UINT_MAX)
{
info[prev].arabic_shaping_action() = entry->prev_action;
buffer->unsafe_to_break (prev, buffer->len);
}
else if (2 <= state && state <= 5) /* States that have a possible prev_action. */
{
buffer->unsafe_to_concat (prev, buffer->len);
}
break;
}
}

View File

@ -446,6 +446,9 @@ _hb_ot_shape_fallback_mark_position (const hb_ot_shape_plan_t *plan,
return;
#endif
if (!buffer->message (font, "start fallback mark"))
return;
_hb_buffer_assert_gsubgpos_vars (buffer);
unsigned int start = 0;
@ -457,6 +460,8 @@ _hb_ot_shape_fallback_mark_position (const hb_ot_shape_plan_t *plan,
start = i;
}
position_cluster (plan, font, buffer, start, count, adjust_offsets_when_zeroing);
(void) buffer->message (font, "end fallback mark");
}
@ -492,6 +497,9 @@ _hb_ot_shape_fallback_kern (const hb_ot_shape_plan_t *plan,
#endif
#ifndef HB_DISABLE_DEPRECATED
if (!buffer->message (font, "start fallback kern"))
return;
if (HB_DIRECTION_IS_HORIZONTAL (buffer->props.direction) ?
!font->has_glyph_h_kerning_func () :
!font->has_glyph_v_kerning_func ())
@ -508,6 +516,8 @@ _hb_ot_shape_fallback_kern (const hb_ot_shape_plan_t *plan,
if (reverse)
buffer->reverse ();
(void) buffer->message (font, "end fallback kern");
#endif
}

View File

@ -1120,7 +1120,7 @@ hb_propagate_flags (hb_buffer_t *buffer)
/* Propagate cluster-level glyph flags to be the same on all cluster glyphs.
* Simplifies using them. */
if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK))
if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS))
return;
hb_glyph_info_t *info = buffer->info;
@ -1129,11 +1129,7 @@ hb_propagate_flags (hb_buffer_t *buffer)
{
unsigned int mask = 0;
for (unsigned int i = start; i < end; i++)
if (info[i].mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK)
{
mask = HB_GLYPH_FLAG_UNSAFE_TO_BREAK;
break;
}
mask |= info[i].mask & HB_GLYPH_FLAG_DEFINED;
if (mask)
for (unsigned int i = start; i < end; i++)
info[i].mask |= mask;

View File

@ -878,7 +878,8 @@ retry:
if (backward)
hb_buffer_reverse (buffer);
buffer->clear_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK);
buffer->clear_glyph_flags ();
buffer->unsafe_to_break ();
/* Wow, done! */
return true;

View File

@ -59,6 +59,7 @@ TESTS = \
tests/tibetan-contractions-2.tests \
tests/tibetan-vowels.tests \
tests/tt-kern-gpos.tests \
tests/unsafe-to-concat.tests \
tests/use-indic3.tests \
tests/use-marchen.tests \
tests/use-syllable.tests \

View File

@ -59,6 +59,7 @@ in_house_tests_base = [
'tibetan-contractions-2.tests',
'tibetan-vowels.tests',
'tt-kern-gpos.tests',
'unsafe-to-concat.tests',
'use-indic3.tests',
'use-marchen.tests',
'use-syllable.tests',

View File

@ -0,0 +1 @@
../fonts/34da9aab7bee86c4dfc3b85e423435822fdf4b62.ttf;--show-flags;U+0628,U+200C,U+0628;[uni0628=1+993#2|uni0628=0+993#2]

View File

@ -129,7 +129,9 @@ struct shape_options_t
{
if (!verify_buffer_monotone (buffer, error))
return false;
if (!verify_buffer_safe_to_break (buffer, text_buffer, font, error))
if (!verify_buffer_unsafe_to_break (buffer, text_buffer, font, error))
return false;
if (!verify_buffer_unsafe_to_concat (buffer, text_buffer, font, error))
return false;
return true;
}
@ -158,17 +160,15 @@ struct shape_options_t
return true;
}
bool verify_buffer_safe_to_break (hb_buffer_t *buffer,
hb_buffer_t *text_buffer,
hb_font_t *font,
const char **error=nullptr)
bool verify_buffer_unsafe_to_break (hb_buffer_t *buffer,
hb_buffer_t *text_buffer,
hb_font_t *font,
const char **error=nullptr)
{
if (cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES &&
cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS)
{
/* Cannot perform this check without monotone clusters.
* Then again, unsafe-to-break flag is much harder to use without
* monotone clusters. */
/* Cannot perform this check without monotone clusters. */
return true;
}
@ -255,7 +255,7 @@ struct shape_options_t
if (diff)
{
if (error)
*error = "Safe-to-break test failed.";
*error = "unsafe-to-break test failed.";
ret = false;
/* Return the reconstructed result instead so it can be inspected. */
@ -269,6 +269,186 @@ struct shape_options_t
return ret;
}
bool verify_buffer_unsafe_to_concat (hb_buffer_t *buffer,
hb_buffer_t *text_buffer,
hb_font_t *font,
const char **error=nullptr)
{
if (cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES &&
cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS)
{
/* Cannot perform this check without monotone clusters. */
return true;
}
/* Check that shuffling up text before shaping at safe-to-concat points
* is indeed safe. */
/* This is what we do:
*
* 1. We shape text once. Then segment the text at all the safe-to-concat
* points;
*
* 2. Then we create two buffers, one containing all the even segments and
* one all the odd segments.
*
* 3. Because all these segments were safe-to-concat at both ends, we
* expect that concatenating them and shaping should NOT change the
* shaping results of each segment. As such, we expect that after
* shaping the two buffers, we still get cluster boundaries at the
* segment boundaries, and that those all are safe-to-concat points.
* Moreover, that there are NOT any safe-to-concat points within the
* segments.
*
* 4. Finally, we reconstruct the shaping results of the original text by
* simply interleaving the shaping results of the segments from the two
* buffers, and assert that the total shaping results is the same as
* the one from original buffer in step 1.
*/
hb_buffer_t *fragments[2] {hb_buffer_create_similar (buffer),
hb_buffer_create_similar (buffer)};
hb_buffer_t *reconstruction = hb_buffer_create_similar (buffer);
hb_segment_properties_t props;
hb_buffer_get_segment_properties (buffer, &props);
hb_buffer_set_segment_properties (fragments[0], &props);
hb_buffer_set_segment_properties (fragments[1], &props);
hb_buffer_set_segment_properties (reconstruction, &props);
unsigned num_glyphs;
hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs);
unsigned num_chars;
hb_glyph_info_t *text = hb_buffer_get_glyph_infos (text_buffer, &num_chars);
bool forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer));
if (!forward)
hb_buffer_reverse (buffer);
/*
* Split text into segments and collect into to fragment streams.
*/
{
unsigned fragment_idx = 0;
unsigned start = 0;
unsigned text_start = 0;
unsigned text_end = 0;
for (unsigned end = 1; end < num_glyphs + 1; end++)
{
if (end < num_glyphs &&
(info[end].cluster == info[end-1].cluster ||
info[end].mask & HB_GLYPH_FLAG_UNSAFE_TO_CONCAT))
continue;
/* Accumulate segment corresponding to glyphs start..end. */
if (end == num_glyphs)
text_end = num_chars;
else
{
unsigned cluster = info[end].cluster;
while (text_end < num_chars && text[text_end].cluster < cluster)
text_end++;
}
assert (text_start < text_end);
if (0)
printf("start %d end %d text start %d end %d\n", start, end, text_start, text_end);
#if 0
hb_buffer_flags_t flags = hb_buffer_get_flags (fragment);
if (0 < text_start)
flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_BOT);
if (text_end < num_chars)
flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_EOT);
hb_buffer_set_flags (fragment, flags);
#endif
hb_buffer_append (fragments[fragment_idx], text_buffer, text_start, text_end);
start = end;
text_start = text_end;
fragment_idx = 1 - fragment_idx;
}
}
bool ret = true;
hb_buffer_diff_flags_t diff;
/*
* Shape the two fragment streams.
*/
if (!hb_shape_full (font, fragments[0], features, num_features, shapers) ||
!hb_shape_full (font, fragments[1], features, num_features, shapers))
{
if (error)
*error = "All shapers failed while shaping fragments.";
ret = false;
goto out;
}
if (!forward)
{
hb_buffer_reverse (fragments[0]);
hb_buffer_reverse (fragments[1]);
}
/*
* Reconstruct results.
*/
{
unsigned fragment_idx = 0;
unsigned fragment_start[2] {0, 0};
unsigned fragment_num_glyphs[2];
hb_glyph_info_t *fragment_info[2];
for (unsigned i = 0; i < 2; i++)
fragment_info[i] = hb_buffer_get_glyph_infos (fragments[i], &fragment_num_glyphs[i]);
while (fragment_start[0] < fragment_num_glyphs[0] ||
fragment_start[1] < fragment_num_glyphs[1])
{
unsigned fragment_end = fragment_start[fragment_idx] + 1;
while (fragment_end < fragment_num_glyphs[fragment_idx] &&
(fragment_info[fragment_idx][fragment_end].cluster == fragment_info[fragment_idx][fragment_end - 1].cluster ||
fragment_info[fragment_idx][fragment_end].mask & HB_GLYPH_FLAG_UNSAFE_TO_CONCAT))
fragment_end++;
hb_buffer_append (reconstruction, fragments[fragment_idx], fragment_start[fragment_idx], fragment_end);
fragment_start[fragment_idx] = fragment_end;
fragment_idx = 1 - fragment_idx;
}
}
if (!forward)
{
hb_buffer_reverse (buffer);
hb_buffer_reverse (reconstruction);
}
/*
* Diff results.
*/
diff = hb_buffer_diff (reconstruction, buffer, (hb_codepoint_t) -1, 0);
if (diff)
{
if (error)
*error = "unsafe-to-concat test failed.";
ret = false;
/* Return the reconstructed result instead so it can be inspected. */
hb_buffer_set_length (buffer, 0);
hb_buffer_append (buffer, reconstruction, 0, -1);
}
out:
hb_buffer_destroy (reconstruction);
hb_buffer_destroy (fragments[0]);
hb_buffer_destroy (fragments[1]);
return ret;
}
void shape_closure (const char *text, int text_len,
hb_font_t *font, hb_buffer_t *buffer,
hb_set_t *glyphs)