diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc index c673077bd..3e09260cf 100644 --- a/src/hb-buffer.cc +++ b/src/hb-buffer.cc @@ -572,33 +572,6 @@ done: skip_glyph (); } -void -hb_buffer_t::unsafe_to_break_impl (unsigned int start, unsigned int end) -{ - unsigned int cluster = UINT_MAX; - cluster = _infos_find_min_cluster (info, start, end, cluster); - _unsafe_to_break_set_mask (info, start, end, cluster); -} -void -hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end) -{ - if (!have_output) - { - unsafe_to_break_impl (start, end); - return; - } - - assert (start <= out_len); - assert (idx <= end); - - unsigned int cluster = UINT_MAX; - cluster = _infos_find_min_cluster (out_info, start, out_len, cluster); - cluster = _infos_find_min_cluster (info, idx, end, cluster); - - _unsafe_to_break_set_mask (out_info, start, out_len, cluster); - _unsafe_to_break_set_mask (info, idx, end, cluster); -} - void hb_buffer_t::guess_segment_properties () { diff --git a/src/hb-buffer.h b/src/hb-buffer.h index 2ba56d345..d6b5a2229 100644 --- a/src/hb-buffer.h +++ b/src/hb-buffer.h @@ -76,18 +76,79 @@ typedef struct hb_glyph_info_t { * @HB_GLYPH_FLAG_UNSAFE_TO_BREAK: Indicates that if input text is broken at the * beginning of the cluster this glyph is part of, * then both sides need to be re-shaped, as the - * result might be different. On the flip side, - * it means that when this flag is not present, - * then it's safe to break the glyph-run at the - * beginning of this cluster, and the two sides - * represent the exact same result one would get - * if breaking input text at the beginning of - * this cluster and shaping the two sides - * separately. This can be used to optimize - * paragraph layout, by avoiding re-shaping - * of each line after line-breaking, or limiting - * the reshaping to a small piece around the - * breaking point only. + * result might be different. + * + * On the flip side, it means that when this + * flag is not present, then it is safe to break + * the glyph-run at the beginning of this + * cluster, and the two sides will represent the + * exact same result one would get if breaking + * input text at the beginning of this cluster + * and shaping the two sides separately. + * + * This can be used to optimize paragraph + * layout, by avoiding re-shaping of each line + * after line-breaking. + * + * @HB_GLYPH_FLAG_UNSAFE_TO_CONCAT: Indicates that if input text is changed on one + * side of the beginning of the cluster this glyph + * is part of, then the shaping results for the + * other side might change. + * + * Note that the absence of this flag will NOT by + * itself mean that it IS safe to concat text. + * Only two pieces of text both of which clear of + * this flag can be concatenated safely. + * + * This can be used to optimize paragraph + * layout, by avoiding re-shaping of each line + * after line-breaking, by limiting the + * reshaping to a small piece around the + * breaking positin only, even if the breaking + * position carries the + * #HB_GLYPH_FLAG_UNSAFE_TO_BREAK or when + * hyphenation or other text transformation + * happens at line-break position, in the following + * way: + * + * 1. Iterate back from the line-break position + * until the first cluster start position that is + * NOT unsafe-to-concat, 2. shape the segment from + * there till the end of line, 3. check whether the + * resulting glyph-run also is clear of the + * unsafe-to-concat at its start-of-text position; + * if it is, just splice it into place and the line + * is shaped; If not, move on to a position further + * back that is clear of unsafe-to-concat and retry + * from there, and repeat. + * + * At the start of next line a similar algorithm can + * be implemented. That is: 1. Iterate forward from + * the line-break position untill the first cluster + * start position that is NOT unsafe-to-concat, 2. + * shape the segment from beginning of the line to + * that position, 3. check whether the resulting + * glyph-run also is clear of the unsafe-to-concat + * at its end-of-text position; if it is, just splice + * it into place and the beginning is shaped; If not, + * move on to a position further forward that is clear + * of unsafe-to-concat and retry up to there, and repeat. + * + * A slight complication will arise in the + * implementation of the algorithm above, + * because while our buffer API has a way to + * return flags for position corresponding to + * start-of-text, there is currently no position + * corresponding to end-of-text. This limitation + * can be alleviated by shaping more text than needed + * and looking for unsafe-to-concat flag within text + * clusters. + * + * The #HB_GLYPH_FLAG_UNSAFE_TO_BREAK flag will + * always imply this flag. + * + * Since: REPLACEME + * * @HB_GLYPH_FLAG_DEFINED: All the currently defined flags. * * Flags for #hb_glyph_info_t. @@ -96,8 +157,9 @@ typedef struct hb_glyph_info_t { */ typedef enum { /*< flags >*/ HB_GLYPH_FLAG_UNSAFE_TO_BREAK = 0x00000001, + HB_GLYPH_FLAG_UNSAFE_TO_CONCAT = 0x00000002, - HB_GLYPH_FLAG_DEFINED = 0x00000001 /* OR of all defined flags */ + HB_GLYPH_FLAG_DEFINED = 0x00000003 /* OR of all defined flags */ } hb_glyph_flags_t; HB_EXTERN hb_glyph_flags_t diff --git a/src/hb-buffer.hh b/src/hb-buffer.hh index c8ee0d90f..adf4aa2b6 100644 --- a/src/hb-buffer.hh +++ b/src/hb-buffer.hh @@ -67,8 +67,8 @@ enum hb_buffer_scratch_flags_t { HB_BUFFER_SCRATCH_FLAG_HAS_DEFAULT_IGNORABLES = 0x00000002u, HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK = 0x00000004u, HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT = 0x00000008u, - HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK = 0x00000010u, - HB_BUFFER_SCRATCH_FLAG_HAS_CGJ = 0x00000020u, + HB_BUFFER_SCRATCH_FLAG_HAS_CGJ = 0x00000010u, + HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS = 0x00000020u, /* Reserved for complex shapers' internal use. */ HB_BUFFER_SCRATCH_FLAG_COMPLEX0 = 0x01000000u, @@ -385,15 +385,80 @@ struct hb_buffer_t /* Merge clusters for deleting current glyph, and skip it. */ HB_INTERNAL void delete_glyph (); - void unsafe_to_break (unsigned int start, - unsigned int end) + + void set_glyph_flags (hb_mask_t mask, + unsigned start = 0, + unsigned end = (unsigned) -1, + bool interior = false, + bool from_out_buffer = false) { - if (end - start < 2) + end = hb_min (end, len); + + if (interior && !from_out_buffer && end - start < 2) return; - unsafe_to_break_impl (start, end); + + scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS; + + if (!from_out_buffer || !have_output) + { + if (!interior) + { + for (unsigned i = start; i < end; i++) + info[i].mask |= mask; + } + else + { + unsigned cluster = _infos_find_min_cluster (info, start, end); + _infos_set_glyph_flags (info, start, end, cluster, mask); + } + } + else + { + assert (start <= out_len); + assert (idx <= end); + + if (!interior) + { + for (unsigned i = start; i < out_len; i++) + out_info[i].mask |= mask; + for (unsigned i = idx; i < end; i++) + info[i].mask |= mask; + } + else + { + unsigned cluster = _infos_find_min_cluster (info, idx, end); + cluster = _infos_find_min_cluster (out_info, start, out_len, cluster); + + _infos_set_glyph_flags (out_info, start, out_len, cluster, mask); + _infos_set_glyph_flags (info, idx, end, cluster, mask); + } + } + } + + void unsafe_to_break (unsigned int start = 0, unsigned int end = -1) + { + set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + start, end, + true); + } + void unsafe_to_concat (unsigned int start = 0, unsigned int end = -1) + { + set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + start, end, + true); + } + void unsafe_to_break_from_outbuffer (unsigned int start = 0, unsigned int end = -1) + { + set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + start, end, + true, true); + } + void unsafe_to_concat_from_outbuffer (unsigned int start = 0, unsigned int end = -1) + { + set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + start, end, + false, true); } - HB_INTERNAL void unsafe_to_break_impl (unsigned int start, unsigned int end); - HB_INTERNAL void unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end); /* Internal methods */ @@ -484,36 +549,31 @@ struct hb_buffer_t set_cluster (hb_glyph_info_t &inf, unsigned int cluster, unsigned int mask = 0) { if (inf.cluster != cluster) - { - if (mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK) - inf.mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK; - else - inf.mask &= ~HB_GLYPH_FLAG_UNSAFE_TO_BREAK; - } + inf.mask = (inf.mask & ~HB_GLYPH_FLAG_DEFINED) | (mask & HB_GLYPH_FLAG_DEFINED); inf.cluster = cluster; } - - static unsigned - _infos_find_min_cluster (const hb_glyph_info_t *infos, - unsigned start, unsigned end, - unsigned cluster) - { - for (unsigned int i = start; i < end; i++) - cluster = hb_min (cluster, infos[i].cluster); - return cluster; - } void - _unsafe_to_break_set_mask (hb_glyph_info_t *infos, - unsigned int start, unsigned int end, - unsigned int cluster) + _infos_set_glyph_flags (hb_glyph_info_t *infos, + unsigned int start, unsigned int end, + unsigned int cluster, + hb_mask_t mask) { for (unsigned int i = start; i < end; i++) if (cluster != infos[i].cluster) { - scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK; - infos[i].mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK; + scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS; + infos[i].mask |= mask; } } + static unsigned + _infos_find_min_cluster (const hb_glyph_info_t *infos, + unsigned start, unsigned end, + unsigned cluster = UINT_MAX) + { + for (unsigned int i = start; i < end; i++) + cluster = hb_min (cluster, infos[i].cluster); + return cluster; + } void clear_glyph_flags (hb_mask_t mask = 0) { diff --git a/src/hb-coretext.cc b/src/hb-coretext.cc index 24075e146..5f383064c 100644 --- a/src/hb-coretext.cc +++ b/src/hb-coretext.cc @@ -1213,7 +1213,8 @@ resize_and_retry: } } - buffer->clear_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK); + buffer->clear_glyph_flags (); + buffer->unsafe_to_break (); #undef FAIL diff --git a/src/hb-directwrite.cc b/src/hb-directwrite.cc index dea87b8cd..f177ff31c 100644 --- a/src/hb-directwrite.cc +++ b/src/hb-directwrite.cc @@ -762,7 +762,8 @@ retry_getglyphs: if (isRightToLeft) hb_buffer_reverse (buffer); - buffer->clear_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK); + buffer->clear_glyph_flags (); + buffer->unsafe_to_break (); delete [] clusterMap; delete [] glyphIndices; diff --git a/src/hb-graphite2.cc b/src/hb-graphite2.cc index 42420ac0b..63dc18b46 100644 --- a/src/hb-graphite2.cc +++ b/src/hb-graphite2.cc @@ -439,7 +439,8 @@ _hb_graphite2_shape (hb_shape_plan_t *shape_plan HB_UNUSED, if (feats) gr_featureval_destroy (feats); gr_seg_destroy (seg); - buffer->clear_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK); + buffer->clear_glyph_flags (); + buffer->unsafe_to_break (); return true; } diff --git a/src/hb-kern.hh b/src/hb-kern.hh index 3f952fe7f..9ea945cae 100644 --- a/src/hb-kern.hh +++ b/src/hb-kern.hh @@ -49,6 +49,10 @@ struct hb_kern_machine_t hb_mask_t kern_mask, bool scale = true) const { + if (!buffer->message (font, "start kern")) + return; + + buffer->unsafe_to_concat (); OT::hb_ot_apply_context_t c (1, font, buffer); c.set_lookup_mask (kern_mask); c.set_lookup_props (OT::LookupFlag::IgnoreMarks); @@ -67,7 +71,8 @@ struct hb_kern_machine_t } skippy_iter.reset (idx, 1); - if (!skippy_iter.next ()) + unsigned unsafe_to; + if (!skippy_iter.next (&unsafe_to)) { idx++; continue; @@ -125,6 +130,8 @@ struct hb_kern_machine_t skip: idx = skippy_iter.idx; } + + (void) buffer->message (font, "end kern"); } const Driver &driver; diff --git a/src/hb-ot-layout-gpos-table.hh b/src/hb-ot-layout-gpos-table.hh index be341fb44..e28c951f3 100644 --- a/src/hb-ot-layout-gpos-table.hh +++ b/src/hb-ot-layout-gpos-table.hh @@ -1235,6 +1235,7 @@ struct PairSet buffer->idx = pos; return_trace (true); } + buffer->unsafe_to_concat (buffer->idx, pos + 1); return_trace (false); } @@ -1362,7 +1363,12 @@ struct PairPosFormat1 hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; skippy_iter.reset (buffer->idx, 1); - if (!skippy_iter.next ()) return_trace (false); + unsigned unsafe_to; + if (!skippy_iter.next (&unsafe_to)) + { + buffer->unsafe_to_concat (buffer->idx, unsafe_to); + return_trace (false); + } return_trace ((this+pairSet[index]).apply (c, valueFormat, skippy_iter.idx)); } @@ -1555,7 +1561,12 @@ struct PairPosFormat2 hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; skippy_iter.reset (buffer->idx, 1); - if (!skippy_iter.next ()) return_trace (false); + unsigned unsafe_to; + if (!skippy_iter.next (&unsafe_to)) + { + buffer->unsafe_to_concat (buffer->idx, unsafe_to); + return_trace (false); + } unsigned int len1 = valueFormat1.get_len (); unsigned int len2 = valueFormat2.get_len (); @@ -1563,7 +1574,11 @@ struct PairPosFormat2 unsigned int klass1 = (this+classDef1).get_class (buffer->cur().codepoint); unsigned int klass2 = (this+classDef2).get_class (buffer->info[skippy_iter.idx].codepoint); - if (unlikely (klass1 >= class1Count || klass2 >= class2Count)) return_trace (false); + if (unlikely (klass1 >= class1Count || klass2 >= class2Count)) + { + buffer->unsafe_to_concat (buffer->idx, skippy_iter.idx + 1); + return_trace (false); + } const Value *v = &values[record_len * (klass1 * class2Count + klass2)]; @@ -1630,8 +1645,10 @@ struct PairPosFormat2 success: if (applied_first || applied_second) buffer->unsafe_to_break (buffer->idx, skippy_iter.idx + 1); - + else boring: + buffer->unsafe_to_concat (buffer->idx, skippy_iter.idx + 1); + buffer->idx = skippy_iter.idx; if (len2) @@ -1861,10 +1878,19 @@ struct CursivePosFormat1 hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; skippy_iter.reset (buffer->idx, 1); - if (!skippy_iter.prev ()) return_trace (false); + unsigned unsafe_from; + if (!skippy_iter.prev (&unsafe_from)) + { + buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1); + return_trace (false); + } const EntryExitRecord &prev_record = entryExitRecord[(this+coverage).get_coverage (buffer->info[skippy_iter.idx].codepoint)]; - if (!prev_record.exitAnchor) return_trace (false); + if (!prev_record.exitAnchor) + { + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); + return_trace (false); + } unsigned int i = skippy_iter.idx; unsigned int j = buffer->idx; @@ -2128,7 +2154,13 @@ struct MarkBasePosFormat1 skippy_iter.reset (buffer->idx, 1); skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks); do { - if (!skippy_iter.prev ()) return_trace (false); + unsigned unsafe_from; + if (!skippy_iter.prev (&unsafe_from)) + { + buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1); + return_trace (false); + } + /* We only want to attach to the first of a MultipleSubst sequence. * https://github.com/harfbuzz/harfbuzz/issues/740 * Reject others... @@ -2151,7 +2183,11 @@ struct MarkBasePosFormat1 //if (!_hb_glyph_info_is_base_glyph (&buffer->info[skippy_iter.idx])) { return_trace (false); } unsigned int base_index = (this+baseCoverage).get_coverage (buffer->info[skippy_iter.idx].codepoint); - if (base_index == NOT_COVERED) return_trace (false); + if (base_index == NOT_COVERED) + { + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); + return_trace (false); + } return_trace ((this+markArray).apply (c, mark_index, base_index, this+baseArray, classCount, skippy_iter.idx)); } @@ -2382,21 +2418,34 @@ struct MarkLigPosFormat1 hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; skippy_iter.reset (buffer->idx, 1); skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks); - if (!skippy_iter.prev ()) return_trace (false); + unsigned unsafe_from; + if (!skippy_iter.prev (&unsafe_from)) + { + buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1); + return_trace (false); + } /* Checking that matched glyph is actually a ligature by GDEF is too strong; disabled */ //if (!_hb_glyph_info_is_ligature (&buffer->info[skippy_iter.idx])) { return_trace (false); } unsigned int j = skippy_iter.idx; unsigned int lig_index = (this+ligatureCoverage).get_coverage (buffer->info[j].codepoint); - if (lig_index == NOT_COVERED) return_trace (false); + if (lig_index == NOT_COVERED) + { + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); + return_trace (false); + } const LigatureArray& lig_array = this+ligatureArray; const LigatureAttach& lig_attach = lig_array[lig_index]; /* Find component to attach to */ unsigned int comp_count = lig_attach.rows; - if (unlikely (!comp_count)) return_trace (false); + if (unlikely (!comp_count)) + { + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); + return_trace (false); + } /* We must now check whether the ligature ID of the current mark glyph * is identical to the ligature ID of the found ligature. If yes, we @@ -2579,9 +2628,18 @@ struct MarkMarkPosFormat1 hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; skippy_iter.reset (buffer->idx, 1); skippy_iter.set_lookup_props (c->lookup_props & ~LookupFlag::IgnoreFlags); - if (!skippy_iter.prev ()) return_trace (false); + unsigned unsafe_from; + if (!skippy_iter.prev (&unsafe_from)) + { + buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1); + return_trace (false); + } - if (!_hb_glyph_info_is_mark (&buffer->info[skippy_iter.idx])) { return_trace (false); } + if (!_hb_glyph_info_is_mark (&buffer->info[skippy_iter.idx])) + { + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); + return_trace (false); + } unsigned int j = skippy_iter.idx; @@ -2606,11 +2664,16 @@ struct MarkMarkPosFormat1 } /* Didn't match. */ + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); return_trace (false); good: unsigned int mark2_index = (this+mark2Coverage).get_coverage (buffer->info[j].codepoint); - if (mark2_index == NOT_COVERED) return_trace (false); + if (mark2_index == NOT_COVERED) + { + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); + return_trace (false); + } return_trace ((this+mark1Array).apply (c, mark1_index, mark2_index, this+mark2Array, classCount, j)); } diff --git a/src/hb-ot-layout-gsub-table.hh b/src/hb-ot-layout-gsub-table.hh index edc89eb37..0b0bc547b 100644 --- a/src/hb-ot-layout-gsub-table.hh +++ b/src/hb-ot-layout-gsub-table.hh @@ -826,22 +826,25 @@ struct Ligature unsigned int total_component_count = 0; - unsigned int match_length = 0; + unsigned int match_end = 0; unsigned int match_positions[HB_MAX_CONTEXT_LENGTH]; if (likely (!match_input (c, count, &component[1], match_glyph, nullptr, - &match_length, + &match_end, match_positions, &total_component_count))) + { + c->buffer->unsafe_to_concat (c->buffer->idx, match_end); return_trace (false); + } ligate_input (c, count, match_positions, - match_length, + match_end, ligGlyph, total_component_count); @@ -1296,7 +1299,7 @@ struct ReverseChainSingleSubstFormat1 match_lookahead (c, lookahead.len, (HBUINT16 *) lookahead.arrayZ, match_coverage, this, - 1, &end_index)) + c->buffer->idx + 1, &end_index)) { c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index); c->replace_glyph_inplace (substitute[index]); @@ -1305,8 +1308,11 @@ struct ReverseChainSingleSubstFormat1 * calls us through a Context lookup. */ return_trace (true); } - - return_trace (false); + else + { + c->buffer->unsafe_to_concat_from_outbuffer (start_index, end_index); + return_trace (false); + } } template 0); while (idx + num_items < end) @@ -543,11 +543,17 @@ struct hb_ot_apply_context_t : } if (skip == matcher_t::SKIP_NO) + { + if (unsafe_to) + *unsafe_to = idx + 1; return false; + } } + if (unsafe_to) + *unsafe_to = end; return false; } - bool prev () + bool prev (unsigned *unsafe_from = nullptr) { assert (num_items > 0); while (idx > num_items - 1) @@ -570,8 +576,14 @@ struct hb_ot_apply_context_t : } if (skip == matcher_t::SKIP_NO) + { + if (unsafe_from) + *unsafe_from = hb_max (1u, idx) - 1u; return false; + } } + if (unsafe_from) + *unsafe_from = 0; return false; } @@ -955,7 +967,7 @@ static inline bool match_input (hb_ot_apply_context_t *c, const HBUINT16 input[], /* Array of input values--start with second glyph */ match_func_t match_func, const void *match_data, - unsigned int *end_offset, + unsigned int *end_position, unsigned int match_positions[HB_MAX_CONTEXT_LENGTH], unsigned int *p_total_component_count = nullptr) { @@ -1008,7 +1020,12 @@ static inline bool match_input (hb_ot_apply_context_t *c, match_positions[0] = buffer->idx; for (unsigned int i = 1; i < count; i++) { - if (!skippy_iter.next ()) return_trace (false); + unsigned unsafe_to; + if (!skippy_iter.next (&unsafe_to)) + { + *end_position = unsafe_to; + return_trace (false); + } match_positions[i] = skippy_iter.idx; @@ -1062,7 +1079,7 @@ static inline bool match_input (hb_ot_apply_context_t *c, total_component_count += _hb_glyph_info_get_lig_num_comps (&buffer->info[skippy_iter.idx]); } - *end_offset = skippy_iter.idx - buffer->idx + 1; + *end_position = skippy_iter.idx + 1; if (p_total_component_count) *p_total_component_count = total_component_count; @@ -1072,7 +1089,7 @@ static inline bool match_input (hb_ot_apply_context_t *c, static inline bool ligate_input (hb_ot_apply_context_t *c, unsigned int count, /* Including the first glyph */ const unsigned int match_positions[HB_MAX_CONTEXT_LENGTH], /* Including the first glyph */ - unsigned int match_length, + unsigned int match_end, hb_codepoint_t lig_glyph, unsigned int total_component_count) { @@ -1080,7 +1097,7 @@ static inline bool ligate_input (hb_ot_apply_context_t *c, hb_buffer_t *buffer = c->buffer; - buffer->merge_clusters (buffer->idx, buffer->idx + match_length); + buffer->merge_clusters (buffer->idx, match_end); /* - If a base and one or more marks ligate, consider that as a base, NOT * ligature, such that all following marks can still attach to it. @@ -1197,11 +1214,16 @@ static inline bool match_backtrack (hb_ot_apply_context_t *c, skippy_iter.set_match_func (match_func, match_data, backtrack); for (unsigned int i = 0; i < count; i++) - if (!skippy_iter.prev ()) + { + unsigned unsafe_from; + if (!skippy_iter.prev (&unsafe_from)) + { + *match_start = unsafe_from; return_trace (false); + } + } *match_start = skippy_iter.idx; - return_trace (true); } @@ -1210,21 +1232,26 @@ static inline bool match_lookahead (hb_ot_apply_context_t *c, const HBUINT16 lookahead[], match_func_t match_func, const void *match_data, - unsigned int offset, + unsigned int start_index, unsigned int *end_index) { TRACE_APPLY (nullptr); hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_context; - skippy_iter.reset (c->buffer->idx + offset - 1, count); + skippy_iter.reset (start_index - 1, count); skippy_iter.set_match_func (match_func, match_data, lookahead); for (unsigned int i = 0; i < count; i++) - if (!skippy_iter.next ()) + { + unsigned unsafe_to; + if (!skippy_iter.next (&unsafe_to)) + { + *end_index = unsafe_to; return_trace (false); + } + } *end_index = skippy_iter.idx + 1; - return_trace (true); } @@ -1350,15 +1377,13 @@ static inline void recurse_lookups (context_t *c, c->recurse (lookupRecord[i].lookupListIndex); } -static inline bool apply_lookup (hb_ot_apply_context_t *c, +static inline void apply_lookup (hb_ot_apply_context_t *c, unsigned int count, /* Including the first glyph */ unsigned int match_positions[HB_MAX_CONTEXT_LENGTH], /* Including the first glyph */ unsigned int lookupCount, const LookupRecord lookupRecord[], /* Array of LookupRecords--in design order */ - unsigned int match_length) + unsigned int match_end) { - TRACE_APPLY (nullptr); - hb_buffer_t *buffer = c->buffer; int end; @@ -1366,7 +1391,7 @@ static inline bool apply_lookup (hb_ot_apply_context_t *c, * Adjust. */ { unsigned int bl = buffer->backtrack_len (); - end = bl + match_length; + end = bl + match_end - buffer->idx; int delta = bl - buffer->idx; /* Convert positions to new indexing. */ @@ -1468,8 +1493,6 @@ static inline bool apply_lookup (hb_ot_apply_context_t *c, } (void) buffer->move_to (end); - - return_trace (true); } @@ -1557,17 +1580,25 @@ static inline bool context_apply_lookup (hb_ot_apply_context_t *c, const LookupRecord lookupRecord[], ContextApplyLookupContext &lookup_context) { - unsigned int match_length = 0; - unsigned int match_positions[HB_MAX_CONTEXT_LENGTH]; - return match_input (c, - inputCount, input, - lookup_context.funcs.match, lookup_context.match_data, - &match_length, match_positions) - && (c->buffer->unsafe_to_break (c->buffer->idx, c->buffer->idx + match_length), - apply_lookup (c, - inputCount, match_positions, - lookupCount, lookupRecord, - match_length)); + unsigned match_end = 0; + unsigned match_positions[HB_MAX_CONTEXT_LENGTH]; + if (match_input (c, + inputCount, input, + lookup_context.funcs.match, lookup_context.match_data, + &match_end, match_positions)) + { + c->buffer->unsafe_to_break (c->buffer->idx, match_end); + apply_lookup (c, + inputCount, match_positions, + lookupCount, lookupRecord, + match_end); + return true; + } + else + { + c->buffer->unsafe_to_concat (c->buffer->idx, match_end); + return false; + } } struct Rule @@ -2459,25 +2490,38 @@ static inline bool chain_context_apply_lookup (hb_ot_apply_context_t *c, const LookupRecord lookupRecord[], ChainContextApplyLookupContext &lookup_context) { - unsigned int start_index = 0, match_length = 0, end_index = 0; - unsigned int match_positions[HB_MAX_CONTEXT_LENGTH]; - return match_input (c, - inputCount, input, - lookup_context.funcs.match, lookup_context.match_data[1], - &match_length, match_positions) - && match_backtrack (c, - backtrackCount, backtrack, - lookup_context.funcs.match, lookup_context.match_data[0], - &start_index) - && match_lookahead (c, - lookaheadCount, lookahead, - lookup_context.funcs.match, lookup_context.match_data[2], - match_length, &end_index) - && (c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index), - apply_lookup (c, - inputCount, match_positions, - lookupCount, lookupRecord, - match_length)); + unsigned end_index = c->buffer->idx; + unsigned match_end = 0; + unsigned match_positions[HB_MAX_CONTEXT_LENGTH]; + if (!(match_input (c, + inputCount, input, + lookup_context.funcs.match, lookup_context.match_data[1], + &match_end, match_positions) && (end_index = match_end) + && match_lookahead (c, + lookaheadCount, lookahead, + lookup_context.funcs.match, lookup_context.match_data[2], + match_end, &end_index))) + { + c->buffer->unsafe_to_concat (c->buffer->idx, end_index); + return false; + } + + unsigned start_index = c->buffer->out_len; + if (!match_backtrack (c, + backtrackCount, backtrack, + lookup_context.funcs.match, lookup_context.match_data[0], + &start_index)) + { + c->buffer->unsafe_to_concat_from_outbuffer (start_index, end_index); + return false; + } + + c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index); + apply_lookup (c, + inputCount, match_positions, + lookupCount, lookupRecord, + match_end); + return true; } struct ChainRule diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc index 222c5d6b7..2298aa92f 100644 --- a/src/hb-ot-shape-complex-arabic.cc +++ b/src/hb-ot-shape-complex-arabic.cc @@ -321,6 +321,20 @@ arabic_joining (hb_buffer_t *buffer) info[prev].arabic_shaping_action() = entry->prev_action; buffer->unsafe_to_break (prev, i + 1); } + else + { + if (prev == UINT_MAX) + { + if (this_type >= JOINING_TYPE_R) + buffer->unsafe_to_concat_from_outbuffer (0, i + 1); + } + else + { + if (this_type >= JOINING_TYPE_R || + (2 <= state && state <= 5) /* States that have a possible prev_action. */) + buffer->unsafe_to_concat (prev, i + 1); + } + } info[i].arabic_shaping_action() = entry->curr_action; @@ -337,7 +351,14 @@ arabic_joining (hb_buffer_t *buffer) const arabic_state_table_entry *entry = &arabic_state_table[state][this_type]; if (entry->prev_action != NONE && prev != UINT_MAX) + { info[prev].arabic_shaping_action() = entry->prev_action; + buffer->unsafe_to_break (prev, buffer->len); + } + else if (2 <= state && state <= 5) /* States that have a possible prev_action. */ + { + buffer->unsafe_to_concat (prev, buffer->len); + } break; } } diff --git a/src/hb-ot-shape-fallback.cc b/src/hb-ot-shape-fallback.cc index 0fae26389..671f30327 100644 --- a/src/hb-ot-shape-fallback.cc +++ b/src/hb-ot-shape-fallback.cc @@ -446,6 +446,9 @@ _hb_ot_shape_fallback_mark_position (const hb_ot_shape_plan_t *plan, return; #endif + if (!buffer->message (font, "start fallback mark")) + return; + _hb_buffer_assert_gsubgpos_vars (buffer); unsigned int start = 0; @@ -457,6 +460,8 @@ _hb_ot_shape_fallback_mark_position (const hb_ot_shape_plan_t *plan, start = i; } position_cluster (plan, font, buffer, start, count, adjust_offsets_when_zeroing); + + (void) buffer->message (font, "end fallback mark"); } @@ -492,6 +497,9 @@ _hb_ot_shape_fallback_kern (const hb_ot_shape_plan_t *plan, #endif #ifndef HB_DISABLE_DEPRECATED + if (!buffer->message (font, "start fallback kern")) + return; + if (HB_DIRECTION_IS_HORIZONTAL (buffer->props.direction) ? !font->has_glyph_h_kerning_func () : !font->has_glyph_v_kerning_func ()) @@ -508,6 +516,8 @@ _hb_ot_shape_fallback_kern (const hb_ot_shape_plan_t *plan, if (reverse) buffer->reverse (); + + (void) buffer->message (font, "end fallback kern"); #endif } diff --git a/src/hb-ot-shape.cc b/src/hb-ot-shape.cc index e16377f97..4bd8aaf03 100644 --- a/src/hb-ot-shape.cc +++ b/src/hb-ot-shape.cc @@ -1120,7 +1120,7 @@ hb_propagate_flags (hb_buffer_t *buffer) /* Propagate cluster-level glyph flags to be the same on all cluster glyphs. * Simplifies using them. */ - if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK)) + if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS)) return; hb_glyph_info_t *info = buffer->info; @@ -1129,11 +1129,7 @@ hb_propagate_flags (hb_buffer_t *buffer) { unsigned int mask = 0; for (unsigned int i = start; i < end; i++) - if (info[i].mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK) - { - mask = HB_GLYPH_FLAG_UNSAFE_TO_BREAK; - break; - } + mask |= info[i].mask & HB_GLYPH_FLAG_DEFINED; if (mask) for (unsigned int i = start; i < end; i++) info[i].mask |= mask; diff --git a/src/hb-uniscribe.cc b/src/hb-uniscribe.cc index 0e5a114f7..50f71ce9c 100644 --- a/src/hb-uniscribe.cc +++ b/src/hb-uniscribe.cc @@ -878,7 +878,8 @@ retry: if (backward) hb_buffer_reverse (buffer); - buffer->clear_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK); + buffer->clear_glyph_flags (); + buffer->unsafe_to_break (); /* Wow, done! */ return true; diff --git a/test/shape/data/in-house/Makefile.sources b/test/shape/data/in-house/Makefile.sources index 01047d548..02c9e914b 100644 --- a/test/shape/data/in-house/Makefile.sources +++ b/test/shape/data/in-house/Makefile.sources @@ -59,6 +59,7 @@ TESTS = \ tests/tibetan-contractions-2.tests \ tests/tibetan-vowels.tests \ tests/tt-kern-gpos.tests \ + tests/unsafe-to-concat.tests \ tests/use-indic3.tests \ tests/use-marchen.tests \ tests/use-syllable.tests \ diff --git a/test/shape/data/in-house/fonts/34da9aab7bee86c4dfc3b85e423435822fdf4b62.ttf b/test/shape/data/in-house/fonts/34da9aab7bee86c4dfc3b85e423435822fdf4b62.ttf new file mode 100644 index 000000000..8508fbeeb Binary files /dev/null and b/test/shape/data/in-house/fonts/34da9aab7bee86c4dfc3b85e423435822fdf4b62.ttf differ diff --git a/test/shape/data/in-house/meson.build b/test/shape/data/in-house/meson.build index cda01d490..2da9dab14 100644 --- a/test/shape/data/in-house/meson.build +++ b/test/shape/data/in-house/meson.build @@ -59,6 +59,7 @@ in_house_tests_base = [ 'tibetan-contractions-2.tests', 'tibetan-vowels.tests', 'tt-kern-gpos.tests', + 'unsafe-to-concat.tests', 'use-indic3.tests', 'use-marchen.tests', 'use-syllable.tests', diff --git a/test/shape/data/in-house/tests/unsafe-to-concat.tests b/test/shape/data/in-house/tests/unsafe-to-concat.tests new file mode 100644 index 000000000..24906fa6d --- /dev/null +++ b/test/shape/data/in-house/tests/unsafe-to-concat.tests @@ -0,0 +1 @@ +../fonts/34da9aab7bee86c4dfc3b85e423435822fdf4b62.ttf;--show-flags;U+0628,U+200C,U+0628;[uni0628=1+993#2|uni0628=0+993#2] diff --git a/util/shape-options.hh b/util/shape-options.hh index b7eb846dc..7847fdb6b 100644 --- a/util/shape-options.hh +++ b/util/shape-options.hh @@ -129,7 +129,9 @@ struct shape_options_t { if (!verify_buffer_monotone (buffer, error)) return false; - if (!verify_buffer_safe_to_break (buffer, text_buffer, font, error)) + if (!verify_buffer_unsafe_to_break (buffer, text_buffer, font, error)) + return false; + if (!verify_buffer_unsafe_to_concat (buffer, text_buffer, font, error)) return false; return true; } @@ -158,17 +160,15 @@ struct shape_options_t return true; } - bool verify_buffer_safe_to_break (hb_buffer_t *buffer, - hb_buffer_t *text_buffer, - hb_font_t *font, - const char **error=nullptr) + bool verify_buffer_unsafe_to_break (hb_buffer_t *buffer, + hb_buffer_t *text_buffer, + hb_font_t *font, + const char **error=nullptr) { if (cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES && cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS) { - /* Cannot perform this check without monotone clusters. - * Then again, unsafe-to-break flag is much harder to use without - * monotone clusters. */ + /* Cannot perform this check without monotone clusters. */ return true; } @@ -255,7 +255,7 @@ struct shape_options_t if (diff) { if (error) - *error = "Safe-to-break test failed."; + *error = "unsafe-to-break test failed."; ret = false; /* Return the reconstructed result instead so it can be inspected. */ @@ -269,6 +269,186 @@ struct shape_options_t return ret; } + bool verify_buffer_unsafe_to_concat (hb_buffer_t *buffer, + hb_buffer_t *text_buffer, + hb_font_t *font, + const char **error=nullptr) + { + if (cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES && + cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS) + { + /* Cannot perform this check without monotone clusters. */ + return true; + } + + /* Check that shuffling up text before shaping at safe-to-concat points + * is indeed safe. */ + + /* This is what we do: + * + * 1. We shape text once. Then segment the text at all the safe-to-concat + * points; + * + * 2. Then we create two buffers, one containing all the even segments and + * one all the odd segments. + * + * 3. Because all these segments were safe-to-concat at both ends, we + * expect that concatenating them and shaping should NOT change the + * shaping results of each segment. As such, we expect that after + * shaping the two buffers, we still get cluster boundaries at the + * segment boundaries, and that those all are safe-to-concat points. + * Moreover, that there are NOT any safe-to-concat points within the + * segments. + * + * 4. Finally, we reconstruct the shaping results of the original text by + * simply interleaving the shaping results of the segments from the two + * buffers, and assert that the total shaping results is the same as + * the one from original buffer in step 1. + */ + + hb_buffer_t *fragments[2] {hb_buffer_create_similar (buffer), + hb_buffer_create_similar (buffer)}; + hb_buffer_t *reconstruction = hb_buffer_create_similar (buffer); + hb_segment_properties_t props; + hb_buffer_get_segment_properties (buffer, &props); + hb_buffer_set_segment_properties (fragments[0], &props); + hb_buffer_set_segment_properties (fragments[1], &props); + hb_buffer_set_segment_properties (reconstruction, &props); + + unsigned num_glyphs; + hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); + + unsigned num_chars; + hb_glyph_info_t *text = hb_buffer_get_glyph_infos (text_buffer, &num_chars); + + bool forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); + + if (!forward) + hb_buffer_reverse (buffer); + + /* + * Split text into segments and collect into to fragment streams. + */ + { + unsigned fragment_idx = 0; + unsigned start = 0; + unsigned text_start = 0; + unsigned text_end = 0; + for (unsigned end = 1; end < num_glyphs + 1; end++) + { + if (end < num_glyphs && + (info[end].cluster == info[end-1].cluster || + info[end].mask & HB_GLYPH_FLAG_UNSAFE_TO_CONCAT)) + continue; + + /* Accumulate segment corresponding to glyphs start..end. */ + if (end == num_glyphs) + text_end = num_chars; + else + { + unsigned cluster = info[end].cluster; + while (text_end < num_chars && text[text_end].cluster < cluster) + text_end++; + } + assert (text_start < text_end); + + if (0) + printf("start %d end %d text start %d end %d\n", start, end, text_start, text_end); + +#if 0 + hb_buffer_flags_t flags = hb_buffer_get_flags (fragment); + if (0 < text_start) + flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_BOT); + if (text_end < num_chars) + flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_EOT); + hb_buffer_set_flags (fragment, flags); +#endif + + hb_buffer_append (fragments[fragment_idx], text_buffer, text_start, text_end); + + start = end; + text_start = text_end; + fragment_idx = 1 - fragment_idx; + } + } + + bool ret = true; + hb_buffer_diff_flags_t diff; + + /* + * Shape the two fragment streams. + */ + if (!hb_shape_full (font, fragments[0], features, num_features, shapers) || + !hb_shape_full (font, fragments[1], features, num_features, shapers)) + { + if (error) + *error = "All shapers failed while shaping fragments."; + ret = false; + goto out; + } + + if (!forward) + { + hb_buffer_reverse (fragments[0]); + hb_buffer_reverse (fragments[1]); + } + + /* + * Reconstruct results. + */ + { + unsigned fragment_idx = 0; + unsigned fragment_start[2] {0, 0}; + unsigned fragment_num_glyphs[2]; + hb_glyph_info_t *fragment_info[2]; + for (unsigned i = 0; i < 2; i++) + fragment_info[i] = hb_buffer_get_glyph_infos (fragments[i], &fragment_num_glyphs[i]); + while (fragment_start[0] < fragment_num_glyphs[0] || + fragment_start[1] < fragment_num_glyphs[1]) + { + unsigned fragment_end = fragment_start[fragment_idx] + 1; + while (fragment_end < fragment_num_glyphs[fragment_idx] && + (fragment_info[fragment_idx][fragment_end].cluster == fragment_info[fragment_idx][fragment_end - 1].cluster || + fragment_info[fragment_idx][fragment_end].mask & HB_GLYPH_FLAG_UNSAFE_TO_CONCAT)) + fragment_end++; + + hb_buffer_append (reconstruction, fragments[fragment_idx], fragment_start[fragment_idx], fragment_end); + + fragment_start[fragment_idx] = fragment_end; + fragment_idx = 1 - fragment_idx; + } + } + + if (!forward) + { + hb_buffer_reverse (buffer); + hb_buffer_reverse (reconstruction); + } + + /* + * Diff results. + */ + diff = hb_buffer_diff (reconstruction, buffer, (hb_codepoint_t) -1, 0); + if (diff) + { + if (error) + *error = "unsafe-to-concat test failed."; + ret = false; + + /* Return the reconstructed result instead so it can be inspected. */ + hb_buffer_set_length (buffer, 0); + hb_buffer_append (buffer, reconstruction, 0, -1); + } + + + out: + hb_buffer_destroy (reconstruction); + hb_buffer_destroy (fragments[0]); + hb_buffer_destroy (fragments[1]); + + return ret; + } + void shape_closure (const char *text, int text_len, hb_font_t *font, hb_buffer_t *buffer, hb_set_t *glyphs)