From 3122c2cdc45a964efedad8953a2df67205c3e3a8 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 4 Dec 2021 19:50:33 -0800 Subject: [PATCH 01/28] [buffer] Add HB_GLYPH_FLAG_UNSAFE_TO_CONCAT Fixes https://github.com/harfbuzz/harfbuzz/issues/1463 --- src/hb-buffer.cc | 10 ++--- src/hb-buffer.h | 76 ++++++++++++++++++++++++++++++------ src/hb-buffer.hh | 33 ++++++++++------ src/hb-ot-layout-gsubgpos.hh | 39 +++++++++++++++--- src/hb-ot-shape.cc | 8 +--- 5 files changed, 124 insertions(+), 42 deletions(-) diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc index c673077bd..6d7c3182c 100644 --- a/src/hb-buffer.cc +++ b/src/hb-buffer.cc @@ -573,14 +573,14 @@ done: } void -hb_buffer_t::unsafe_to_break_impl (unsigned int start, unsigned int end) +hb_buffer_t::unsafe_to_break_impl (unsigned int start, unsigned int end, hb_mask_t mask) { unsigned int cluster = UINT_MAX; cluster = _infos_find_min_cluster (info, start, end, cluster); - _unsafe_to_break_set_mask (info, start, end, cluster); + _unsafe_to_break_set_mask (info, start, end, cluster, mask); } void -hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end) +hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end, hb_mask_t mask) { if (!have_output) { @@ -595,8 +595,8 @@ hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int en cluster = _infos_find_min_cluster (out_info, start, out_len, cluster); cluster = _infos_find_min_cluster (info, idx, end, cluster); - _unsafe_to_break_set_mask (out_info, start, out_len, cluster); - _unsafe_to_break_set_mask (info, idx, end, cluster); + _unsafe_to_break_set_mask (out_info, start, out_len, cluster, mask); + _unsafe_to_break_set_mask (info, idx, end, cluster, mask); } void diff --git a/src/hb-buffer.h b/src/hb-buffer.h index 2ba56d345..9fb218c0d 100644 --- a/src/hb-buffer.h +++ b/src/hb-buffer.h @@ -76,18 +76,67 @@ typedef struct hb_glyph_info_t { * @HB_GLYPH_FLAG_UNSAFE_TO_BREAK: Indicates that if input text is broken at the * beginning of the cluster this glyph is part of, * then both sides need to be re-shaped, as the - * result might be different. On the flip side, - * it means that when this flag is not present, - * then it's safe to break the glyph-run at the - * beginning of this cluster, and the two sides - * represent the exact same result one would get - * if breaking input text at the beginning of - * this cluster and shaping the two sides - * separately. This can be used to optimize - * paragraph layout, by avoiding re-shaping - * of each line after line-breaking, or limiting - * the reshaping to a small piece around the - * breaking point only. + * result might be different. + * + * On the flip side, it means that when this + * flag is not present, then it is safe to break + * the glyph-run at the beginning of this + * cluster, and the two sides will represent the + * exact same result one would get if breaking + * input text at the beginning of this cluster + * and shaping the two sides separately. + * + * This can be used to optimize paragraph + * layout, by avoiding re-shaping of each line + * after line-breaking. + * + * @HB_GLYPH_FLAG_UNSAFE_TO_CONCAT: Indicates that if input text is changed on one + * side of the beginning of the cluster this glyph + * is part of, then the shaping results for the + * other side might change. + * + * Note that the absence of this flag will NOT by + * itself mean that it IS safe to concat text. + * Only two pieces of text both of which clear of + * this flag can be concatenated safely. + * + * This can be used to optimize paragraph + * layout, by avoiding re-shaping of each line + * after line-breaking, by limiting the + * reshaping to a small piece around the + * breaking positin only, even if the breaking + * position carries the + * #HB_GLYPH_FLAG_UNSAFE_TO_BREAK or when + * hyphenation or other text transformation + * happens at line-break position, in the following + * way: + * + * 1. Iterate back from the line-break position till + * the the first cluster start position that is + * NOT unsafe-to-concat, 2. shape the segment from + * there till the end of line, 3. check whether the + * resulting glyph-run also is clear of the + * unsafe-to-concat at its start-of-text position; + * if it is, just splice it into place and the line + * is shaped; If not, move on to a position further + * back that is clear of unsafe-to-concat and retry + * from there, and repeat. + * + * At the start of next line a similar algorithm can + * be implemented. A slight complication will arise, + * because while our buffer API has a way to + * return flags for position corresponding to + * start-of-text, there is currently no position + * corresponding to end-of-text. This limitation + * can be alleviated by shaping more text than needed + * and looking for unsafe-to-concat flag within text + * clusters. + * + * The #HB_GLYPH_FLAG_UNSAFE_TO_BREAK flag will + * always imply this flag. + * + * Since: REPLACEME + * * @HB_GLYPH_FLAG_DEFINED: All the currently defined flags. * * Flags for #hb_glyph_info_t. @@ -96,8 +145,9 @@ typedef struct hb_glyph_info_t { */ typedef enum { /*< flags >*/ HB_GLYPH_FLAG_UNSAFE_TO_BREAK = 0x00000001, + HB_GLYPH_FLAG_UNSAFE_TO_CONCAT = 0x00000002, - HB_GLYPH_FLAG_DEFINED = 0x00000001 /* OR of all defined flags */ + HB_GLYPH_FLAG_DEFINED = 0x00000003 /* OR of all defined flags */ } hb_glyph_flags_t; HB_EXTERN hb_glyph_flags_t diff --git a/src/hb-buffer.hh b/src/hb-buffer.hh index c8ee0d90f..9f441305f 100644 --- a/src/hb-buffer.hh +++ b/src/hb-buffer.hh @@ -67,8 +67,8 @@ enum hb_buffer_scratch_flags_t { HB_BUFFER_SCRATCH_FLAG_HAS_DEFAULT_IGNORABLES = 0x00000002u, HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK = 0x00000004u, HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT = 0x00000008u, - HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK = 0x00000010u, - HB_BUFFER_SCRATCH_FLAG_HAS_CGJ = 0x00000020u, + HB_BUFFER_SCRATCH_FLAG_HAS_CGJ = 0x00000010u, + HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS = 0x00000020u, /* Reserved for complex shapers' internal use. */ HB_BUFFER_SCRATCH_FLAG_COMPLEX0 = 0x01000000u, @@ -392,8 +392,19 @@ struct hb_buffer_t return; unsafe_to_break_impl (start, end); } - HB_INTERNAL void unsafe_to_break_impl (unsigned int start, unsigned int end); - HB_INTERNAL void unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end); + void unsafe_to_concat (unsigned int start, + unsigned int end) + { + if (end - start < 2) + return; + unsafe_to_break_impl (start, end, HB_GLYPH_FLAG_UNSAFE_TO_CONCAT); + } + HB_INTERNAL void unsafe_to_break_impl (unsigned int start, unsigned int end, + hb_mask_t mask = HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT); + HB_INTERNAL void unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end, + hb_mask_t mask = HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT); + void unsafe_to_concat_from_outbuffer (unsigned int start, unsigned int end) + { unsafe_to_break_from_outbuffer (start, end, HB_GLYPH_FLAG_UNSAFE_TO_CONCAT); } /* Internal methods */ @@ -484,12 +495,7 @@ struct hb_buffer_t set_cluster (hb_glyph_info_t &inf, unsigned int cluster, unsigned int mask = 0) { if (inf.cluster != cluster) - { - if (mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK) - inf.mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK; - else - inf.mask &= ~HB_GLYPH_FLAG_UNSAFE_TO_BREAK; - } + inf.mask = (inf.mask & ~HB_GLYPH_FLAG_DEFINED) | (mask & HB_GLYPH_FLAG_DEFINED); inf.cluster = cluster; } @@ -505,13 +511,14 @@ struct hb_buffer_t void _unsafe_to_break_set_mask (hb_glyph_info_t *infos, unsigned int start, unsigned int end, - unsigned int cluster) + unsigned int cluster, + hb_mask_t mask) { for (unsigned int i = start; i < end; i++) if (cluster != infos[i].cluster) { - scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK; - infos[i].mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK; + scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS; + infos[i].mask |= mask; } } diff --git a/src/hb-ot-layout-gsubgpos.hh b/src/hb-ot-layout-gsubgpos.hh index cb16caebd..e1dca6da3 100644 --- a/src/hb-ot-layout-gsubgpos.hh +++ b/src/hb-ot-layout-gsubgpos.hh @@ -520,7 +520,7 @@ struct hb_ot_apply_context_t : may_skip (const hb_glyph_info_t &info) const { return matcher.may_skip (c, info); } - bool next () + bool next (unsigned *unsafe_to = nullptr) { assert (num_items > 0); while (idx + num_items < end) @@ -543,11 +543,17 @@ struct hb_ot_apply_context_t : } if (skip == matcher_t::SKIP_NO) + { + if (unsafe_to) + *unsafe_to = idx + 1; return false; + } } + if (unsafe_to) + *unsafe_to = end; return false; } - bool prev () + bool prev (unsigned *unsafe_from = nullptr) { assert (num_items > 0); while (idx > num_items - 1) @@ -570,8 +576,14 @@ struct hb_ot_apply_context_t : } if (skip == matcher_t::SKIP_NO) + { + if (unsafe_from) + *unsafe_from = hb_max (1u, idx) - 1u; return false; + } } + if (unsafe_from) + *unsafe_from = 0; return false; } @@ -1008,7 +1020,12 @@ static inline bool match_input (hb_ot_apply_context_t *c, match_positions[0] = buffer->idx; for (unsigned int i = 1; i < count; i++) { - if (!skippy_iter.next ()) return_trace (false); + unsigned unsafe_to; + if (!skippy_iter.next (&unsafe_to)) + { + c->buffer->unsafe_to_concat (c->buffer->idx, unsafe_to); + return_trace (false); + } match_positions[i] = skippy_iter.idx; @@ -1197,8 +1214,14 @@ static inline bool match_backtrack (hb_ot_apply_context_t *c, skippy_iter.set_match_func (match_func, match_data, backtrack); for (unsigned int i = 0; i < count; i++) - if (!skippy_iter.prev ()) + { + unsigned unsafe_from; + if (!skippy_iter.prev (&unsafe_from)) + { + c->buffer->unsafe_to_concat_from_outbuffer (unsafe_from, c->buffer->idx); return_trace (false); + } + } *match_start = skippy_iter.idx; @@ -1220,8 +1243,14 @@ static inline bool match_lookahead (hb_ot_apply_context_t *c, skippy_iter.set_match_func (match_func, match_data, lookahead); for (unsigned int i = 0; i < count; i++) - if (!skippy_iter.next ()) + { + unsigned unsafe_to; + if (!skippy_iter.next (&unsafe_to)) + { + c->buffer->unsafe_to_concat (c->buffer->idx + offset, unsafe_to); return_trace (false); + } + } *end_index = skippy_iter.idx + 1; diff --git a/src/hb-ot-shape.cc b/src/hb-ot-shape.cc index e16377f97..4bd8aaf03 100644 --- a/src/hb-ot-shape.cc +++ b/src/hb-ot-shape.cc @@ -1120,7 +1120,7 @@ hb_propagate_flags (hb_buffer_t *buffer) /* Propagate cluster-level glyph flags to be the same on all cluster glyphs. * Simplifies using them. */ - if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK)) + if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS)) return; hb_glyph_info_t *info = buffer->info; @@ -1129,11 +1129,7 @@ hb_propagate_flags (hb_buffer_t *buffer) { unsigned int mask = 0; for (unsigned int i = start; i < end; i++) - if (info[i].mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK) - { - mask = HB_GLYPH_FLAG_UNSAFE_TO_BREAK; - break; - } + mask |= info[i].mask & HB_GLYPH_FLAG_DEFINED; if (mask) for (unsigned int i = start; i < end; i++) info[i].mask |= mask; From 36b1561715737ff6608bf2eb6c21b64348abb226 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Fri, 19 Nov 2021 14:10:34 -0700 Subject: [PATCH 02/28] Implement hb-shape --verify unsafe-to-concat flag 15 tests failing. Those look like legit places that unsafe-to-concat needs more implementation. --- util/shape-options.hh | 198 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 189 insertions(+), 9 deletions(-) diff --git a/util/shape-options.hh b/util/shape-options.hh index b7eb846dc..7847fdb6b 100644 --- a/util/shape-options.hh +++ b/util/shape-options.hh @@ -129,7 +129,9 @@ struct shape_options_t { if (!verify_buffer_monotone (buffer, error)) return false; - if (!verify_buffer_safe_to_break (buffer, text_buffer, font, error)) + if (!verify_buffer_unsafe_to_break (buffer, text_buffer, font, error)) + return false; + if (!verify_buffer_unsafe_to_concat (buffer, text_buffer, font, error)) return false; return true; } @@ -158,17 +160,15 @@ struct shape_options_t return true; } - bool verify_buffer_safe_to_break (hb_buffer_t *buffer, - hb_buffer_t *text_buffer, - hb_font_t *font, - const char **error=nullptr) + bool verify_buffer_unsafe_to_break (hb_buffer_t *buffer, + hb_buffer_t *text_buffer, + hb_font_t *font, + const char **error=nullptr) { if (cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES && cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS) { - /* Cannot perform this check without monotone clusters. - * Then again, unsafe-to-break flag is much harder to use without - * monotone clusters. */ + /* Cannot perform this check without monotone clusters. */ return true; } @@ -255,7 +255,7 @@ struct shape_options_t if (diff) { if (error) - *error = "Safe-to-break test failed."; + *error = "unsafe-to-break test failed."; ret = false; /* Return the reconstructed result instead so it can be inspected. */ @@ -269,6 +269,186 @@ struct shape_options_t return ret; } + bool verify_buffer_unsafe_to_concat (hb_buffer_t *buffer, + hb_buffer_t *text_buffer, + hb_font_t *font, + const char **error=nullptr) + { + if (cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES && + cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS) + { + /* Cannot perform this check without monotone clusters. */ + return true; + } + + /* Check that shuffling up text before shaping at safe-to-concat points + * is indeed safe. */ + + /* This is what we do: + * + * 1. We shape text once. Then segment the text at all the safe-to-concat + * points; + * + * 2. Then we create two buffers, one containing all the even segments and + * one all the odd segments. + * + * 3. Because all these segments were safe-to-concat at both ends, we + * expect that concatenating them and shaping should NOT change the + * shaping results of each segment. As such, we expect that after + * shaping the two buffers, we still get cluster boundaries at the + * segment boundaries, and that those all are safe-to-concat points. + * Moreover, that there are NOT any safe-to-concat points within the + * segments. + * + * 4. Finally, we reconstruct the shaping results of the original text by + * simply interleaving the shaping results of the segments from the two + * buffers, and assert that the total shaping results is the same as + * the one from original buffer in step 1. + */ + + hb_buffer_t *fragments[2] {hb_buffer_create_similar (buffer), + hb_buffer_create_similar (buffer)}; + hb_buffer_t *reconstruction = hb_buffer_create_similar (buffer); + hb_segment_properties_t props; + hb_buffer_get_segment_properties (buffer, &props); + hb_buffer_set_segment_properties (fragments[0], &props); + hb_buffer_set_segment_properties (fragments[1], &props); + hb_buffer_set_segment_properties (reconstruction, &props); + + unsigned num_glyphs; + hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); + + unsigned num_chars; + hb_glyph_info_t *text = hb_buffer_get_glyph_infos (text_buffer, &num_chars); + + bool forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); + + if (!forward) + hb_buffer_reverse (buffer); + + /* + * Split text into segments and collect into to fragment streams. + */ + { + unsigned fragment_idx = 0; + unsigned start = 0; + unsigned text_start = 0; + unsigned text_end = 0; + for (unsigned end = 1; end < num_glyphs + 1; end++) + { + if (end < num_glyphs && + (info[end].cluster == info[end-1].cluster || + info[end].mask & HB_GLYPH_FLAG_UNSAFE_TO_CONCAT)) + continue; + + /* Accumulate segment corresponding to glyphs start..end. */ + if (end == num_glyphs) + text_end = num_chars; + else + { + unsigned cluster = info[end].cluster; + while (text_end < num_chars && text[text_end].cluster < cluster) + text_end++; + } + assert (text_start < text_end); + + if (0) + printf("start %d end %d text start %d end %d\n", start, end, text_start, text_end); + +#if 0 + hb_buffer_flags_t flags = hb_buffer_get_flags (fragment); + if (0 < text_start) + flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_BOT); + if (text_end < num_chars) + flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_EOT); + hb_buffer_set_flags (fragment, flags); +#endif + + hb_buffer_append (fragments[fragment_idx], text_buffer, text_start, text_end); + + start = end; + text_start = text_end; + fragment_idx = 1 - fragment_idx; + } + } + + bool ret = true; + hb_buffer_diff_flags_t diff; + + /* + * Shape the two fragment streams. + */ + if (!hb_shape_full (font, fragments[0], features, num_features, shapers) || + !hb_shape_full (font, fragments[1], features, num_features, shapers)) + { + if (error) + *error = "All shapers failed while shaping fragments."; + ret = false; + goto out; + } + + if (!forward) + { + hb_buffer_reverse (fragments[0]); + hb_buffer_reverse (fragments[1]); + } + + /* + * Reconstruct results. + */ + { + unsigned fragment_idx = 0; + unsigned fragment_start[2] {0, 0}; + unsigned fragment_num_glyphs[2]; + hb_glyph_info_t *fragment_info[2]; + for (unsigned i = 0; i < 2; i++) + fragment_info[i] = hb_buffer_get_glyph_infos (fragments[i], &fragment_num_glyphs[i]); + while (fragment_start[0] < fragment_num_glyphs[0] || + fragment_start[1] < fragment_num_glyphs[1]) + { + unsigned fragment_end = fragment_start[fragment_idx] + 1; + while (fragment_end < fragment_num_glyphs[fragment_idx] && + (fragment_info[fragment_idx][fragment_end].cluster == fragment_info[fragment_idx][fragment_end - 1].cluster || + fragment_info[fragment_idx][fragment_end].mask & HB_GLYPH_FLAG_UNSAFE_TO_CONCAT)) + fragment_end++; + + hb_buffer_append (reconstruction, fragments[fragment_idx], fragment_start[fragment_idx], fragment_end); + + fragment_start[fragment_idx] = fragment_end; + fragment_idx = 1 - fragment_idx; + } + } + + if (!forward) + { + hb_buffer_reverse (buffer); + hb_buffer_reverse (reconstruction); + } + + /* + * Diff results. + */ + diff = hb_buffer_diff (reconstruction, buffer, (hb_codepoint_t) -1, 0); + if (diff) + { + if (error) + *error = "unsafe-to-concat test failed."; + ret = false; + + /* Return the reconstructed result instead so it can be inspected. */ + hb_buffer_set_length (buffer, 0); + hb_buffer_append (buffer, reconstruction, 0, -1); + } + + + out: + hb_buffer_destroy (reconstruction); + hb_buffer_destroy (fragments[0]); + hb_buffer_destroy (fragments[1]); + + return ret; + } + void shape_closure (const char *text, int text_len, hb_font_t *font, hb_buffer_t *buffer, hb_set_t *glyphs) From e1cbd4539f392034899353f55daffa32e6d62c87 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 20 Nov 2021 11:46:48 -0700 Subject: [PATCH 03/28] [unsafe-to-concat] Add annotations to GPOS and kern Failures from 13 to 14. --- src/hb-kern.hh | 4 +++- src/hb-ot-layout-gpos-table.hh | 43 +++++++++++++++++++++++++++++----- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/src/hb-kern.hh b/src/hb-kern.hh index 3f952fe7f..4e1a35bbc 100644 --- a/src/hb-kern.hh +++ b/src/hb-kern.hh @@ -67,8 +67,10 @@ struct hb_kern_machine_t } skippy_iter.reset (idx, 1); - if (!skippy_iter.next ()) + unsigned unsafe_to; + if (!skippy_iter.next (&unsafe_to)) { + buffer->unsafe_to_concat (idx, unsafe_to); idx++; continue; } diff --git a/src/hb-ot-layout-gpos-table.hh b/src/hb-ot-layout-gpos-table.hh index be341fb44..8dd4cf4de 100644 --- a/src/hb-ot-layout-gpos-table.hh +++ b/src/hb-ot-layout-gpos-table.hh @@ -1362,7 +1362,12 @@ struct PairPosFormat1 hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; skippy_iter.reset (buffer->idx, 1); - if (!skippy_iter.next ()) return_trace (false); + unsigned unsafe_to; + if (!skippy_iter.next (&unsafe_to)) + { + buffer->unsafe_to_concat (buffer->idx, unsafe_to); + return_trace (false); + } return_trace ((this+pairSet[index]).apply (c, valueFormat, skippy_iter.idx)); } @@ -1555,7 +1560,12 @@ struct PairPosFormat2 hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; skippy_iter.reset (buffer->idx, 1); - if (!skippy_iter.next ()) return_trace (false); + unsigned unsafe_to; + if (!skippy_iter.next (&unsafe_to)) + { + buffer->unsafe_to_concat (buffer->idx, unsafe_to); + return_trace (false); + } unsigned int len1 = valueFormat1.get_len (); unsigned int len2 = valueFormat2.get_len (); @@ -1861,7 +1871,12 @@ struct CursivePosFormat1 hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; skippy_iter.reset (buffer->idx, 1); - if (!skippy_iter.prev ()) return_trace (false); + unsigned unsafe_from; + if (!skippy_iter.prev (&unsafe_from)) + { + buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx); + return_trace (false); + } const EntryExitRecord &prev_record = entryExitRecord[(this+coverage).get_coverage (buffer->info[skippy_iter.idx].codepoint)]; if (!prev_record.exitAnchor) return_trace (false); @@ -2128,7 +2143,13 @@ struct MarkBasePosFormat1 skippy_iter.reset (buffer->idx, 1); skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks); do { - if (!skippy_iter.prev ()) return_trace (false); + unsigned unsafe_from; + if (!skippy_iter.prev (&unsafe_from)) + { + buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx); + return_trace (false); + } + /* We only want to attach to the first of a MultipleSubst sequence. * https://github.com/harfbuzz/harfbuzz/issues/740 * Reject others... @@ -2382,7 +2403,12 @@ struct MarkLigPosFormat1 hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; skippy_iter.reset (buffer->idx, 1); skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks); - if (!skippy_iter.prev ()) return_trace (false); + unsigned unsafe_from; + if (!skippy_iter.prev (&unsafe_from)) + { + buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx); + return_trace (false); + } /* Checking that matched glyph is actually a ligature by GDEF is too strong; disabled */ //if (!_hb_glyph_info_is_ligature (&buffer->info[skippy_iter.idx])) { return_trace (false); } @@ -2579,7 +2605,12 @@ struct MarkMarkPosFormat1 hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; skippy_iter.reset (buffer->idx, 1); skippy_iter.set_lookup_props (c->lookup_props & ~LookupFlag::IgnoreFlags); - if (!skippy_iter.prev ()) return_trace (false); + unsigned unsafe_from; + if (!skippy_iter.prev (&unsafe_from)) + { + buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx); + return_trace (false); + } if (!_hb_glyph_info_is_mark (&buffer->info[skippy_iter.idx])) { return_trace (false); } From 596bc7e939d927b4b211cdd847fbb1208789b999 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 20 Nov 2021 12:02:47 -0700 Subject: [PATCH 04/28] [unsafe-to-concat] Add to GPOS kerning --- src/hb-ot-layout-gpos-table.hh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/hb-ot-layout-gpos-table.hh b/src/hb-ot-layout-gpos-table.hh index 8dd4cf4de..4f876d680 100644 --- a/src/hb-ot-layout-gpos-table.hh +++ b/src/hb-ot-layout-gpos-table.hh @@ -1235,6 +1235,7 @@ struct PairSet buffer->idx = pos; return_trace (true); } + buffer->unsafe_to_concat (buffer->idx, pos + 1); return_trace (false); } @@ -1640,6 +1641,8 @@ struct PairPosFormat2 success: if (applied_first || applied_second) buffer->unsafe_to_break (buffer->idx, skippy_iter.idx + 1); + else + buffer->unsafe_to_concat (buffer->idx, skippy_iter.idx + 1); boring: From 78481b32c0a14f0ee1c4baec4d5208b385be0b2e Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 21 Nov 2021 16:50:34 -0700 Subject: [PATCH 05/28] [gsubgpos] Combine input/backtrack/lookahead unsafe-to-concat I feel like this is correct logic. Still have to prove. Errors unchanged at 10. --- src/hb-ot-layout-gpos-table.hh | 2 +- src/hb-ot-layout-gsub-table.hh | 15 +++--- src/hb-ot-layout-gsubgpos.hh | 93 +++++++++++++++++++--------------- 3 files changed, 62 insertions(+), 48 deletions(-) diff --git a/src/hb-ot-layout-gpos-table.hh b/src/hb-ot-layout-gpos-table.hh index 4f876d680..bd1577fbb 100644 --- a/src/hb-ot-layout-gpos-table.hh +++ b/src/hb-ot-layout-gpos-table.hh @@ -1642,7 +1642,7 @@ struct PairPosFormat2 if (applied_first || applied_second) buffer->unsafe_to_break (buffer->idx, skippy_iter.idx + 1); else - buffer->unsafe_to_concat (buffer->idx, skippy_iter.idx + 1); + buffer->unsafe_to_concat (buffer->idx, skippy_iter.idx + 1); boring: diff --git a/src/hb-ot-layout-gsub-table.hh b/src/hb-ot-layout-gsub-table.hh index edc89eb37..e5f13bd1f 100644 --- a/src/hb-ot-layout-gsub-table.hh +++ b/src/hb-ot-layout-gsub-table.hh @@ -826,14 +826,14 @@ struct Ligature unsigned int total_component_count = 0; - unsigned int match_length = 0; + unsigned int match_end = 0; unsigned int match_positions[HB_MAX_CONTEXT_LENGTH]; if (likely (!match_input (c, count, &component[1], match_glyph, nullptr, - &match_length, + &match_end, match_positions, &total_component_count))) return_trace (false); @@ -841,7 +841,7 @@ struct Ligature ligate_input (c, count, match_positions, - match_length, + match_end, ligGlyph, total_component_count); @@ -1296,7 +1296,7 @@ struct ReverseChainSingleSubstFormat1 match_lookahead (c, lookahead.len, (HBUINT16 *) lookahead.arrayZ, match_coverage, this, - 1, &end_index)) + c->buffer->idx + 1, &end_index)) { c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index); c->replace_glyph_inplace (substitute[index]); @@ -1305,8 +1305,11 @@ struct ReverseChainSingleSubstFormat1 * calls us through a Context lookup. */ return_trace (true); } - - return_trace (false); + else + { + c->buffer->unsafe_to_concat_from_outbuffer (start_index, end_index); + return_trace (false); + } } templatebuffer->unsafe_to_concat (c->buffer->idx, unsafe_to); + *end_position = unsafe_to; return_trace (false); } @@ -1079,7 +1079,7 @@ static inline bool match_input (hb_ot_apply_context_t *c, total_component_count += _hb_glyph_info_get_lig_num_comps (&buffer->info[skippy_iter.idx]); } - *end_offset = skippy_iter.idx - buffer->idx + 1; + *end_position = skippy_iter.idx + 1; if (p_total_component_count) *p_total_component_count = total_component_count; @@ -1089,7 +1089,7 @@ static inline bool match_input (hb_ot_apply_context_t *c, static inline bool ligate_input (hb_ot_apply_context_t *c, unsigned int count, /* Including the first glyph */ const unsigned int match_positions[HB_MAX_CONTEXT_LENGTH], /* Including the first glyph */ - unsigned int match_length, + unsigned int match_end, hb_codepoint_t lig_glyph, unsigned int total_component_count) { @@ -1097,7 +1097,7 @@ static inline bool ligate_input (hb_ot_apply_context_t *c, hb_buffer_t *buffer = c->buffer; - buffer->merge_clusters (buffer->idx, buffer->idx + match_length); + buffer->merge_clusters (buffer->idx, match_end); /* - If a base and one or more marks ligate, consider that as a base, NOT * ligature, such that all following marks can still attach to it. @@ -1218,13 +1218,12 @@ static inline bool match_backtrack (hb_ot_apply_context_t *c, unsigned unsafe_from; if (!skippy_iter.prev (&unsafe_from)) { - c->buffer->unsafe_to_concat_from_outbuffer (unsafe_from, c->buffer->idx); + *match_start = unsafe_from; return_trace (false); } } *match_start = skippy_iter.idx; - return_trace (true); } @@ -1233,13 +1232,13 @@ static inline bool match_lookahead (hb_ot_apply_context_t *c, const HBUINT16 lookahead[], match_func_t match_func, const void *match_data, - unsigned int offset, + unsigned int start_index, unsigned int *end_index) { TRACE_APPLY (nullptr); hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_context; - skippy_iter.reset (c->buffer->idx + offset - 1, count); + skippy_iter.reset (start_index - 1, count); skippy_iter.set_match_func (match_func, match_data, lookahead); for (unsigned int i = 0; i < count; i++) @@ -1247,13 +1246,12 @@ static inline bool match_lookahead (hb_ot_apply_context_t *c, unsigned unsafe_to; if (!skippy_iter.next (&unsafe_to)) { - c->buffer->unsafe_to_concat (c->buffer->idx + offset, unsafe_to); + *end_index = unsafe_to; return_trace (false); } } *end_index = skippy_iter.idx + 1; - return_trace (true); } @@ -1379,15 +1377,13 @@ static inline void recurse_lookups (context_t *c, c->recurse (lookupRecord[i].lookupListIndex); } -static inline bool apply_lookup (hb_ot_apply_context_t *c, +static inline void apply_lookup (hb_ot_apply_context_t *c, unsigned int count, /* Including the first glyph */ unsigned int match_positions[HB_MAX_CONTEXT_LENGTH], /* Including the first glyph */ unsigned int lookupCount, const LookupRecord lookupRecord[], /* Array of LookupRecords--in design order */ - unsigned int match_length) + unsigned int match_end) { - TRACE_APPLY (nullptr); - hb_buffer_t *buffer = c->buffer; int end; @@ -1395,7 +1391,7 @@ static inline bool apply_lookup (hb_ot_apply_context_t *c, * Adjust. */ { unsigned int bl = buffer->backtrack_len (); - end = bl + match_length; + end = bl + match_end - buffer->idx; int delta = bl - buffer->idx; /* Convert positions to new indexing. */ @@ -1497,8 +1493,6 @@ static inline bool apply_lookup (hb_ot_apply_context_t *c, } (void) buffer->move_to (end); - - return_trace (true); } @@ -1586,17 +1580,25 @@ static inline bool context_apply_lookup (hb_ot_apply_context_t *c, const LookupRecord lookupRecord[], ContextApplyLookupContext &lookup_context) { - unsigned int match_length = 0; - unsigned int match_positions[HB_MAX_CONTEXT_LENGTH]; - return match_input (c, - inputCount, input, - lookup_context.funcs.match, lookup_context.match_data, - &match_length, match_positions) - && (c->buffer->unsafe_to_break (c->buffer->idx, c->buffer->idx + match_length), - apply_lookup (c, - inputCount, match_positions, - lookupCount, lookupRecord, - match_length)); + unsigned match_end = 0; + unsigned match_positions[HB_MAX_CONTEXT_LENGTH]; + if (match_input (c, + inputCount, input, + lookup_context.funcs.match, lookup_context.match_data, + &match_end, match_positions)) + { + c->buffer->unsafe_to_break (c->buffer->idx, match_end); + apply_lookup (c, + inputCount, match_positions, + lookupCount, lookupRecord, + match_end); + return true; + } + else + { + c->buffer->unsafe_to_concat (c->buffer->idx, match_end); + return false; + } } struct Rule @@ -2488,12 +2490,13 @@ static inline bool chain_context_apply_lookup (hb_ot_apply_context_t *c, const LookupRecord lookupRecord[], ChainContextApplyLookupContext &lookup_context) { - unsigned int start_index = 0, match_length = 0, end_index = 0; - unsigned int match_positions[HB_MAX_CONTEXT_LENGTH]; - return match_input (c, - inputCount, input, - lookup_context.funcs.match, lookup_context.match_data[1], - &match_length, match_positions) + unsigned start_index = c->buffer->out_len, end_index = c->buffer->idx; + unsigned match_end = 0; + unsigned match_positions[HB_MAX_CONTEXT_LENGTH]; + if (match_input (c, + inputCount, input, + lookup_context.funcs.match, lookup_context.match_data[1], + &match_end, match_positions) && (end_index = match_end) && match_backtrack (c, backtrackCount, backtrack, lookup_context.funcs.match, lookup_context.match_data[0], @@ -2501,12 +2504,20 @@ static inline bool chain_context_apply_lookup (hb_ot_apply_context_t *c, && match_lookahead (c, lookaheadCount, lookahead, lookup_context.funcs.match, lookup_context.match_data[2], - match_length, &end_index) - && (c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index), - apply_lookup (c, - inputCount, match_positions, - lookupCount, lookupRecord, - match_length)); + match_end, &end_index)) + { + c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index); + apply_lookup (c, + inputCount, match_positions, + lookupCount, lookupRecord, + match_end); + return true; + } + else + { + c->buffer->unsafe_to_concat_from_outbuffer (start_index, end_index); + return false; + } } struct ChainRule From 56d081955c768a4ed55354fe57577cb10706fb81 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 4 Dec 2021 19:59:55 -0800 Subject: [PATCH 06/28] [buffer] Rename _unsafe_to_break_set_mask to _infos_set_glyph_flags --- src/hb-buffer.cc | 6 +++--- src/hb-buffer.hh | 27 +++++++++++++-------------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc index 6d7c3182c..a067b3dc4 100644 --- a/src/hb-buffer.cc +++ b/src/hb-buffer.cc @@ -577,7 +577,7 @@ hb_buffer_t::unsafe_to_break_impl (unsigned int start, unsigned int end, hb_mask { unsigned int cluster = UINT_MAX; cluster = _infos_find_min_cluster (info, start, end, cluster); - _unsafe_to_break_set_mask (info, start, end, cluster, mask); + _infos_set_glyph_flags (info, start, end, cluster, mask); } void hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end, hb_mask_t mask) @@ -595,8 +595,8 @@ hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int en cluster = _infos_find_min_cluster (out_info, start, out_len, cluster); cluster = _infos_find_min_cluster (info, idx, end, cluster); - _unsafe_to_break_set_mask (out_info, start, out_len, cluster, mask); - _unsafe_to_break_set_mask (info, idx, end, cluster, mask); + _infos_set_glyph_flags (out_info, start, out_len, cluster, mask); + _infos_set_glyph_flags (info, idx, end, cluster, mask); } void diff --git a/src/hb-buffer.hh b/src/hb-buffer.hh index 9f441305f..b7e2ce271 100644 --- a/src/hb-buffer.hh +++ b/src/hb-buffer.hh @@ -498,7 +498,19 @@ struct hb_buffer_t inf.mask = (inf.mask & ~HB_GLYPH_FLAG_DEFINED) | (mask & HB_GLYPH_FLAG_DEFINED); inf.cluster = cluster; } - + void + _infos_set_glyph_flags (hb_glyph_info_t *infos, + unsigned int start, unsigned int end, + unsigned int cluster, + hb_mask_t mask) + { + for (unsigned int i = start; i < end; i++) + if (cluster != infos[i].cluster) + { + scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS; + infos[i].mask |= mask; + } + } static unsigned _infos_find_min_cluster (const hb_glyph_info_t *infos, unsigned start, unsigned end, @@ -508,19 +520,6 @@ struct hb_buffer_t cluster = hb_min (cluster, infos[i].cluster); return cluster; } - void - _unsafe_to_break_set_mask (hb_glyph_info_t *infos, - unsigned int start, unsigned int end, - unsigned int cluster, - hb_mask_t mask) - { - for (unsigned int i = start; i < end; i++) - if (cluster != infos[i].cluster) - { - scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS; - infos[i].mask |= mask; - } - } void clear_glyph_flags (hb_mask_t mask = 0) { From f91ce56e08ed9acdfaf5dfe994d950195dd10881 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 4 Dec 2021 20:07:05 -0800 Subject: [PATCH 07/28] [buffer] Add default cluster value in find_min_cluster --- src/hb-buffer.cc | 6 ++---- src/hb-buffer.hh | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc index a067b3dc4..e66d1a785 100644 --- a/src/hb-buffer.cc +++ b/src/hb-buffer.cc @@ -575,8 +575,7 @@ done: void hb_buffer_t::unsafe_to_break_impl (unsigned int start, unsigned int end, hb_mask_t mask) { - unsigned int cluster = UINT_MAX; - cluster = _infos_find_min_cluster (info, start, end, cluster); + unsigned cluster = _infos_find_min_cluster (info, start, end); _infos_set_glyph_flags (info, start, end, cluster, mask); } void @@ -591,9 +590,8 @@ hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int en assert (start <= out_len); assert (idx <= end); - unsigned int cluster = UINT_MAX; + unsigned cluster = _infos_find_min_cluster (info, idx, end); cluster = _infos_find_min_cluster (out_info, start, out_len, cluster); - cluster = _infos_find_min_cluster (info, idx, end, cluster); _infos_set_glyph_flags (out_info, start, out_len, cluster, mask); _infos_set_glyph_flags (info, idx, end, cluster, mask); diff --git a/src/hb-buffer.hh b/src/hb-buffer.hh index b7e2ce271..ea71c8524 100644 --- a/src/hb-buffer.hh +++ b/src/hb-buffer.hh @@ -514,7 +514,7 @@ struct hb_buffer_t static unsigned _infos_find_min_cluster (const hb_glyph_info_t *infos, unsigned start, unsigned end, - unsigned cluster) + unsigned cluster = UINT_MAX) { for (unsigned int i = start; i < end; i++) cluster = hb_min (cluster, infos[i].cluster); From d98a0fc88e8bcf7993c92425212cd6c57a632a01 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 4 Dec 2021 20:43:27 -0800 Subject: [PATCH 08/28] [buffer] Consolidate glyph-flags implementation --- src/hb-buffer.cc | 25 ---------------- src/hb-buffer.hh | 76 +++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 62 insertions(+), 39 deletions(-) diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc index e66d1a785..3e09260cf 100644 --- a/src/hb-buffer.cc +++ b/src/hb-buffer.cc @@ -572,31 +572,6 @@ done: skip_glyph (); } -void -hb_buffer_t::unsafe_to_break_impl (unsigned int start, unsigned int end, hb_mask_t mask) -{ - unsigned cluster = _infos_find_min_cluster (info, start, end); - _infos_set_glyph_flags (info, start, end, cluster, mask); -} -void -hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end, hb_mask_t mask) -{ - if (!have_output) - { - unsafe_to_break_impl (start, end); - return; - } - - assert (start <= out_len); - assert (idx <= end); - - unsigned cluster = _infos_find_min_cluster (info, idx, end); - cluster = _infos_find_min_cluster (out_info, start, out_len, cluster); - - _infos_set_glyph_flags (out_info, start, out_len, cluster, mask); - _infos_set_glyph_flags (info, idx, end, cluster, mask); -} - void hb_buffer_t::guess_segment_properties () { diff --git a/src/hb-buffer.hh b/src/hb-buffer.hh index ea71c8524..942da91e1 100644 --- a/src/hb-buffer.hh +++ b/src/hb-buffer.hh @@ -385,26 +385,74 @@ struct hb_buffer_t /* Merge clusters for deleting current glyph, and skip it. */ HB_INTERNAL void delete_glyph (); - void unsafe_to_break (unsigned int start, - unsigned int end) + + void set_glyph_flags (unsigned start, + unsigned end, + hb_mask_t mask, + bool interior = false, + bool from_out_buffer = false) { - if (end - start < 2) + if (interior && !from_out_buffer && end - start < 2) return; - unsafe_to_break_impl (start, end); + + scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS; + + if (!from_out_buffer || !have_output) + { + if (!interior) + { + for (unsigned i = start; i < end; i++) + info[i].mask |= mask; + } + else + { + unsigned cluster = _infos_find_min_cluster (info, start, end); + _infos_set_glyph_flags (info, start, end, cluster, mask); + } + } + else + { + assert (start <= out_len); + assert (idx <= end); + + if (!interior) + { + for (unsigned i = start; i < out_len; i++) + out_info[i].mask |= mask; + for (unsigned i = idx; i < end; i++) + info[i].mask |= mask; + } + else + { + unsigned cluster = _infos_find_min_cluster (info, idx, end); + cluster = _infos_find_min_cluster (out_info, start, out_len, cluster); + + _infos_set_glyph_flags (out_info, start, out_len, cluster, mask); + _infos_set_glyph_flags (info, idx, end, cluster, mask); + } + } } - void unsafe_to_concat (unsigned int start, - unsigned int end) + + void unsafe_to_break (unsigned int start, unsigned int end) { - if (end - start < 2) - return; - unsafe_to_break_impl (start, end, HB_GLYPH_FLAG_UNSAFE_TO_CONCAT); + set_glyph_flags (start, end, HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + true); + } + void unsafe_to_concat (unsigned int start, unsigned int end) + { + set_glyph_flags (start, end, HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + true); + } + void unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end) + { + set_glyph_flags (start, end, HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + true, true); } - HB_INTERNAL void unsafe_to_break_impl (unsigned int start, unsigned int end, - hb_mask_t mask = HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT); - HB_INTERNAL void unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end, - hb_mask_t mask = HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT); void unsafe_to_concat_from_outbuffer (unsigned int start, unsigned int end) - { unsafe_to_break_from_outbuffer (start, end, HB_GLYPH_FLAG_UNSAFE_TO_CONCAT); } + { + set_glyph_flags (start, end, HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + true, true); + } /* Internal methods */ From 60006d368770982c6a0d3bf06eb937773343cf5b Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Thu, 20 Jan 2022 15:29:28 -0700 Subject: [PATCH 09/28] [gsubgpos] Adjust chaining unsafe-to-concat application Fixes three tests. --- src/hb-ot-layout-gsubgpos.hh | 44 ++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/src/hb-ot-layout-gsubgpos.hh b/src/hb-ot-layout-gsubgpos.hh index 9c48b0ee8..8ee114d5b 100644 --- a/src/hb-ot-layout-gsubgpos.hh +++ b/src/hb-ot-layout-gsubgpos.hh @@ -2490,34 +2490,38 @@ static inline bool chain_context_apply_lookup (hb_ot_apply_context_t *c, const LookupRecord lookupRecord[], ChainContextApplyLookupContext &lookup_context) { - unsigned start_index = c->buffer->out_len, end_index = c->buffer->idx; + unsigned end_index = c->buffer->idx; unsigned match_end = 0; unsigned match_positions[HB_MAX_CONTEXT_LENGTH]; - if (match_input (c, - inputCount, input, - lookup_context.funcs.match, lookup_context.match_data[1], - &match_end, match_positions) && (end_index = match_end) - && match_backtrack (c, - backtrackCount, backtrack, - lookup_context.funcs.match, lookup_context.match_data[0], - &start_index) - && match_lookahead (c, - lookaheadCount, lookahead, - lookup_context.funcs.match, lookup_context.match_data[2], - match_end, &end_index)) + if (!(match_input (c, + inputCount, input, + lookup_context.funcs.match, lookup_context.match_data[1], + &match_end, match_positions) && (end_index = match_end) + && match_lookahead (c, + lookaheadCount, lookahead, + lookup_context.funcs.match, lookup_context.match_data[2], + match_end, &end_index))) { - c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index); - apply_lookup (c, - inputCount, match_positions, - lookupCount, lookupRecord, - match_end); - return true; + c->buffer->unsafe_to_concat (c->buffer->idx, end_index); + return false; } - else + + unsigned start_index = c->buffer->out_len; + if (!match_backtrack (c, + backtrackCount, backtrack, + lookup_context.funcs.match, lookup_context.match_data[0], + &start_index)) { c->buffer->unsafe_to_concat_from_outbuffer (start_index, end_index); return false; } + + c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index); + apply_lookup (c, + inputCount, match_positions, + lookupCount, lookupRecord, + match_end); + return true; } struct ChainRule From c0058892bec52e4f0346b1139ebb206c03e094e2 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Thu, 20 Jan 2022 15:51:04 -0700 Subject: [PATCH 10/28] [unsafe-to-concat] Mark entire buffer unsafe-to-concat if kerx format2 --- src/hb-aat-layout-kerx-table.hh | 1 + src/hb-buffer.hh | 21 ++++++++++++++------- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/hb-aat-layout-kerx-table.hh b/src/hb-aat-layout-kerx-table.hh index 0354b47d5..346024f2b 100644 --- a/src/hb-aat-layout-kerx-table.hh +++ b/src/hb-aat-layout-kerx-table.hh @@ -406,6 +406,7 @@ struct KerxSubTableFormat2 accelerator_t accel (*this, c); hb_kern_machine_t machine (accel, header.coverage & header.CrossStream); machine.kern (c->font, c->buffer, c->plan->kern_mask); + c->buffer->set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_CONCAT); return_trace (true); } diff --git a/src/hb-buffer.hh b/src/hb-buffer.hh index 942da91e1..c0d2e46f1 100644 --- a/src/hb-buffer.hh +++ b/src/hb-buffer.hh @@ -386,12 +386,15 @@ struct hb_buffer_t HB_INTERNAL void delete_glyph (); - void set_glyph_flags (unsigned start, - unsigned end, - hb_mask_t mask, + void set_glyph_flags (hb_mask_t mask, + unsigned start = 0, + unsigned end = (unsigned) -1, bool interior = false, bool from_out_buffer = false) { + if (end == (unsigned) -1) + end = len; + if (interior && !from_out_buffer && end - start < 2) return; @@ -435,22 +438,26 @@ struct hb_buffer_t void unsafe_to_break (unsigned int start, unsigned int end) { - set_glyph_flags (start, end, HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + start, end, true); } void unsafe_to_concat (unsigned int start, unsigned int end) { - set_glyph_flags (start, end, HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + start, end, true); } void unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end) { - set_glyph_flags (start, end, HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + start, end, true, true); } void unsafe_to_concat_from_outbuffer (unsigned int start, unsigned int end) { - set_glyph_flags (start, end, HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + start, end, true, true); } From 48c5f26199808f40251cdaef7494456e9f23acb9 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Fri, 21 Jan 2022 12:18:50 -0700 Subject: [PATCH 11/28] [unsafe-to-concat] Fix PairPos2 logic Test failures down from 8 to 6: 113/400 harfbuzz:shaping+in-house / mongolian-variation-selector FAIL 0.06s exit status 1 203/400 harfbuzz:shaping+aots / gpos3 FAIL 0.06s exit status 1 204/400 harfbuzz:shaping+aots / gpos4_lookupflag FAIL 0.06s exit status 1 260/400 harfbuzz:shaping+aots / gsub4_1_multiple_ligatures FAIL 0.06s exit status 1 264/400 harfbuzz:shaping+aots / lookupflag_ignore_attach FAIL 0.06s exit status 1 266/400 harfbuzz:shaping+aots / lookupflag_ignore_combination FAIL 0.06s exit status 1 --- src/hb-ot-layout-gpos-table.hh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/hb-ot-layout-gpos-table.hh b/src/hb-ot-layout-gpos-table.hh index bd1577fbb..af91f2aaf 100644 --- a/src/hb-ot-layout-gpos-table.hh +++ b/src/hb-ot-layout-gpos-table.hh @@ -1574,7 +1574,11 @@ struct PairPosFormat2 unsigned int klass1 = (this+classDef1).get_class (buffer->cur().codepoint); unsigned int klass2 = (this+classDef2).get_class (buffer->info[skippy_iter.idx].codepoint); - if (unlikely (klass1 >= class1Count || klass2 >= class2Count)) return_trace (false); + if (unlikely (klass1 >= class1Count || klass2 >= class2Count)) + { + buffer->unsafe_to_concat (buffer->idx, skippy_iter.idx + 1); + return_trace (false); + } const Value *v = &values[record_len * (klass1 * class2Count + klass2)]; @@ -1642,9 +1646,9 @@ struct PairPosFormat2 if (applied_first || applied_second) buffer->unsafe_to_break (buffer->idx, skippy_iter.idx + 1); else - buffer->unsafe_to_concat (buffer->idx, skippy_iter.idx + 1); - boring: + buffer->unsafe_to_concat (buffer->idx, skippy_iter.idx + 1); + buffer->idx = skippy_iter.idx; if (len2) From 235c3a129581e96701fe055341e56699766df5fa Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Fri, 21 Jan 2022 15:17:40 -0700 Subject: [PATCH 12/28] [unsafe-to-concat] Adjust Arabic joining logic Test failures done one more. Fixed the mongolian-variation-selector test. Another test case: $ util/hb-shape NotoSansArabic-Regular.ttf -u 628,200c,628 --show-flags --verify --- src/hb-ot-shape-complex-arabic.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc index 222c5d6b7..6c656197e 100644 --- a/src/hb-ot-shape-complex-arabic.cc +++ b/src/hb-ot-shape-complex-arabic.cc @@ -321,6 +321,10 @@ arabic_joining (hb_buffer_t *buffer) info[prev].arabic_shaping_action() = entry->prev_action; buffer->unsafe_to_break (prev, i + 1); } + else if (2 <= state && state <= 5) /* States that have a possible prev_action. */ + { + buffer->unsafe_to_concat (prev, i + 1); + } info[i].arabic_shaping_action() = entry->curr_action; From 4f04baef17bf5b150c1594f6e80604974e6e95e4 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Fri, 21 Jan 2022 18:26:54 -0700 Subject: [PATCH 13/28] [unsafe-to-concat] Further adjust Arabic joining logic at boundary --- src/hb-ot-shape-complex-arabic.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc index 6c656197e..5eec4af37 100644 --- a/src/hb-ot-shape-complex-arabic.cc +++ b/src/hb-ot-shape-complex-arabic.cc @@ -341,7 +341,14 @@ arabic_joining (hb_buffer_t *buffer) const arabic_state_table_entry *entry = &arabic_state_table[state][this_type]; if (entry->prev_action != NONE && prev != UINT_MAX) + { info[prev].arabic_shaping_action() = entry->prev_action; + buffer->unsafe_to_break (prev, buffer->len); + } + else if (2 <= state && state <= 5) /* States that have a possible prev_action. */ + { + buffer->unsafe_to_concat (prev, buffer->len); + } break; } } From ea1b32c8c198da4475941f459b16dc6d7e28148a Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Fri, 21 Jan 2022 18:58:33 -0700 Subject: [PATCH 14/28] [unsafe-to-concat] Adjust "interior"ness of "from_out_buffer" --- src/hb-buffer.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hb-buffer.hh b/src/hb-buffer.hh index c0d2e46f1..4a567b3e1 100644 --- a/src/hb-buffer.hh +++ b/src/hb-buffer.hh @@ -458,7 +458,7 @@ struct hb_buffer_t { set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, start, end, - true, true); + false, true); } From 11bdd7a020d3e99c0ff43f34cf1724a95713b463 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Fri, 21 Jan 2022 18:59:06 -0700 Subject: [PATCH 15/28] [unsafe-to-concat] Adjust CursivePos Doesn't fix the test yet. --- src/hb-ot-layout-gpos-table.hh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/hb-ot-layout-gpos-table.hh b/src/hb-ot-layout-gpos-table.hh index af91f2aaf..c24ccdd98 100644 --- a/src/hb-ot-layout-gpos-table.hh +++ b/src/hb-ot-layout-gpos-table.hh @@ -1886,7 +1886,11 @@ struct CursivePosFormat1 } const EntryExitRecord &prev_record = entryExitRecord[(this+coverage).get_coverage (buffer->info[skippy_iter.idx].codepoint)]; - if (!prev_record.exitAnchor) return_trace (false); + if (!prev_record.exitAnchor) + { + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx); + return_trace (false); + } unsigned int i = skippy_iter.idx; unsigned int j = buffer->idx; From 909e34f68a969275bc9b14c63e03d5d131823d91 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 22 Jan 2022 09:44:13 -0700 Subject: [PATCH 16/28] [unsafe-to-concat] Adjust Arabic-joining start boundary condition more --- src/hb-ot-shape-complex-arabic.cc | 14 ++++++++++++-- test/shape/data/in-house/Makefile.sources | 1 + .../34da9aab7bee86c4dfc3b85e423435822fdf4b62.ttf | Bin 0 -> 1904 bytes test/shape/data/in-house/meson.build | 1 + .../data/in-house/tests/unsafe-to-concat.tests | 1 + 5 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 test/shape/data/in-house/fonts/34da9aab7bee86c4dfc3b85e423435822fdf4b62.ttf create mode 100644 test/shape/data/in-house/tests/unsafe-to-concat.tests diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc index 5eec4af37..2298aa92f 100644 --- a/src/hb-ot-shape-complex-arabic.cc +++ b/src/hb-ot-shape-complex-arabic.cc @@ -321,9 +321,19 @@ arabic_joining (hb_buffer_t *buffer) info[prev].arabic_shaping_action() = entry->prev_action; buffer->unsafe_to_break (prev, i + 1); } - else if (2 <= state && state <= 5) /* States that have a possible prev_action. */ + else { - buffer->unsafe_to_concat (prev, i + 1); + if (prev == UINT_MAX) + { + if (this_type >= JOINING_TYPE_R) + buffer->unsafe_to_concat_from_outbuffer (0, i + 1); + } + else + { + if (this_type >= JOINING_TYPE_R || + (2 <= state && state <= 5) /* States that have a possible prev_action. */) + buffer->unsafe_to_concat (prev, i + 1); + } } info[i].arabic_shaping_action() = entry->curr_action; diff --git a/test/shape/data/in-house/Makefile.sources b/test/shape/data/in-house/Makefile.sources index 01047d548..02c9e914b 100644 --- a/test/shape/data/in-house/Makefile.sources +++ b/test/shape/data/in-house/Makefile.sources @@ -59,6 +59,7 @@ TESTS = \ tests/tibetan-contractions-2.tests \ tests/tibetan-vowels.tests \ tests/tt-kern-gpos.tests \ + tests/unsafe-to-concat.tests \ tests/use-indic3.tests \ tests/use-marchen.tests \ tests/use-syllable.tests \ diff --git a/test/shape/data/in-house/fonts/34da9aab7bee86c4dfc3b85e423435822fdf4b62.ttf b/test/shape/data/in-house/fonts/34da9aab7bee86c4dfc3b85e423435822fdf4b62.ttf new file mode 100644 index 0000000000000000000000000000000000000000..8508fbeeb5fd773bfe3cf2b2170f2f6c74100b23 GIT binary patch literal 1904 zcma)6U2GIp6#nk)%xu~IY$?0jvVSg1yCs$4cDr3@`6;y9t_h`EyQmG()hS(QW7{q5 zLV*M&(I^rhFeYli2jYv87)*RHCYosCgFg9SV#J8Q(L|z>_+Wyu?fBi9U8sPG_h#mv z^WE=!=ggfm0|B5Gr{TazCOt5SEr7wJbbxd;2wOJgVLe?0w6fa6vAj~|>d=6b|$m+0T1zvJ-qi9?H@SMQ^J6%b341!E$4;PB7fuP2t8 zBoKMQ`yu@j{pQJ;((xu0-_pOI{^sf8K|`rmP63M4Ynm~R&!N}x9{nryBeTX#VZ)`K z44@{d|K?nAp)}?T%mNzo;k;=gY_>bz7B49==tP{*#Ch|YjjM|yN8DM)pSEzAd+@cz zuOhBPyJwkfqT+cGAbX9C=9eMhH`6#}zJY3>F&gViraBWouSXy9hC*IXDD*+d;|Yg7 zo)Al89ly#t5M*r@cWa}rdp$m1XCl>=jJ0m{F|r; zlUsV%=)RrXyz7F#u_Ge4a^qj=+^y+kOJktg9cXB*uMZ@*@9ve)1M_crPH+R>t%;QV z3}J;yTpd0BjlKtJ8|$CYMr&GHVhi2j$mmFTVt+YKo+Q>=ad$0HceTaWH`EU6?uH$^ zWL4Rry6~_z>b`fh&m9Tja7CyxvSh1LjGF-TPA^(_r$DeRGev#CxrH?<8>o>1I z`o7t`d^PCQvUGJQ5@kn@A@BGBVRZmuQOooX6MO~!J205nWezRB90r0(vw6t~I&Z5F ztXOr7h;z`We3sh4UV9aJB=9}aYE4l!RZ-N_@`GJ9GTCfK?AA9$YU~_ZUip#RyCp>& z6BoHeq^?!DJO>X?L!OTZDmHP|xe6#!LH54A{`J*&_SulY;lu$MrONMNl9z1@Do>Ve z1$8)6wjJoh%Vk?d99PP=2EV8(+d4LjDEd*v98O>!Q#eem5)~7S=s*|REh|Agf(&^@ zGNw_WH^kX*{kirKnI9>;i}Aaj_}C0-HaU|^QJ5$xf!yBrcmMjS3X(s>WmLr)h@7>BCC^UP^qAz^nbFJ zd((W4@pl{ia|uN_geo+l3#X_OYlKTwJCDvz#Z!rGavdD#j+fVt@|ti>6iefU>Efa> zZ&OnPHC9M!V9TMYS>vWNH9J+h>C6-+rX)QPkN2|}b{IRv9!<0br|dh4%P&)2f_Hhj z_}>geqz8xgqlzpJ*_qa`_uKFY6l=fEm^a3$Tw%#oYC51A#&ijOSs@~_G{j_0*laPR tm>^5u?Ys&#T!`|$?<(U}-09&Zd)87cGbJ9io Date: Sat, 22 Jan 2022 10:16:23 -0700 Subject: [PATCH 17/28] [unsafe-to-concat] Adjust GPOS lookbacks Fixes Cursive aots test, fails a couple new aots ones. --- src/hb-ot-layout-gpos-table.hh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/hb-ot-layout-gpos-table.hh b/src/hb-ot-layout-gpos-table.hh index c24ccdd98..e91f416d3 100644 --- a/src/hb-ot-layout-gpos-table.hh +++ b/src/hb-ot-layout-gpos-table.hh @@ -1881,14 +1881,14 @@ struct CursivePosFormat1 unsigned unsafe_from; if (!skippy_iter.prev (&unsafe_from)) { - buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx); + buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1); return_trace (false); } const EntryExitRecord &prev_record = entryExitRecord[(this+coverage).get_coverage (buffer->info[skippy_iter.idx].codepoint)]; if (!prev_record.exitAnchor) { - buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx); + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); return_trace (false); } @@ -2157,7 +2157,7 @@ struct MarkBasePosFormat1 unsigned unsafe_from; if (!skippy_iter.prev (&unsafe_from)) { - buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx); + buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1); return_trace (false); } @@ -2417,7 +2417,7 @@ struct MarkLigPosFormat1 unsigned unsafe_from; if (!skippy_iter.prev (&unsafe_from)) { - buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx); + buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1); return_trace (false); } @@ -2619,7 +2619,7 @@ struct MarkMarkPosFormat1 unsigned unsafe_from; if (!skippy_iter.prev (&unsafe_from)) { - buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx); + buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1); return_trace (false); } From b443898cce2251ca2aaf332c13886577ab8db416 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 22 Jan 2022 10:24:59 -0700 Subject: [PATCH 18/28] [unsafe-to-concat] Adjust MarkBasePos A couple more aots tests down. Four failing: 209/401 harfbuzz:shaping+aots / gpos6 FAIL 0.06s exit status 1 261/401 harfbuzz:shaping+aots / gsub4_1_multiple_ligatures FAIL 0.07s exit status 1 265/401 harfbuzz:shaping+aots / lookupflag_ignore_attach FAIL 0.07s exit status 1 267/401 harfbuzz:shaping+aots / lookupflag_ignore_combination FAIL 0.07s exit status 1 --- src/hb-ot-layout-gpos-table.hh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/hb-ot-layout-gpos-table.hh b/src/hb-ot-layout-gpos-table.hh index e91f416d3..2b2125f45 100644 --- a/src/hb-ot-layout-gpos-table.hh +++ b/src/hb-ot-layout-gpos-table.hh @@ -2183,7 +2183,11 @@ struct MarkBasePosFormat1 //if (!_hb_glyph_info_is_base_glyph (&buffer->info[skippy_iter.idx])) { return_trace (false); } unsigned int base_index = (this+baseCoverage).get_coverage (buffer->info[skippy_iter.idx].codepoint); - if (base_index == NOT_COVERED) return_trace (false); + if (base_index == NOT_COVERED) + { + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); + return_trace (false); + } return_trace ((this+markArray).apply (c, mark_index, base_index, this+baseArray, classCount, skippy_iter.idx)); } From a575992057bcfab2ee3a720ad321c907cf20ff22 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 22 Jan 2022 10:30:39 -0700 Subject: [PATCH 19/28] [unsafe-to-concat] Mark LigatureSubst Failures down to two: 209/401 harfbuzz:shaping+aots / gpos6 FAIL 0.06s exit status 1 265/401 harfbuzz:shaping+aots / lookupflag_ignore_attach FAIL 0.06s exit status 1 --- src/hb-ot-layout-gsub-table.hh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/hb-ot-layout-gsub-table.hh b/src/hb-ot-layout-gsub-table.hh index e5f13bd1f..0b0bc547b 100644 --- a/src/hb-ot-layout-gsub-table.hh +++ b/src/hb-ot-layout-gsub-table.hh @@ -836,7 +836,10 @@ struct Ligature &match_end, match_positions, &total_component_count))) + { + c->buffer->unsafe_to_concat (c->buffer->idx, match_end); return_trace (false); + } ligate_input (c, count, From 6a7d6d4b64dd0eff7d0e7191ad30268a2502db5f Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 22 Jan 2022 10:33:35 -0700 Subject: [PATCH 20/28] [unsafe-to-concat] More annotations for MarkLigaturePos --- src/hb-ot-layout-gpos-table.hh | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/hb-ot-layout-gpos-table.hh b/src/hb-ot-layout-gpos-table.hh index 2b2125f45..87c2fef22 100644 --- a/src/hb-ot-layout-gpos-table.hh +++ b/src/hb-ot-layout-gpos-table.hh @@ -2430,14 +2430,22 @@ struct MarkLigPosFormat1 unsigned int j = skippy_iter.idx; unsigned int lig_index = (this+ligatureCoverage).get_coverage (buffer->info[j].codepoint); - if (lig_index == NOT_COVERED) return_trace (false); + if (lig_index == NOT_COVERED) + { + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); + return_trace (false); + } const LigatureArray& lig_array = this+ligatureArray; const LigatureAttach& lig_attach = lig_array[lig_index]; /* Find component to attach to */ unsigned int comp_count = lig_attach.rows; - if (unlikely (!comp_count)) return_trace (false); + if (unlikely (!comp_count)) + { + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); + return_trace (false); + } /* We must now check whether the ligature ID of the current mark glyph * is identical to the ligature ID of the found ligature. If yes, we From 8663eda4fde11d360cca15936e9d2ae07c357958 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 22 Jan 2022 10:35:05 -0700 Subject: [PATCH 21/28] [unsafe-to-concat] More annotations for MarkMarkPos Failures down to one: 265/401 harfbuzz:shaping+aots / lookupflag_ignore_attach FAIL --- src/hb-ot-layout-gpos-table.hh | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/hb-ot-layout-gpos-table.hh b/src/hb-ot-layout-gpos-table.hh index 87c2fef22..e28c951f3 100644 --- a/src/hb-ot-layout-gpos-table.hh +++ b/src/hb-ot-layout-gpos-table.hh @@ -2635,7 +2635,11 @@ struct MarkMarkPosFormat1 return_trace (false); } - if (!_hb_glyph_info_is_mark (&buffer->info[skippy_iter.idx])) { return_trace (false); } + if (!_hb_glyph_info_is_mark (&buffer->info[skippy_iter.idx])) + { + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); + return_trace (false); + } unsigned int j = skippy_iter.idx; @@ -2660,11 +2664,16 @@ struct MarkMarkPosFormat1 } /* Didn't match. */ + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); return_trace (false); good: unsigned int mark2_index = (this+mark2Coverage).get_coverage (buffer->info[j].codepoint); - if (mark2_index == NOT_COVERED) return_trace (false); + if (mark2_index == NOT_COVERED) + { + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); + return_trace (false); + } return_trace ((this+mark1Array).apply (c, mark1_index, mark2_index, this+mark2Array, classCount, j)); } From 14d43d12359ba14e4bb95c24903be70808f66738 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 22 Jan 2022 10:46:18 -0700 Subject: [PATCH 22/28] [unsafe-to-concat] Adjust end conditions --- src/hb-buffer.hh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/hb-buffer.hh b/src/hb-buffer.hh index 4a567b3e1..b92c87693 100644 --- a/src/hb-buffer.hh +++ b/src/hb-buffer.hh @@ -392,8 +392,7 @@ struct hb_buffer_t bool interior = false, bool from_out_buffer = false) { - if (end == (unsigned) -1) - end = len; + end = hb_min (end, len); if (interior && !from_out_buffer && end - start < 2) return; From bcdfedbc09c308965239c4b7a750a9cc20618542 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 22 Jan 2022 11:19:05 -0700 Subject: [PATCH 23/28] [unsafe-to-concat] Mark as unsafe in kern machine Fixes that last test. Yay! --- src/hb-aat-layout-kerx-table.hh | 1 - src/hb-kern.hh | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/hb-aat-layout-kerx-table.hh b/src/hb-aat-layout-kerx-table.hh index 346024f2b..0354b47d5 100644 --- a/src/hb-aat-layout-kerx-table.hh +++ b/src/hb-aat-layout-kerx-table.hh @@ -406,7 +406,6 @@ struct KerxSubTableFormat2 accelerator_t accel (*this, c); hb_kern_machine_t machine (accel, header.coverage & header.CrossStream); machine.kern (c->font, c->buffer, c->plan->kern_mask); - c->buffer->set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_CONCAT); return_trace (true); } diff --git a/src/hb-kern.hh b/src/hb-kern.hh index 4e1a35bbc..debfbb1ce 100644 --- a/src/hb-kern.hh +++ b/src/hb-kern.hh @@ -49,6 +49,7 @@ struct hb_kern_machine_t hb_mask_t kern_mask, bool scale = true) const { + buffer->set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_CONCAT); OT::hb_ot_apply_context_t c (1, font, buffer); c.set_lookup_mask (kern_mask); c.set_lookup_props (OT::LookupFlag::IgnoreMarks); @@ -70,7 +71,6 @@ struct hb_kern_machine_t unsigned unsafe_to; if (!skippy_iter.next (&unsafe_to)) { - buffer->unsafe_to_concat (idx, unsafe_to); idx++; continue; } From 374a6f186d7094370ffd6aed80688203c8f5a067 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 22 Jan 2022 11:29:00 -0700 Subject: [PATCH 24/28] [fallback-shape] Add buffer trace log --- src/hb-kern.hh | 5 +++++ src/hb-ot-shape-fallback.cc | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/src/hb-kern.hh b/src/hb-kern.hh index debfbb1ce..943631932 100644 --- a/src/hb-kern.hh +++ b/src/hb-kern.hh @@ -49,6 +49,9 @@ struct hb_kern_machine_t hb_mask_t kern_mask, bool scale = true) const { + if (!buffer->message (font, "start kern")) + return; + buffer->set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_CONCAT); OT::hb_ot_apply_context_t c (1, font, buffer); c.set_lookup_mask (kern_mask); @@ -127,6 +130,8 @@ struct hb_kern_machine_t skip: idx = skippy_iter.idx; } + + (void) buffer->message (font, "end kern"); } const Driver &driver; diff --git a/src/hb-ot-shape-fallback.cc b/src/hb-ot-shape-fallback.cc index 0fae26389..671f30327 100644 --- a/src/hb-ot-shape-fallback.cc +++ b/src/hb-ot-shape-fallback.cc @@ -446,6 +446,9 @@ _hb_ot_shape_fallback_mark_position (const hb_ot_shape_plan_t *plan, return; #endif + if (!buffer->message (font, "start fallback mark")) + return; + _hb_buffer_assert_gsubgpos_vars (buffer); unsigned int start = 0; @@ -457,6 +460,8 @@ _hb_ot_shape_fallback_mark_position (const hb_ot_shape_plan_t *plan, start = i; } position_cluster (plan, font, buffer, start, count, adjust_offsets_when_zeroing); + + (void) buffer->message (font, "end fallback mark"); } @@ -492,6 +497,9 @@ _hb_ot_shape_fallback_kern (const hb_ot_shape_plan_t *plan, #endif #ifndef HB_DISABLE_DEPRECATED + if (!buffer->message (font, "start fallback kern")) + return; + if (HB_DIRECTION_IS_HORIZONTAL (buffer->props.direction) ? !font->has_glyph_h_kerning_func () : !font->has_glyph_v_kerning_func ()) @@ -508,6 +516,8 @@ _hb_ot_shape_fallback_kern (const hb_ot_shape_plan_t *plan, if (reverse) buffer->reverse (); + + (void) buffer->message (font, "end fallback kern"); #endif } From 6e345f709d4888ab10ed22afffb6661d695e052f Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 22 Jan 2022 11:40:37 -0700 Subject: [PATCH 25/28] Cosmetic --- src/hb-buffer.hh | 16 ++++++++-------- src/hb-kern.hh | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/hb-buffer.hh b/src/hb-buffer.hh index b92c87693..879802134 100644 --- a/src/hb-buffer.hh +++ b/src/hb-buffer.hh @@ -435,28 +435,28 @@ struct hb_buffer_t } } - void unsafe_to_break (unsigned int start, unsigned int end) + void unsafe_to_break (unsigned int start = 0, unsigned int end = -1) { set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, - start, end, + start = start, end = end, true); } - void unsafe_to_concat (unsigned int start, unsigned int end) + void unsafe_to_concat (unsigned int start = 0, unsigned int end = -1) { set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, - start, end, + start = start, end = end, true); } - void unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end) + void unsafe_to_break_from_outbuffer (unsigned int start = 0, unsigned int end = -1) { set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, - start, end, + start = start, end = end, true, true); } - void unsafe_to_concat_from_outbuffer (unsigned int start, unsigned int end) + void unsafe_to_concat_from_outbuffer (unsigned int start = 0, unsigned int end = -1) { set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, - start, end, + start = start, end = end, false, true); } diff --git a/src/hb-kern.hh b/src/hb-kern.hh index 943631932..9ea945cae 100644 --- a/src/hb-kern.hh +++ b/src/hb-kern.hh @@ -52,7 +52,7 @@ struct hb_kern_machine_t if (!buffer->message (font, "start kern")) return; - buffer->set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_CONCAT); + buffer->unsafe_to_concat (); OT::hb_ot_apply_context_t c (1, font, buffer); c.set_lookup_mask (kern_mask); c.set_lookup_props (OT::LookupFlag::IgnoreMarks); From 20031ddbb814d46f8c988242d2ee0bd9b198dbe2 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 22 Jan 2022 11:41:30 -0700 Subject: [PATCH 26/28] [unsafe-to-concat] Mark in all other shapers unsafe_to_break() implies unsafe-to-concat; but setting the flag manually wasn't. --- src/hb-coretext.cc | 3 ++- src/hb-directwrite.cc | 3 ++- src/hb-graphite2.cc | 3 ++- src/hb-uniscribe.cc | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/hb-coretext.cc b/src/hb-coretext.cc index 24075e146..5f383064c 100644 --- a/src/hb-coretext.cc +++ b/src/hb-coretext.cc @@ -1213,7 +1213,8 @@ resize_and_retry: } } - buffer->clear_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK); + buffer->clear_glyph_flags (); + buffer->unsafe_to_break (); #undef FAIL diff --git a/src/hb-directwrite.cc b/src/hb-directwrite.cc index dea87b8cd..f177ff31c 100644 --- a/src/hb-directwrite.cc +++ b/src/hb-directwrite.cc @@ -762,7 +762,8 @@ retry_getglyphs: if (isRightToLeft) hb_buffer_reverse (buffer); - buffer->clear_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK); + buffer->clear_glyph_flags (); + buffer->unsafe_to_break (); delete [] clusterMap; delete [] glyphIndices; diff --git a/src/hb-graphite2.cc b/src/hb-graphite2.cc index 42420ac0b..63dc18b46 100644 --- a/src/hb-graphite2.cc +++ b/src/hb-graphite2.cc @@ -439,7 +439,8 @@ _hb_graphite2_shape (hb_shape_plan_t *shape_plan HB_UNUSED, if (feats) gr_featureval_destroy (feats); gr_seg_destroy (seg); - buffer->clear_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK); + buffer->clear_glyph_flags (); + buffer->unsafe_to_break (); return true; } diff --git a/src/hb-uniscribe.cc b/src/hb-uniscribe.cc index 0e5a114f7..50f71ce9c 100644 --- a/src/hb-uniscribe.cc +++ b/src/hb-uniscribe.cc @@ -878,7 +878,8 @@ retry: if (backward) hb_buffer_reverse (buffer); - buffer->clear_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK); + buffer->clear_glyph_flags (); + buffer->unsafe_to_break (); /* Wow, done! */ return true; From 332460649268844bf93e147cc0b86a82c449a980 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 22 Jan 2022 15:46:13 -0700 Subject: [PATCH 27/28] [buffer] Oops --- src/hb-buffer.hh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hb-buffer.hh b/src/hb-buffer.hh index 879802134..adf4aa2b6 100644 --- a/src/hb-buffer.hh +++ b/src/hb-buffer.hh @@ -438,25 +438,25 @@ struct hb_buffer_t void unsafe_to_break (unsigned int start = 0, unsigned int end = -1) { set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, - start = start, end = end, + start, end, true); } void unsafe_to_concat (unsigned int start = 0, unsigned int end = -1) { set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, - start = start, end = end, + start, end, true); } void unsafe_to_break_from_outbuffer (unsigned int start = 0, unsigned int end = -1) { set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, - start = start, end = end, + start, end, true, true); } void unsafe_to_concat_from_outbuffer (unsigned int start = 0, unsigned int end = -1) { set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, - start = start, end = end, + start, end, false, true); } From 540af548dc3926c4b9db059c9b20297df0802671 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 25 Jan 2022 09:10:56 -0700 Subject: [PATCH 28/28] [unsafe-to-concat] Clarify documentation as per feedback Fixes https://github.com/harfbuzz/harfbuzz/pull/3297#discussion_r754395825 --- src/hb-buffer.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/hb-buffer.h b/src/hb-buffer.h index 9fb218c0d..d6b5a2229 100644 --- a/src/hb-buffer.h +++ b/src/hb-buffer.h @@ -111,8 +111,8 @@ typedef struct hb_glyph_info_t { * happens at line-break position, in the following * way: * - * 1. Iterate back from the line-break position till - * the the first cluster start position that is + * 1. Iterate back from the line-break position + * until the first cluster start position that is * NOT unsafe-to-concat, 2. shape the segment from * there till the end of line, 3. check whether the * resulting glyph-run also is clear of the @@ -123,7 +123,19 @@ typedef struct hb_glyph_info_t { * from there, and repeat. * * At the start of next line a similar algorithm can - * be implemented. A slight complication will arise, + * be implemented. That is: 1. Iterate forward from + * the line-break position untill the first cluster + * start position that is NOT unsafe-to-concat, 2. + * shape the segment from beginning of the line to + * that position, 3. check whether the resulting + * glyph-run also is clear of the unsafe-to-concat + * at its end-of-text position; if it is, just splice + * it into place and the beginning is shaped; If not, + * move on to a position further forward that is clear + * of unsafe-to-concat and retry up to there, and repeat. + * + * A slight complication will arise in the + * implementation of the algorithm above, * because while our buffer API has a way to * return flags for position corresponding to * start-of-text, there is currently no position