From 104dc85a2235cc14d2f40638c2f9fa00b39dc5a4 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 4 Jun 2022 06:56:35 -0600 Subject: [PATCH 1/6] [buffer] Add try_allocate for buffer variables --- src/hb-buffer.hh | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/hb-buffer.hh b/src/hb-buffer.hh index f1bba203a..89bd281a7 100644 --- a/src/hb-buffer.hh +++ b/src/hb-buffer.hh @@ -132,9 +132,7 @@ struct hb_buffer_t * Managed by enter / leave */ -#ifndef HB_NDEBUG uint8_t allocated_var_bits; -#endif uint8_t serial; hb_buffer_scratch_flags_t scratch_flags; /* Have space-fallback, etc. */ unsigned int max_len; /* Maximum allowed len. */ @@ -163,38 +161,40 @@ struct hb_buffer_t void allocate_var (unsigned int start, unsigned int count) { -#ifndef HB_NDEBUG unsigned int end = start + count; assert (end <= 8); unsigned int bits = (1u<func (offsetof (hb_glyph_info_t, var) - offsetof(hb_glyph_info_t, var1), \ sizeof (b->info[0].var)) -#define HB_BUFFER_ALLOCATE_VAR(b, var) HB_BUFFER_XALLOCATE_VAR (b, allocate_var, var ()) -#define HB_BUFFER_DEALLOCATE_VAR(b, var) HB_BUFFER_XALLOCATE_VAR (b, deallocate_var, var ()) -#define HB_BUFFER_ASSERT_VAR(b, var) HB_BUFFER_XALLOCATE_VAR (b, assert_var, var ()) +#define HB_BUFFER_ALLOCATE_VAR(b, var) HB_BUFFER_XALLOCATE_VAR (b, allocate_var, var ()) +#define HB_BUFFER_TRY_ALLOCATE_VAR(b, var) HB_BUFFER_XALLOCATE_VAR (b, try_allocate_var, var ()) +#define HB_BUFFER_DEALLOCATE_VAR(b, var) HB_BUFFER_XALLOCATE_VAR (b, deallocate_var, var ()) +#define HB_BUFFER_ASSERT_VAR(b, var) HB_BUFFER_XALLOCATE_VAR (b, assert_var, var ()) #endif /* HB_BUFFER_HH */ From 15543f70e04e726639c1b50ace6bdaa9c3ab50b6 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 4 Jun 2022 10:55:50 -0600 Subject: [PATCH 2/6] [indic-like] Move allocation of syllable() buffer var to shapers that use it In indic, we don't have a pause location release the var. --- src/hb-ot-layout.cc | 1 - src/hb-ot-layout.hh | 3 --- src/hb-ot-shaper-indic.cc | 1 + src/hb-ot-shaper-khmer.cc | 3 ++- src/hb-ot-shaper-myanmar.cc | 2 ++ src/hb-ot-shaper-syllabic.cc | 8 ++++++++ src/hb-ot-shaper-syllabic.hh | 5 +++++ src/hb-ot-shaper-use.cc | 2 ++ 8 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/hb-ot-layout.cc b/src/hb-ot-layout.cc index 35e887ba3..299a71228 100644 --- a/src/hb-ot-layout.cc +++ b/src/hb-ot-layout.cc @@ -260,7 +260,6 @@ _hb_ot_layout_set_glyph_props (hb_font_t *font, { _hb_glyph_info_set_glyph_props (&buffer->info[i], gdef.get_glyph_props (buffer->info[i].codepoint)); _hb_glyph_info_clear_lig_props (&buffer->info[i]); - buffer->info[i].syllable() = 0; } } diff --git a/src/hb-ot-layout.hh b/src/hb-ot-layout.hh index 75bba0bc5..6395f0667 100644 --- a/src/hb-ot-layout.hh +++ b/src/hb-ot-layout.hh @@ -589,13 +589,11 @@ _hb_buffer_allocate_gsubgpos_vars (hb_buffer_t *buffer) { HB_BUFFER_ALLOCATE_VAR (buffer, glyph_props); HB_BUFFER_ALLOCATE_VAR (buffer, lig_props); - HB_BUFFER_ALLOCATE_VAR (buffer, syllable); } static inline void _hb_buffer_deallocate_gsubgpos_vars (hb_buffer_t *buffer) { - HB_BUFFER_DEALLOCATE_VAR (buffer, syllable); HB_BUFFER_DEALLOCATE_VAR (buffer, lig_props); HB_BUFFER_DEALLOCATE_VAR (buffer, glyph_props); } @@ -605,7 +603,6 @@ _hb_buffer_assert_gsubgpos_vars (hb_buffer_t *buffer) { HB_BUFFER_ASSERT_VAR (buffer, glyph_props); HB_BUFFER_ASSERT_VAR (buffer, lig_props); - HB_BUFFER_ASSERT_VAR (buffer, syllable); } /* Make sure no one directly touches our props... */ diff --git a/src/hb-ot-shaper-indic.cc b/src/hb-ot-shaper-indic.cc index 2c8f7c5f1..c239170ac 100644 --- a/src/hb-ot-shaper-indic.cc +++ b/src/hb-ot-shaper-indic.cc @@ -356,6 +356,7 @@ setup_syllables_indic (const hb_ot_shape_plan_t *plan HB_UNUSED, hb_font_t *font HB_UNUSED, hb_buffer_t *buffer) { + HB_BUFFER_ALLOCATE_VAR (buffer, syllable); find_syllables_indic (buffer); foreach_syllable (buffer, start, end) buffer->unsafe_to_break (start, end); diff --git a/src/hb-ot-shaper-khmer.cc b/src/hb-ot-shaper-khmer.cc index 7aca1565b..a7c35ad10 100644 --- a/src/hb-ot-shaper-khmer.cc +++ b/src/hb-ot-shaper-khmer.cc @@ -115,7 +115,7 @@ collect_features_khmer (hb_ot_shape_planner_t *plan) map->add_feature (khmer_features[i]); /* https://github.com/harfbuzz/harfbuzz/issues/3531 */ - map->add_gsub_pause (nullptr); + map->add_gsub_pause (hb_syllabic_clear_var); // Don't need syllables anymore, use stop to free buffer var for (; i < KHMER_NUM_FEATURES; i++) map->add_feature (khmer_features[i]); @@ -187,6 +187,7 @@ setup_syllables_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED, hb_font_t *font HB_UNUSED, hb_buffer_t *buffer) { + HB_BUFFER_ALLOCATE_VAR (buffer, syllable); find_syllables_khmer (buffer); foreach_syllable (buffer, start, end) buffer->unsafe_to_break (start, end); diff --git a/src/hb-ot-shaper-myanmar.cc b/src/hb-ot-shaper-myanmar.cc index 8cc373a73..ecb4cf1ab 100644 --- a/src/hb-ot-shaper-myanmar.cc +++ b/src/hb-ot-shaper-myanmar.cc @@ -92,6 +92,7 @@ collect_features_myanmar (hb_ot_shape_planner_t *plan) map->enable_feature (myanmar_basic_features[i], F_MANUAL_ZWJ | F_PER_SYLLABLE); map->add_gsub_pause (nullptr); } + map->add_gsub_pause (hb_syllabic_clear_var); // Don't need syllables anymore, use stop to free buffer var for (unsigned int i = 0; i < ARRAY_LENGTH (myanmar_other_features); i++) map->enable_feature (myanmar_other_features[i], F_MANUAL_ZWJ); @@ -118,6 +119,7 @@ setup_syllables_myanmar (const hb_ot_shape_plan_t *plan HB_UNUSED, hb_font_t *font HB_UNUSED, hb_buffer_t *buffer) { + HB_BUFFER_ALLOCATE_VAR (buffer, syllable); find_syllables_myanmar (buffer); foreach_syllable (buffer, start, end) buffer->unsafe_to_break (start, end); diff --git a/src/hb-ot-shaper-syllabic.cc b/src/hb-ot-shaper-syllabic.cc index 686015d79..58d694279 100644 --- a/src/hb-ot-shaper-syllabic.cc +++ b/src/hb-ot-shaper-syllabic.cc @@ -99,5 +99,13 @@ hb_syllabic_insert_dotted_circles (hb_font_t *font, buffer->sync (); } +HB_INTERNAL void +hb_syllabic_clear_var (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer) +{ + HB_BUFFER_DEALLOCATE_VAR (buffer, syllable); +} + #endif diff --git a/src/hb-ot-shaper-syllabic.hh b/src/hb-ot-shaper-syllabic.hh index a250967a7..e8a15bb48 100644 --- a/src/hb-ot-shaper-syllabic.hh +++ b/src/hb-ot-shaper-syllabic.hh @@ -38,5 +38,10 @@ hb_syllabic_insert_dotted_circles (hb_font_t *font, int repha_category = -1, int dottedcircle_position = -1); +HB_INTERNAL void +hb_syllabic_clear_var (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); + #endif /* HB_OT_SHAPER_SYLLABIC_HH */ diff --git a/src/hb-ot-shaper-use.cc b/src/hb-ot-shaper-use.cc index 98f0f99e3..b4cdf6d39 100644 --- a/src/hb-ot-shaper-use.cc +++ b/src/hb-ot-shaper-use.cc @@ -133,6 +133,7 @@ collect_features_use (hb_ot_shape_planner_t *plan) map->enable_feature (use_basic_features[i], F_MANUAL_ZWJ | F_PER_SYLLABLE); map->add_gsub_pause (reorder_use); + map->add_gsub_pause (hb_syllabic_clear_var); // Don't need syllables anymore, use stop to free buffer var /* "Topographical features" */ for (unsigned int i = 0; i < ARRAY_LENGTH (use_topographical_features); i++) @@ -297,6 +298,7 @@ setup_syllables_use (const hb_ot_shape_plan_t *plan, hb_font_t *font HB_UNUSED, hb_buffer_t *buffer) { + HB_BUFFER_ALLOCATE_VAR (buffer, syllable); find_syllables_use (buffer); foreach_syllable (buffer, start, end) buffer->unsafe_to_break (start, end); From b96622d15c5e22ae214e4184142d28ee609293a4 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 5 Jun 2022 02:45:41 -0600 Subject: [PATCH 3/6] [layout] Use a cache for main input ClassDef of (Chain)ContextLookupFormat2 This commit adds a per-lookup caching infrastructure to GSUB/GPOS, and uses it to cache input ClassDef.get_class value for (Chain)ContextLookupFormat2. For fonts heavy on use of heave class-based2 context matching, this shows a good speedup. For NotoNastaliqUrdu for example, I observe 17% speedup. Unfortunately not many other lookups can use a cache like this :(. https://github.com/harfbuzz/harfbuzz/pull/3636 --- src/hb-ot-layout-common.hh | 13 ++ src/hb-ot-layout-gsubgpos.hh | 277 +++++++++++++++++++++++++++++------ src/hb-ot-layout.cc | 10 +- 3 files changed, 257 insertions(+), 43 deletions(-) diff --git a/src/hb-ot-layout-common.hh b/src/hb-ot-layout-common.hh index 015180778..d34380534 100644 --- a/src/hb-ot-layout-common.hh +++ b/src/hb-ot-layout-common.hh @@ -2001,6 +2001,8 @@ struct ClassDefFormat1 return_trace (c->check_struct (this) && classValue.sanitize (c)); } + unsigned cost () const { return 1; } + template bool collect_coverage (set_t *glyphs) const { @@ -2237,6 +2239,8 @@ struct ClassDefFormat2 return_trace (rangeRecord.sanitize (c)); } + unsigned cost () const { return hb_bit_storage ((unsigned) rangeRecord.len); /* bsearch cost */ } + template bool collect_coverage (set_t *glyphs) const { @@ -2477,6 +2481,15 @@ struct ClassDef } } + unsigned cost () const + { + switch (u.format) { + case 1: return u.format1.cost (); + case 2: return u.format2.cost (); + default:return 0u; + } + } + /* Might return false if array looks unsorted. * Used for faster rejection of corrupt data. */ template diff --git a/src/hb-ot-layout-gsubgpos.hh b/src/hb-ot-layout-gsubgpos.hh index 7f57adfb2..6f745d006 100644 --- a/src/hb-ot-layout-gsubgpos.hh +++ b/src/hb-ot-layout-gsubgpos.hh @@ -394,7 +394,6 @@ struct hb_collect_coverage_context_t : set_t *set; }; - struct hb_ot_apply_context_t : hb_dispatch_context_t { @@ -410,7 +409,7 @@ struct hb_ot_apply_context_t : match_func (nullptr), match_data (nullptr) {} - typedef bool (*match_func_t) (hb_codepoint_t glyph_id, const HBUINT16 &value, const void *data); + typedef bool (*match_func_t) (hb_glyph_info_t &info, const HBUINT16 &value, const void *data); void set_ignore_zwnj (bool ignore_zwnj_) { ignore_zwnj = ignore_zwnj_; } void set_ignore_zwj (bool ignore_zwj_) { ignore_zwj = ignore_zwj_; } @@ -428,7 +427,7 @@ struct hb_ot_apply_context_t : MATCH_MAYBE }; - may_match_t may_match (const hb_glyph_info_t &info, + may_match_t may_match (hb_glyph_info_t &info, const HBUINT16 *glyph_data) const { if (!(info.mask & mask) || @@ -436,7 +435,7 @@ struct hb_ot_apply_context_t : return MATCH_NO; if (match_func) - return match_func (info.codepoint, *glyph_data, match_data) ? MATCH_YES : MATCH_NO; + return match_func (info, *glyph_data, match_data) ? MATCH_YES : MATCH_NO; return MATCH_MAYBE; } @@ -524,7 +523,7 @@ struct hb_ot_apply_context_t : while (idx + num_items < end) { idx++; - const hb_glyph_info_t &info = c->buffer->info[idx]; + hb_glyph_info_t &info = c->buffer->info[idx]; matcher_t::may_skip_t skip = matcher.may_skip (c, info); if (unlikely (skip == matcher_t::SKIP_YES)) @@ -557,7 +556,7 @@ struct hb_ot_apply_context_t : while (idx > num_items - 1) { idx--; - const hb_glyph_info_t &info = c->buffer->out_info[idx]; + hb_glyph_info_t &info = c->buffer->out_info[idx]; matcher_t::may_skip_t skip = matcher.may_skip (c, info); if (unlikely (skip == matcher_t::SKIP_YES)) @@ -639,6 +638,7 @@ struct hb_ot_apply_context_t : bool per_syllable = false; bool random = false; uint32_t random_state = 1; + unsigned new_syllables = (unsigned) -1; hb_ot_apply_context_t (unsigned int table_index_, hb_font_t *font_, @@ -736,6 +736,9 @@ struct hb_ot_apply_context_t : bool ligature = false, bool component = false) const { + if (new_syllables != (unsigned) -1) + buffer->cur().syllable() = new_syllables; + unsigned int props = _hb_glyph_info_get_glyph_props (&buffer->cur()); props |= HB_OT_LAYOUT_GLYPH_PROPS_SUBSTITUTED; if (ligature) @@ -800,15 +803,60 @@ struct hb_accelerate_subtables_context_t : return typed_obj->apply (c); } + template + static inline auto apply_cached_ (const T *obj, OT::hb_ot_apply_context_t *c, hb_priority<1>) HB_RETURN (bool, obj->apply_cached (c) ) + template + static inline auto apply_cached_ (const T *obj, OT::hb_ot_apply_context_t *c, hb_priority<0>) HB_RETURN (bool, obj->apply (c) ) + template + static inline bool apply_cached_to (const void *obj, OT::hb_ot_apply_context_t *c) + { + const Type *typed_obj = (const Type *) obj; + return apply_cached_ (typed_obj, c, hb_prioritize); + } + + template + static inline auto cache_enter_ (const T *obj, OT::hb_ot_apply_context_t *c, hb_priority<1>) HB_RETURN (bool, obj->cache_enter (c) ) + template + static inline bool cache_enter_ (const T *obj, OT::hb_ot_apply_context_t *c, hb_priority<0>) { return false; } + template + static inline bool cache_enter_to (const void *obj, OT::hb_ot_apply_context_t *c) + { + const Type *typed_obj = (const Type *) obj; + return cache_enter_ (typed_obj, c, hb_prioritize); + } + + template + static inline auto cache_leave_ (const T *obj, OT::hb_ot_apply_context_t *c, hb_priority<1>) HB_RETURN (void, obj->cache_leave (c) ) + template + static inline void cache_leave_ (const T *obj, OT::hb_ot_apply_context_t *c, hb_priority<0>) {} + template + static inline void cache_leave_to (const void *obj, OT::hb_ot_apply_context_t *c) + { + const Type *typed_obj = (const Type *) obj; + return cache_leave_ (typed_obj, c, hb_prioritize); + } + typedef bool (*hb_apply_func_t) (const void *obj, OT::hb_ot_apply_context_t *c); + typedef bool (*hb_cache_enter_func_t) (const void *obj, OT::hb_ot_apply_context_t *c); + typedef void (*hb_cache_leave_func_t) (const void *obj, OT::hb_ot_apply_context_t *c); struct hb_applicable_t { + friend struct hb_accelerate_subtables_context_t; + friend struct hb_ot_layout_lookup_accelerator_t; + template - void init (const T &obj_, hb_apply_func_t apply_func_) + void init (const T &obj_, + hb_apply_func_t apply_func_, + hb_apply_func_t apply_cached_func_, + hb_cache_enter_func_t cache_enter_func_, + hb_cache_leave_func_t cache_leave_func_) { obj = &obj_; apply_func = apply_func_; + apply_cached_func = apply_cached_func_; + cache_enter_func = cache_enter_func_; + cache_leave_func = cache_leave_func_; digest.init (); obj_.get_coverage ().collect_coverage (&digest); } @@ -817,21 +865,59 @@ struct hb_accelerate_subtables_context_t : { return digest.may_have (c->buffer->cur().codepoint) && apply_func (obj, c); } + bool apply_cached (OT::hb_ot_apply_context_t *c) const + { + return digest.may_have (c->buffer->cur().codepoint) && apply_cached_func (obj, c); + } + + bool cache_enter (OT::hb_ot_apply_context_t *c) const + { + return cache_enter_func (obj, c); + } + void cache_leave (OT::hb_ot_apply_context_t *c) const + { + cache_leave_func (obj, c); + } private: const void *obj; hb_apply_func_t apply_func; + hb_apply_func_t apply_cached_func; + hb_cache_enter_func_t cache_enter_func; + hb_cache_leave_func_t cache_leave_func; hb_set_digest_t digest; }; typedef hb_vector_t array_t; + template + auto cache_cost (const T &obj, hb_priority<1>) HB_AUTO_RETURN ( obj.cache_cost () ) + + template + auto cache_cost (const T &obj, hb_priority<0>) HB_AUTO_RETURN ( 0u ) + /* Dispatch interface. */ template return_t dispatch (const T &obj) { - hb_applicable_t *entry = array.push(); - entry->init (obj, apply_to); + hb_applicable_t entry; + + entry.init (obj, + apply_to, + apply_cached_to, + cache_enter_to, + cache_leave_to); + + array.push (entry); + + // Cache handling + unsigned cost = cache_cost (obj, hb_prioritize); + if (cost > cache_user_cost && !array.in_error ()) + { + cache_user_idx = array.length - 1; + cache_user_cost = cost; + } + return hb_empty_t (); } static return_t default_return_value () { return hb_empty_t (); } @@ -840,15 +926,15 @@ struct hb_accelerate_subtables_context_t : array (array_) {} array_t &array; + unsigned cache_user_idx = (unsigned) -1; + unsigned cache_user_cost = 0; }; - - typedef bool (*intersects_func_t) (const hb_set_t *glyphs, const HBUINT16 &value, const void *data); typedef void (*intersected_glyphs_func_t) (const hb_set_t *glyphs, const void *data, unsigned value, hb_set_t *intersected_glyphs); typedef void (*collect_glyphs_func_t) (hb_set_t *glyphs, const HBUINT16 &value, const void *data); -typedef bool (*match_func_t) (hb_codepoint_t glyph_id, const HBUINT16 &value, const void *data); +typedef bool (*match_func_t) (hb_glyph_info_t &info, const HBUINT16 &value, const void *data); struct ContextClosureFuncs { @@ -863,6 +949,10 @@ struct ContextApplyFuncs { match_func_t match; }; +struct ChainContextApplyFuncs +{ + match_func_t match[3]; +}; static inline bool intersects_glyph (const hb_set_t *glyphs, const HBUINT16 &value, const void *data HB_UNUSED) @@ -939,19 +1029,30 @@ static inline void collect_array (hb_collect_glyphs_context_t *c HB_UNUSED, } -static inline bool match_glyph (hb_codepoint_t glyph_id, const HBUINT16 &value, const void *data HB_UNUSED) +static inline bool match_glyph (hb_glyph_info_t &info, const HBUINT16 &value, const void *data HB_UNUSED) { - return glyph_id == value; + return info.codepoint == value; } -static inline bool match_class (hb_codepoint_t glyph_id, const HBUINT16 &value, const void *data) +static inline bool match_class (hb_glyph_info_t &info, const HBUINT16 &value, const void *data) { const ClassDef &class_def = *reinterpret_cast(data); - return class_def.get_class (glyph_id) == value; + return class_def.get_class (info.codepoint) == value; } -static inline bool match_coverage (hb_codepoint_t glyph_id, const HBUINT16 &value, const void *data) +static inline bool match_class_cached (hb_glyph_info_t &info, const HBUINT16 &value, const void *data) +{ + unsigned klass = info.syllable(); + if (klass < 255) + return klass == value; + const ClassDef &class_def = *reinterpret_cast(data); + klass = class_def.get_class (info.codepoint); + if (likely (klass < 255)) + info.syllable() = klass; + return klass == value; +} +static inline bool match_coverage (hb_glyph_info_t &info, const HBUINT16 &value, const void *data) { const Offset16To &coverage = (const Offset16To&)value; - return (data+coverage).get_coverage (glyph_id) != NOT_COVERED; + return (data+coverage).get_coverage (info.codepoint) != NOT_COVERED; } static inline bool would_match_input (hb_would_apply_context_t *c, @@ -964,8 +1065,12 @@ static inline bool would_match_input (hb_would_apply_context_t *c, return false; for (unsigned int i = 1; i < count; i++) - if (likely (!match_func (c->glyphs[i], input[i - 1], match_data))) + { + hb_glyph_info_t info; + info.codepoint = c->glyphs[i]; + if (likely (!match_func (info, input[i - 1], match_data))) return false; + } return true; } @@ -2125,19 +2230,51 @@ struct ContextFormat2 const Coverage &get_coverage () const { return this+coverage; } - bool apply (hb_ot_apply_context_t *c) const + unsigned cache_cost () const + { + unsigned c = (this+classDef).cost () * ruleSet.len; + return c >= 4 ? c : 0; + } + bool cache_enter (hb_ot_apply_context_t *c) const + { + if (!HB_BUFFER_TRY_ALLOCATE_VAR (c->buffer, syllable)) + return false; + auto &info = c->buffer->info; + unsigned count = c->buffer->len; + for (unsigned i = 0; i < count; i++) + info[i].syllable() = 255; + c->new_syllables = 255; + return true; + } + void cache_leave (hb_ot_apply_context_t *c) const + { + c->new_syllables = (unsigned) -1; + HB_BUFFER_DEALLOCATE_VAR (c->buffer, syllable); + } + bool apply_cached (hb_ot_apply_context_t *c) const { return apply (c, true); } + + bool apply (hb_ot_apply_context_t *c, bool cached = false) const { TRACE_APPLY (this); unsigned int index = (this+coverage).get_coverage (c->buffer->cur().codepoint); if (likely (index == NOT_COVERED)) return_trace (false); const ClassDef &class_def = this+classDef; - index = class_def.get_class (c->buffer->cur().codepoint); - const RuleSet &rule_set = this+ruleSet[index]; + struct ContextApplyLookupContext lookup_context = { - {match_class}, + {cached ? match_class_cached : match_class}, &class_def }; + + if (cached && c->buffer->cur().syllable() < 255) + index = c->buffer->cur().syllable (); + else + { + index = class_def.get_class (c->buffer->cur().codepoint); + if (cached && index < 255) + c->buffer->cur().syllable() = index; + } + const RuleSet &rule_set = this+ruleSet[index]; return_trace (rule_set.apply (c, lookup_context)); } @@ -2411,7 +2548,7 @@ struct ChainContextCollectGlyphsLookupContext struct ChainContextApplyLookupContext { - ContextApplyFuncs funcs; + ChainContextApplyFuncs funcs; const void *match_data[3]; }; @@ -2499,7 +2636,7 @@ static inline bool chain_context_would_apply_lookup (hb_would_apply_context_t *c return (c->zero_context ? !backtrackCount && !lookaheadCount : true) && would_match_input (c, inputCount, input, - lookup_context.funcs.match, lookup_context.match_data[1]); + lookup_context.funcs.match[1], lookup_context.match_data[1]); } static inline bool chain_context_apply_lookup (hb_ot_apply_context_t *c, @@ -2518,11 +2655,11 @@ static inline bool chain_context_apply_lookup (hb_ot_apply_context_t *c, unsigned match_positions[HB_MAX_CONTEXT_LENGTH]; if (!(match_input (c, inputCount, input, - lookup_context.funcs.match, lookup_context.match_data[1], + lookup_context.funcs.match[1], lookup_context.match_data[1], &match_end, match_positions) && (end_index = match_end) && match_lookahead (c, lookaheadCount, lookahead, - lookup_context.funcs.match, lookup_context.match_data[2], + lookup_context.funcs.match[2], lookup_context.match_data[2], match_end, &end_index))) { c->buffer->unsafe_to_concat (c->buffer->idx, end_index); @@ -2532,7 +2669,7 @@ static inline bool chain_context_apply_lookup (hb_ot_apply_context_t *c, unsigned start_index = c->buffer->out_len; if (!match_backtrack (c, backtrackCount, backtrack, - lookup_context.funcs.match, lookup_context.match_data[0], + lookup_context.funcs.match[0], lookup_context.match_data[0], &start_index)) { c->buffer->unsafe_to_concat_from_outbuffer (start_index, end_index); @@ -2934,7 +3071,7 @@ struct ChainContextFormat1 { const ChainRuleSet &rule_set = this+ruleSet[(this+coverage).get_coverage (c->glyphs[0])]; struct ChainContextApplyLookupContext lookup_context = { - {match_glyph}, + {{match_glyph, match_glyph, match_glyph}}, {nullptr, nullptr, nullptr} }; return rule_set.would_apply (c, lookup_context); @@ -2950,7 +3087,7 @@ struct ChainContextFormat1 const ChainRuleSet &rule_set = this+ruleSet[index]; struct ChainContextApplyLookupContext lookup_context = { - {match_glyph}, + {{match_glyph, match_glyph, match_glyph}}, {nullptr, nullptr, nullptr} }; return_trace (rule_set.apply (c, lookup_context)); @@ -3134,7 +3271,7 @@ struct ChainContextFormat2 unsigned int index = input_class_def.get_class (c->glyphs[0]); const ChainRuleSet &rule_set = this+ruleSet[index]; struct ChainContextApplyLookupContext lookup_context = { - {match_class}, + {{match_class, match_class, match_class}}, {&backtrack_class_def, &input_class_def, &lookahead_class_def} @@ -3144,7 +3281,30 @@ struct ChainContextFormat2 const Coverage &get_coverage () const { return this+coverage; } - bool apply (hb_ot_apply_context_t *c) const + unsigned cache_cost () const + { + unsigned c = (this+inputClassDef).cost () * ruleSet.len; + return c >= 4 ? c : 0; + } + bool cache_enter (hb_ot_apply_context_t *c) const + { + if (!HB_BUFFER_TRY_ALLOCATE_VAR (c->buffer, syllable)) + return false; + auto &info = c->buffer->info; + unsigned count = c->buffer->len; + for (unsigned i = 0; i < count; i++) + info[i].syllable() = 255; + c->new_syllables = 255; + return true; + } + void cache_leave (hb_ot_apply_context_t *c) const + { + c->new_syllables = (unsigned) -1; + HB_BUFFER_DEALLOCATE_VAR (c->buffer, syllable); + } + bool apply_cached (hb_ot_apply_context_t *c) const { return apply (c, true); } + + bool apply (hb_ot_apply_context_t *c, bool cached = false) const { TRACE_APPLY (this); unsigned int index = (this+coverage).get_coverage (c->buffer->cur().codepoint); @@ -3154,14 +3314,24 @@ struct ChainContextFormat2 const ClassDef &input_class_def = this+inputClassDef; const ClassDef &lookahead_class_def = this+lookaheadClassDef; - index = input_class_def.get_class (c->buffer->cur().codepoint); - const ChainRuleSet &rule_set = this+ruleSet[index]; struct ChainContextApplyLookupContext lookup_context = { - {match_class}, + {{cached && &backtrack_class_def == &input_class_def ? match_class_cached : match_class, + cached ? match_class_cached : match_class, + cached && &lookahead_class_def == &input_class_def ? match_class_cached : match_class}}, {&backtrack_class_def, &input_class_def, &lookahead_class_def} }; + + if (cached && c->buffer->cur().syllable() < 255) + index = c->buffer->cur().syllable (); + else + { + index = input_class_def.get_class (c->buffer->cur().codepoint); + if (cached && index < 255) + c->buffer->cur().syllable() = index; + } + const ChainRuleSet &rule_set = this+ruleSet[index]; return_trace (rule_set.apply (c, lookup_context)); } @@ -3359,7 +3529,7 @@ struct ChainContextFormat3 const Array16OfOffset16To &lookahead = StructAfter> (input); const Array16Of &lookup = StructAfter> (lookahead); struct ChainContextApplyLookupContext lookup_context = { - {match_coverage}, + {{match_coverage, match_coverage, match_coverage}}, {this, this, this} }; return chain_context_would_apply_lookup (c, @@ -3386,7 +3556,7 @@ struct ChainContextFormat3 const Array16OfOffset16To &lookahead = StructAfter> (input); const Array16Of &lookup = StructAfter> (lookahead); struct ChainContextApplyLookupContext lookup_context = { - {match_coverage}, + {{match_coverage, match_coverage, match_coverage}}, {this, this, this} }; return_trace (chain_context_apply_lookup (c, @@ -3625,23 +3795,48 @@ struct hb_ot_layout_lookup_accelerator_t subtables.init (); OT::hb_accelerate_subtables_context_t c_accelerate_subtables (subtables); lookup.dispatch (&c_accelerate_subtables); + cache_user_idx = c_accelerate_subtables.cache_user_idx; + for (unsigned i = 0; i < subtables.length; i++) + if (i != cache_user_idx) + subtables[i].apply_cached_func = subtables[i].apply_func; } void fini () { subtables.fini (); } bool may_have (hb_codepoint_t g) const { return digest.may_have (g); } - bool apply (hb_ot_apply_context_t *c) const + bool apply (hb_ot_apply_context_t *c, bool use_cache) const { - for (unsigned int i = 0; i < subtables.length; i++) - if (subtables[i].apply (c)) - return true; + if (use_cache) + { + for (unsigned int i = 0; i < subtables.length; i++) + if (subtables[i].apply_cached (c)) + return true; + } + else + { + for (unsigned int i = 0; i < subtables.length; i++) + if (subtables[i].apply (c)) + return true; + } return false; } + bool cache_enter (OT::hb_ot_apply_context_t *c) const + { + return cache_user_idx != (unsigned) -1 && + subtables[cache_user_idx].cache_enter (c); + } + void cache_leave (OT::hb_ot_apply_context_t *c) const + { + subtables[cache_user_idx].cache_leave (c); + } + + private: hb_set_digest_t digest; hb_accelerate_subtables_context_t::array_t subtables; + unsigned cache_user_idx = (unsigned) -1; }; struct GSUBGPOS diff --git a/src/hb-ot-layout.cc b/src/hb-ot-layout.cc index 299a71228..408a2266e 100644 --- a/src/hb-ot-layout.cc +++ b/src/hb-ot-layout.cc @@ -1826,6 +1826,8 @@ static inline bool apply_forward (OT::hb_ot_apply_context_t *c, const OT::hb_ot_layout_lookup_accelerator_t &accel) { + bool use_cache = accel.cache_enter (c); + bool ret = false; hb_buffer_t *buffer = c->buffer; while (buffer->idx < buffer->len && buffer->successful) @@ -1835,7 +1837,7 @@ apply_forward (OT::hb_ot_apply_context_t *c, (buffer->cur().mask & c->lookup_mask) && c->check_glyph_property (&buffer->cur(), c->lookup_props)) { - applied = accel.apply (c); + applied = accel.apply (c, use_cache); } if (applied) @@ -1843,6 +1845,10 @@ apply_forward (OT::hb_ot_apply_context_t *c, else (void) buffer->next_glyph (); } + + if (use_cache) + accel.cache_leave (c); + return ret; } @@ -1857,7 +1863,7 @@ apply_backward (OT::hb_ot_apply_context_t *c, if (accel.may_have (buffer->cur().codepoint) && (buffer->cur().mask & c->lookup_mask) && c->check_glyph_property (&buffer->cur(), c->lookup_props)) - ret |= accel.apply (c); + ret |= accel.apply (c, false); /* The reverse lookup doesn't "advance" cursor (for good reason). */ buffer->idx--; From d4c09e9a872967ebc2b9921ad1d267162e5ad569 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 7 Jun 2022 09:03:30 -0600 Subject: [PATCH 4/6] [gsubgpos] Remove apply_cached() entry point Just use a bool to apply() --- src/hb-ot-layout-gsubgpos.hh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/hb-ot-layout-gsubgpos.hh b/src/hb-ot-layout-gsubgpos.hh index 6f745d006..c8e7215e2 100644 --- a/src/hb-ot-layout-gsubgpos.hh +++ b/src/hb-ot-layout-gsubgpos.hh @@ -804,7 +804,7 @@ struct hb_accelerate_subtables_context_t : } template - static inline auto apply_cached_ (const T *obj, OT::hb_ot_apply_context_t *c, hb_priority<1>) HB_RETURN (bool, obj->apply_cached (c) ) + static inline auto apply_cached_ (const T *obj, OT::hb_ot_apply_context_t *c, hb_priority<1>) HB_RETURN (bool, obj->apply (c, true) ) template static inline auto apply_cached_ (const T *obj, OT::hb_ot_apply_context_t *c, hb_priority<0>) HB_RETURN (bool, obj->apply (c) ) template @@ -2251,7 +2251,6 @@ struct ContextFormat2 c->new_syllables = (unsigned) -1; HB_BUFFER_DEALLOCATE_VAR (c->buffer, syllable); } - bool apply_cached (hb_ot_apply_context_t *c) const { return apply (c, true); } bool apply (hb_ot_apply_context_t *c, bool cached = false) const { @@ -3302,7 +3301,6 @@ struct ChainContextFormat2 c->new_syllables = (unsigned) -1; HB_BUFFER_DEALLOCATE_VAR (c->buffer, syllable); } - bool apply_cached (hb_ot_apply_context_t *c) const { return apply (c, true); } bool apply (hb_ot_apply_context_t *c, bool cached = false) const { From 5963cf446907127c55fe0404e068c19ca7eb4490 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 7 Jun 2022 09:12:45 -0600 Subject: [PATCH 5/6] [gsubgpos] Merge cache_enter and cache_leave entry points Saves a pointer per subtable --- src/hb-ot-layout-gsubgpos.hh | 102 ++++++++++++++++------------------- 1 file changed, 47 insertions(+), 55 deletions(-) diff --git a/src/hb-ot-layout-gsubgpos.hh b/src/hb-ot-layout-gsubgpos.hh index c8e7215e2..2674a3e3f 100644 --- a/src/hb-ot-layout-gsubgpos.hh +++ b/src/hb-ot-layout-gsubgpos.hh @@ -815,30 +815,18 @@ struct hb_accelerate_subtables_context_t : } template - static inline auto cache_enter_ (const T *obj, OT::hb_ot_apply_context_t *c, hb_priority<1>) HB_RETURN (bool, obj->cache_enter (c) ) + static inline auto cache_func_ (const T *obj, OT::hb_ot_apply_context_t *c, bool enter, hb_priority<1>) HB_RETURN (bool, obj->cache_func (c, enter) ) template - static inline bool cache_enter_ (const T *obj, OT::hb_ot_apply_context_t *c, hb_priority<0>) { return false; } + static inline bool cache_func_ (const T *obj, OT::hb_ot_apply_context_t *c, bool enter, hb_priority<0>) { return false; } template - static inline bool cache_enter_to (const void *obj, OT::hb_ot_apply_context_t *c) + static inline bool cache_func_to (const void *obj, OT::hb_ot_apply_context_t *c, bool enter) { const Type *typed_obj = (const Type *) obj; - return cache_enter_ (typed_obj, c, hb_prioritize); - } - - template - static inline auto cache_leave_ (const T *obj, OT::hb_ot_apply_context_t *c, hb_priority<1>) HB_RETURN (void, obj->cache_leave (c) ) - template - static inline void cache_leave_ (const T *obj, OT::hb_ot_apply_context_t *c, hb_priority<0>) {} - template - static inline void cache_leave_to (const void *obj, OT::hb_ot_apply_context_t *c) - { - const Type *typed_obj = (const Type *) obj; - return cache_leave_ (typed_obj, c, hb_prioritize); + return cache_func_ (typed_obj, c, enter, hb_prioritize); } typedef bool (*hb_apply_func_t) (const void *obj, OT::hb_ot_apply_context_t *c); - typedef bool (*hb_cache_enter_func_t) (const void *obj, OT::hb_ot_apply_context_t *c); - typedef void (*hb_cache_leave_func_t) (const void *obj, OT::hb_ot_apply_context_t *c); + typedef bool (*hb_cache_func_t) (const void *obj, OT::hb_ot_apply_context_t *c, bool enter); struct hb_applicable_t { @@ -849,14 +837,12 @@ struct hb_accelerate_subtables_context_t : void init (const T &obj_, hb_apply_func_t apply_func_, hb_apply_func_t apply_cached_func_, - hb_cache_enter_func_t cache_enter_func_, - hb_cache_leave_func_t cache_leave_func_) + hb_cache_func_t cache_func_) { obj = &obj_; apply_func = apply_func_; apply_cached_func = apply_cached_func_; - cache_enter_func = cache_enter_func_; - cache_leave_func = cache_leave_func_; + cache_func = cache_func_; digest.init (); obj_.get_coverage ().collect_coverage (&digest); } @@ -872,19 +858,18 @@ struct hb_accelerate_subtables_context_t : bool cache_enter (OT::hb_ot_apply_context_t *c) const { - return cache_enter_func (obj, c); + return cache_func (obj, c, true); } void cache_leave (OT::hb_ot_apply_context_t *c) const { - cache_leave_func (obj, c); + cache_func (obj, c, false); } private: const void *obj; hb_apply_func_t apply_func; hb_apply_func_t apply_cached_func; - hb_cache_enter_func_t cache_enter_func; - hb_cache_leave_func_t cache_leave_func; + hb_cache_func_t cache_func; hb_set_digest_t digest; }; @@ -905,8 +890,7 @@ struct hb_accelerate_subtables_context_t : entry.init (obj, apply_to, apply_cached_to, - cache_enter_to, - cache_leave_to); + cache_func_to); array.push (entry); @@ -2235,21 +2219,25 @@ struct ContextFormat2 unsigned c = (this+classDef).cost () * ruleSet.len; return c >= 4 ? c : 0; } - bool cache_enter (hb_ot_apply_context_t *c) const + bool cache_func (hb_ot_apply_context_t *c, bool enter) const { - if (!HB_BUFFER_TRY_ALLOCATE_VAR (c->buffer, syllable)) - return false; - auto &info = c->buffer->info; - unsigned count = c->buffer->len; - for (unsigned i = 0; i < count; i++) - info[i].syllable() = 255; - c->new_syllables = 255; - return true; - } - void cache_leave (hb_ot_apply_context_t *c) const - { - c->new_syllables = (unsigned) -1; - HB_BUFFER_DEALLOCATE_VAR (c->buffer, syllable); + if (enter) + { + if (!HB_BUFFER_TRY_ALLOCATE_VAR (c->buffer, syllable)) + return false; + auto &info = c->buffer->info; + unsigned count = c->buffer->len; + for (unsigned i = 0; i < count; i++) + info[i].syllable() = 255; + c->new_syllables = 255; + return true; + } + else + { + c->new_syllables = (unsigned) -1; + HB_BUFFER_DEALLOCATE_VAR (c->buffer, syllable); + return true; + } } bool apply (hb_ot_apply_context_t *c, bool cached = false) const @@ -3285,21 +3273,25 @@ struct ChainContextFormat2 unsigned c = (this+inputClassDef).cost () * ruleSet.len; return c >= 4 ? c : 0; } - bool cache_enter (hb_ot_apply_context_t *c) const + bool cache_func (hb_ot_apply_context_t *c, bool enter) const { - if (!HB_BUFFER_TRY_ALLOCATE_VAR (c->buffer, syllable)) - return false; - auto &info = c->buffer->info; - unsigned count = c->buffer->len; - for (unsigned i = 0; i < count; i++) - info[i].syllable() = 255; - c->new_syllables = 255; - return true; - } - void cache_leave (hb_ot_apply_context_t *c) const - { - c->new_syllables = (unsigned) -1; - HB_BUFFER_DEALLOCATE_VAR (c->buffer, syllable); + if (enter) + { + if (!HB_BUFFER_TRY_ALLOCATE_VAR (c->buffer, syllable)) + return false; + auto &info = c->buffer->info; + unsigned count = c->buffer->len; + for (unsigned i = 0; i < count; i++) + info[i].syllable() = 255; + c->new_syllables = 255; + return true; + } + else + { + c->new_syllables = (unsigned) -1; + HB_BUFFER_DEALLOCATE_VAR (c->buffer, syllable); + return true; + } } bool apply (hb_ot_apply_context_t *c, bool cached = false) const From c8fb048f79964e0b6cdf9d322fc12c71328cfde8 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 7 Jun 2022 09:20:27 -0600 Subject: [PATCH 6/6] [gsubgpos] Document caching --- src/hb-ot-layout-gsubgpos.hh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/hb-ot-layout-gsubgpos.hh b/src/hb-ot-layout-gsubgpos.hh index 2674a3e3f..21ddcf7fa 100644 --- a/src/hb-ot-layout-gsubgpos.hh +++ b/src/hb-ot-layout-gsubgpos.hh @@ -894,7 +894,14 @@ struct hb_accelerate_subtables_context_t : array.push (entry); - // Cache handling + /* Cache handling + * + * We allow one subtable from each lookup to use a cache. The assumption + * being that multiple subtables of the same lookup cannot use a cache + * because the resources they would use will collide. As such, we ask + * each subtable to tell us how much it costs (which a cache would avoid), + * and we allocate the cache opportunity to the costliest subtable. + */ unsigned cost = cache_cost (obj, hb_prioritize); if (cost > cache_user_cost && !array.in_error ()) {