From 7a004a7ac27da776b623c0892ebced3d12213c39 Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Tue, 29 Nov 2022 00:47:55 +0000 Subject: [PATCH] [subset] Cache per subtable cmap unicode mappings. --- src/hb-ot-cmap-table.hh | 108 +++++++++++++++++++++++++++++------ src/hb-subset-accelerator.hh | 15 ++++- src/hb-subset.cc | 5 ++ 3 files changed, 109 insertions(+), 19 deletions(-) diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh index 44f3e5f2a..0a3a82dc8 100644 --- a/src/hb-ot-cmap-table.hh +++ b/src/hb-ot-cmap-table.hh @@ -1474,15 +1474,54 @@ struct EncodingRecord DEFINE_SIZE_STATIC (8); }; +struct cmap; + struct SubtableUnicodesCache { private: + hb_blob_ptr_t base_blob; const char* base; hb_hashmap_t> cached_unicodes; public: + + static SubtableUnicodesCache* create (hb_blob_ptr_t source_table) + { + SubtableUnicodesCache* cache = + (SubtableUnicodesCache*) hb_malloc (sizeof(SubtableUnicodesCache)); + new (cache) SubtableUnicodesCache (source_table); + return cache; + } + + static void destroy (void* value) { + if (!value) return; + + SubtableUnicodesCache* cache = (SubtableUnicodesCache*) value; + cache->~SubtableUnicodesCache (); + hb_free (cache); + } + SubtableUnicodesCache(const void* cmap_base) - : base ((const char *) cmap_base), cached_unicodes () {} + : base_blob(), + base ((const char*) cmap_base), + cached_unicodes () + {} + + SubtableUnicodesCache(hb_blob_ptr_t base_blob_) + : base_blob(base_blob_), + base ((const char *) base_blob.get()), + cached_unicodes () + {} + + ~SubtableUnicodesCache() + { + base_blob.destroy (); + } + + bool same_base(const void* other) + { + return other == (const void*) base; + } hb_set_t* set_for (const EncodingRecord* record) { @@ -1491,7 +1530,7 @@ struct SubtableUnicodesCache { hb_set_t *s = hb_set_create (); if (unlikely (s->in_error ())) return hb_set_get_empty (); - + (base+record->subtable).collect_unicodes (s); if (unlikely (!cached_unicodes.set ((unsigned) ((const char *) record - base), hb::unique_ptr {s}))) @@ -1523,13 +1562,30 @@ struct cmap { static constexpr hb_tag_t tableTag = HB_OT_TAG_cmap; + + static SubtableUnicodesCache* create_filled_cache(hb_blob_ptr_t source_table) { + const cmap* cmap = source_table.get(); + auto it = + + hb_iter (cmap->encodingRecord) + | hb_filter ([&](const EncodingRecord& _) { + return cmap::filter_encoding_records_for_subset (cmap, _); + }) + ; + + SubtableUnicodesCache* cache = SubtableUnicodesCache::create(source_table); + for (const EncodingRecord& _ : it) + cache->set_for(&_); // populate the cache for this encoding record. + + return cache; + } + template bool serialize (hb_serialize_context_t *c, Iterator it, EncodingRecIter encodingrec_iter, const void *base, - const hb_subset_plan_t *plan, + hb_subset_plan_t *plan, bool drop_format_4 = false) { if (unlikely (!c->extend_min ((*this)))) return false; @@ -1538,7 +1594,14 @@ struct cmap unsigned format4objidx = 0, format12objidx = 0, format14objidx = 0; auto snap = c->snapshot (); - SubtableUnicodesCache unicodes_cache (base); + SubtableUnicodesCache local_unicodes_cache (base); + SubtableUnicodesCache* unicodes_cache = &local_unicodes_cache; + + if (plan->accelerator && + plan->accelerator->cmap_cache && + plan->accelerator->cmap_cache->same_base (base)) + unicodes_cache = plan->accelerator->cmap_cache; + for (const EncodingRecord& _ : encodingrec_iter) { if (c->in_error ()) @@ -1547,7 +1610,7 @@ struct cmap unsigned format = (base+_.subtable).u.format; if (format != 4 && format != 12 && format != 14) continue; - hb_set_t* unicodes_set = unicodes_cache.set_for (&_); + hb_set_t* unicodes_set = unicodes_cache->set_for (&_); if (!drop_format_4 && format == 4) { @@ -1566,7 +1629,7 @@ struct cmap else if (format == 12) { - if (_can_drop (_, *unicodes_set, base, unicodes_cache, + it | hb_map (hb_first), encodingrec_iter)) continue; + if (_can_drop (_, *unicodes_set, base, *unicodes_cache, + it | hb_map (hb_first), encodingrec_iter)) continue; c->copy (_, + it | hb_filter (*unicodes_set, hb_first), 12u, base, plan, &format12objidx); } else if (format == 14) c->copy (_, it, 14u, base, plan, &format14objidx); @@ -1653,17 +1716,9 @@ struct cmap auto encodingrec_iter = + hb_iter (encodingRecord) - | hb_filter ([&] (const EncodingRecord& _) - { - if ((_.platformID == 0 && _.encodingID == 3) || - (_.platformID == 0 && _.encodingID == 4) || - (_.platformID == 3 && _.encodingID == 1) || - (_.platformID == 3 && _.encodingID == 10) || - (this + _.subtable).u.format == 14) - return true; - - return false; - }) + | hb_filter ([&](const EncodingRecord& _) { + return cmap::filter_encoding_records_for_subset (this, _); + }) ; if (unlikely (!encodingrec_iter.len ())) return_trace (false); @@ -1692,7 +1747,11 @@ struct cmap { return (_.second != HB_MAP_VALUE_INVALID); }) ; - return_trace (cmap_prime->serialize (c->serializer, it, encodingrec_iter, this, c->plan)); + return_trace (cmap_prime->serialize (c->serializer, + it, + encodingrec_iter, + this, + c->plan)); } const CmapSubtable *find_best_subtable (bool *symbol = nullptr) const @@ -1928,6 +1987,19 @@ struct cmap encodingRecord.sanitize (c, this)); } + private: + + static bool filter_encoding_records_for_subset(const cmap* cmap, + const EncodingRecord& _) + { + return + (_.platformID == 0 && _.encodingID == 3) || + (_.platformID == 0 && _.encodingID == 4) || + (_.platformID == 3 && _.encodingID == 1) || + (_.platformID == 3 && _.encodingID == 10) || + (cmap + _.subtable).u.format == 14; + } + protected: HBUINT16 version; /* Table version number (0). */ SortedArray16Of diff --git a/src/hb-subset-accelerator.hh b/src/hb-subset-accelerator.hh index 53fea0e77..3158ad76c 100644 --- a/src/hb-subset-accelerator.hh +++ b/src/hb-subset-accelerator.hh @@ -39,6 +39,10 @@ namespace CFF { struct cff_subset_accelerator_t; } +namespace OT { +struct SubtableUnicodesCache; +}; + struct hb_subset_accelerator_t { static hb_user_data_key_t* user_data_key() @@ -64,6 +68,9 @@ struct hb_subset_accelerator_t if (accel->cff_accelerator && accel->destroy_cff_accelerator) accel->destroy_cff_accelerator ((void*) accel->cff_accelerator); + if (accel->cmap_cache && accel->destroy_cmap_cache) + accel->destroy_cmap_cache ((void*) accel->cmap_cache); + accel->~hb_subset_accelerator_t (); hb_free (accel); } @@ -71,17 +78,23 @@ struct hb_subset_accelerator_t hb_subset_accelerator_t(const hb_map_t& unicode_to_gid_, const hb_set_t& unicodes_) : unicode_to_gid(unicode_to_gid_), unicodes(unicodes_), - has_seac(false), cff_accelerator(nullptr), destroy_cff_accelerator(nullptr) {} + cmap_cache(nullptr), destroy_cmap_cache(nullptr), + has_seac(false), cff_accelerator(nullptr), destroy_cff_accelerator(nullptr) + + {} // Generic const hb_map_t unicode_to_gid; const hb_set_t unicodes; + OT::SubtableUnicodesCache* cmap_cache; + hb_destroy_func_t destroy_cmap_cache; // CFF bool has_seac; CFF::cff_subset_accelerator_t* cff_accelerator; hb_destroy_func_t destroy_cff_accelerator; + // TODO(garretrieger): see if we can make the cff_accelerator and cmap_cache const // TODO(garretrieger): cumulative glyf checksum map // TODO(garretrieger): sanitized table cache. diff --git a/src/hb-subset.cc b/src/hb-subset.cc index 8f74725c6..f169314b9 100644 --- a/src/hb-subset.cc +++ b/src/hb-subset.cc @@ -514,6 +514,11 @@ static void _attach_accelerator_data (hb_subset_plan_t* plan, return; } + // Populate caches that need access to the final tables. + hb_blob_ptr_t cmap_ptr (hb_sanitize_context_t ().reference_table (face)); + accel->cmap_cache = OT::cmap::create_filled_cache (cmap_ptr); + accel->destroy_cmap_cache = OT::SubtableUnicodesCache::destroy; + if (!hb_face_set_user_data(face, hb_subset_accelerator_t::user_data_key(), accel,