diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh index 208c9f852..6b6393c8f 100644 --- a/src/hb-ot-cmap-table.hh +++ b/src/hb-ot-cmap-table.hh @@ -332,7 +332,6 @@ struct CmapSubtableFormat4 if (unlikely (!c->extend_min (this))) return; this->format = 4; - // TODO(grieger): does pre-alloc make this faster? hb_vector_t> cp_to_gid { format4_iter }; @@ -1664,13 +1663,7 @@ struct cmap if (unlikely (has_format12 && (!unicode_ucs4 && !ms_ucs4))) return_trace (false); auto it = - + hb_iter (c->plan->unicodes) - | hb_map ([&] (hb_codepoint_t _) - { - hb_codepoint_t new_gid = HB_MAP_VALUE_INVALID; - c->plan->new_gid_for_codepoint (_, &new_gid); - return hb_pair_t (_, new_gid); - }) + + c->plan->unicode_to_new_gid_list.iter () | hb_filter ([&] (const hb_pair_t _) { return (_.second != HB_MAP_VALUE_INVALID); }) ; diff --git a/src/hb-subset-plan.cc b/src/hb-subset-plan.cc index 74b7e3977..4cff60e54 100644 --- a/src/hb-subset-plan.cc +++ b/src/hb-subset-plan.cc @@ -287,6 +287,14 @@ _remove_invalid_gids (hb_set_t *glyphs, } } +static inline int +_compare_cp_gid_pair (const void* a, + const void* b) +{ + return ((hb_pair_t*)a)->first - + ((hb_pair_t*)b)->first; +} + static void _populate_unicodes_to_retain (const hb_set_t *unicodes, const hb_set_t *glyphs, @@ -294,12 +302,13 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes, { OT::cmap::accelerator_t cmap (plan->source); - constexpr static const int size_threshold = 4096; - + unsigned size_threshold = plan->source->get_num_glyphs (); if (glyphs->is_empty () && unicodes->get_population () < size_threshold) { - /* This is the fast path if it's anticipated that size of unicodes - * is << than the number of codepoints in the font. */ + // This is approach to collection is faster, but can only be used if glyphs + // are not being explicitly added to the subset and the input unicodes set is + // not excessively large (eg. an inverted set). + plan->unicode_to_new_gid_list.alloc (unicodes->get_population ()); for (hb_codepoint_t cp : *unicodes) { hb_codepoint_t gid; @@ -310,12 +319,19 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes, } plan->codepoint_to_glyph->set (cp, gid); + plan->unicode_to_new_gid_list.push (hb_pair (cp, gid)); } } else { + // This approach is slower, but can handle adding in glyphs to the subset and will match + // them with cmap entries. hb_map_t unicode_glyphid_map; cmap.collect_mapping (hb_set_get_empty (), &unicode_glyphid_map); + plan->unicode_to_new_gid_list.alloc (hb_min(unicodes->get_population () + + glyphs->get_population (), + unicode_glyphid_map.get_population ())); + for (hb_pair_t cp_gid : + unicode_glyphid_map.iter ()) @@ -324,8 +340,11 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes, continue; plan->codepoint_to_glyph->set (cp_gid.first, cp_gid.second); + plan->unicode_to_new_gid_list.push (hb_pair (cp_gid.first, cp_gid.second)); } + plan->unicode_to_new_gid_list.qsort (_compare_cp_gid_pair); + /* Add gids which where requested, but not mapped in cmap */ // TODO(garretrieger): // Once https://github.com/harfbuzz/harfbuzz/issues/3169 @@ -338,8 +357,13 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes, } } - + plan->codepoint_to_glyph->keys () | hb_sink (plan->unicodes); - + plan->codepoint_to_glyph->values () | hb_sink (plan->_glyphset_gsub); + for (unsigned i = 0; i < plan->unicode_to_new_gid_list.length; i++) + { + // Use raw array access for performance. + hb_pair_t pair = plan->unicode_to_new_gid_list.arrayZ[i]; + plan->unicodes->add(pair.first); + plan->_glyphset_gsub->add(pair.second); + } } static void @@ -485,6 +509,9 @@ hb_subset_plan_create_or_fail (hb_face_t *face, plan->successful = true; plan->flags = input->flags; plan->unicodes = hb_set_create (); + + plan->unicode_to_new_gid_list.init (); + plan->name_ids = hb_set_copy (input->sets.name_ids); _nameid_closure (face, plan->name_ids); plan->name_languages = hb_set_copy (input->sets.name_languages); @@ -536,6 +563,14 @@ hb_subset_plan_create_or_fail (hb_face_t *face, plan->reverse_glyph_map, &plan->_num_output_glyphs); + // Now that we have old to new gid map update the unicode to new gid list. + for (unsigned i = 0; i < plan->unicode_to_new_gid_list.length; i++) + { + // Use raw array access for performance. + plan->unicode_to_new_gid_list.arrayZ[i].second = + plan->glyph_map->get(plan->unicode_to_new_gid_list.arrayZ[i].second); + } + if (unlikely (plan->in_error ())) { hb_subset_plan_destroy (plan); return nullptr; @@ -558,6 +593,7 @@ hb_subset_plan_destroy (hb_subset_plan_t *plan) if (!hb_object_destroy (plan)) return; hb_set_destroy (plan->unicodes); + plan->unicode_to_new_gid_list.fini (); hb_set_destroy (plan->name_ids); hb_set_destroy (plan->name_languages); hb_set_destroy (plan->layout_features); diff --git a/src/hb-subset-plan.hh b/src/hb-subset-plan.hh index ab2c4c302..0f20d74db 100644 --- a/src/hb-subset-plan.hh +++ b/src/hb-subset-plan.hh @@ -44,6 +44,7 @@ struct hb_subset_plan_t // For each cp that we'd like to retain maps to the corresponding gid. hb_set_t *unicodes; + hb_vector_t> unicode_to_new_gid_list; // name_ids we would like to retain hb_set_t *name_ids;