Merge pull request #3561 from googlefonts/cmap_opt
[subset] Further cmap subsetting speed optimizations
This commit is contained in:
commit
052812b6ba
|
@ -332,7 +332,6 @@ struct CmapSubtableFormat4
|
||||||
if (unlikely (!c->extend_min (this))) return;
|
if (unlikely (!c->extend_min (this))) return;
|
||||||
this->format = 4;
|
this->format = 4;
|
||||||
|
|
||||||
// TODO(grieger): does pre-alloc make this faster?
|
|
||||||
hb_vector_t<hb_pair_t<hb_codepoint_t, hb_codepoint_t>> cp_to_gid {
|
hb_vector_t<hb_pair_t<hb_codepoint_t, hb_codepoint_t>> cp_to_gid {
|
||||||
format4_iter
|
format4_iter
|
||||||
};
|
};
|
||||||
|
@ -1664,13 +1663,7 @@ struct cmap
|
||||||
if (unlikely (has_format12 && (!unicode_ucs4 && !ms_ucs4))) return_trace (false);
|
if (unlikely (has_format12 && (!unicode_ucs4 && !ms_ucs4))) return_trace (false);
|
||||||
|
|
||||||
auto it =
|
auto it =
|
||||||
+ hb_iter (c->plan->unicodes)
|
+ c->plan->unicode_to_new_gid_list.iter ()
|
||||||
| hb_map ([&] (hb_codepoint_t _)
|
|
||||||
{
|
|
||||||
hb_codepoint_t new_gid = HB_MAP_VALUE_INVALID;
|
|
||||||
c->plan->new_gid_for_codepoint (_, &new_gid);
|
|
||||||
return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (_, new_gid);
|
|
||||||
})
|
|
||||||
| hb_filter ([&] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t> _)
|
| hb_filter ([&] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t> _)
|
||||||
{ return (_.second != HB_MAP_VALUE_INVALID); })
|
{ return (_.second != HB_MAP_VALUE_INVALID); })
|
||||||
;
|
;
|
||||||
|
|
|
@ -287,6 +287,14 @@ _remove_invalid_gids (hb_set_t *glyphs,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
_compare_cp_gid_pair (const void* a,
|
||||||
|
const void* b)
|
||||||
|
{
|
||||||
|
return ((hb_pair_t<hb_codepoint_t, hb_codepoint_t>*)a)->first -
|
||||||
|
((hb_pair_t<hb_codepoint_t, hb_codepoint_t>*)b)->first;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
_populate_unicodes_to_retain (const hb_set_t *unicodes,
|
_populate_unicodes_to_retain (const hb_set_t *unicodes,
|
||||||
const hb_set_t *glyphs,
|
const hb_set_t *glyphs,
|
||||||
|
@ -294,12 +302,13 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes,
|
||||||
{
|
{
|
||||||
OT::cmap::accelerator_t cmap (plan->source);
|
OT::cmap::accelerator_t cmap (plan->source);
|
||||||
|
|
||||||
constexpr static const int size_threshold = 4096;
|
unsigned size_threshold = plan->source->get_num_glyphs ();
|
||||||
|
|
||||||
if (glyphs->is_empty () && unicodes->get_population () < size_threshold)
|
if (glyphs->is_empty () && unicodes->get_population () < size_threshold)
|
||||||
{
|
{
|
||||||
/* This is the fast path if it's anticipated that size of unicodes
|
// This is approach to collection is faster, but can only be used if glyphs
|
||||||
* is << than the number of codepoints in the font. */
|
// are not being explicitly added to the subset and the input unicodes set is
|
||||||
|
// not excessively large (eg. an inverted set).
|
||||||
|
plan->unicode_to_new_gid_list.alloc (unicodes->get_population ());
|
||||||
for (hb_codepoint_t cp : *unicodes)
|
for (hb_codepoint_t cp : *unicodes)
|
||||||
{
|
{
|
||||||
hb_codepoint_t gid;
|
hb_codepoint_t gid;
|
||||||
|
@ -310,12 +319,19 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes,
|
||||||
}
|
}
|
||||||
|
|
||||||
plan->codepoint_to_glyph->set (cp, gid);
|
plan->codepoint_to_glyph->set (cp, gid);
|
||||||
|
plan->unicode_to_new_gid_list.push (hb_pair (cp, gid));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
// This approach is slower, but can handle adding in glyphs to the subset and will match
|
||||||
|
// them with cmap entries.
|
||||||
hb_map_t unicode_glyphid_map;
|
hb_map_t unicode_glyphid_map;
|
||||||
cmap.collect_mapping (hb_set_get_empty (), &unicode_glyphid_map);
|
cmap.collect_mapping (hb_set_get_empty (), &unicode_glyphid_map);
|
||||||
|
plan->unicode_to_new_gid_list.alloc (hb_min(unicodes->get_population ()
|
||||||
|
+ glyphs->get_population (),
|
||||||
|
unicode_glyphid_map.get_population ()));
|
||||||
|
|
||||||
|
|
||||||
for (hb_pair_t<hb_codepoint_t, hb_codepoint_t> cp_gid :
|
for (hb_pair_t<hb_codepoint_t, hb_codepoint_t> cp_gid :
|
||||||
+ unicode_glyphid_map.iter ())
|
+ unicode_glyphid_map.iter ())
|
||||||
|
@ -324,8 +340,11 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes,
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
plan->codepoint_to_glyph->set (cp_gid.first, cp_gid.second);
|
plan->codepoint_to_glyph->set (cp_gid.first, cp_gid.second);
|
||||||
|
plan->unicode_to_new_gid_list.push (hb_pair (cp_gid.first, cp_gid.second));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
plan->unicode_to_new_gid_list.qsort (_compare_cp_gid_pair);
|
||||||
|
|
||||||
/* Add gids which where requested, but not mapped in cmap */
|
/* Add gids which where requested, but not mapped in cmap */
|
||||||
// TODO(garretrieger):
|
// TODO(garretrieger):
|
||||||
// Once https://github.com/harfbuzz/harfbuzz/issues/3169
|
// Once https://github.com/harfbuzz/harfbuzz/issues/3169
|
||||||
|
@ -338,8 +357,13 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
+ plan->codepoint_to_glyph->keys () | hb_sink (plan->unicodes);
|
for (unsigned i = 0; i < plan->unicode_to_new_gid_list.length; i++)
|
||||||
+ plan->codepoint_to_glyph->values () | hb_sink (plan->_glyphset_gsub);
|
{
|
||||||
|
// Use raw array access for performance.
|
||||||
|
hb_pair_t<hb_codepoint_t, hb_codepoint_t> pair = plan->unicode_to_new_gid_list.arrayZ[i];
|
||||||
|
plan->unicodes->add(pair.first);
|
||||||
|
plan->_glyphset_gsub->add(pair.second);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -485,6 +509,9 @@ hb_subset_plan_create_or_fail (hb_face_t *face,
|
||||||
plan->successful = true;
|
plan->successful = true;
|
||||||
plan->flags = input->flags;
|
plan->flags = input->flags;
|
||||||
plan->unicodes = hb_set_create ();
|
plan->unicodes = hb_set_create ();
|
||||||
|
|
||||||
|
plan->unicode_to_new_gid_list.init ();
|
||||||
|
|
||||||
plan->name_ids = hb_set_copy (input->sets.name_ids);
|
plan->name_ids = hb_set_copy (input->sets.name_ids);
|
||||||
_nameid_closure (face, plan->name_ids);
|
_nameid_closure (face, plan->name_ids);
|
||||||
plan->name_languages = hb_set_copy (input->sets.name_languages);
|
plan->name_languages = hb_set_copy (input->sets.name_languages);
|
||||||
|
@ -536,6 +563,14 @@ hb_subset_plan_create_or_fail (hb_face_t *face,
|
||||||
plan->reverse_glyph_map,
|
plan->reverse_glyph_map,
|
||||||
&plan->_num_output_glyphs);
|
&plan->_num_output_glyphs);
|
||||||
|
|
||||||
|
// Now that we have old to new gid map update the unicode to new gid list.
|
||||||
|
for (unsigned i = 0; i < plan->unicode_to_new_gid_list.length; i++)
|
||||||
|
{
|
||||||
|
// Use raw array access for performance.
|
||||||
|
plan->unicode_to_new_gid_list.arrayZ[i].second =
|
||||||
|
plan->glyph_map->get(plan->unicode_to_new_gid_list.arrayZ[i].second);
|
||||||
|
}
|
||||||
|
|
||||||
if (unlikely (plan->in_error ())) {
|
if (unlikely (plan->in_error ())) {
|
||||||
hb_subset_plan_destroy (plan);
|
hb_subset_plan_destroy (plan);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -558,6 +593,7 @@ hb_subset_plan_destroy (hb_subset_plan_t *plan)
|
||||||
if (!hb_object_destroy (plan)) return;
|
if (!hb_object_destroy (plan)) return;
|
||||||
|
|
||||||
hb_set_destroy (plan->unicodes);
|
hb_set_destroy (plan->unicodes);
|
||||||
|
plan->unicode_to_new_gid_list.fini ();
|
||||||
hb_set_destroy (plan->name_ids);
|
hb_set_destroy (plan->name_ids);
|
||||||
hb_set_destroy (plan->name_languages);
|
hb_set_destroy (plan->name_languages);
|
||||||
hb_set_destroy (plan->layout_features);
|
hb_set_destroy (plan->layout_features);
|
||||||
|
|
|
@ -44,6 +44,7 @@ struct hb_subset_plan_t
|
||||||
|
|
||||||
// For each cp that we'd like to retain maps to the corresponding gid.
|
// For each cp that we'd like to retain maps to the corresponding gid.
|
||||||
hb_set_t *unicodes;
|
hb_set_t *unicodes;
|
||||||
|
hb_vector_t<hb_pair_t<hb_codepoint_t, hb_codepoint_t>> unicode_to_new_gid_list;
|
||||||
|
|
||||||
// name_ids we would like to retain
|
// name_ids we would like to retain
|
||||||
hb_set_t *name_ids;
|
hb_set_t *name_ids;
|
||||||
|
|
Loading…
Reference in New Issue