[subset] Remove switch to alternate unicode collection at large subset sizes.

Benchmarks show that the first path is always faster even at large subset sizes:

BM_subset_codepoints/subset_roboto/10_median                              +0.0324         +0.0325             0             0             0             0
BM_subset_codepoints/subset_roboto/64_median                              +0.0253         +0.0255             0             1             0             1
BM_subset_codepoints/subset_roboto/512_median                             +0.0126         +0.0128             1             1             1             1
BM_subset_codepoints/subset_roboto/4000_median                            +0.0500         +0.0491             6             7             6             7
BM_subset_codepoints/subset_amiri/10_median                               +0.0338         +0.0332             1             1             1             1
BM_subset_codepoints/subset_amiri/64_median                               +0.0238         +0.0234             1             1             1             1
BM_subset_codepoints/subset_amiri/512_median                              +0.0066         +0.0063             8             8             8             8
BM_subset_codepoints/subset_amiri/4000_median                             -0.0011         -0.0012            13            13            13            13
BM_subset_codepoints/subset_noto_nastaliq_urdu/10_median                  +0.0226         +0.0226             0             0             0             0
BM_subset_codepoints/subset_noto_nastaliq_urdu/64_median                  +0.0047         +0.0044            20            20            20            20
BM_subset_codepoints/subset_noto_nastaliq_urdu/512_median                 +0.0022         +0.0021           165           166           165           166
BM_subset_codepoints/subset_noto_nastaliq_urdu/1000_median                -0.0021         -0.0023           166           166           166           165
BM_subset_codepoints/subset_noto_devangari/10_median                      +0.0054         +0.0054             0             0             0             0
BM_subset_codepoints/subset_noto_devangari/64_median                      +0.0024         +0.0019             0             0             0             0
BM_subset_codepoints/subset_noto_devangari/512_median                     +0.0089         +0.0090             5             5             5             5
BM_subset_codepoints/subset_noto_devangari/1000_median                    -0.0028         -0.0019             5             5             5             5
BM_subset_codepoints/subset_mplus1p/10_median                             +0.0001         +0.0002             0             0             0             0
BM_subset_codepoints/subset_mplus1p/64_median                             +0.0073         +0.0075             1             1             1             1
BM_subset_codepoints/subset_mplus1p/512_median                            +0.0034         +0.0034             1             1             1             1
BM_subset_codepoints/subset_mplus1p/4096_median                           -0.1248         -0.1248             7             6             7             6
BM_subset_codepoints/subset_mplus1p/10000_median                          -0.0885         -0.0885            13            12            13            12
BM_subset_codepoints/subset_notocjk/10_median                             +0.0031         +0.0032             2             2             2             2
BM_subset_codepoints/subset_notocjk/64_median                             -0.0010         -0.0010             2             2             2             2
BM_subset_codepoints/subset_notocjk/512_median                            -0.0023         -0.0023             9             9             9             9
BM_subset_codepoints/subset_notocjk/4096_median                           -0.1725         -0.1726            28            23            28            23
BM_subset_codepoints/subset_notocjk/32768_median                          -0.0277         -0.0287           140           137           140           136
BM_subset_codepoints/subset_notocjk/100000_median                         -0.0929         -0.0926           162           147           162           147
This commit is contained in:
Garret Rieger 2022-05-03 22:40:56 +00:00
parent f0c04114bc
commit 7c7c01d28c
1 changed files with 5 additions and 5 deletions

View File

@ -302,12 +302,10 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes,
{
OT::cmap::accelerator_t cmap (plan->source);
constexpr static const int size_threshold = 4096;
if (glyphs->is_empty () && unicodes->get_population () < size_threshold)
if (glyphs->is_empty ())
{
/* This is the fast path if it's anticipated that size of unicodes
* is << than the number of codepoints in the font. */
// This is approach to collection is faster, but can only be used if glyphs
// are not being explicitly added to the subset.
plan->unicode_to_new_gid_list.alloc (unicodes->get_population ());
for (hb_codepoint_t cp : *unicodes)
{
@ -324,6 +322,8 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes,
}
else
{
// This approach is slower, but can handle adding in glyphs to the subset and will match
// them with cmap entries.
hb_map_t unicode_glyphid_map;
cmap.collect_mapping (hb_set_get_empty (), &unicode_glyphid_map);
plan->unicode_to_new_gid_list.alloc (unicode_glyphid_map.get_population ());