Merge pull request #3171 from googlefonts/unicode_glyph_invert

[subset] use inverted set for all unicodes.
This commit is contained in:
Behdad Esfahbod 2021-08-25 16:21:07 -06:00 committed by GitHub
commit 18b4aab652
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 120 additions and 50 deletions

View File

@ -240,7 +240,12 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes,
{
OT::cmap::accelerator_t cmap;
cmap.init (plan->source);
constexpr static const int size_threshold = 4000;
if (unicodes->get_population () < size_threshold && glyphs->is_empty ())
{
// This is the fast path if it's anticipated that size of unicodes
// is << than the number of codepoints in the font.
for (hb_codepoint_t cp : *unicodes)
{
hb_codepoint_t gid;
@ -249,33 +254,41 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes,
DEBUG_MSG(SUBSET, nullptr, "Drop U+%04X; no gid", cp);
continue;
}
plan->unicodes->add (cp);
plan->codepoint_to_glyph->set (cp, gid);
plan->_glyphset_gsub->add (gid);
}
if (glyphs->is_empty ())
{
cmap.fini ();
return;
}
} else {
hb_map_t unicode_glyphid_map;
cmap.collect_mapping (hb_set_get_empty (), &unicode_glyphid_map);
cmap.fini ();
for (hb_pair_t<hb_codepoint_t, hb_codepoint_t> cp_gid :
+ unicode_glyphid_map.iter () | hb_filter (glyphs, hb_second))
+ unicode_glyphid_map.iter ())
{
plan->unicodes->add (cp_gid.first);
if (!unicodes->has (cp_gid.first) && !glyphs->has (cp_gid.second))
continue;
plan->codepoint_to_glyph->set (cp_gid.first, cp_gid.second);
}
// Add gids which where requested, but not mapped in cmap
// TODO(garretrieger): once https://github.com/harfbuzz/harfbuzz/issues/3169
// is implemented, this can be done with union and del_range
for (hb_codepoint_t gid : glyphs->iter ())
{
if (gid >= plan->source->get_num_glyphs ())
break;
plan->_glyphset_gsub->add (gid);
}
}
+ plan->codepoint_to_glyph->keys () | hb_sink (plan->unicodes);
+ plan->codepoint_to_glyph->values () | hb_sink (plan->_glyphset_gsub);
}
static void
_populate_gids_to_retain (hb_subset_plan_t* plan,
const hb_set_t *unicodes,
const hb_set_t *input_glyphs_to_retain,
bool close_over_gsub,
bool close_over_gpos,
bool close_over_gdef)
@ -292,7 +305,6 @@ _populate_gids_to_retain (hb_subset_plan_t* plan,
colr.init (plan->source);
plan->_glyphset_gsub->add (0); // Not-def
hb_set_union (plan->_glyphset_gsub, input_glyphs_to_retain);
_cmap_closure (plan->source, plan->unicodes, plan->_glyphset_gsub);
@ -477,8 +489,6 @@ hb_subset_plan_create (hb_face_t *face,
_populate_unicodes_to_retain (input->unicodes, input->glyphs, plan);
_populate_gids_to_retain (plan,
input->unicodes,
input->glyphs,
!input->drop_tables->has (HB_OT_TAG_GSUB),
!input->drop_tables->has (HB_OT_TAG_GPOS),
!input->drop_tables->has (HB_OT_TAG_GDEF));

View File

@ -131,13 +131,6 @@ struct subset_main_t : option_parser_t, face_options_t, output_options_t<false>
return true;
}
void
add_all_unicodes ()
{
hb_set_t *codepoints = hb_subset_input_unicode_set (input);
hb_face_collect_unicodes (face, codepoints);
}
void add_options ();
protected:
@ -165,8 +158,17 @@ parse_gids (const char *name G_GNUC_UNUSED,
GError **error)
{
subset_main_t *subset_main = (subset_main_t *) data;
hb_bool_t is_remove = (name[strlen (name) - 1] == '-');
hb_set_t *gids = hb_subset_input_glyph_set (subset_main->input);
if (0 == strcmp (arg, "*"))
{
hb_set_clear (gids);
if (!is_remove)
hb_set_invert (gids);
return true;
}
char *s = (char *) arg;
char *p;
@ -203,11 +205,17 @@ parse_gids (const char *name G_GNUC_UNUSED,
"Invalid glyph-index range %u-%u", start_code, end_code);
return false;
}
if (!is_remove)
hb_set_add_range (gids, start_code, end_code);
else
hb_set_del_range (gids, start_code, end_code);
}
else
{
if (!is_remove)
hb_set_add (gids, start_code);
else
hb_set_del (gids, start_code);
}
s = p;
@ -223,8 +231,17 @@ parse_glyphs (const char *name G_GNUC_UNUSED,
GError **error G_GNUC_UNUSED)
{
subset_main_t *subset_main = (subset_main_t *) data;
hb_bool_t is_remove = (name[strlen (name) - 1] == '-');
hb_set_t *gids = hb_subset_input_glyph_set (subset_main->input);
if (0 == strcmp (arg, "*"))
{
hb_set_clear (gids);
if (!is_remove)
hb_set_invert (gids);
return true;
}
const char *p = arg;
const char *p_end = arg + strlen (arg);
@ -248,7 +265,10 @@ parse_glyphs (const char *name G_GNUC_UNUSED,
return false;
}
if (!is_remove)
hb_set_add (gids, gid);
else
hb_set_del (gids, gid);
}
p = end + 1;
@ -265,20 +285,26 @@ parse_text (const char *name G_GNUC_UNUSED,
GError **error G_GNUC_UNUSED)
{
subset_main_t *subset_main = (subset_main_t *) data;
hb_bool_t is_remove = (name[strlen (name) - 1] == '-');
hb_set_t *unicodes = hb_subset_input_unicode_set (subset_main->input);
if (0 == strcmp (arg, "*"))
{
subset_main->add_all_unicodes ();
hb_set_clear (unicodes);
if (!is_remove)
hb_set_invert (unicodes);
return true;
}
hb_set_t *unicodes = hb_subset_input_unicode_set (subset_main->input);
for (gchar *c = (gchar *) arg;
*c;
c = g_utf8_find_next_char(c, nullptr))
{
gunichar cp = g_utf8_get_char(c);
if (!is_remove)
hb_set_add (unicodes, cp);
else
hb_set_del (unicodes, cp);
}
return true;
}
@ -290,16 +316,18 @@ parse_unicodes (const char *name G_GNUC_UNUSED,
GError **error)
{
subset_main_t *subset_main = (subset_main_t *) data;
hb_bool_t is_remove = (name[strlen (name) - 1] == '-');
hb_set_t *unicodes = hb_subset_input_unicode_set (subset_main->input);
if (0 == strcmp (arg, "*"))
{
subset_main->add_all_unicodes ();
hb_set_clear (unicodes);
if (!is_remove)
hb_set_invert (unicodes);
return true;
}
// XXX TODO Ranges
hb_set_t *unicodes = hb_subset_input_unicode_set (subset_main->input);
#define DELIMITERS "<+->{},;&#\\xXuUnNiI\n\t\v\f\r "
char *s = (char *) arg;
@ -338,11 +366,17 @@ parse_unicodes (const char *name G_GNUC_UNUSED,
"Invalid Unicode range %u-%u", start_code, end_code);
return false;
}
if (!is_remove)
hb_set_add_range (unicodes, start_code, end_code);
else
hb_set_del_range (unicodes, start_code, end_code);
}
else
{
if (!is_remove)
hb_set_add (unicodes, start_code);
else
hb_set_del (unicodes, start_code);
}
s = p;
@ -667,13 +701,39 @@ subset_main_t::add_options ()
GOptionEntry glyphset_entries[] =
{
{"gids", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_gids, "Specify glyph IDs or ranges to include in the subset", "list of glyph indices/ranges"},
{"gids", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_gids,
"Specify glyph IDs or ranges to include in the subset.\n"
" "
"Use --gids-=... to subtract codepoints from the current set.", "list of glyph indices/ranges or *"},
{"gids-", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, (gpointer) &parse_gids, "Specify glyph IDs or ranges to remove from the subset", "list of glyph indices/ranges or *"},
{"gids+", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, (gpointer) &parse_gids, "Specify glyph IDs or ranges to include in the subset", "list of glyph indices/ranges or *"},
{"gids-file", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_file_for<parse_gids>, "Specify file to read glyph IDs or ranges from", "filename"},
{"glyphs", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_glyphs, "Specify glyph names to include in the subset", "list of glyph names"},
{"glyphs", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_glyphs, "Specify glyph names to include in the subset. Use --glyphs-=... to subtract glyphs from the current set.", "list of glyph names or *"},
{"glyphs+", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, (gpointer) &parse_glyphs, "Specify glyph names to include in the subset", "list of glyph names"},
{"glyphs-", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, (gpointer) &parse_glyphs, "Specify glyph names to remove from the subset", "list of glyph names"},
{"glyphs-file", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_file_for<parse_glyphs>, "Specify file to read glyph names fromt", "filename"},
{"text", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_text, "Specify text to include in the subset", "string"},
{"text", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_text, "Specify text to include in the subset. Use --text-=... to subtract codepoints from the current set.", "string"},
{"text-", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, (gpointer) &parse_text, "Specify text to remove from the subset", "string"},
{"text+", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, (gpointer) &parse_text, "Specify text to include in the subset", "string"},
{"text-file", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_file_for<parse_text, false>,"Specify file to read text from", "filename"},
{"unicodes", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_unicodes, "Specify Unicode codepoints or ranges to include in the subset", "list of hex numbers/ranges"},
{"unicodes", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_unicodes,
"Specify Unicode codepoints or ranges to include in the subset. Use * to include all codepoints.\n"
" "
"--unicodes-=... can be used to subtract codepoints "
"from the current set.\n"
" "
"For example: --unicodes=* --unicodes-=41,42,43 would create a subset with all codepoints\n"
" "
"except for 41, 42, 43.",
"list of hex numbers/ranges or *"},
{"unicodes-", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, (gpointer) &parse_unicodes, "Specify Unicode codepoints or ranges to remove from the subset", "list of hex numbers/ranges or *"},
{"unicodes+", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, (gpointer) &parse_unicodes, "Specify Unicode codepoints or ranges to include in the subset", "list of hex numbers/ranges or *"},
{"unicodes-file", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_file_for<parse_unicodes>,"Specify file to read Unicode codepoints or ranges from", "filename"},
{nullptr}
};