Merge pull request #3171 from googlefonts/unicode_glyph_invert
[subset] use inverted set for all unicodes.
This commit is contained in:
commit
18b4aab652
|
@ -240,7 +240,12 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes,
|
||||||
{
|
{
|
||||||
OT::cmap::accelerator_t cmap;
|
OT::cmap::accelerator_t cmap;
|
||||||
cmap.init (plan->source);
|
cmap.init (plan->source);
|
||||||
|
constexpr static const int size_threshold = 4000;
|
||||||
|
|
||||||
|
if (unicodes->get_population () < size_threshold && glyphs->is_empty ())
|
||||||
|
{
|
||||||
|
// This is the fast path if it's anticipated that size of unicodes
|
||||||
|
// is << than the number of codepoints in the font.
|
||||||
for (hb_codepoint_t cp : *unicodes)
|
for (hb_codepoint_t cp : *unicodes)
|
||||||
{
|
{
|
||||||
hb_codepoint_t gid;
|
hb_codepoint_t gid;
|
||||||
|
@ -249,33 +254,41 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes,
|
||||||
DEBUG_MSG(SUBSET, nullptr, "Drop U+%04X; no gid", cp);
|
DEBUG_MSG(SUBSET, nullptr, "Drop U+%04X; no gid", cp);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
plan->unicodes->add (cp);
|
|
||||||
plan->codepoint_to_glyph->set (cp, gid);
|
plan->codepoint_to_glyph->set (cp, gid);
|
||||||
plan->_glyphset_gsub->add (gid);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (glyphs->is_empty ())
|
|
||||||
{
|
|
||||||
cmap.fini ();
|
cmap.fini ();
|
||||||
return;
|
} else {
|
||||||
}
|
|
||||||
|
|
||||||
hb_map_t unicode_glyphid_map;
|
hb_map_t unicode_glyphid_map;
|
||||||
cmap.collect_mapping (hb_set_get_empty (), &unicode_glyphid_map);
|
cmap.collect_mapping (hb_set_get_empty (), &unicode_glyphid_map);
|
||||||
cmap.fini ();
|
cmap.fini ();
|
||||||
|
|
||||||
for (hb_pair_t<hb_codepoint_t, hb_codepoint_t> cp_gid :
|
for (hb_pair_t<hb_codepoint_t, hb_codepoint_t> cp_gid :
|
||||||
+ unicode_glyphid_map.iter () | hb_filter (glyphs, hb_second))
|
+ unicode_glyphid_map.iter ())
|
||||||
{
|
{
|
||||||
plan->unicodes->add (cp_gid.first);
|
if (!unicodes->has (cp_gid.first) && !glyphs->has (cp_gid.second))
|
||||||
|
continue;
|
||||||
|
|
||||||
plan->codepoint_to_glyph->set (cp_gid.first, cp_gid.second);
|
plan->codepoint_to_glyph->set (cp_gid.first, cp_gid.second);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add gids which where requested, but not mapped in cmap
|
||||||
|
// TODO(garretrieger): once https://github.com/harfbuzz/harfbuzz/issues/3169
|
||||||
|
// is implemented, this can be done with union and del_range
|
||||||
|
for (hb_codepoint_t gid : glyphs->iter ())
|
||||||
|
{
|
||||||
|
if (gid >= plan->source->get_num_glyphs ())
|
||||||
|
break;
|
||||||
|
plan->_glyphset_gsub->add (gid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+ plan->codepoint_to_glyph->keys () | hb_sink (plan->unicodes);
|
||||||
|
+ plan->codepoint_to_glyph->values () | hb_sink (plan->_glyphset_gsub);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
_populate_gids_to_retain (hb_subset_plan_t* plan,
|
_populate_gids_to_retain (hb_subset_plan_t* plan,
|
||||||
const hb_set_t *unicodes,
|
|
||||||
const hb_set_t *input_glyphs_to_retain,
|
|
||||||
bool close_over_gsub,
|
bool close_over_gsub,
|
||||||
bool close_over_gpos,
|
bool close_over_gpos,
|
||||||
bool close_over_gdef)
|
bool close_over_gdef)
|
||||||
|
@ -292,7 +305,6 @@ _populate_gids_to_retain (hb_subset_plan_t* plan,
|
||||||
colr.init (plan->source);
|
colr.init (plan->source);
|
||||||
|
|
||||||
plan->_glyphset_gsub->add (0); // Not-def
|
plan->_glyphset_gsub->add (0); // Not-def
|
||||||
hb_set_union (plan->_glyphset_gsub, input_glyphs_to_retain);
|
|
||||||
|
|
||||||
_cmap_closure (plan->source, plan->unicodes, plan->_glyphset_gsub);
|
_cmap_closure (plan->source, plan->unicodes, plan->_glyphset_gsub);
|
||||||
|
|
||||||
|
@ -477,8 +489,6 @@ hb_subset_plan_create (hb_face_t *face,
|
||||||
_populate_unicodes_to_retain (input->unicodes, input->glyphs, plan);
|
_populate_unicodes_to_retain (input->unicodes, input->glyphs, plan);
|
||||||
|
|
||||||
_populate_gids_to_retain (plan,
|
_populate_gids_to_retain (plan,
|
||||||
input->unicodes,
|
|
||||||
input->glyphs,
|
|
||||||
!input->drop_tables->has (HB_OT_TAG_GSUB),
|
!input->drop_tables->has (HB_OT_TAG_GSUB),
|
||||||
!input->drop_tables->has (HB_OT_TAG_GPOS),
|
!input->drop_tables->has (HB_OT_TAG_GPOS),
|
||||||
!input->drop_tables->has (HB_OT_TAG_GDEF));
|
!input->drop_tables->has (HB_OT_TAG_GDEF));
|
||||||
|
|
|
@ -131,13 +131,6 @@ struct subset_main_t : option_parser_t, face_options_t, output_options_t<false>
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
add_all_unicodes ()
|
|
||||||
{
|
|
||||||
hb_set_t *codepoints = hb_subset_input_unicode_set (input);
|
|
||||||
hb_face_collect_unicodes (face, codepoints);
|
|
||||||
}
|
|
||||||
|
|
||||||
void add_options ();
|
void add_options ();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -165,8 +158,17 @@ parse_gids (const char *name G_GNUC_UNUSED,
|
||||||
GError **error)
|
GError **error)
|
||||||
{
|
{
|
||||||
subset_main_t *subset_main = (subset_main_t *) data;
|
subset_main_t *subset_main = (subset_main_t *) data;
|
||||||
|
hb_bool_t is_remove = (name[strlen (name) - 1] == '-');
|
||||||
hb_set_t *gids = hb_subset_input_glyph_set (subset_main->input);
|
hb_set_t *gids = hb_subset_input_glyph_set (subset_main->input);
|
||||||
|
|
||||||
|
if (0 == strcmp (arg, "*"))
|
||||||
|
{
|
||||||
|
hb_set_clear (gids);
|
||||||
|
if (!is_remove)
|
||||||
|
hb_set_invert (gids);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
char *s = (char *) arg;
|
char *s = (char *) arg;
|
||||||
char *p;
|
char *p;
|
||||||
|
|
||||||
|
@ -203,11 +205,17 @@ parse_gids (const char *name G_GNUC_UNUSED,
|
||||||
"Invalid glyph-index range %u-%u", start_code, end_code);
|
"Invalid glyph-index range %u-%u", start_code, end_code);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (!is_remove)
|
||||||
hb_set_add_range (gids, start_code, end_code);
|
hb_set_add_range (gids, start_code, end_code);
|
||||||
|
else
|
||||||
|
hb_set_del_range (gids, start_code, end_code);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
if (!is_remove)
|
||||||
hb_set_add (gids, start_code);
|
hb_set_add (gids, start_code);
|
||||||
|
else
|
||||||
|
hb_set_del (gids, start_code);
|
||||||
}
|
}
|
||||||
|
|
||||||
s = p;
|
s = p;
|
||||||
|
@ -223,8 +231,17 @@ parse_glyphs (const char *name G_GNUC_UNUSED,
|
||||||
GError **error G_GNUC_UNUSED)
|
GError **error G_GNUC_UNUSED)
|
||||||
{
|
{
|
||||||
subset_main_t *subset_main = (subset_main_t *) data;
|
subset_main_t *subset_main = (subset_main_t *) data;
|
||||||
|
hb_bool_t is_remove = (name[strlen (name) - 1] == '-');
|
||||||
hb_set_t *gids = hb_subset_input_glyph_set (subset_main->input);
|
hb_set_t *gids = hb_subset_input_glyph_set (subset_main->input);
|
||||||
|
|
||||||
|
if (0 == strcmp (arg, "*"))
|
||||||
|
{
|
||||||
|
hb_set_clear (gids);
|
||||||
|
if (!is_remove)
|
||||||
|
hb_set_invert (gids);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
const char *p = arg;
|
const char *p = arg;
|
||||||
const char *p_end = arg + strlen (arg);
|
const char *p_end = arg + strlen (arg);
|
||||||
|
|
||||||
|
@ -248,7 +265,10 @@ parse_glyphs (const char *name G_GNUC_UNUSED,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!is_remove)
|
||||||
hb_set_add (gids, gid);
|
hb_set_add (gids, gid);
|
||||||
|
else
|
||||||
|
hb_set_del (gids, gid);
|
||||||
}
|
}
|
||||||
|
|
||||||
p = end + 1;
|
p = end + 1;
|
||||||
|
@ -265,20 +285,26 @@ parse_text (const char *name G_GNUC_UNUSED,
|
||||||
GError **error G_GNUC_UNUSED)
|
GError **error G_GNUC_UNUSED)
|
||||||
{
|
{
|
||||||
subset_main_t *subset_main = (subset_main_t *) data;
|
subset_main_t *subset_main = (subset_main_t *) data;
|
||||||
|
hb_bool_t is_remove = (name[strlen (name) - 1] == '-');
|
||||||
|
|
||||||
|
hb_set_t *unicodes = hb_subset_input_unicode_set (subset_main->input);
|
||||||
if (0 == strcmp (arg, "*"))
|
if (0 == strcmp (arg, "*"))
|
||||||
{
|
{
|
||||||
subset_main->add_all_unicodes ();
|
hb_set_clear (unicodes);
|
||||||
|
if (!is_remove)
|
||||||
|
hb_set_invert (unicodes);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
hb_set_t *unicodes = hb_subset_input_unicode_set (subset_main->input);
|
|
||||||
for (gchar *c = (gchar *) arg;
|
for (gchar *c = (gchar *) arg;
|
||||||
*c;
|
*c;
|
||||||
c = g_utf8_find_next_char(c, nullptr))
|
c = g_utf8_find_next_char(c, nullptr))
|
||||||
{
|
{
|
||||||
gunichar cp = g_utf8_get_char(c);
|
gunichar cp = g_utf8_get_char(c);
|
||||||
|
if (!is_remove)
|
||||||
hb_set_add (unicodes, cp);
|
hb_set_add (unicodes, cp);
|
||||||
|
else
|
||||||
|
hb_set_del (unicodes, cp);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -290,16 +316,18 @@ parse_unicodes (const char *name G_GNUC_UNUSED,
|
||||||
GError **error)
|
GError **error)
|
||||||
{
|
{
|
||||||
subset_main_t *subset_main = (subset_main_t *) data;
|
subset_main_t *subset_main = (subset_main_t *) data;
|
||||||
|
hb_bool_t is_remove = (name[strlen (name) - 1] == '-');
|
||||||
|
|
||||||
|
hb_set_t *unicodes = hb_subset_input_unicode_set (subset_main->input);
|
||||||
if (0 == strcmp (arg, "*"))
|
if (0 == strcmp (arg, "*"))
|
||||||
{
|
{
|
||||||
subset_main->add_all_unicodes ();
|
hb_set_clear (unicodes);
|
||||||
|
if (!is_remove)
|
||||||
|
hb_set_invert (unicodes);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// XXX TODO Ranges
|
// XXX TODO Ranges
|
||||||
hb_set_t *unicodes = hb_subset_input_unicode_set (subset_main->input);
|
|
||||||
|
|
||||||
#define DELIMITERS "<+->{},;&#\\xXuUnNiI\n\t\v\f\r "
|
#define DELIMITERS "<+->{},;&#\\xXuUnNiI\n\t\v\f\r "
|
||||||
|
|
||||||
char *s = (char *) arg;
|
char *s = (char *) arg;
|
||||||
|
@ -338,11 +366,17 @@ parse_unicodes (const char *name G_GNUC_UNUSED,
|
||||||
"Invalid Unicode range %u-%u", start_code, end_code);
|
"Invalid Unicode range %u-%u", start_code, end_code);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (!is_remove)
|
||||||
hb_set_add_range (unicodes, start_code, end_code);
|
hb_set_add_range (unicodes, start_code, end_code);
|
||||||
|
else
|
||||||
|
hb_set_del_range (unicodes, start_code, end_code);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
if (!is_remove)
|
||||||
hb_set_add (unicodes, start_code);
|
hb_set_add (unicodes, start_code);
|
||||||
|
else
|
||||||
|
hb_set_del (unicodes, start_code);
|
||||||
}
|
}
|
||||||
|
|
||||||
s = p;
|
s = p;
|
||||||
|
@ -667,13 +701,39 @@ subset_main_t::add_options ()
|
||||||
|
|
||||||
GOptionEntry glyphset_entries[] =
|
GOptionEntry glyphset_entries[] =
|
||||||
{
|
{
|
||||||
{"gids", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_gids, "Specify glyph IDs or ranges to include in the subset", "list of glyph indices/ranges"},
|
{"gids", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_gids,
|
||||||
|
"Specify glyph IDs or ranges to include in the subset.\n"
|
||||||
|
" "
|
||||||
|
"Use --gids-=... to subtract codepoints from the current set.", "list of glyph indices/ranges or *"},
|
||||||
|
{"gids-", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, (gpointer) &parse_gids, "Specify glyph IDs or ranges to remove from the subset", "list of glyph indices/ranges or *"},
|
||||||
|
{"gids+", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, (gpointer) &parse_gids, "Specify glyph IDs or ranges to include in the subset", "list of glyph indices/ranges or *"},
|
||||||
{"gids-file", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_file_for<parse_gids>, "Specify file to read glyph IDs or ranges from", "filename"},
|
{"gids-file", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_file_for<parse_gids>, "Specify file to read glyph IDs or ranges from", "filename"},
|
||||||
{"glyphs", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_glyphs, "Specify glyph names to include in the subset", "list of glyph names"},
|
{"glyphs", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_glyphs, "Specify glyph names to include in the subset. Use --glyphs-=... to subtract glyphs from the current set.", "list of glyph names or *"},
|
||||||
|
{"glyphs+", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, (gpointer) &parse_glyphs, "Specify glyph names to include in the subset", "list of glyph names"},
|
||||||
|
{"glyphs-", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, (gpointer) &parse_glyphs, "Specify glyph names to remove from the subset", "list of glyph names"},
|
||||||
|
|
||||||
|
|
||||||
{"glyphs-file", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_file_for<parse_glyphs>, "Specify file to read glyph names fromt", "filename"},
|
{"glyphs-file", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_file_for<parse_glyphs>, "Specify file to read glyph names fromt", "filename"},
|
||||||
{"text", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_text, "Specify text to include in the subset", "string"},
|
|
||||||
|
{"text", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_text, "Specify text to include in the subset. Use --text-=... to subtract codepoints from the current set.", "string"},
|
||||||
|
{"text-", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, (gpointer) &parse_text, "Specify text to remove from the subset", "string"},
|
||||||
|
{"text+", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, (gpointer) &parse_text, "Specify text to include in the subset", "string"},
|
||||||
|
|
||||||
|
|
||||||
{"text-file", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_file_for<parse_text, false>,"Specify file to read text from", "filename"},
|
{"text-file", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_file_for<parse_text, false>,"Specify file to read text from", "filename"},
|
||||||
{"unicodes", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_unicodes, "Specify Unicode codepoints or ranges to include in the subset", "list of hex numbers/ranges"},
|
{"unicodes", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_unicodes,
|
||||||
|
"Specify Unicode codepoints or ranges to include in the subset. Use * to include all codepoints.\n"
|
||||||
|
" "
|
||||||
|
"--unicodes-=... can be used to subtract codepoints "
|
||||||
|
"from the current set.\n"
|
||||||
|
" "
|
||||||
|
"For example: --unicodes=* --unicodes-=41,42,43 would create a subset with all codepoints\n"
|
||||||
|
" "
|
||||||
|
"except for 41, 42, 43.",
|
||||||
|
"list of hex numbers/ranges or *"},
|
||||||
|
{"unicodes-", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, (gpointer) &parse_unicodes, "Specify Unicode codepoints or ranges to remove from the subset", "list of hex numbers/ranges or *"},
|
||||||
|
{"unicodes+", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, (gpointer) &parse_unicodes, "Specify Unicode codepoints or ranges to include in the subset", "list of hex numbers/ranges or *"},
|
||||||
|
|
||||||
{"unicodes-file", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_file_for<parse_unicodes>,"Specify file to read Unicode codepoints or ranges from", "filename"},
|
{"unicodes-file", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_file_for<parse_unicodes>,"Specify file to read Unicode codepoints or ranges from", "filename"},
|
||||||
{nullptr}
|
{nullptr}
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue