[subset] Add support for "--gids" option

cmap subsetting now retains entries associated with any glyph ids explicitly requested
This commit is contained in:
Qunxin Liu 2020-04-22 15:58:41 -07:00 committed by Garret Rieger
parent 44d88cff95
commit b2a965df5e
53 changed files with 282 additions and 23 deletions

View File

@ -56,6 +56,18 @@ struct CmapSubtableFormat0
out->add (i);
}
void collect_mapping (hb_set_t *unicodes, /* OUT */
hb_map_t *mapping /* OUT */) const
{
for (unsigned i = 0; i < 256; i++)
if (glyphIdArray[i])
{
hb_codepoint_t glyph = glyphIdArray[i];
*unicodes << i;
*mapping << hb_pair_t<hb_codepoint_t, hb_codepoint_t> (i, glyph);
}
}
bool sanitize (hb_sanitize_context_t *c) const
{
TRACE_SANITIZE (this);
@ -238,8 +250,7 @@ struct CmapSubtableFormat4
{ return _.first <= 0xFFFF; })
;
//comment off temporarily while we're working on supporting --gids option
//if (format4_iter.len () == 0) return;
if (format4_iter.len () == 0) return;
unsigned table_initpos = c->length ();
if (unlikely (!c->extend_min (*this))) return;
@ -377,6 +388,45 @@ struct CmapSubtableFormat4
}
}
void collect_mapping (hb_set_t *unicodes, /* OUT */
hb_map_t *mapping /* OUT */) const
{
unsigned count = this->segCount;
if (count && this->startCount[count - 1] == 0xFFFFu)
count--; /* Skip sentinel segment. */
for (unsigned i = 0; i < count; i++)
{
hb_codepoint_t start = this->startCount[i];
hb_codepoint_t end = this->endCount[i];
unsigned rangeOffset = this->idRangeOffset[i];
if (rangeOffset == 0)
{
for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++)
{
hb_codepoint_t gid = (codepoint + this->idDelta[i]) & 0xFFFFu;
if (unlikely (!gid))
continue;
*unicodes << codepoint;
*mapping << hb_pair_t<hb_codepoint_t, hb_codepoint_t> (codepoint, gid);
}
}
else
{
for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++)
{
unsigned index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
if (unlikely (index >= this->glyphIdArrayLength))
break;
hb_codepoint_t gid = this->glyphIdArray[index];
if (unlikely (!gid))
continue;
*unicodes << codepoint;
*mapping << hb_pair_t<hb_codepoint_t, hb_codepoint_t> (codepoint, gid);
}
}
}
}
const HBUINT16 *endCount;
const HBUINT16 *startCount;
const HBUINT16 *idDelta;
@ -397,6 +447,13 @@ struct CmapSubtableFormat4
accel.collect_unicodes (out);
}
void collect_mapping (hb_set_t *unicodes, /* OUT */
hb_map_t *mapping /* OUT */) const
{
accelerator_t accel (this);
accel.collect_mapping (unicodes, mapping);
}
bool sanitize (hb_sanitize_context_t *c) const
{
TRACE_SANITIZE (this);
@ -499,6 +556,21 @@ struct CmapSubtableTrimmed
out->add (start + i);
}
void collect_mapping (hb_set_t *unicodes, /* OUT */
hb_map_t *mapping /* OUT */) const
{
hb_codepoint_t start_cp = startCharCode;
unsigned count = glyphIdArray.len;
for (unsigned i = 0; i < count; i++)
if (glyphIdArray[i])
{
hb_codepoint_t unicode = start_cp + i;
hb_codepoint_t glyphid = glyphIdArray[i];
*unicodes << unicode;
*mapping << hb_pair_t<hb_codepoint_t, hb_codepoint_t> (unicode, glyphid);
}
}
bool sanitize (hb_sanitize_context_t *c) const
{
TRACE_SANITIZE (this);
@ -557,6 +629,36 @@ struct CmapSubtableLongSegmented
}
}
void collect_mapping (hb_set_t *unicodes, /* OUT */
hb_map_t *mapping, /* OUT */
unsigned num_glyphs) const
{
for (unsigned i = 0; i < this->groups.len; i++)
{
hb_codepoint_t start = this->groups[i].startCharCode;
hb_codepoint_t end = hb_min ((hb_codepoint_t) this->groups[i].endCharCode,
(hb_codepoint_t) HB_UNICODE_MAX);
hb_codepoint_t gid = this->groups[i].glyphID;
if (!gid)
{
/* Intention is: if (hb_is_same (T, CmapSubtableFormat13)) continue; */
if (! T::group_get_glyph (this->groups[i], end)) continue;
start++;
gid++;
}
if (unlikely ((unsigned int) gid >= num_glyphs)) continue;
if (unlikely ((unsigned int) (gid + end - start) >= num_glyphs))
end = start + (hb_codepoint_t) num_glyphs - gid;
for (unsigned cp = start; cp <= end; cp++)
{
*unicodes << cp;
*mapping << hb_pair_t<hb_codepoint_t, hb_codepoint_t> (cp, gid);
gid++;
}
}
}
bool sanitize (hb_sanitize_context_t *c) const
{
TRACE_SANITIZE (this);
@ -783,6 +885,19 @@ struct NonDefaultUVS : SortedArrayOf<UVSMapping, HBUINT32>
out->add (arrayZ[i].unicodeValue);
}
void collect_mapping (hb_set_t *unicodes, /* OUT */
hb_map_t *mapping /* OUT */) const
{
unsigned count = len;
for (unsigned i = 0; i < count; i++)
{
hb_codepoint_t unicode = arrayZ[i].unicodeValue;
hb_codepoint_t glyphid = arrayZ[i].glyphID;
*unicodes << unicode;
*mapping << hb_pair_t<hb_codepoint_t, hb_codepoint_t> (unicode, glyphid);
}
}
void closure_glyphs (const hb_set_t *unicodes,
hb_set_t *glyphset) const
{
@ -795,7 +910,7 @@ struct NonDefaultUVS : SortedArrayOf<UVSMapping, HBUINT32>
NonDefaultUVS* copy (hb_serialize_context_t *c,
const hb_set_t *unicodes,
const hb_set_t *glyphs,
const hb_set_t *glyphs_requested,
const hb_map_t *glyph_map) const
{
NonDefaultUVS *out = c->start_embed<NonDefaultUVS> ();
@ -805,7 +920,7 @@ struct NonDefaultUVS : SortedArrayOf<UVSMapping, HBUINT32>
+ as_array ()
| hb_filter ([&] (const UVSMapping& _)
{
return unicodes->has (_.unicodeValue) || glyphs->has (_.glyphID);
return unicodes->has (_.unicodeValue) || glyphs_requested->has (_.glyphID);
})
;
@ -867,6 +982,14 @@ struct VariationSelectorRecord
(base+nonDefaultUVS).collect_unicodes (out);
}
void collect_mapping (const void *base,
hb_set_t *unicodes, /* OUT */
hb_map_t *mapping /* OUT */) const
{
(base+defaultUVS).collect_unicodes (unicodes);
(base+nonDefaultUVS).collect_mapping (unicodes, mapping);
}
int cmp (const hb_codepoint_t &variation_selector) const
{ return varSelector.cmp (variation_selector); }
@ -881,7 +1004,7 @@ struct VariationSelectorRecord
hb_pair_t<unsigned, unsigned>
copy (hb_serialize_context_t *c,
const hb_set_t *unicodes,
const hb_set_t *glyphs,
const hb_set_t *glyphs_requested,
const hb_map_t *glyph_map,
const void *base) const
{
@ -896,7 +1019,7 @@ struct VariationSelectorRecord
if (nonDefaultUVS != 0)
{
c->push ();
if (c->copy (base+nonDefaultUVS, unicodes, glyphs, glyph_map))
if (c->copy (base+nonDefaultUVS, unicodes, glyphs_requested, glyph_map))
non_default_uvs_objidx = c->pop_pack ();
else c->pop_discard ();
}
@ -945,7 +1068,7 @@ struct CmapSubtableFormat14
void serialize (hb_serialize_context_t *c,
const hb_set_t *unicodes,
const hb_set_t *glyphs,
const hb_set_t *glyphs_requested,
const hb_map_t *glyph_map,
const void *base)
{
@ -974,7 +1097,7 @@ struct CmapSubtableFormat14
hb_vector_t<hb_pair_t<unsigned, unsigned>> obj_indices;
for (int i = src_tbl->record.len - 1; i >= 0; i--)
{
hb_pair_t<unsigned, unsigned> result = src_tbl->record[i].copy (c, unicodes, glyphs, glyph_map, base);
hb_pair_t<unsigned, unsigned> result = src_tbl->record[i].copy (c, unicodes, glyphs_requested, glyph_map, base);
if (result.first || result.second)
obj_indices.push (result);
}
@ -1037,6 +1160,13 @@ struct CmapSubtableFormat14
_.collect_unicodes (out, this);
}
void collect_mapping (hb_set_t *unicodes, /* OUT */
hb_map_t *mapping /* OUT */) const
{
for (const VariationSelectorRecord& _ : record)
_.collect_mapping (this, unicodes, mapping);
}
bool sanitize (hb_sanitize_context_t *c) const
{
TRACE_SANITIZE (this);
@ -1086,6 +1216,22 @@ struct CmapSubtable
}
}
void collect_mapping (hb_set_t *unicodes, /* OUT */
hb_map_t *mapping, /* OUT */
unsigned num_glyphs = UINT_MAX) const
{
switch (u.format) {
case 0: u.format0 .collect_mapping (unicodes, mapping); return;
case 4: u.format4 .collect_mapping (unicodes, mapping); return;
case 6: u.format6 .collect_mapping (unicodes, mapping); return;
case 10: u.format10.collect_mapping (unicodes, mapping); return;
case 12: u.format12.collect_mapping (unicodes, mapping, num_glyphs); return;
case 13: u.format13.collect_mapping (unicodes, mapping, num_glyphs); return;
case 14:
default: return;
}
}
template<typename Iterator,
hb_requires (hb_is_iterator (Iterator))>
void serialize (hb_serialize_context_t *c,
@ -1097,7 +1243,7 @@ struct CmapSubtable
switch (format) {
case 4: return u.format4.serialize (c, it);
case 12: return u.format12.serialize (c, it);
case 14: return u.format14.serialize (c, plan->unicodes, plan->_glyphset, plan->glyph_map, base);
case 14: return u.format14.serialize (c, plan->unicodes, plan->glyphs_requested, plan->glyph_map, base);
default: return;
}
}
@ -1200,7 +1346,7 @@ struct cmap
static constexpr hb_tag_t tableTag = HB_OT_TAG_cmap;
template<typename Iterator, typename EncodingRecIter,
hb_requires (hb_is_iterator (Iterator))>
hb_requires (hb_is_iterator (EncodingRecIter))>
void serialize (hb_serialize_context_t *c,
Iterator it,
EncodingRecIter encodingrec_iter,
@ -1215,13 +1361,38 @@ struct cmap
for (const EncodingRecord& _ : encodingrec_iter)
{
hb_set_t unicodes_set;
(base+_.subtable).collect_unicodes (&unicodes_set);
hb_map_t cp_glyphid_map;
(base+_.subtable).collect_mapping (&unicodes_set, &cp_glyphid_map);
unsigned format = (base+_.subtable).u.format;
if (format == 4) c->copy (_, + it | hb_filter (unicodes_set, hb_first), 4u, base, plan, &format4objidx);
else if (format == 12) c->copy (_, + it | hb_filter (unicodes_set, hb_first), 12u, base, plan, &format12objidx);
else if (format == 14) c->copy (_, it, 14u, base, plan, &format14objidx);
if (!plan->glyphs_requested->is_empty ())
{
auto table_iter =
+ hb_zip (unicodes_set.iter(), unicodes_set.iter() | hb_map(cp_glyphid_map))
| hb_filter (plan->_glyphset, hb_second)
| hb_filter ([plan] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t>& p)
{
return plan->unicodes->has (p.first) ||
plan->glyphs_requested->has (p.second);
})
| hb_map ([plan] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t>& p_org)
{
return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (p_org.first, plan->glyph_map->get(p_org.second));
})
;
if (format == 4) c->copy (_, table_iter, 4u, base, plan, &format4objidx);
else if (format == 12) c->copy (_, table_iter, 12u, base, plan, &format12objidx);
else if (format == 14) c->copy (_, table_iter, 14u, base, plan, &format14objidx);
}
/* when --gids option is not used, we iterate input unicodes instead of
* all codepoints in each subtable, which is more efficient */
else
{
if (format == 4) c->copy (_, + it | hb_filter (unicodes_set, hb_first), 4u, base, plan, &format4objidx);
else if (format == 12) c->copy (_, + it | hb_filter (unicodes_set, hb_first), 12u, base, plan, &format12objidx);
else if (format == 14) c->copy (_, it, 14u, base, plan, &format14objidx);
}
}
c->check_assign(this->encodingRecord.len, (c->length () - cmap::min_size)/EncodingRecord::static_size);
@ -1291,7 +1462,6 @@ struct cmap
| hb_filter ([&] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t> _)
{ return (_.second != HB_MAP_VALUE_INVALID); })
;
cmap_prime->serialize (c->serializer, it, encodingrec_iter, this, c->plan);
return_trace (true);
}
@ -1413,6 +1583,9 @@ struct cmap
void collect_unicodes (hb_set_t *out, unsigned int num_glyphs) const
{ subtable->collect_unicodes (out, num_glyphs); }
void collect_mapping (hb_set_t *unicodes, hb_map_t *mapping,
unsigned num_glyphs = UINT_MAX) const
{ subtable->collect_mapping (unicodes, mapping, num_glyphs); }
void collect_variation_selectors (hb_set_t *out) const
{ subtable_uvs->collect_variation_selectors (out); }
void collect_variation_unicodes (hb_codepoint_t variation_selector,

View File

@ -30,6 +30,7 @@
#include "hb-open-type.hh"
#include "hb-ot-os2-unicode-ranges.hh"
#include "hb-ot-cmap-table.hh"
#include "hb-set.hh"
@ -172,12 +173,28 @@ struct OS2
OS2 *os2_prime = c->serializer->embed (this);
if (unlikely (!os2_prime)) return_trace (false);
hb_set_t unicodes;
hb_map_t unicode_glyphid_map;
OT::cmap::accelerator_t cmap;
cmap.init (c->plan->source);
cmap.collect_mapping (&unicodes, &unicode_glyphid_map);
cmap.fini ();
if (c->plan->unicodes->is_empty ()) unicodes.clear ();
else hb_set_set (&unicodes, c->plan->unicodes);
+ unicode_glyphid_map.iter ()
| hb_filter (c->plan->glyphs_requested, hb_second)
| hb_map (hb_first)
| hb_sink (unicodes)
;
uint16_t min_cp, max_cp;
find_min_and_max_codepoint (c->plan->unicodes, &min_cp, &max_cp);
find_min_and_max_codepoint (&unicodes, &min_cp, &max_cp);
os2_prime->usFirstCharIndex = min_cp;
os2_prime->usLastCharIndex = max_cp;
_update_unicode_ranges (c->plan->unicodes, os2_prime->ulUnicodeRange);
_update_unicode_ranges (&unicodes, os2_prime->ulUnicodeRange);
return_trace (true);
}

View File

@ -294,6 +294,7 @@ hb_subset_plan_create (hb_face_t *face,
plan->name_ids = hb_set_reference (input->name_ids);
_nameid_closure (face, plan->name_ids);
plan->name_languages = hb_set_reference (input->name_languages);
plan->glyphs_requested = hb_set_reference (input->glyphs);
plan->drop_tables = hb_set_reference (input->drop_tables);
plan->source = hb_face_reference (face);
plan->dest = hb_face_builder_create ();
@ -337,6 +338,7 @@ hb_subset_plan_destroy (hb_subset_plan_t *plan)
hb_set_destroy (plan->unicodes);
hb_set_destroy (plan->name_ids);
hb_set_destroy (plan->name_languages);
hb_set_destroy (plan->glyphs_requested);
hb_set_destroy (plan->drop_tables);
hb_face_destroy (plan->source);
hb_face_destroy (plan->dest);

View File

@ -53,6 +53,9 @@ struct hb_subset_plan_t
// name_languages we would like to retain
hb_set_t *name_languages;
//glyph ids requested to retain
hb_set_t *glyphs_requested;
// Tables which should be dropped.
hb_set_t *drop_tables;

View File

@ -113,7 +113,6 @@ test_subset_cmap4_no_exceeding_maximum_codepoint (void)
hb_face_destroy (face_origin);
}
/* TEMPORARILY COMMENTED OFF
static void
test_subset_cmap_empty_tables (void)
{
@ -133,7 +132,6 @@ test_subset_cmap_empty_tables (void)
hb_face_destroy (face_abc);
hb_face_destroy (face_empty);
}
*/
static void
test_subset_cmap_noto_color_emoji_noop (void)
@ -188,7 +186,7 @@ main (int argc, char **argv)
hb_test_add (test_subset_cmap_noop);
hb_test_add (test_subset_cmap_non_consecutive_glyphs);
hb_test_add (test_subset_cmap4_no_exceeding_maximum_codepoint);
//hb_test_add (test_subset_cmap_empty_tables);
hb_test_add (test_subset_cmap_empty_tables);
hb_test_add (test_subset_cmap_noto_color_emoji_noop);
hb_test_add (test_subset_cmap_noto_color_emoji_non_consecutive_glyphs);

View File

@ -4,6 +4,7 @@ TESTS = \
tests/cff-full-font.tests \
tests/japanese.tests \
tests/cff-japanese.tests \
tests/cmap.tests \
tests/cmap14.tests \
tests/sbix.tests \
tests/colr.tests \
@ -20,7 +21,6 @@ DISABLED_TESTS = \
tests/layout.gsub3.tests \
tests/layout.gsub6.tests \
tests/layout.gdef.tests \
tests/cmap.tests \
$(NULL)
XFAIL_TESTS = \

View File

@ -0,0 +1 @@
--gids=1,2,3

View File

@ -11,6 +11,7 @@ retain-gids.txt
name-ids.txt
name-languages.txt
name-legacy.txt
gids.txt
SUBSETS:
abc

View File

@ -8,6 +8,7 @@ drop-hints.txt
drop-hints-retain-gids.txt
retain-gids.txt
name-ids.txt
gids.txt
SUBSETS:
一丂七

View File

@ -13,5 +13,6 @@ acek
あいうえおか
あいう珠度輸
𝜕𝟘AB
𥝱

View File

@ -13,6 +13,7 @@ tests = [
'layout.gsub3',
'layout.gsub6',
'layout.gdef',
'cmap',
'cmap14',
'sbix',
'colr',
@ -36,7 +37,7 @@ foreach t : tests
],
# as the tests are ran concurrently let's raise acceptable time here
# ideally better to break and let meson handles them in parallel
timeout: 120,
timeout: 140,
workdir: join_paths(meson.current_build_dir(), '..', '..'),
)
endforeach

View File

@ -28,6 +28,66 @@
#include "hb-subset-input.hh"
static gboolean
parse_gids (const char *name G_GNUC_UNUSED,
const char *arg,
gpointer data,
GError **error G_GNUC_UNUSED)
{
subset_options_t *subset_opts = (subset_options_t *) data;
hb_set_t *gids = subset_opts->input->glyphs;
char *s = (char *) arg;
char *p;
while (s && *s)
{
while (*s && strchr (", ", *s))
s++;
if (!*s)
break;
errno = 0;
hb_codepoint_t start_code = strtoul (s, &p, 10);
if (s[0] == '-' || errno || s == p)
{
hb_set_destroy (gids);
g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_BAD_VALUE,
"Failed parsing gids values at: '%s'", s);
return false;
}
if (p && p[0] == '-') //gid ranges
{
s = ++p;
hb_codepoint_t end_code = strtoul (s, &p, 10);
if (s[0] == '-' || errno || s == p)
{
hb_set_destroy (gids);
g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_BAD_VALUE,
"Failed parsing gids values at: '%s'", s);
return false;
}
if (end_code < start_code)
{
hb_set_destroy (gids);
g_set_error (error, G_OPTION_ERROR, G_OPTION_ERROR_BAD_VALUE,
"Invalid gids range value %u-%u", start_code, end_code);
return false;
}
hb_set_add_range (gids, start_code, end_code);
}
else
{
hb_set_add (gids, start_code);
}
s = p;
}
return true;
}
static gboolean
parse_nameids (const char *name,
const char *arg,
@ -184,6 +244,7 @@ subset_options_t::add_options (option_parser_t *parser)
{
{"no-hinting", 0, 0, G_OPTION_ARG_NONE, &this->input->drop_hints, "Whether to drop hints", nullptr},
{"retain-gids", 0, 0, G_OPTION_ARG_NONE, &this->input->retain_gids, "If set don't renumber glyph ids in the subset.", nullptr},
{"gids", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_gids, "Specify glyph IDs or ranges to include in the subset", "list of comma/whitespace-separated int numbers or ranges"},
{"desubroutinize", 0, 0, G_OPTION_ARG_NONE, &this->input->desubroutinize, "Remove CFF/CFF2 use of subroutines", nullptr},
{"name-IDs", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_nameids, "Subset specified nameids", "list of int numbers"},
{"name-legacy", 0, 0, G_OPTION_ARG_NONE, &this->input->name_legacy, "Keep legacy (non-Unicode) 'name' table entries", nullptr},