Merge pull request #3710 from googlefonts/24bit_repacking

[subset] Prepare the repacker for handling 24bit offsets in GSUB/GPOS.
This commit is contained in:
Behdad Esfahbod 2022-07-07 13:35:38 -06:00 committed by GitHub
commit a64fc71033
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 152 additions and 14 deletions

View File

@ -265,28 +265,64 @@ struct graph_t
} }
/* /*
* Assign unique space numbers to each connected subgraph of 32 bit offset(s). * Finds the set of nodes (placed into roots) that should be assigned unique spaces.
* More specifically this looks for the top most 24 bit or 32 bit links in the graph.
* Some special casing is done that is specific to the layout of GSUB/GPOS tables.
*/ */
bool assign_32bit_spaces () void find_space_roots (hb_set_t& visited, hb_set_t& roots)
{ {
unsigned root_index = root_idx (); int root_index = (int) root_idx ();
hb_set_t visited; for (int i = root_index; i >= 0; i--)
hb_set_t roots;
for (unsigned i = 0; i <= root_index; i++)
{ {
if (visited.has (i)) continue;
// Only real links can form 32 bit spaces // Only real links can form 32 bit spaces
for (auto& l : vertices_[i].obj.real_links) for (auto& l : vertices_[i].obj.real_links)
{ {
if (l.width == 4 && !l.is_signed) if (l.is_signed || l.width < 3)
continue;
if (i == root_index && l.width == 3)
// Ignore 24bit links from the root node, this skips past the single 24bit
// pointer to the lookup list.
continue;
if (l.width == 3)
{ {
// A 24bit offset forms a root, unless there is 32bit offsets somewhere
// in it's subgraph, then those become the roots instead. This is to make sure
// that extension subtables beneath a 24bit lookup become the spaces instead
// of the offset to the lookup.
hb_set_t sub_roots;
find_32bit_roots (l.objidx, sub_roots);
if (sub_roots) {
for (unsigned sub_root_idx : sub_roots) {
roots.add (sub_root_idx);
find_subgraph (sub_root_idx, visited);
}
continue;
}
}
roots.add (l.objidx); roots.add (l.objidx);
find_subgraph (l.objidx, visited); find_subgraph (l.objidx, visited);
} }
} }
} }
// Mark everything not in the subgraphs of 32 bit roots as visited. /*
// This prevents 32 bit subgraphs from being connected via nodes not in the 32 bit subgraphs. * Assign unique space numbers to each connected subgraph of 24 bit and/or 32 bit offset(s).
* Currently, this is implemented specifically tailored to the structure of a GPOS/GSUB
* (including with 24bit offsets) table.
*/
bool assign_spaces ()
{
hb_set_t visited;
hb_set_t roots;
find_space_roots (visited, roots);
// Mark everything not in the subgraphs of the roots as visited. This prevents
// subgraphs from being connected via nodes not in those subgraphs.
visited.invert (); visited.invert ();
if (!roots) return false; if (!roots) return false;
@ -422,6 +458,22 @@ struct graph_t
find_subgraph (link.objidx, subgraph); find_subgraph (link.objidx, subgraph);
} }
/*
* Finds the topmost children of 32bit offsets in the subgraph starting
* at node_idx. Found indices are placed into 'found'.
*/
void find_32bit_roots (unsigned node_idx, hb_set_t& found)
{
for (const auto& link : vertices_[node_idx].obj.all_links ())
{
if (!link.is_signed && link.width == 4) {
found.add (link.objidx);
continue;
}
find_32bit_roots (link.objidx, found);
}
}
/* /*
* duplicates all nodes in the subgraph reachable from node_idx. Does not re-assign * duplicates all nodes in the subgraph reachable from node_idx. Does not re-assign
* links. index_map is updated with mappings from old id to new id. If a duplication has already * links. index_map is updated with mappings from old id to new id. If a duplication has already
@ -622,7 +674,7 @@ struct graph_t
private: private:
/* /*
* Returns the numbers of incoming edges that are 32bits wide. * Returns the numbers of incoming edges that are 24 or 32 bits wide.
*/ */
unsigned wide_parents (unsigned node_idx, hb_set_t& parents) const unsigned wide_parents (unsigned node_idx, hb_set_t& parents) const
{ {
@ -636,7 +688,9 @@ struct graph_t
// Only real links can be wide // Only real links can be wide
for (const auto& l : vertices_[p].obj.real_links) for (const auto& l : vertices_[p].obj.real_links)
{ {
if (l.objidx == node_idx && l.width == 4 && !l.is_signed) if (l.objidx == node_idx
&& (l.width == 3 || l.width == 4)
&& !l.is_signed)
{ {
count++; count++;
parents.add (p); parents.add (p);

View File

@ -172,7 +172,7 @@ hb_resolve_overflows (const T& packed,
&& will_overflow) && will_overflow)
{ {
DEBUG_MSG (SUBSET_REPACK, nullptr, "Assigning spaces to 32 bit subgraphs."); DEBUG_MSG (SUBSET_REPACK, nullptr, "Assigning spaces to 32 bit subgraphs.");
if (sorted_graph.assign_32bit_spaces ()) if (sorted_graph.assign_spaces ())
sorted_graph.sort_shortest_distance (); sorted_graph.sort_shortest_distance ();
} }

View File

@ -57,6 +57,14 @@ static void add_offset (unsigned id,
c->add_link (*offset, id); c->add_link (*offset, id);
} }
static void add_24_offset (unsigned id,
hb_serialize_context_t* c)
{
OT::Offset24* offset = c->start_embed<OT::Offset24> ();
c->extend_min (offset);
c->add_link (*offset, id);
}
static void add_wide_offset (unsigned id, static void add_wide_offset (unsigned id,
hb_serialize_context_t* c) hb_serialize_context_t* c)
{ {
@ -812,6 +820,51 @@ populate_serializer_virtual_link (hb_serialize_context_t* c)
c->end_serialize(); c->end_serialize();
} }
static void
populate_serializer_with_24_and_32_bit_offsets (hb_serialize_context_t* c)
{
std::string large_string(60000, 'a');
c->start_serialize<char> ();
unsigned obj_f = add_object ("f", 1, c);
unsigned obj_g = add_object ("g", 1, c);
unsigned obj_j = add_object ("j", 1, c);
unsigned obj_k = add_object ("k", 1, c);
start_object (large_string.c_str (), 40000, c);
add_offset (obj_f, c);
unsigned obj_c = c->pop_pack (false);
start_object (large_string.c_str (), 40000, c);
add_offset (obj_g, c);
unsigned obj_d = c->pop_pack (false);
start_object (large_string.c_str (), 40000, c);
add_offset (obj_j, c);
unsigned obj_h = c->pop_pack (false);
start_object (large_string.c_str (), 40000, c);
add_offset (obj_k, c);
unsigned obj_i = c->pop_pack (false);
start_object ("e", 1, c);
add_wide_offset (obj_h, c);
add_wide_offset (obj_i, c);
unsigned obj_e = c->pop_pack (false);
start_object ("b", 1, c);
add_24_offset (obj_c, c);
add_24_offset (obj_d, c);
add_24_offset (obj_e, c);
unsigned obj_b = c->pop_pack (false);
start_object ("a", 1, c);
add_24_offset (obj_b, c);
c->pop_pack (false);
c->end_serialize();
}
static void test_sort_shortest () static void test_sort_shortest ()
{ {
size_t buffer_size = 100; size_t buffer_size = 100;
@ -1129,6 +1182,36 @@ static void test_resolve_overflows_via_isolation_spaces ()
hb_blob_destroy (out); hb_blob_destroy (out);
} }
static void test_resolve_mixed_overflows_via_isolation_spaces ()
{
size_t buffer_size = 200000;
void* buffer = malloc (buffer_size);
hb_serialize_context_t c (buffer, buffer_size);
populate_serializer_with_24_and_32_bit_offsets (&c);
graph_t graph (c.object_graph ());
assert (c.offset_overflow ());
hb_blob_t* out = hb_resolve_overflows (c.object_graph (), HB_TAG ('G', 'S', 'U', 'B'), 0);
assert (out);
hb_bytes_t result = out->as_bytes ();
unsigned expected_length =
// Objects
7 +
4 * 40000;
expected_length +=
// Links
2 * 4 + // 32
4 * 3 + // 24
4 * 2; // 16
assert (result.length == expected_length);
free (buffer);
hb_blob_destroy (out);
}
static void test_resolve_overflows_via_splitting_spaces () static void test_resolve_overflows_via_splitting_spaces ()
{ {
size_t buffer_size = 160000; size_t buffer_size = 160000;
@ -1270,6 +1353,7 @@ main (int argc, char **argv)
test_resolve_overflows_via_isolating_16bit_space_2 (); test_resolve_overflows_via_isolating_16bit_space_2 ();
test_resolve_overflows_via_splitting_spaces (); test_resolve_overflows_via_splitting_spaces ();
test_resolve_overflows_via_splitting_spaces_2 (); test_resolve_overflows_via_splitting_spaces_2 ();
test_resolve_mixed_overflows_via_isolation_spaces ();
test_duplicate_leaf (); test_duplicate_leaf ();
test_duplicate_interior (); test_duplicate_interior ();
test_virtual_link (); test_virtual_link ();