From 401066bf3d20bf8913d340811fd1c61ed65bb5f1 Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Wed, 6 Jul 2022 18:44:40 +0000 Subject: [PATCH 1/3] [subset] Prepare the repacker for handling 24bit offsets in GSUB/GPOS. The boring expansion (https://github.com/be-fonts/boring-expansion-spec) plans to introduce 24bit offsets into GSUB/GPOS. This changes the repacker to treat 24 bit offsets similar to 32 bit offsets and assign the top level 24 bit offsets into spaces to improve packing. --- src/graph/graph.hh | 73 ++++++++++++++++++++++++++++++++++++---------- src/hb-repacker.hh | 2 +- 2 files changed, 59 insertions(+), 16 deletions(-) diff --git a/src/graph/graph.hh b/src/graph/graph.hh index 52ca9dd14..88a9e4e60 100644 --- a/src/graph/graph.hh +++ b/src/graph/graph.hh @@ -264,29 +264,58 @@ struct graph_t hb_swap (vertices_, sorted_graph); } - /* - * Assign unique space numbers to each connected subgraph of 32 bit offset(s). - */ - bool assign_32bit_spaces () + void find_space_roots (hb_set_t& visited, hb_set_t& roots) { - unsigned root_index = root_idx (); - hb_set_t visited; - hb_set_t roots; - for (unsigned i = 0; i <= root_index; i++) + int root_index = (int) root_idx (); + for (int i = root_index; i >= 0; i--) { + if (visited.has (i)) continue; + // Only real links can form 32 bit spaces for (auto& l : vertices_[i].obj.real_links) { - if (l.width == 4 && !l.is_signed) + if (l.is_signed || l.width < 3) + continue; + + if (i == root_index && l.width == 3) + // Ignore 24bit links from the root node, this skips past the single 24bit + // pointer to the lookup list. + continue; + + if (l.width == 3) { - roots.add (l.objidx); - find_subgraph (l.objidx, visited); + // A 24bit offset forms a root, unless there is 32bit offsets somewhere + // in it's subgraph, then those become the roots instead. + hb_set_t sub_roots; + find_32bit_roots (l.objidx, sub_roots); + if (sub_roots) { + for (unsigned sub_root_idx : sub_roots) { + roots.add (sub_root_idx); + find_subgraph (sub_root_idx, visited); + } + continue; + } } + + roots.add (l.objidx); + find_subgraph (l.objidx, visited); } } + } - // Mark everything not in the subgraphs of 32 bit roots as visited. - // This prevents 32 bit subgraphs from being connected via nodes not in the 32 bit subgraphs. + /* + * Assign unique space numbers to each connected subgraph of 24 bit and/or 32 bit offset(s). + * Currently, this is implemented specifically tailored to the structure of a GPOS/GSUB + * (including with 24bit offsets) table. + */ + bool assign_spaces () + { + hb_set_t visited; + hb_set_t roots; + find_space_roots (visited, roots); + + // Mark everything not in the subgraphs of the roots as visited. This prevents + // subgraphs from being connected via nodes not in those subgraphs. visited.invert (); if (!roots) return false; @@ -422,6 +451,18 @@ struct graph_t find_subgraph (link.objidx, subgraph); } + void find_32bit_roots (unsigned node_idx, hb_set_t& found) + { + for (const auto& link : vertices_[node_idx].obj.all_links ()) + { + if (!link.is_signed && link.width == 4) { + found.add (link.objidx); + continue; + } + find_32bit_roots (link.objidx, found); + } + } + /* * duplicates all nodes in the subgraph reachable from node_idx. Does not re-assign * links. index_map is updated with mappings from old id to new id. If a duplication has already @@ -622,7 +663,7 @@ struct graph_t private: /* - * Returns the numbers of incoming edges that are 32bits wide. + * Returns the numbers of incoming edges that are 24 or 32 bits wide. */ unsigned wide_parents (unsigned node_idx, hb_set_t& parents) const { @@ -636,7 +677,9 @@ struct graph_t // Only real links can be wide for (const auto& l : vertices_[p].obj.real_links) { - if (l.objidx == node_idx && l.width == 4 && !l.is_signed) + if (l.objidx == node_idx + && (l.width == 3 || l.width == 4) + && !l.is_signed) { count++; parents.add (p); diff --git a/src/hb-repacker.hh b/src/hb-repacker.hh index 683a441ec..57c063111 100644 --- a/src/hb-repacker.hh +++ b/src/hb-repacker.hh @@ -172,7 +172,7 @@ hb_resolve_overflows (const T& packed, && will_overflow) { DEBUG_MSG (SUBSET_REPACK, nullptr, "Assigning spaces to 32 bit subgraphs."); - if (sorted_graph.assign_32bit_spaces ()) + if (sorted_graph.assign_spaces ()) sorted_graph.sort_shortest_distance (); } From b4f561dbbf61c7df9b284d1f2d4989b4517fb908 Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Wed, 6 Jul 2022 18:49:23 +0000 Subject: [PATCH 2/3] [subset] Add some comments to find_space_roots/find_32_bit_roots methods. --- src/graph/graph.hh | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/graph/graph.hh b/src/graph/graph.hh index 88a9e4e60..49638f34a 100644 --- a/src/graph/graph.hh +++ b/src/graph/graph.hh @@ -264,6 +264,11 @@ struct graph_t hb_swap (vertices_, sorted_graph); } + /* + * Finds the set of nodes (placed into roots) that should be assigned unique spaces. + * More specifically this looks for the top most 24 bit or 32 bit links in the graph. + * Some special casing is done that is specific to the layout of GSUB/GPOS tables. + */ void find_space_roots (hb_set_t& visited, hb_set_t& roots) { int root_index = (int) root_idx (); @@ -285,7 +290,9 @@ struct graph_t if (l.width == 3) { // A 24bit offset forms a root, unless there is 32bit offsets somewhere - // in it's subgraph, then those become the roots instead. + // in it's subgraph, then those become the roots instead. This is to make sure + // that extension subtables beneath a 24bit lookup become the spaces instead + // of the offset to the lookup. hb_set_t sub_roots; find_32bit_roots (l.objidx, sub_roots); if (sub_roots) { @@ -451,6 +458,10 @@ struct graph_t find_subgraph (link.objidx, subgraph); } + /* + * Finds the topmost children of 32bit offsets in the subgraph starting + * at node_idx. Found indices are placed into 'found'. + */ void find_32bit_roots (unsigned node_idx, hb_set_t& found) { for (const auto& link : vertices_[node_idx].obj.all_links ()) From 6fad6b4113750d3aabea633685bc272f98a2ef83 Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Wed, 6 Jul 2022 19:18:27 +0000 Subject: [PATCH 3/3] [repacker] add tests for special casing of 24bit offsets. --- src/test-repacker.cc | 84 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/src/test-repacker.cc b/src/test-repacker.cc index 69863b562..4bc731d8e 100644 --- a/src/test-repacker.cc +++ b/src/test-repacker.cc @@ -57,6 +57,14 @@ static void add_offset (unsigned id, c->add_link (*offset, id); } +static void add_24_offset (unsigned id, + hb_serialize_context_t* c) +{ + OT::Offset24* offset = c->start_embed (); + c->extend_min (offset); + c->add_link (*offset, id); +} + static void add_wide_offset (unsigned id, hb_serialize_context_t* c) { @@ -812,6 +820,51 @@ populate_serializer_virtual_link (hb_serialize_context_t* c) c->end_serialize(); } +static void +populate_serializer_with_24_and_32_bit_offsets (hb_serialize_context_t* c) +{ + std::string large_string(60000, 'a'); + c->start_serialize (); + + unsigned obj_f = add_object ("f", 1, c); + unsigned obj_g = add_object ("g", 1, c); + unsigned obj_j = add_object ("j", 1, c); + unsigned obj_k = add_object ("k", 1, c); + + start_object (large_string.c_str (), 40000, c); + add_offset (obj_f, c); + unsigned obj_c = c->pop_pack (false); + + start_object (large_string.c_str (), 40000, c); + add_offset (obj_g, c); + unsigned obj_d = c->pop_pack (false); + + start_object (large_string.c_str (), 40000, c); + add_offset (obj_j, c); + unsigned obj_h = c->pop_pack (false); + + start_object (large_string.c_str (), 40000, c); + add_offset (obj_k, c); + unsigned obj_i = c->pop_pack (false); + + start_object ("e", 1, c); + add_wide_offset (obj_h, c); + add_wide_offset (obj_i, c); + unsigned obj_e = c->pop_pack (false); + + start_object ("b", 1, c); + add_24_offset (obj_c, c); + add_24_offset (obj_d, c); + add_24_offset (obj_e, c); + unsigned obj_b = c->pop_pack (false); + + start_object ("a", 1, c); + add_24_offset (obj_b, c); + c->pop_pack (false); + + c->end_serialize(); +} + static void test_sort_shortest () { size_t buffer_size = 100; @@ -1129,6 +1182,36 @@ static void test_resolve_overflows_via_isolation_spaces () hb_blob_destroy (out); } +static void test_resolve_mixed_overflows_via_isolation_spaces () +{ + size_t buffer_size = 200000; + void* buffer = malloc (buffer_size); + hb_serialize_context_t c (buffer, buffer_size); + populate_serializer_with_24_and_32_bit_offsets (&c); + graph_t graph (c.object_graph ()); + + assert (c.offset_overflow ()); + hb_blob_t* out = hb_resolve_overflows (c.object_graph (), HB_TAG ('G', 'S', 'U', 'B'), 0); + assert (out); + hb_bytes_t result = out->as_bytes (); + + unsigned expected_length = + // Objects + 7 + + 4 * 40000; + + expected_length += + // Links + 2 * 4 + // 32 + 4 * 3 + // 24 + 4 * 2; // 16 + + assert (result.length == expected_length); + + free (buffer); + hb_blob_destroy (out); +} + static void test_resolve_overflows_via_splitting_spaces () { size_t buffer_size = 160000; @@ -1270,6 +1353,7 @@ main (int argc, char **argv) test_resolve_overflows_via_isolating_16bit_space_2 (); test_resolve_overflows_via_splitting_spaces (); test_resolve_overflows_via_splitting_spaces_2 (); + test_resolve_mixed_overflows_via_isolation_spaces (); test_duplicate_leaf (); test_duplicate_interior (); test_virtual_link ();