From aaa7873d425a6267b1df16f5a1f3750578b438f0 Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Mon, 2 Nov 2020 16:16:27 -0800 Subject: [PATCH] [subset] add topological sort by closest distance via Dijkstra's algorithm. --- src/hb-repacker.hh | 125 +++++++++++++++++++++++++++++++++++++++++++ src/test-repacker.cc | 37 ++++++++++++- 2 files changed, 160 insertions(+), 2 deletions(-) diff --git a/src/hb-repacker.hh b/src/hb-repacker.hh index e103a8ef4..90d73b30c 100644 --- a/src/hb-repacker.hh +++ b/src/hb-repacker.hh @@ -150,6 +150,62 @@ struct graph_t sorted_graph.fini_deep (); } + /* + * Generates a new topological sorting of graph ordered by the shortest + * distance to each node. + */ + void sort_shortest_distance () + { + if (objects_.length <= 1) { + // Graph of 1 or less doesn't need sorting. + return; + } + + hb_hashmap_t distance_to; + compute_distances (&distance_to); + + hb_set_t queue; + hb_vector_t sorted_graph; + hb_map_t id_map; + hb_map_t edge_count; + incoming_edge_count (&edge_count); + + // Object graphs are in reverse order, the first object is at the end + // of the vector. Since the graph is topologically sorted it's safe to + // assume the first object has no incoming edges. + queue.add (objects_.length - 1); + int new_id = objects_.length - 1; + + while (queue.get_population ()) + { + unsigned next_id = closest_object (queue, distance_to); + queue.del (next_id); + + hb_serialize_context_t::object_t& next = objects_[next_id]; + sorted_graph.push (next); + id_map.set (next_id, new_id--); + + for (const auto& link : next.links) { + edge_count.set (link.objidx, edge_count.get (link.objidx) - 1); + if (!edge_count.get (link.objidx)) + queue.add (link.objidx); + } + } + + if (new_id != -1) + { + // Graph is not fully connected, there are unsorted objects. + // TODO(garretrieger): handle this. + assert (false); + } + + remap_obj_indices (id_map, &sorted_graph); + + sorted_graph.as_array ().reverse (); + objects_ = sorted_graph; + sorted_graph.fini_deep (); + } + /* * Will any offsets overflow on graph when it's serialized? */ @@ -185,6 +241,74 @@ struct graph_t private: + unsigned closest_object (const hb_set_t& queue, + const hb_hashmap_t& distance_to) + { + int64_t closest_distance = hb_int_max (int64_t); + unsigned closest_index = -1; + for (unsigned i : queue) + { + if (distance_to.get (i) < closest_distance) + { + closest_distance = distance_to.get (i); + closest_index = i; + } + } + assert (closest_index != (unsigned) -1); + return closest_index; + } + + /* + * Finds the distance too each object in the graph + * from the initial node. + */ + void compute_distances (hb_hashmap_t* distance_to) + { + // Uses Dijkstra's algorithm to find all of the shortest distances. + // https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm + distance_to->clear (); + hb_set_t unvisited; + unvisited.add_range (0, objects_.length - 1); + + unsigned current_idx = objects_.length - 1; + distance_to->set (current_idx, 0); + + while (unvisited.get_population ()) + { + const auto& current = objects_[current_idx]; + int current_distance = (*distance_to)[current_idx]; + + for (const auto& link : current.links) + { + if (!unvisited.has (link.objidx)) continue; + + const auto& child = objects_[link.objidx]; + int64_t child_weight = child.tail - child.head + + (!link.is_wide ? (1 << 16) : ((int64_t) 1 << 32)); + int64_t child_distance = current_distance + child_weight; + + if (child_distance < distance_to->get (link.objidx)) + distance_to->set (link.objidx, child_distance); + } + + unvisited.del (current_idx); + + // TODO(garretrieger): change this to use a priority queue. + int64_t smallest_distance = hb_int_max(int64_t); + for (hb_codepoint_t idx : unvisited) + { + if (distance_to->get (idx) < smallest_distance) + { + smallest_distance = distance_to->get (idx); + current_idx = idx; + } + } + + // TODO(garretrieger): this will trigger if graph is disconnected. Handle this. + assert (!unvisited.get_population () || smallest_distance != hb_int_max (int64_t)); + } + } + int64_t compute_offset ( unsigned parent_idx, const hb_serialize_context_t::object_t::link_t& link, @@ -318,6 +442,7 @@ hb_resolve_overflows (const hb_vector_t& pac graph_t sorted_graph (packed); sorted_graph.sort_kahn (); if (sorted_graph.will_overflow ()) { + sorted_graph.sort_shortest_distance (); // TODO(garretrieger): try additional offset resolution strategies // - Dijkstra sort of weighted graph. // - Promotion to extension lookups. diff --git a/src/test-repacker.cc b/src/test-repacker.cc index 280db6bdc..46554e1cd 100644 --- a/src/test-repacker.cc +++ b/src/test-repacker.cc @@ -116,7 +116,7 @@ populate_serializer_complex_2 (hb_serialize_context_t* c) { c->start_serialize (); - unsigned obj_5 = add_object ("mno", 3, c); + unsigned obj_5 = add_object ("mn", 3, c); unsigned obj_4 = add_object ("jkl", 3, c); @@ -184,7 +184,7 @@ static void test_sort_kahn_2 () assert(graph.objects_[3].links.length == 1); assert(graph.objects_[3].links[0].objidx == 1); - assert(strncmp (graph.objects_[2].head, "mno", 3) == 0); + assert(strncmp (graph.objects_[2].head, "mn", 2) == 0); assert(graph.objects_[2].links.length == 0); assert(strncmp (graph.objects_[1].head, "ghi", 3) == 0); @@ -195,6 +195,38 @@ static void test_sort_kahn_2 () assert(graph.objects_[0].links.length == 0); } +static void test_sort_shortest () +{ + size_t buffer_size = 100; + void* buffer = malloc (buffer_size); + hb_serialize_context_t c (buffer, buffer_size); + populate_serializer_complex_2 (&c); + + graph_t graph (c.object_graph ()); + graph.sort_shortest_distance (); + + + assert(strncmp (graph.objects_[4].head, "abc", 3) == 0); + assert(graph.objects_[4].links.length == 3); + assert(graph.objects_[4].links[0].objidx == 2); + assert(graph.objects_[4].links[1].objidx == 0); + assert(graph.objects_[4].links[2].objidx == 3); + + assert(strncmp (graph.objects_[3].head, "mn", 2) == 0); + assert(graph.objects_[3].links.length == 0); + + assert(strncmp (graph.objects_[2].head, "def", 3) == 0); + assert(graph.objects_[2].links.length == 1); + assert(graph.objects_[2].links[0].objidx == 1); + + assert(strncmp (graph.objects_[1].head, "ghi", 3) == 0); + assert(graph.objects_[1].links.length == 1); + assert(graph.objects_[1].links[0].objidx == 0); + + assert(strncmp (graph.objects_[0].head, "jkl", 3) == 0); + assert(graph.objects_[0].links.length == 0); +} + static void test_serialize () { @@ -245,6 +277,7 @@ main (int argc, char **argv) test_serialize (); test_sort_kahn_1 (); test_sort_kahn_2 (); + test_sort_shortest (); test_will_overflow_1 (); test_will_overflow_2 (); }