[subset] add topological sort by closest distance via Dijkstra's algorithm.

This commit is contained in:
Garret Rieger 2020-11-02 16:16:27 -08:00
parent 8ebe5d734f
commit aaa7873d42
2 changed files with 160 additions and 2 deletions

View File

@ -150,6 +150,62 @@ struct graph_t
sorted_graph.fini_deep ();
}
/*
* Generates a new topological sorting of graph ordered by the shortest
* distance to each node.
*/
void sort_shortest_distance ()
{
if (objects_.length <= 1) {
// Graph of 1 or less doesn't need sorting.
return;
}
hb_hashmap_t<unsigned, int64_t, -1, hb_int_max(int64_t)> distance_to;
compute_distances (&distance_to);
hb_set_t queue;
hb_vector_t<hb_serialize_context_t::object_t> sorted_graph;
hb_map_t id_map;
hb_map_t edge_count;
incoming_edge_count (&edge_count);
// Object graphs are in reverse order, the first object is at the end
// of the vector. Since the graph is topologically sorted it's safe to
// assume the first object has no incoming edges.
queue.add (objects_.length - 1);
int new_id = objects_.length - 1;
while (queue.get_population ())
{
unsigned next_id = closest_object (queue, distance_to);
queue.del (next_id);
hb_serialize_context_t::object_t& next = objects_[next_id];
sorted_graph.push (next);
id_map.set (next_id, new_id--);
for (const auto& link : next.links) {
edge_count.set (link.objidx, edge_count.get (link.objidx) - 1);
if (!edge_count.get (link.objidx))
queue.add (link.objidx);
}
}
if (new_id != -1)
{
// Graph is not fully connected, there are unsorted objects.
// TODO(garretrieger): handle this.
assert (false);
}
remap_obj_indices (id_map, &sorted_graph);
sorted_graph.as_array ().reverse ();
objects_ = sorted_graph;
sorted_graph.fini_deep ();
}
/*
* Will any offsets overflow on graph when it's serialized?
*/
@ -185,6 +241,74 @@ struct graph_t
private:
unsigned closest_object (const hb_set_t& queue,
const hb_hashmap_t<unsigned, int64_t, -1, hb_int_max(int64_t)>& distance_to)
{
int64_t closest_distance = hb_int_max (int64_t);
unsigned closest_index = -1;
for (unsigned i : queue)
{
if (distance_to.get (i) < closest_distance)
{
closest_distance = distance_to.get (i);
closest_index = i;
}
}
assert (closest_index != (unsigned) -1);
return closest_index;
}
/*
* Finds the distance too each object in the graph
* from the initial node.
*/
void compute_distances (hb_hashmap_t<unsigned, int64_t, -1, hb_int_max(int64_t)>* distance_to)
{
// Uses Dijkstra's algorithm to find all of the shortest distances.
// https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
distance_to->clear ();
hb_set_t unvisited;
unvisited.add_range (0, objects_.length - 1);
unsigned current_idx = objects_.length - 1;
distance_to->set (current_idx, 0);
while (unvisited.get_population ())
{
const auto& current = objects_[current_idx];
int current_distance = (*distance_to)[current_idx];
for (const auto& link : current.links)
{
if (!unvisited.has (link.objidx)) continue;
const auto& child = objects_[link.objidx];
int64_t child_weight = child.tail - child.head +
(!link.is_wide ? (1 << 16) : ((int64_t) 1 << 32));
int64_t child_distance = current_distance + child_weight;
if (child_distance < distance_to->get (link.objidx))
distance_to->set (link.objidx, child_distance);
}
unvisited.del (current_idx);
// TODO(garretrieger): change this to use a priority queue.
int64_t smallest_distance = hb_int_max(int64_t);
for (hb_codepoint_t idx : unvisited)
{
if (distance_to->get (idx) < smallest_distance)
{
smallest_distance = distance_to->get (idx);
current_idx = idx;
}
}
// TODO(garretrieger): this will trigger if graph is disconnected. Handle this.
assert (!unvisited.get_population () || smallest_distance != hb_int_max (int64_t));
}
}
int64_t compute_offset (
unsigned parent_idx,
const hb_serialize_context_t::object_t::link_t& link,
@ -318,6 +442,7 @@ hb_resolve_overflows (const hb_vector_t<hb_serialize_context_t::object_t *>& pac
graph_t sorted_graph (packed);
sorted_graph.sort_kahn ();
if (sorted_graph.will_overflow ()) {
sorted_graph.sort_shortest_distance ();
// TODO(garretrieger): try additional offset resolution strategies
// - Dijkstra sort of weighted graph.
// - Promotion to extension lookups.

View File

@ -116,7 +116,7 @@ populate_serializer_complex_2 (hb_serialize_context_t* c)
{
c->start_serialize<char> ();
unsigned obj_5 = add_object ("mno", 3, c);
unsigned obj_5 = add_object ("mn", 3, c);
unsigned obj_4 = add_object ("jkl", 3, c);
@ -184,7 +184,7 @@ static void test_sort_kahn_2 ()
assert(graph.objects_[3].links.length == 1);
assert(graph.objects_[3].links[0].objidx == 1);
assert(strncmp (graph.objects_[2].head, "mno", 3) == 0);
assert(strncmp (graph.objects_[2].head, "mn", 2) == 0);
assert(graph.objects_[2].links.length == 0);
assert(strncmp (graph.objects_[1].head, "ghi", 3) == 0);
@ -195,6 +195,38 @@ static void test_sort_kahn_2 ()
assert(graph.objects_[0].links.length == 0);
}
static void test_sort_shortest ()
{
size_t buffer_size = 100;
void* buffer = malloc (buffer_size);
hb_serialize_context_t c (buffer, buffer_size);
populate_serializer_complex_2 (&c);
graph_t graph (c.object_graph ());
graph.sort_shortest_distance ();
assert(strncmp (graph.objects_[4].head, "abc", 3) == 0);
assert(graph.objects_[4].links.length == 3);
assert(graph.objects_[4].links[0].objidx == 2);
assert(graph.objects_[4].links[1].objidx == 0);
assert(graph.objects_[4].links[2].objidx == 3);
assert(strncmp (graph.objects_[3].head, "mn", 2) == 0);
assert(graph.objects_[3].links.length == 0);
assert(strncmp (graph.objects_[2].head, "def", 3) == 0);
assert(graph.objects_[2].links.length == 1);
assert(graph.objects_[2].links[0].objidx == 1);
assert(strncmp (graph.objects_[1].head, "ghi", 3) == 0);
assert(graph.objects_[1].links.length == 1);
assert(graph.objects_[1].links[0].objidx == 0);
assert(strncmp (graph.objects_[0].head, "jkl", 3) == 0);
assert(graph.objects_[0].links.length == 0);
}
static void
test_serialize ()
{
@ -245,6 +277,7 @@ main (int argc, char **argv)
test_serialize ();
test_sort_kahn_1 ();
test_sort_kahn_2 ();
test_sort_shortest ();
test_will_overflow_1 ();
test_will_overflow_2 ();
}