[subset] add topological sort by closest distance via Dijkstra's algorithm.
This commit is contained in:
parent
8ebe5d734f
commit
aaa7873d42
|
@ -150,6 +150,62 @@ struct graph_t
|
|||
sorted_graph.fini_deep ();
|
||||
}
|
||||
|
||||
/*
|
||||
* Generates a new topological sorting of graph ordered by the shortest
|
||||
* distance to each node.
|
||||
*/
|
||||
void sort_shortest_distance ()
|
||||
{
|
||||
if (objects_.length <= 1) {
|
||||
// Graph of 1 or less doesn't need sorting.
|
||||
return;
|
||||
}
|
||||
|
||||
hb_hashmap_t<unsigned, int64_t, -1, hb_int_max(int64_t)> distance_to;
|
||||
compute_distances (&distance_to);
|
||||
|
||||
hb_set_t queue;
|
||||
hb_vector_t<hb_serialize_context_t::object_t> sorted_graph;
|
||||
hb_map_t id_map;
|
||||
hb_map_t edge_count;
|
||||
incoming_edge_count (&edge_count);
|
||||
|
||||
// Object graphs are in reverse order, the first object is at the end
|
||||
// of the vector. Since the graph is topologically sorted it's safe to
|
||||
// assume the first object has no incoming edges.
|
||||
queue.add (objects_.length - 1);
|
||||
int new_id = objects_.length - 1;
|
||||
|
||||
while (queue.get_population ())
|
||||
{
|
||||
unsigned next_id = closest_object (queue, distance_to);
|
||||
queue.del (next_id);
|
||||
|
||||
hb_serialize_context_t::object_t& next = objects_[next_id];
|
||||
sorted_graph.push (next);
|
||||
id_map.set (next_id, new_id--);
|
||||
|
||||
for (const auto& link : next.links) {
|
||||
edge_count.set (link.objidx, edge_count.get (link.objidx) - 1);
|
||||
if (!edge_count.get (link.objidx))
|
||||
queue.add (link.objidx);
|
||||
}
|
||||
}
|
||||
|
||||
if (new_id != -1)
|
||||
{
|
||||
// Graph is not fully connected, there are unsorted objects.
|
||||
// TODO(garretrieger): handle this.
|
||||
assert (false);
|
||||
}
|
||||
|
||||
remap_obj_indices (id_map, &sorted_graph);
|
||||
|
||||
sorted_graph.as_array ().reverse ();
|
||||
objects_ = sorted_graph;
|
||||
sorted_graph.fini_deep ();
|
||||
}
|
||||
|
||||
/*
|
||||
* Will any offsets overflow on graph when it's serialized?
|
||||
*/
|
||||
|
@ -185,6 +241,74 @@ struct graph_t
|
|||
|
||||
private:
|
||||
|
||||
unsigned closest_object (const hb_set_t& queue,
|
||||
const hb_hashmap_t<unsigned, int64_t, -1, hb_int_max(int64_t)>& distance_to)
|
||||
{
|
||||
int64_t closest_distance = hb_int_max (int64_t);
|
||||
unsigned closest_index = -1;
|
||||
for (unsigned i : queue)
|
||||
{
|
||||
if (distance_to.get (i) < closest_distance)
|
||||
{
|
||||
closest_distance = distance_to.get (i);
|
||||
closest_index = i;
|
||||
}
|
||||
}
|
||||
assert (closest_index != (unsigned) -1);
|
||||
return closest_index;
|
||||
}
|
||||
|
||||
/*
|
||||
* Finds the distance too each object in the graph
|
||||
* from the initial node.
|
||||
*/
|
||||
void compute_distances (hb_hashmap_t<unsigned, int64_t, -1, hb_int_max(int64_t)>* distance_to)
|
||||
{
|
||||
// Uses Dijkstra's algorithm to find all of the shortest distances.
|
||||
// https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
|
||||
distance_to->clear ();
|
||||
hb_set_t unvisited;
|
||||
unvisited.add_range (0, objects_.length - 1);
|
||||
|
||||
unsigned current_idx = objects_.length - 1;
|
||||
distance_to->set (current_idx, 0);
|
||||
|
||||
while (unvisited.get_population ())
|
||||
{
|
||||
const auto& current = objects_[current_idx];
|
||||
int current_distance = (*distance_to)[current_idx];
|
||||
|
||||
for (const auto& link : current.links)
|
||||
{
|
||||
if (!unvisited.has (link.objidx)) continue;
|
||||
|
||||
const auto& child = objects_[link.objidx];
|
||||
int64_t child_weight = child.tail - child.head +
|
||||
(!link.is_wide ? (1 << 16) : ((int64_t) 1 << 32));
|
||||
int64_t child_distance = current_distance + child_weight;
|
||||
|
||||
if (child_distance < distance_to->get (link.objidx))
|
||||
distance_to->set (link.objidx, child_distance);
|
||||
}
|
||||
|
||||
unvisited.del (current_idx);
|
||||
|
||||
// TODO(garretrieger): change this to use a priority queue.
|
||||
int64_t smallest_distance = hb_int_max(int64_t);
|
||||
for (hb_codepoint_t idx : unvisited)
|
||||
{
|
||||
if (distance_to->get (idx) < smallest_distance)
|
||||
{
|
||||
smallest_distance = distance_to->get (idx);
|
||||
current_idx = idx;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(garretrieger): this will trigger if graph is disconnected. Handle this.
|
||||
assert (!unvisited.get_population () || smallest_distance != hb_int_max (int64_t));
|
||||
}
|
||||
}
|
||||
|
||||
int64_t compute_offset (
|
||||
unsigned parent_idx,
|
||||
const hb_serialize_context_t::object_t::link_t& link,
|
||||
|
@ -318,6 +442,7 @@ hb_resolve_overflows (const hb_vector_t<hb_serialize_context_t::object_t *>& pac
|
|||
graph_t sorted_graph (packed);
|
||||
sorted_graph.sort_kahn ();
|
||||
if (sorted_graph.will_overflow ()) {
|
||||
sorted_graph.sort_shortest_distance ();
|
||||
// TODO(garretrieger): try additional offset resolution strategies
|
||||
// - Dijkstra sort of weighted graph.
|
||||
// - Promotion to extension lookups.
|
||||
|
|
|
@ -116,7 +116,7 @@ populate_serializer_complex_2 (hb_serialize_context_t* c)
|
|||
{
|
||||
c->start_serialize<char> ();
|
||||
|
||||
unsigned obj_5 = add_object ("mno", 3, c);
|
||||
unsigned obj_5 = add_object ("mn", 3, c);
|
||||
|
||||
unsigned obj_4 = add_object ("jkl", 3, c);
|
||||
|
||||
|
@ -184,7 +184,7 @@ static void test_sort_kahn_2 ()
|
|||
assert(graph.objects_[3].links.length == 1);
|
||||
assert(graph.objects_[3].links[0].objidx == 1);
|
||||
|
||||
assert(strncmp (graph.objects_[2].head, "mno", 3) == 0);
|
||||
assert(strncmp (graph.objects_[2].head, "mn", 2) == 0);
|
||||
assert(graph.objects_[2].links.length == 0);
|
||||
|
||||
assert(strncmp (graph.objects_[1].head, "ghi", 3) == 0);
|
||||
|
@ -195,6 +195,38 @@ static void test_sort_kahn_2 ()
|
|||
assert(graph.objects_[0].links.length == 0);
|
||||
}
|
||||
|
||||
static void test_sort_shortest ()
|
||||
{
|
||||
size_t buffer_size = 100;
|
||||
void* buffer = malloc (buffer_size);
|
||||
hb_serialize_context_t c (buffer, buffer_size);
|
||||
populate_serializer_complex_2 (&c);
|
||||
|
||||
graph_t graph (c.object_graph ());
|
||||
graph.sort_shortest_distance ();
|
||||
|
||||
|
||||
assert(strncmp (graph.objects_[4].head, "abc", 3) == 0);
|
||||
assert(graph.objects_[4].links.length == 3);
|
||||
assert(graph.objects_[4].links[0].objidx == 2);
|
||||
assert(graph.objects_[4].links[1].objidx == 0);
|
||||
assert(graph.objects_[4].links[2].objidx == 3);
|
||||
|
||||
assert(strncmp (graph.objects_[3].head, "mn", 2) == 0);
|
||||
assert(graph.objects_[3].links.length == 0);
|
||||
|
||||
assert(strncmp (graph.objects_[2].head, "def", 3) == 0);
|
||||
assert(graph.objects_[2].links.length == 1);
|
||||
assert(graph.objects_[2].links[0].objidx == 1);
|
||||
|
||||
assert(strncmp (graph.objects_[1].head, "ghi", 3) == 0);
|
||||
assert(graph.objects_[1].links.length == 1);
|
||||
assert(graph.objects_[1].links[0].objidx == 0);
|
||||
|
||||
assert(strncmp (graph.objects_[0].head, "jkl", 3) == 0);
|
||||
assert(graph.objects_[0].links.length == 0);
|
||||
}
|
||||
|
||||
static void
|
||||
test_serialize ()
|
||||
{
|
||||
|
@ -245,6 +277,7 @@ main (int argc, char **argv)
|
|||
test_serialize ();
|
||||
test_sort_kahn_1 ();
|
||||
test_sort_kahn_2 ();
|
||||
test_sort_shortest ();
|
||||
test_will_overflow_1 ();
|
||||
test_will_overflow_2 ();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue