[subset] Implement Kahn's algo for topological sorting instead of BFS.

This commit is contained in:
Garret Rieger 2020-10-30 10:29:51 -07:00
parent 00f393dc3f
commit f4c78cc7dd
2 changed files with 124 additions and 33 deletions

View File

@ -35,10 +35,13 @@
struct graph_t struct graph_t
{ {
// TODO(garretrieger): add an error tracking system similar to what serialize_context_t
// does.
/* /*
* A topological sorting of an object graph. Ordered * A topological sorting of an object graph. Ordered
* in reverse serialization order (first object in the * in reverse serialization order (first object in the
* serialization is at the end of the graph). This matches * serialization is at the end of the list). This matches
* the 'packed' object stack used internally in the * the 'packed' object stack used internally in the
* serializer * serializer
*/ */
@ -92,14 +95,11 @@ struct graph_t
} }
/* /*
* Generates a new topological sorting of graph using BFS. * Generates a new topological sorting of graph using Kahn's
* algorithm: https://en.wikipedia.org/wiki/Topological_sorting#Algorithms
*/ */
void sort_bfs () void sort_kahn ()
{ {
// BFS doesn't always produce a topological sort so this is just
// for testing re-ordering capabilities for now.
// Will need to use a more advanced topological sorting algorithm
if (objects_.length <= 1) { if (objects_.length <= 1) {
// Graph of 1 or less doesn't need sorting. // Graph of 1 or less doesn't need sorting.
return; return;
@ -108,25 +108,28 @@ struct graph_t
hb_vector_t<unsigned> queue; hb_vector_t<unsigned> queue;
hb_vector_t<hb_serialize_context_t::object_t> sorted_graph; hb_vector_t<hb_serialize_context_t::object_t> sorted_graph;
hb_map_t id_map; hb_map_t id_map;
hb_map_t edge_count;
incoming_edge_count (&edge_count);
// Object graphs are in reverse order, the first object is at the end // Object graphs are in reverse order, the first object is at the end
// of the vector. // of the vector. Since the graph is topologically sorted it's safe to
// assume the first object has no incoming edges.
queue.push (objects_.length - 1); queue.push (objects_.length - 1);
int new_id = objects_.length - 1; int new_id = objects_.length - 1;
hb_set_t visited;
while (queue.length) while (queue.length)
{ {
unsigned next_id = queue[0]; unsigned next_id = queue[0];
queue.remove(0); queue.remove(0);
visited.add(next_id);
hb_serialize_context_t::object_t& next = objects_[next_id]; hb_serialize_context_t::object_t& next = objects_[next_id];
sorted_graph.push (next); sorted_graph.push (next);
id_map.set (next_id, new_id--); id_map.set (next_id, new_id--);
for (const auto& link : next.links) { for (const auto& link : next.links) {
if (!visited.has (link.objidx)) // TODO(garretrieger): sort children from smallest to largest
edge_count.set (link.objidx, edge_count.get (link.objidx) - 1);
if (!edge_count.get (link.objidx))
queue.push (link.objidx); queue.push (link.objidx);
} }
} }
@ -138,19 +141,7 @@ struct graph_t
assert (false); assert (false);
} }
// Apply objidx remapping. remap_obj_indices (id_map, &sorted_graph);
// TODO(garretrieger): extract this to a helper.
for (unsigned i = 0; i < sorted_graph.length; i++)
{
for (unsigned j = 0; j < sorted_graph[i].links.length; j++)
{
auto& link = sorted_graph[i].links[j];
if (!id_map.has (link.objidx))
// TODO(garretrieger): handle this.
assert (false);
link.objidx = id_map.get (link.objidx);
}
}
sorted_graph.as_array ().reverse (); sorted_graph.as_array ().reverse ();
objects_ = sorted_graph; objects_ = sorted_graph;
@ -163,13 +154,53 @@ struct graph_t
bool will_overflow() bool will_overflow()
{ {
// TODO(garretrieger): implement me. // TODO(garretrieger): implement me.
// Check for offsets that exceed their width or are negative if // - Check for offsets that exceed their width or;
// using a non-signed link. // - are negative if using a non-signed link.
return false; return false;
} }
private: private:
/*
* Updates all objidx's in all links using the provided mapping.
*/
void remap_obj_indices (const hb_map_t& id_map,
hb_vector_t<hb_serialize_context_t::object_t>* sorted_graph)
{
for (unsigned i = 0; i < sorted_graph->length; i++)
{
for (unsigned j = 0; j < (*sorted_graph)[i].links.length; j++)
{
auto& link = (*sorted_graph)[i].links[j];
if (!id_map.has (link.objidx))
// TODO(garretrieger): handle this.
assert (false);
link.objidx = id_map.get (link.objidx);
}
}
}
/*
* Creates a map from objid to # of incoming edges.
*/
void incoming_edge_count (hb_map_t* out)
{
for (unsigned i = 0; i < objects_.length; i++)
{
if (!out->has (i))
out->set (i, 0);
for (const auto& l : objects_[i].links)
{
unsigned id = l.objidx;
if (out->has (id))
out->set (id, out->get (id) + 1);
else
out->set (id, 1);
}
}
}
template <typename O> void template <typename O> void
serialize_link_of_type (const hb_serialize_context_t::object_t::link_t& link, serialize_link_of_type (const hb_serialize_context_t::object_t::link_t& link,
char* head, char* head,
@ -220,9 +251,10 @@ inline void
hb_resolve_overflows (const hb_vector_t<hb_serialize_context_t::object_t *>& packed, hb_resolve_overflows (const hb_vector_t<hb_serialize_context_t::object_t *>& packed,
hb_serialize_context_t* c) { hb_serialize_context_t* c) {
graph_t sorted_graph (packed); graph_t sorted_graph (packed);
sorted_graph.sort_bfs (); sorted_graph.sort_kahn ();
if (sorted_graph.will_overflow ()) { if (sorted_graph.will_overflow ()) {
// TODO(garretrieger): additional offset resolution strategies // TODO(garretrieger): try additional offset resolution strategies
// - Dijkstra sort of weighted graph.
// - Promotion to extension lookups. // - Promotion to extension lookups.
// - Table duplication. // - Table duplication.
// - Table splitting. // - Table splitting.

View File

@ -71,7 +71,7 @@ populate_serializer_simple (hb_serialize_context_t* c)
} }
static void static void
populate_serializer_complex (hb_serialize_context_t* c) populate_serializer_complex_1 (hb_serialize_context_t* c)
{ {
c->start_serialize<char> (); c->start_serialize<char> ();
@ -90,15 +90,41 @@ populate_serializer_complex (hb_serialize_context_t* c)
c->end_serialize(); c->end_serialize();
} }
static void test_sort_bfs () static void
populate_serializer_complex_2 (hb_serialize_context_t* c)
{
c->start_serialize<char> ();
unsigned obj_5 = add_object ("mno", 3, c);
unsigned obj_4 = add_object ("jkl", 3, c);
start_object ("ghi", 3, c);
add_offset (obj_4, c);
unsigned obj_3 = c->pop_pack (false);
start_object ("def", 3, c);
add_offset (obj_3, c);
unsigned obj_2 = c->pop_pack (false);
start_object ("abc", 3, c);
add_offset (obj_2, c);
add_offset (obj_4, c);
add_offset (obj_5, c);
c->pop_pack ();
c->end_serialize();
}
static void test_sort_kahn_1 ()
{ {
size_t buffer_size = 100; size_t buffer_size = 100;
void* buffer = malloc (buffer_size); void* buffer = malloc (buffer_size);
hb_serialize_context_t c (buffer, buffer_size); hb_serialize_context_t c (buffer, buffer_size);
populate_serializer_complex (&c); populate_serializer_complex_1 (&c);
graph_t graph (c.object_graph ()); graph_t graph (c.object_graph ());
graph.sort_bfs (); graph.sort_kahn ();
assert(strncmp (graph.objects_[3].head, "abc", 3) == 0); assert(strncmp (graph.objects_[3].head, "abc", 3) == 0);
assert(graph.objects_[3].links.length == 2); assert(graph.objects_[3].links.length == 2);
@ -116,6 +142,38 @@ static void test_sort_bfs ()
assert(graph.objects_[0].links.length == 0); assert(graph.objects_[0].links.length == 0);
} }
static void test_sort_kahn_2 ()
{
size_t buffer_size = 100;
void* buffer = malloc (buffer_size);
hb_serialize_context_t c (buffer, buffer_size);
populate_serializer_complex_2 (&c);
graph_t graph (c.object_graph ());
graph.sort_kahn ();
assert(strncmp (graph.objects_[4].head, "abc", 3) == 0);
assert(graph.objects_[4].links.length == 3);
assert(graph.objects_[4].links[0].objidx == 3);
assert(graph.objects_[4].links[1].objidx == 0);
assert(graph.objects_[4].links[2].objidx == 2);
assert(strncmp (graph.objects_[3].head, "def", 3) == 0);
assert(graph.objects_[3].links.length == 1);
assert(graph.objects_[3].links[0].objidx == 1);
assert(strncmp (graph.objects_[2].head, "mno", 3) == 0);
assert(graph.objects_[2].links.length == 0);
assert(strncmp (graph.objects_[1].head, "ghi", 3) == 0);
assert(graph.objects_[1].links.length == 1);
assert(graph.objects_[1].links[0].objidx == 0);
assert(strncmp (graph.objects_[0].head, "jkl", 3) == 0);
assert(graph.objects_[0].links.length == 0);
}
static void static void
test_serialize () test_serialize ()
{ {
@ -142,5 +200,6 @@ int
main (int argc, char **argv) main (int argc, char **argv)
{ {
test_serialize (); test_serialize ();
test_sort_bfs (); test_sort_kahn_1 ();
test_sort_kahn_2 ();
} }