[subset] Add table duplication overflow resolution.

This commit is contained in:
Garret Rieger 2020-11-05 16:39:23 -08:00
parent 8286bd8094
commit 75414e82b5
3 changed files with 347 additions and 34 deletions

View File

@ -438,6 +438,10 @@ struct hb_no_trace_t {
#define TRACE_SUBSET(this) hb_no_trace_t<bool> trace
#endif
#ifndef HB_DEBUG_SUBSET_REPACK
#define HB_DEBUG_SUBSET_REPACK (HB_DEBUG+0)
#endif
#ifndef HB_DEBUG_DISPATCH
#define HB_DEBUG_DISPATCH ( \
HB_DEBUG_APPLY + \

View File

@ -39,6 +39,36 @@ struct graph_t
// TODO(garretrieger): add an error tracking system similar to what serialize_context_t
// does.
struct overflow_record_t
{
unsigned parent;
const hb_serialize_context_t::object_t::link_t* link;
};
struct clone_buffer_t
{
clone_buffer_t () : head (nullptr), tail (nullptr) {}
void copy (const hb_serialize_context_t::object_t& object)
{
fini ();
unsigned size = object.tail - object.head;
head = (char*) malloc (size);
memcpy (head, object.head, size);
tail = head + size;
}
char* head;
char* tail;
void fini ()
{
if (!head) return;
free (head);
head = nullptr;
}
};
/*
* A topological sorting of an object graph. Ordered
* in reverse serialization order (first object in the
@ -72,6 +102,7 @@ struct graph_t
~graph_t ()
{
objects_.fini_deep ();
clone_buffers_.fini_deep ();
}
/*
@ -213,11 +244,45 @@ struct graph_t
sorted_graph.fini_deep ();
}
/*
* Creates a copy of child and re-assigns the link from
* parent to the clone. The copy is a shallow copy, objects
* linked from child are not duplicated.
*/
void duplicate (unsigned parent_idx, unsigned child_idx)
{
const auto& child = objects_[child_idx];
clone_buffer_t* buffer = clone_buffers_.push ();
buffer->copy (child);
auto* clone = objects_.push ();
clone->head = buffer->head;
clone->tail = buffer->tail;
for (const auto& l : child.links)
clone->links.push (l);
auto& parent = objects_[parent_idx];
unsigned clone_idx = objects_.length - 2;
for (unsigned i = 0; i < parent.links.length; i++)
{
auto& l = parent.links[i];
if (l.objidx == child_idx) l.objidx = clone_idx;
}
// The last object is the root of the graph, so swap back the root to the end.
// The root's obj idx does change, however since it's root nothing else refers to it.
// all other obj idx's will be unaffected.
hb_serialize_context_t::object_t root = objects_[objects_.length - 2];
objects_[objects_.length - 2] = *clone;
objects_[objects_.length - 1] = root;
}
/*
* Will any offsets overflow on graph when it's serialized?
*/
bool will_overflow ()
bool will_overflow (hb_vector_t<overflow_record_t>* overflows)
{
if (overflows) overflows->resize (0);
hb_vector_t<unsigned> start_positions;
start_positions.resize (objects_.length);
hb_vector_t<unsigned> end_positions;
@ -232,7 +297,7 @@ struct graph_t
}
for (unsigned parent_idx = 0; parent_idx < objects_.length; parent_idx++)
for (int parent_idx = objects_.length - 1; parent_idx >= 0; parent_idx--)
{
for (const auto& link : objects_[parent_idx].links)
{
@ -241,11 +306,52 @@ struct graph_t
start_positions,
end_positions);
if (!is_valid_offset (offset, link)) return true;
if (is_valid_offset (offset, link))
continue;
if (!overflows) return true;
overflow_record_t r;
r.parent = parent_idx;
r.link = &link;
overflows->push (r);
}
}
return false;
if (!overflows) return false;
return overflows->length;
}
/*
* Creates a map from objid to # of incoming edges.
*/
void incoming_edge_count (hb_vector_t<unsigned>* out) const
{
out->resize (0);
out->resize (objects_.length);
for (const auto& o : objects_)
{
for (const auto& l : o.links)
{
(*out)[l.objidx] += 1;
}
}
}
void print_overflows (const hb_vector_t<overflow_record_t>& overflows) const
{
if (!DEBUG_ENABLED(SUBSET_REPACK)) return;
hb_vector_t<unsigned> edge_count;
incoming_edge_count (&edge_count);
for (const auto& o : overflows)
{
DEBUG_MSG (SUBSET_REPACK, nullptr, "overflow from %d => %d (%d incoming , %d outgoing)",
o.parent,
o.link->objidx,
edge_count[o.link->objidx],
objects_[o.link->objidx].links.length);
}
}
private:
@ -371,22 +477,6 @@ struct graph_t
}
}
/*
* Creates a map from objid to # of incoming edges.
*/
void incoming_edge_count (hb_vector_t<unsigned>* out)
{
out->resize (0);
out->resize (objects_.length);
for (const auto& o : objects_)
{
for (const auto& l : o.links)
{
(*out)[l.objidx] += 1;
}
}
}
template <typename O> void
serialize_link_of_type (const hb_serialize_context_t::object_t::link_t& link,
char* head,
@ -426,6 +516,7 @@ struct graph_t
public:
hb_vector_t<hb_serialize_context_t::object_t> objects_;
hb_vector_t<clone_buffer_t> clone_buffers_;
};
@ -438,13 +529,49 @@ hb_resolve_overflows (const hb_vector_t<hb_serialize_context_t::object_t *>& pac
hb_serialize_context_t* c) {
graph_t sorted_graph (packed);
sorted_graph.sort_kahn ();
if (sorted_graph.will_overflow ()) {
sorted_graph.sort_shortest_distance ();
// TODO(garretrieger): try additional offset resolution strategies
// - Dijkstra sort of weighted graph.
// - Promotion to extension lookups.
// - Table duplication.
// - Table splitting.
if (!sorted_graph.will_overflow (nullptr)) return;
sorted_graph.sort_shortest_distance ();
unsigned round = 0;
hb_vector_t<graph_t::overflow_record_t> overflows;
// TODO(garretrieger): select a good limit for max rounds.
while (sorted_graph.will_overflow (&overflows) && round++ < 10) {
DEBUG_MSG (SUBSET_REPACK, nullptr, "Over flow resolution round %d", round);
sorted_graph.print_overflows (overflows);
// TODO(garretrieger): cache ege count in the graph object . Will need to be invalidated
// by graph modifications.
hb_vector_t<unsigned> edge_count;
sorted_graph.incoming_edge_count (&edge_count);
// Try resolving the furthest overflow first.
bool resolution_attempted = false;
for (int i = overflows.length - 1; i >= 0; i--)
{
const graph_t::overflow_record_t& r = overflows[i];
if (edge_count[r.link->objidx] > 1)
{
DEBUG_MSG (SUBSET_REPACK, nullptr, "Duplicating %d => %d",
r.parent, r.link->objidx);
// The child object is shared, we may be able to eliminate the overflow
// by duplicating it.
sorted_graph.duplicate (r.parent, r.link->objidx);
sorted_graph.sort_shortest_distance ();
resolution_attempted = true;
break;
}
// TODO(garretrieger): add additional offset resolution strategies
// - Promotion to extension lookups.
// - Table splitting.
}
if (!resolution_attempted)
{
DEBUG_MSG (SUBSET_REPACK, nullptr, "No resolution available :(");
break;
}
}
sorted_graph.serialize (c);

View File

@ -75,12 +75,12 @@ populate_serializer_simple (hb_serialize_context_t* c)
static void
populate_serializer_with_overflow (hb_serialize_context_t* c)
{
std::string large_string(40000, 'a');
std::string large_string(50000, 'a');
c->start_serialize<char> ();
unsigned obj_1 = add_object (large_string.c_str(), 40000, c);
unsigned obj_2 = add_object (large_string.c_str(), 40000, c);
unsigned obj_3 = add_object (large_string.c_str(), 40000, c);
unsigned obj_1 = add_object (large_string.c_str(), 10000, c);
unsigned obj_2 = add_object (large_string.c_str(), 20000, c);
unsigned obj_3 = add_object (large_string.c_str(), 50000, c);
start_object ("abc", 3, c);
add_offset (obj_3, c);
@ -91,6 +91,26 @@ populate_serializer_with_overflow (hb_serialize_context_t* c)
c->end_serialize();
}
static void
populate_serializer_with_dedup_overflow (hb_serialize_context_t* c)
{
std::string large_string(70000, 'a');
c->start_serialize<char> ();
unsigned obj_1 = add_object ("def", 3, c);
start_object (large_string.c_str(), 60000, c);
add_offset (obj_1, c);
unsigned obj_2 = c->pop_pack (false);
start_object (large_string.c_str(), 10000, c);
add_offset (obj_2, c);
add_offset (obj_1, c);
c->pop_pack (false);
c->end_serialize();
}
static void
populate_serializer_complex_1 (hb_serialize_context_t* c)
{
@ -116,7 +136,7 @@ populate_serializer_complex_2 (hb_serialize_context_t* c)
{
c->start_serialize<char> ();
unsigned obj_5 = add_object ("mn", 3, c);
unsigned obj_5 = add_object ("mn", 2, c);
unsigned obj_4 = add_object ("jkl", 3, c);
@ -137,6 +157,36 @@ populate_serializer_complex_2 (hb_serialize_context_t* c)
c->end_serialize();
}
static void
populate_serializer_complex_3 (hb_serialize_context_t* c)
{
c->start_serialize<char> ();
unsigned obj_6 = add_object ("opqrst", 6, c);
unsigned obj_5 = add_object ("mn", 2, c);
start_object ("jkl", 3, c);
add_offset (obj_6, c);
unsigned obj_4 = c->pop_pack (false);
start_object ("ghi", 3, c);
add_offset (obj_4, c);
unsigned obj_3 = c->pop_pack (false);
start_object ("def", 3, c);
add_offset (obj_3, c);
unsigned obj_2 = c->pop_pack (false);
start_object ("abc", 3, c);
add_offset (obj_2, c);
add_offset (obj_4, c);
add_offset (obj_5, c);
c->pop_pack ();
c->end_serialize();
}
static void test_sort_kahn_1 ()
{
size_t buffer_size = 100;
@ -227,6 +277,79 @@ static void test_sort_shortest ()
assert(graph.objects_[0].links.length == 0);
}
static void test_duplicate_leaf ()
{
size_t buffer_size = 100;
void* buffer = malloc (buffer_size);
hb_serialize_context_t c (buffer, buffer_size);
populate_serializer_complex_2 (&c);
graph_t graph (c.object_graph ());
graph.duplicate (4, 1);
assert(strncmp (graph.objects_[5].head, "abc", 3) == 0);
assert(graph.objects_[5].links.length == 3);
assert(graph.objects_[5].links[0].objidx == 3);
assert(graph.objects_[5].links[1].objidx == 4);
assert(graph.objects_[5].links[2].objidx == 0);
assert(strncmp (graph.objects_[4].head, "jkl", 3) == 0);
assert(graph.objects_[4].links.length == 0);
assert(strncmp (graph.objects_[3].head, "def", 3) == 0);
assert(graph.objects_[3].links.length == 1);
assert(graph.objects_[3].links[0].objidx == 2);
assert(strncmp (graph.objects_[2].head, "ghi", 3) == 0);
assert(graph.objects_[2].links.length == 1);
assert(graph.objects_[2].links[0].objidx == 1);
assert(strncmp (graph.objects_[1].head, "jkl", 3) == 0);
assert(graph.objects_[1].links.length == 0);
assert(strncmp (graph.objects_[0].head, "mn", 2) == 0);
assert(graph.objects_[0].links.length == 0);
}
static void test_duplicate_interior ()
{
size_t buffer_size = 100;
void* buffer = malloc (buffer_size);
hb_serialize_context_t c (buffer, buffer_size);
populate_serializer_complex_3 (&c);
graph_t graph (c.object_graph ());
graph.duplicate (3, 2);
assert(strncmp (graph.objects_[6].head, "abc", 3) == 0);
assert(graph.objects_[6].links.length == 3);
assert(graph.objects_[6].links[0].objidx == 4);
assert(graph.objects_[6].links[1].objidx == 2);
assert(graph.objects_[6].links[2].objidx == 1);
assert(strncmp (graph.objects_[5].head, "jkl", 3) == 0);
assert(graph.objects_[5].links.length == 1);
assert(graph.objects_[5].links[0].objidx == 0);
assert(strncmp (graph.objects_[4].head, "def", 3) == 0);
assert(graph.objects_[4].links.length == 1);
assert(graph.objects_[4].links[0].objidx == 3);
assert(strncmp (graph.objects_[3].head, "ghi", 3) == 0);
assert(graph.objects_[3].links.length == 1);
assert(graph.objects_[3].links[0].objidx == 5);
assert(strncmp (graph.objects_[2].head, "jkl", 3) == 0);
assert(graph.objects_[2].links.length == 1);
assert(graph.objects_[2].links[0].objidx == 0);
assert(strncmp (graph.objects_[1].head, "mn", 2) == 0);
assert(graph.objects_[1].links.length == 0);
assert(strncmp (graph.objects_[0].head, "opqrst", 6) == 0);
assert(graph.objects_[0].links.length == 0);
}
static void
test_serialize ()
{
@ -257,7 +380,7 @@ static void test_will_overflow_1 ()
populate_serializer_complex_2 (&c);
graph_t graph (c.object_graph ());
assert (!graph.will_overflow ());
assert (!graph.will_overflow (nullptr));
}
static void test_will_overflow_2 ()
@ -268,9 +391,63 @@ static void test_will_overflow_2 ()
populate_serializer_with_overflow (&c);
graph_t graph (c.object_graph ());
assert (graph.will_overflow ());
assert (graph.will_overflow (nullptr));
}
static void test_will_overflow_3 ()
{
size_t buffer_size = 160000;
void* buffer = malloc (buffer_size);
hb_serialize_context_t c (buffer, buffer_size);
populate_serializer_with_dedup_overflow (&c);
graph_t graph (c.object_graph ());
assert (graph.will_overflow (nullptr));
}
static void test_resolve_overflows_via_sort ()
{
size_t buffer_size = 160000;
void* buffer = malloc (buffer_size);
hb_serialize_context_t c (buffer, buffer_size);
populate_serializer_with_overflow (&c);
graph_t graph (c.object_graph ());
void* out_buffer = malloc (buffer_size);
hb_serialize_context_t out (out_buffer, buffer_size);
hb_resolve_overflows (c.object_graph (), &out);
assert (!out.offset_overflow);
hb_bytes_t result = out.copy_bytes ();
assert (result.length == (80000 + 3 + 3 * 2));
result.free ();
free (buffer);
free (out_buffer);
}
static void test_resolve_overflows_via_duplication ()
{
size_t buffer_size = 160000;
void* buffer = malloc (buffer_size);
hb_serialize_context_t c (buffer, buffer_size);
populate_serializer_with_dedup_overflow (&c);
graph_t graph (c.object_graph ());
void* out_buffer = malloc (buffer_size);
hb_serialize_context_t out (out_buffer, buffer_size);
hb_resolve_overflows (c.object_graph (), &out);
assert (!out.offset_overflow);
hb_bytes_t result = out.copy_bytes ();
assert (result.length == (10000 + 2 * 2 + 60000 + 2 + 3 * 2));
result.free ();
free (buffer);
free (out_buffer);
}
// TODO(garretrieger): update will_overflow tests to check the overflows array.
// TODO(garretrieger): add a test(s) using a real font.
int
@ -282,4 +459,9 @@ main (int argc, char **argv)
test_sort_shortest ();
test_will_overflow_1 ();
test_will_overflow_2 ();
test_will_overflow_3 ();
test_resolve_overflows_via_sort ();
test_resolve_overflows_via_duplication ();
test_duplicate_leaf ();
test_duplicate_interior ();
}