Merge pull request #2857 from googlefonts/repacker

[subset] add a GSUB/GPOS table repacker to resolve offset overflows.
2021-03-18 14:38:55 -07:00 · 2021-03-18 14:38:55 -07:00 · c5d6bdb4bf
parent a7d120aeab 46bf03d691
commit c5d6bdb4bf
31 changed files with 2201 additions and 95 deletions
--- a/configure.ac
+++ b/configure.ac
@ -432,6 +432,7 @@ test/shaping/data/in-house/Makefile
 test/shaping/data/text-rendering-tests/Makefile
 test/subset/Makefile
 test/subset/data/Makefile
+test/subset/data/repack_tests/Makefile
 docs/Makefile
 docs/version.xml
 ])
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -342,7 +342,7 @@ test_gsub_would_substitute_SOURCES = test-gsub-would-substitute.cc
 test_gsub_would_substitute_CPPFLAGS = $(HBCFLAGS) $(FREETYPE_CFLAGS)
 test_gsub_would_substitute_LDADD = libharfbuzz.la $(HBLIBS) $(FREETYPE_LIBS)

-COMPILED_TESTS = test-algs test-array test-iter test-meta test-number test-ot-tag test-unicode-ranges test-bimap
+COMPILED_TESTS = test-algs test-array test-iter test-meta test-number test-ot-tag test-priority-queue test-unicode-ranges test-bimap test-repacker
 COMPILED_TESTS_CPPFLAGS = $(HBCFLAGS) -DMAIN -UNDEBUG
 COMPILED_TESTS_LDADD = libharfbuzz.la $(HBLIBS)
 check_PROGRAMS += $(COMPILED_TESTS)
@ -356,6 +356,14 @@ test_array_SOURCES = test-array.cc
 test_array_CPPFLAGS = $(HBCFLAGS)
 test_array_LDADD = libharfbuzz.la $(HBLIBS)

+test_priority_queue_SOURCES = test-priority-queue.cc hb-static.cc
+test_priority_queue_CPPFLAGS = $(HBCFLAGS)
+test_priority_queue_LDADD = libharfbuzz.la $(HBLIBS)
+
+test_repacker_SOURCES = test-repacker.cc hb-static.cc
+test_repacker_CPPFLAGS = $(HBCFLAGS)
+test_repacker_LDADD = libharfbuzz.la libharfbuzz-subset.la $(HBLIBS)
+
 test_iter_SOURCES = test-iter.cc hb-static.cc
 test_iter_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
 test_iter_LDADD = $(COMPILED_TESTS_LDADD)
--- a/src/Makefile.sources
+++ b/src/Makefile.sources
@ -167,6 +167,7 @@ HB_BASE_sources = \
 	hb-unicode.hh \
 	hb-utf.hh \
 	hb-vector.hh \
+	hb-priority-queue.hh \
 	hb.hh \
 	$(NULL)

@ -268,6 +269,7 @@ HB_SUBSET_sources = \
 	hb-subset-plan.hh \
 	hb-subset.cc \
 	hb-subset.hh \
+	hb-repacker.hh \
 	$(NULL)

 HB_SUBSET_headers = \
--- a/src/hb-cff-interp-common.hh
+++ b/src/hb-cff-interp-common.hh
@ -263,7 +263,7 @@ struct UnsizedByteStr : UnsizedArrayOf <HBUINT8>

    T *ip = c->allocate_size<T> (T::static_size);
    if (unlikely (!ip)) return_trace (false);
-    return_trace (c->check_assign (*ip, value));
+    return_trace (c->check_assign (*ip, value, HB_SERIALIZE_ERROR_INT_OVERFLOW));
  }

  template <typename V>
--- a/src/hb-debug.hh
+++ b/src/hb-debug.hh
@ -438,6 +438,10 @@ struct hb_no_trace_t {
 #define TRACE_SUBSET(this) hb_no_trace_t<bool> trace
 #endif

+#ifndef HB_DEBUG_SUBSET_REPACK
+#define HB_DEBUG_SUBSET_REPACK (HB_DEBUG+0)
+#endif
+
 #ifndef HB_DEBUG_DISPATCH
 #define HB_DEBUG_DISPATCH ( \
 	HB_DEBUG_APPLY + \
--- a/src/hb-open-type.hh
+++ b/src/hb-open-type.hh
@ -209,7 +209,9 @@ struct Offset : Type
  void *serialize (hb_serialize_context_t *c, const void *base)
  {
    void *t = c->start_embed<void> ();
-    c->check_assign (*this, (unsigned) ((char *) t - (char *) base));
+    c->check_assign (*this,
+                     (unsigned) ((char *) t - (char *) base),
+                     HB_SERIALIZE_ERROR_OFFSET_OVERFLOW);
    return t;
  }

@ -621,7 +623,7 @@ struct ArrayOf
  {
    TRACE_SERIALIZE (this);
    if (unlikely (!c->extend_min (*this))) return_trace (false);
-    c->check_assign (len, items_len);
+    c->check_assign (len, items_len, HB_SERIALIZE_ERROR_ARRAY_OVERFLOW);
    if (unlikely (!c->extend (*this))) return_trace (false);
    return_trace (true);
  }
@ -656,7 +658,7 @@ struct ArrayOf
    TRACE_SERIALIZE (this);
    auto *out = c->start_embed (this);
    if (unlikely (!c->extend_min (out))) return_trace (nullptr);
-    c->check_assign (out->len, len);
+    c->check_assign (out->len, len, HB_SERIALIZE_ERROR_ARRAY_OVERFLOW);
    if (unlikely (!as_array ().copy (c))) return_trace (nullptr);
    return_trace (out);
  }
@ -787,7 +789,7 @@ struct HeadlessArrayOf
  {
    TRACE_SERIALIZE (this);
    if (unlikely (!c->extend_min (*this))) return_trace (false);
-    c->check_assign (lenP1, items_len + 1);
+    c->check_assign (lenP1, items_len + 1, HB_SERIALIZE_ERROR_ARRAY_OVERFLOW);
    if (unlikely (!c->extend (*this))) return_trace (false);
    return_trace (true);
  }
--- a/src/hb-ot-cmap-table.hh
+++ b/src/hb-ot-cmap-table.hh
@ -276,7 +276,9 @@ struct CmapSubtableFormat4
    HBUINT16 *idRangeOffset = serialize_rangeoffset_glyid (c, format4_iter, endCode, startCode, idDelta, segcount);
    if (unlikely (!c->check_success (idRangeOffset))) return;

-    if (unlikely (!c->check_assign(this->length, c->length () - table_initpos))) return;
+    if (unlikely (!c->check_assign(this->length,
+                                   c->length () - table_initpos,
+                                   HB_SERIALIZE_ERROR_INT_OVERFLOW))) return;
    this->segCountX2 = segcount * 2;
    this->entrySelector = hb_max (1u, hb_bit_storage (segcount)) - 1;
    this->searchRange = 2 * (1u << this->entrySelector);
@ -850,7 +852,9 @@ struct DefaultUVS : SortedArrayOf<UnicodeValueRange, HBUINT32>
    }
    else
    {
-      if (unlikely (!c->check_assign (out->len, (c->length () - init_len) / UnicodeValueRange::static_size))) return nullptr;
+      if (unlikely (!c->check_assign (out->len,
+                                      (c->length () - init_len) / UnicodeValueRange::static_size,
+                                      HB_SERIALIZE_ERROR_INT_OVERFLOW))) return nullptr;
      return out;
    }
  }
@ -1112,10 +1116,12 @@ struct CmapSubtableFormat14
      return;

    int tail_len = init_tail - c->tail;
-    c->check_assign (this->length, c->length () - table_initpos + tail_len);
+    c->check_assign (this->length, c->length () - table_initpos + tail_len,
+                     HB_SERIALIZE_ERROR_INT_OVERFLOW);
    c->check_assign (this->record.len,
 		     (c->length () - table_initpos - CmapSubtableFormat14::min_size) /
-		     VariationSelectorRecord::static_size);
+		     VariationSelectorRecord::static_size,
+                     HB_SERIALIZE_ERROR_INT_OVERFLOW);

    /* Correct the incorrect write order by reversing the order of the variation
       records array. */
@ -1401,7 +1407,9 @@ struct cmap
      }
    }

-    c->check_assign(this->encodingRecord.len, (c->length () - cmap::min_size)/EncodingRecord::static_size);
+    c->check_assign(this->encodingRecord.len,
+                    (c->length () - cmap::min_size)/EncodingRecord::static_size,
+                    HB_SERIALIZE_ERROR_INT_OVERFLOW);
  }

  void closure_glyphs (const hb_set_t      *unicodes,
--- a/src/hb-ot-hdmx-table.hh
+++ b/src/hb-ot-hdmx-table.hh
@ -110,7 +110,7 @@ struct hdmx
    for (const hb_item_type<Iterator>& _ : +it)
      c->start_embed<DeviceRecord> ()->serialize (c, _.first, _.second);

-    return_trace (c->successful);
+    return_trace (c->successful ());
  }


--- a/src/hb-ot-hmtx-table.hh
+++ b/src/hb-ot-hmtx-table.hh
@ -146,7 +146,7 @@ struct hmtxvmtx

    _mtx.fini ();

-    if (unlikely (c->serializer->ran_out_of_room || c->serializer->in_error ()))
+    if (unlikely (c->serializer->in_error ()))
      return_trace (false);

    // Amend header num hmetrics
--- a/src/hb-ot-layout-gpos-table.hh
+++ b/src/hb-ot-layout-gpos-table.hh
@ -694,7 +694,7 @@ struct MarkArray : ArrayOf<MarkRecord>	/* Array of MarkRecords--in Coverage orde
  {
    TRACE_SERIALIZE (this);
    if (unlikely (!c->extend_min (*this))) return_trace (false);
-    if (unlikely (!c->check_assign (len, it.len ()))) return_trace (false);
+    if (unlikely (!c->check_assign (len, it.len (), HB_SERIALIZE_ERROR_ARRAY_OVERFLOW))) return_trace (false);
    c->copy_all (it, base, c->to_bias (this), klass_mapping, layout_variation_idx_map);
    return_trace (true);
  }
@ -756,7 +756,7 @@ struct SinglePosFormat1
  {
    auto out = c->extend_min (*this);
    if (unlikely (!out)) return;
-    if (unlikely (!c->check_assign (valueFormat, valFormat))) return;
+    if (unlikely (!c->check_assign (valueFormat, valFormat, HB_SERIALIZE_ERROR_INT_OVERFLOW))) return;

    + it
    | hb_map (hb_second)
@ -870,8 +870,8 @@ struct SinglePosFormat2
  {
    auto out = c->extend_min (*this);
    if (unlikely (!out)) return;
-    if (unlikely (!c->check_assign (valueFormat, valFormat))) return;
-    if (unlikely (!c->check_assign (valueCount, it.len ()))) return;
+    if (unlikely (!c->check_assign (valueFormat, valFormat, HB_SERIALIZE_ERROR_INT_OVERFLOW))) return;
+    if (unlikely (!c->check_assign (valueCount, it.len (), HB_SERIALIZE_ERROR_ARRAY_OVERFLOW))) return;

    + it
    | hb_map (hb_second)
--- a/src/hb-ot-layout-gsub-table.hh
+++ b/src/hb-ot-layout-gsub-table.hh
@ -102,7 +102,7 @@ struct SingleSubstFormat1
    TRACE_SERIALIZE (this);
    if (unlikely (!c->extend_min (*this))) return_trace (false);
    if (unlikely (!coverage.serialize (c, this).serialize (c, glyphs))) return_trace (false);
-    c->check_assign (deltaGlyphID, delta);
+    c->check_assign (deltaGlyphID, delta, HB_SERIALIZE_ERROR_INT_OVERFLOW);
    return_trace (true);
  }

@ -1551,7 +1551,7 @@ struct SubstLookup : Lookup

  template <typename context_t>
  static inline typename context_t::return_t dispatch_recurse_func (context_t *c, unsigned int lookup_index);
-  
+
  static inline typename hb_closure_context_t::return_t closure_glyphs_recurse_func (hb_closure_context_t *c, unsigned lookup_index, hb_set_t *covered_seq_indices, unsigned seq_index, unsigned end_index);

  static inline hb_closure_context_t::return_t dispatch_closure_recurse_func (hb_closure_context_t *c, unsigned lookup_index, hb_set_t *covered_seq_indices, unsigned seq_index, unsigned end_index)
--- a/src/hb-ot-name-table.hh
+++ b/src/hb-ot-name-table.hh
@ -230,7 +230,8 @@ struct name
    c->copy_all (records, src_string_pool);
    free (records.arrayZ);

-    if (unlikely (c->ran_out_of_room)) return_trace (false);
+
+    if (unlikely (c->ran_out_of_room ())) return_trace (false);

    this->stringOffset = c->length ();

--- a/src/hb-priority-queue.hh
+++ b/src/hb-priority-queue.hh
@ -0,0 +1,151 @@
+/*
+ * Copyright © 2020  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger
+ */
+
+#ifndef HB_PRIORITY_QUEUE_HH
+#define HB_PRIORITY_QUEUE_HH
+
+#include "hb.hh"
+#include "hb-vector.hh"
+
+/*
+ * hb_priority_queue_t
+ *
+ * Priority queue implemented as a binary heap. Supports extract minimum
+ * and insert operations.
+ */
+struct hb_priority_queue_t
+{
+  HB_DELETE_COPY_ASSIGN (hb_priority_queue_t);
+  hb_priority_queue_t ()  { init (); }
+  ~hb_priority_queue_t () { fini (); }
+
+ private:
+  typedef hb_pair_t<int64_t, unsigned> item_t;
+  hb_vector_t<item_t> heap;
+
+ public:
+  void init () { heap.init (); }
+
+  void fini () { heap.fini (); }
+
+  void reset () { heap.resize (0); }
+
+  bool in_error () const { return heap.in_error (); }
+
+  void insert (int64_t priority, unsigned value)
+  {
+    heap.push (item_t (priority, value));
+    bubble_up (heap.length - 1);
+  }
+
+  item_t pop_minimum ()
+  {
+    item_t result = heap[0];
+
+    heap[0] = heap[heap.length - 1];
+    heap.shrink (heap.length - 1);
+    bubble_down (0);
+
+    return result;
+  }
+
+  const item_t& minimum ()
+  {
+    return heap[0];
+  }
+
+  bool is_empty () const { return heap.length == 0; }
+  explicit operator bool () const { return !is_empty (); }
+  unsigned int get_population () const { return heap.length; }
+
+  /* Sink interface. */
+  hb_priority_queue_t& operator << (item_t item)
+  { insert (item.first, item.second); return *this; }
+
+ private:
+
+  static constexpr unsigned parent (unsigned index)
+  {
+    return (index - 1) / 2;
+  }
+
+  static constexpr unsigned left_child (unsigned index)
+  {
+    return 2 * index + 1;
+  }
+
+  static constexpr unsigned right_child (unsigned index)
+  {
+    return 2 * index + 2;
+  }
+
+  void bubble_down (unsigned index)
+  {
+    unsigned left = left_child (index);
+    unsigned right = right_child (index);
+
+    bool has_left = left < heap.length;
+    if (!has_left)
+      // If there's no left, then there's also no right.
+      return;
+
+    bool has_right = right < heap.length;
+    if (heap[index].first <= heap[left].first
+        && (!has_right || heap[index].first <= heap[right].first))
+      return;
+
+    if (!has_right || heap[left].first < heap[right].first)
+    {
+      swap (index, left);
+      bubble_down (left);
+      return;
+    }
+
+    swap (index, right);
+    bubble_down (right);
+  }
+
+  void bubble_up (unsigned index)
+  {
+    if (index == 0) return;
+
+    unsigned parent_index = parent (index);
+    if (heap[parent_index].first <= heap[index].first)
+      return;
+
+    swap (index, parent_index);
+    bubble_up (parent_index);
+  }
+
+  void swap (unsigned a, unsigned b)
+  {
+    item_t temp = heap[a];
+    heap[a] = heap[b];
+    heap[b] = temp;
+  }
+};
+
+#endif /* HB_PRIORITY_QUEUE_HH */
--- a/src/hb-repacker.hh
+++ b/src/hb-repacker.hh
@ -0,0 +1,754 @@
+/*
+ * Copyright © 2020  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger
+ */
+
+#ifndef HB_REPACKER_HH
+#define HB_REPACKER_HH
+
+#include "hb-open-type.hh"
+#include "hb-map.hh"
+#include "hb-priority-queue.hh"
+#include "hb-serialize.hh"
+#include "hb-vector.hh"
+
+
+struct graph_t
+{
+  struct vertex_t
+  {
+    vertex_t () :
+        distance (0),
+        incoming_edges (0),
+        start (0),
+        end (0),
+        priority(0) {}
+
+    void fini () { obj.fini (); }
+
+    hb_serialize_context_t::object_t obj;
+    int64_t distance;
+    unsigned incoming_edges;
+    unsigned start;
+    unsigned end;
+    unsigned priority;
+
+    bool is_shared () const
+    {
+      return incoming_edges > 1;
+    }
+
+    bool is_leaf () const
+    {
+      return !obj.links.length;
+    }
+
+    void raise_priority ()
+    {
+      priority++;
+    }
+
+    int64_t modified_distance (unsigned order) const
+    {
+      // TODO(garretrieger): once priority is high enough, should try
+      // setting distance = 0 which will force to sort immediately after
+      // it's parent where possible.
+
+      int64_t modified_distance = distance + distance_modifier ();
+      return (modified_distance << 24) | (0x00FFFFFF & order);
+    }
+
+    int64_t distance_modifier () const
+    {
+      if (!priority) return 0;
+      int64_t table_size = obj.tail - obj.head;
+      return -(table_size - table_size / (1 << hb_min(priority, 16u)));
+    }
+  };
+
+  struct overflow_record_t
+  {
+    unsigned parent;
+    const hb_serialize_context_t::object_t::link_t* link;
+  };
+
+  struct clone_buffer_t
+  {
+    clone_buffer_t () : head (nullptr), tail (nullptr) {}
+
+    bool copy (const hb_serialize_context_t::object_t& object)
+    {
+      fini ();
+      unsigned size = object.tail - object.head;
+      head = (char*) malloc (size);
+      if (!head) return false;
+
+      memcpy (head, object.head, size);
+      tail = head + size;
+      return true;
+    }
+
+    char* head;
+    char* tail;
+
+    void fini ()
+    {
+      if (!head) return;
+      free (head);
+      head = nullptr;
+    }
+  };
+
+  /*
+   * A topological sorting of an object graph. Ordered
+   * in reverse serialization order (first object in the
+   * serialization is at the end of the list). This matches
+   * the 'packed' object stack used internally in the
+   * serializer
+   */
+  graph_t (const hb_vector_t<hb_serialize_context_t::object_t *>& objects)
+      : edge_count_invalid (true),
+        distance_invalid (true),
+        positions_invalid (true),
+        successful (true)
+  {
+    bool removed_nil = false;
+    for (unsigned i = 0; i < objects.length; i++)
+    {
+      // TODO(grieger): check all links point to valid objects.
+
+      // If this graph came from a serialization buffer object 0 is the
+      // nil object. We don't need it for our purposes here so drop it.
+      if (i == 0 && !objects[i])
+      {
+        removed_nil = true;
+        continue;
+      }
+
+      vertex_t* v = vertices_.push ();
+      v->obj = *objects[i];
+      if (!removed_nil) continue;
+      for (unsigned i = 0; i < v->obj.links.length; i++)
+        // Fix indices to account for removed nil object.
+        v->obj.links[i].objidx--;
+    }
+  }
+
+  ~graph_t ()
+  {
+    vertices_.fini_deep ();
+    clone_buffers_.fini_deep ();
+  }
+
+  bool in_error () const
+  {
+    return !successful || vertices_.in_error () || clone_buffers_.in_error ();
+  }
+
+  const vertex_t& root () const
+  {
+    return vertices_[root_idx ()];
+  }
+
+  unsigned root_idx () const
+  {
+    // Object graphs are in reverse order, the first object is at the end
+    // of the vector. Since the graph is topologically sorted it's safe to
+    // assume the first object has no incoming edges.
+    return vertices_.length - 1;
+  }
+
+  const hb_serialize_context_t::object_t& object(unsigned i) const
+  {
+    return vertices_[i].obj;
+  }
+
+  /*
+   * serialize graph into the provided serialization buffer.
+   */
+  void serialize (hb_serialize_context_t* c) const
+  {
+    c->start_serialize<void> ();
+    for (unsigned i = 0; i < vertices_.length; i++) {
+      c->push ();
+
+      size_t size = vertices_[i].obj.tail - vertices_[i].obj.head;
+      char* start = c->allocate_size <char> (size);
+      if (!start) return;
+
+      memcpy (start, vertices_[i].obj.head, size);
+
+      for (const auto& link : vertices_[i].obj.links)
+        serialize_link (link, start, c);
+
+      // All duplications are already encoded in the graph, so don't
+      // enable sharing during packing.
+      c->pop_pack (false);
+    }
+    c->end_serialize ();
+  }
+
+  /*
+   * Generates a new topological sorting of graph using Kahn's
+   * algorithm: https://en.wikipedia.org/wiki/Topological_sorting#Algorithms
+   */
+  void sort_kahn ()
+  {
+    positions_invalid = true;
+
+    if (vertices_.length <= 1) {
+      // Graph of 1 or less doesn't need sorting.
+      return;
+    }
+
+    hb_vector_t<unsigned> queue;
+    hb_vector_t<vertex_t> sorted_graph;
+    hb_vector_t<unsigned> id_map;
+    check_success (id_map.resize (vertices_.length));
+
+    hb_vector_t<unsigned> removed_edges;
+    check_success (removed_edges.resize (vertices_.length));
+    update_incoming_edge_count ();
+
+    queue.push (root_idx ());
+    int new_id = vertices_.length - 1;
+
+    while (!queue.in_error () && queue.length)
+    {
+      unsigned next_id = queue[0];
+      queue.remove (0);
+
+      vertex_t& next = vertices_[next_id];
+      sorted_graph.push (next);
+      id_map[next_id] = new_id--;
+
+      for (const auto& link : next.obj.links) {
+        removed_edges[link.objidx]++;
+        if (!(vertices_[link.objidx].incoming_edges - removed_edges[link.objidx]))
+          queue.push (link.objidx);
+      }
+    }
+
+    check_success (!queue.in_error ());
+    check_success (!sorted_graph.in_error ());
+    if (!check_success (new_id == -1))
+      DEBUG_MSG (SUBSET_REPACK, nullptr, "Graph is not fully connected.");
+
+    remap_obj_indices (id_map, &sorted_graph);
+
+    sorted_graph.as_array ().reverse ();
+
+    vertices_.fini_deep ();
+    vertices_ = sorted_graph;
+    sorted_graph.fini_deep ();
+  }
+
+  /*
+   * Generates a new topological sorting of graph ordered by the shortest
+   * distance to each node.
+   */
+  void sort_shortest_distance ()
+  {
+    positions_invalid = true;
+
+    if (vertices_.length <= 1) {
+      // Graph of 1 or less doesn't need sorting.
+      return;
+    }
+
+    update_distances ();
+
+    hb_priority_queue_t queue;
+    hb_vector_t<vertex_t> sorted_graph;
+    hb_vector_t<unsigned> id_map;
+    check_success (id_map.resize (vertices_.length));
+
+    hb_vector_t<unsigned> removed_edges;
+    check_success (removed_edges.resize (vertices_.length));
+    update_incoming_edge_count ();
+
+    queue.insert (root ().modified_distance (0), root_idx ());
+    int new_id = root_idx ();
+    unsigned order = 1;
+    while (!queue.in_error () && !queue.is_empty ())
+    {
+      unsigned next_id = queue.pop_minimum().second;
+
+      vertex_t& next = vertices_[next_id];
+      sorted_graph.push (next);
+      id_map[next_id] = new_id--;
+
+      for (const auto& link : next.obj.links) {
+        removed_edges[link.objidx]++;
+        if (!(vertices_[link.objidx].incoming_edges - removed_edges[link.objidx]))
+          // Add the order that the links were encountered to the priority.
+          // This ensures that ties between priorities objects are broken in a consistent
+          // way. More specifically this is set up so that if a set of objects have the same
+          // distance they'll be added to the topological order in the order that they are
+          // referenced from the parent object.
+          queue.insert (vertices_[link.objidx].modified_distance (order++),
+                        link.objidx);
+      }
+    }
+
+    check_success (!queue.in_error ());
+    check_success (!sorted_graph.in_error ());
+    if (!check_success (new_id == -1))
+      DEBUG_MSG (SUBSET_REPACK, nullptr, "Graph is not fully connected.");
+
+    remap_obj_indices (id_map, &sorted_graph);
+
+    sorted_graph.as_array ().reverse ();
+
+    vertices_.fini_deep ();
+    vertices_ = sorted_graph;
+    sorted_graph.fini_deep ();
+  }
+
+  /*
+   * Creates a copy of child and re-assigns the link from
+   * parent to the clone. The copy is a shallow copy, objects
+   * linked from child are not duplicated.
+   */
+  void duplicate (unsigned parent_idx, unsigned child_idx)
+  {
+    DEBUG_MSG (SUBSET_REPACK, nullptr, "  Duplicating %d => %d",
+               parent_idx, child_idx);
+
+    positions_invalid = true;
+
+    auto* clone = vertices_.push ();
+    auto& child = vertices_[child_idx];
+    clone_buffer_t* buffer = clone_buffers_.push ();
+    if (!check_success (buffer->copy (child.obj))) {
+      return;
+    }
+
+    clone->obj.head = buffer->head;
+    clone->obj.tail = buffer->tail;
+    clone->distance = child.distance;
+
+    for (const auto& l : child.obj.links)
+      clone->obj.links.push (l);
+
+    check_success (!clone->obj.links.in_error ());
+
+    auto& parent = vertices_[parent_idx];
+    unsigned clone_idx = vertices_.length - 2;
+    for (unsigned i = 0; i < parent.obj.links.length; i++)
+    {
+      auto& l = parent.obj.links[i];
+      if (l.objidx == child_idx)
+      {
+        l.objidx = clone_idx;
+        clone->incoming_edges++;
+        child.incoming_edges--;
+      }
+    }
+
+    // The last object is the root of the graph, so swap back the root to the end.
+    // The root's obj idx does change, however since it's root nothing else refers to it.
+    // all other obj idx's will be unaffected.
+    vertex_t root = vertices_[vertices_.length - 2];
+    vertices_[vertices_.length - 2] = *clone;
+    vertices_[vertices_.length - 1] = root;
+  }
+
+  /*
+   * Raises the sorting priority of all children.
+   */
+  void raise_childrens_priority (unsigned parent_idx)
+  {
+    DEBUG_MSG (SUBSET_REPACK, nullptr, "  Raising priority of all children of %d",
+               parent_idx);
+    // This operation doesn't change ordering until a sort is run, so no need
+    // to invalidate positions. It does not change graph structure so no need
+    // to update distances or edge counts.
+    auto& parent = vertices_[parent_idx].obj;
+    for (unsigned i = 0; i < parent.links.length; i++)
+      vertices_[parent.links[i].objidx].raise_priority ();
+  }
+
+  /*
+   * Will any offsets overflow on graph when it's serialized?
+   */
+  bool will_overflow (hb_vector_t<overflow_record_t>* overflows = nullptr)
+  {
+    if (overflows) overflows->resize (0);
+    update_positions ();
+
+    for (int parent_idx = vertices_.length - 1; parent_idx >= 0; parent_idx--)
+    {
+      for (const auto& link : vertices_[parent_idx].obj.links)
+      {
+        int64_t offset = compute_offset (parent_idx, link);
+        if (is_valid_offset (offset, link))
+          continue;
+
+        if (!overflows) return true;
+
+        overflow_record_t r;
+        r.parent = parent_idx;
+        r.link = &link;
+        overflows->push (r);
+      }
+    }
+
+    if (!overflows) return false;
+    return overflows->length;
+  }
+
+  void print_overflows (const hb_vector_t<overflow_record_t>& overflows)
+  {
+    if (!DEBUG_ENABLED(SUBSET_REPACK)) return;
+
+    update_incoming_edge_count ();
+    for (const auto& o : overflows)
+    {
+      const auto& child = vertices_[o.link->objidx];
+      DEBUG_MSG (SUBSET_REPACK, nullptr, "  overflow from %d => %d (%d incoming , %d outgoing)",
+                 o.parent,
+                 o.link->objidx,
+                 child.incoming_edges,
+                 child.obj.links.length);
+    }
+  }
+
+  void err_other_error () { this->successful = false; }
+
+ private:
+
+  bool check_success (bool success)
+  { return this->successful && (success || (err_other_error (), false)); }
+
+  /*
+   * Creates a map from objid to # of incoming edges.
+   */
+  void update_incoming_edge_count ()
+  {
+    if (!edge_count_invalid) return;
+
+    for (unsigned i = 0; i < vertices_.length; i++)
+      vertices_[i].incoming_edges = 0;
+
+    for (const vertex_t& v : vertices_)
+    {
+      for (auto& l : v.obj.links)
+      {
+        vertices_[l.objidx].incoming_edges++;
+      }
+    }
+
+    edge_count_invalid = false;
+  }
+
+  /*
+   * compute the serialized start and end positions for each vertex.
+   */
+  void update_positions ()
+  {
+    if (!positions_invalid) return;
+
+    unsigned current_pos = 0;
+    for (int i = root_idx (); i >= 0; i--)
+    {
+      auto& v = vertices_[i];
+      v.start = current_pos;
+      current_pos += v.obj.tail - v.obj.head;
+      v.end = current_pos;
+    }
+
+    positions_invalid = false;
+  }
+
+  /*
+   * Finds the distance to each object in the graph
+   * from the initial node.
+   */
+  void update_distances ()
+  {
+    if (!distance_invalid) return;
+
+    // Uses Dijkstra's algorithm to find all of the shortest distances.
+    // https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
+    //
+    // Implementation Note:
+    // Since our priority queue doesn't support fast priority decreases
+    // we instead just add new entries into the queue when a priority changes.
+    // Redundant ones are filtered out later on by the visited set.
+    // According to https://www3.cs.stonybrook.edu/~rezaul/papers/TR-07-54.pdf
+    // for practical performance this is faster then using a more advanced queue
+    // (such as a fibonaacci queue) with a fast decrease priority.
+    for (unsigned i = 0; i < vertices_.length; i++)
+    {
+      if (i == vertices_.length - 1)
+        vertices_[i].distance = 0;
+      else
+        vertices_[i].distance = hb_int_max (int64_t);
+    }
+
+    hb_priority_queue_t queue;
+    queue.insert (0, vertices_.length - 1);
+
+    hb_set_t visited;
+
+    while (!queue.in_error () && !queue.is_empty ())
+    {
+      unsigned next_idx = queue.pop_minimum ().second;
+      if (visited.has (next_idx)) continue;
+      const auto& next = vertices_[next_idx];
+      int64_t next_distance = vertices_[next_idx].distance;
+      visited.add (next_idx);
+
+      for (const auto& link : next.obj.links)
+      {
+        if (visited.has (link.objidx)) continue;
+
+        const auto& child = vertices_[link.objidx].obj;
+        int64_t child_weight = child.tail - child.head +
+                               (!link.is_wide ? (1 << 16) : ((int64_t) 1 << 32));
+        int64_t child_distance = next_distance + child_weight;
+
+        if (child_distance < vertices_[link.objidx].distance)
+        {
+          vertices_[link.objidx].distance = child_distance;
+          queue.insert (child_distance, link.objidx);
+        }
+      }
+    }
+
+    check_success (!queue.in_error ());
+    if (!check_success (queue.is_empty ()))
+    {
+      DEBUG_MSG (SUBSET_REPACK, nullptr, "Graph is not fully connected.");
+      return;
+    }
+
+    distance_invalid = false;
+  }
+
+  int64_t compute_offset (
+      unsigned parent_idx,
+      const hb_serialize_context_t::object_t::link_t& link) const
+  {
+    const auto& parent = vertices_[parent_idx];
+    const auto& child = vertices_[link.objidx];
+    int64_t offset = 0;
+    switch ((hb_serialize_context_t::whence_t) link.whence) {
+      case hb_serialize_context_t::whence_t::Head:
+        offset = child.start - parent.start; break;
+      case hb_serialize_context_t::whence_t::Tail:
+        offset = child.start - parent.end; break;
+      case hb_serialize_context_t::whence_t::Absolute:
+        offset = child.start; break;
+    }
+
+    assert (offset >= link.bias);
+    offset -= link.bias;
+    return offset;
+  }
+
+  bool is_valid_offset (int64_t offset,
+                        const hb_serialize_context_t::object_t::link_t& link) const
+  {
+    if (link.is_signed)
+    {
+      if (link.is_wide)
+        return offset >= -((int64_t) 1 << 31) && offset < ((int64_t) 1 << 31);
+      else
+        return offset >= -(1 << 15) && offset < (1 << 15);
+    }
+    else
+    {
+      if (link.is_wide)
+        return offset >= 0 && offset < ((int64_t) 1 << 32);
+      else
+        return offset >= 0 && offset < (1 << 16);
+    }
+  }
+
+  /*
+   * Updates all objidx's in all links using the provided mapping.
+   */
+  void remap_obj_indices (const hb_vector_t<unsigned>& id_map,
+                          hb_vector_t<vertex_t>* sorted_graph) const
+  {
+    for (unsigned i = 0; i < sorted_graph->length; i++)
+    {
+      for (unsigned j = 0; j < (*sorted_graph)[i].obj.links.length; j++)
+      {
+        auto& link = (*sorted_graph)[i].obj.links[j];
+        link.objidx = id_map[link.objidx];
+      }
+    }
+  }
+
+  template <typename O> void
+  serialize_link_of_type (const hb_serialize_context_t::object_t::link_t& link,
+                          char* head,
+                          hb_serialize_context_t* c) const
+  {
+    OT::Offset<O>* offset = reinterpret_cast<OT::Offset<O>*> (head + link.position);
+    *offset = 0;
+    c->add_link (*offset,
+                 // serializer has an extra nil object at the start of the
+                 // object array. So all id's are +1 of what our id's are.
+                 link.objidx + 1,
+                 (hb_serialize_context_t::whence_t) link.whence,
+                 link.bias);
+  }
+
+  void serialize_link (const hb_serialize_context_t::object_t::link_t& link,
+                 char* head,
+                 hb_serialize_context_t* c) const
+  {
+    if (link.is_wide)
+    {
+      if (link.is_signed)
+      {
+        serialize_link_of_type<OT::HBINT32> (link, head, c);
+      } else {
+        serialize_link_of_type<OT::HBUINT32> (link, head, c);
+      }
+    } else {
+      if (link.is_signed)
+      {
+        serialize_link_of_type<OT::HBINT16> (link, head, c);
+      } else {
+        serialize_link_of_type<OT::HBUINT16> (link, head, c);
+      }
+    }
+  }
+
+ public:
+  // TODO(garretrieger): make private, will need to move most of offset overflow code into graph.
+  hb_vector_t<vertex_t> vertices_;
+ private:
+  hb_vector_t<clone_buffer_t> clone_buffers_;
+  bool edge_count_invalid;
+  bool distance_invalid;
+  bool positions_invalid;
+  bool successful;
+};
+
+
+/*
+ * Attempts to modify the topological sorting of the provided object graph to
+ * eliminate offset overflows in the links between objects of the graph. If a
+ * non-overflowing ordering is found the updated graph is serialized it into the
+ * provided serialization context.
+ *
+ * If necessary the structure of the graph may be modified in ways that do not
+ * affect the functionality of the graph. For example shared objects may be
+ * duplicated.
+ */
+inline void
+hb_resolve_overflows (const hb_vector_t<hb_serialize_context_t::object_t *>& packed,
+                      hb_serialize_context_t* c) {
+  // Kahn sort is ~twice as fast as shortest distance sort and works for many fonts
+  // so try it first to save time.
+  graph_t sorted_graph (packed);
+  sorted_graph.sort_kahn ();
+  if (!sorted_graph.will_overflow ())
+  {
+    sorted_graph.serialize (c);
+    return;
+  }
+
+  sorted_graph.sort_shortest_distance ();
+
+  unsigned round = 0;
+  hb_vector_t<graph_t::overflow_record_t> overflows;
+  // TODO(garretrieger): select a good limit for max rounds.
+  while (!sorted_graph.in_error ()
+         && sorted_graph.will_overflow (&overflows)
+         && round++ < 10) {
+    DEBUG_MSG (SUBSET_REPACK, nullptr, "=== Over flow resolution round %d ===", round);
+    sorted_graph.print_overflows (overflows);
+
+    bool resolution_attempted = false;
+    hb_set_t priority_bumped_parents;
+    // Try resolving the furthest overflows first.
+    for (int i = overflows.length - 1; i >= 0; i--)
+    {
+      const graph_t::overflow_record_t& r = overflows[i];
+      const auto& child = sorted_graph.vertices_[r.link->objidx];
+      if (child.is_shared ())
+      {
+        // The child object is shared, we may be able to eliminate the overflow
+        // by duplicating it.
+        sorted_graph.duplicate (r.parent, r.link->objidx);
+        resolution_attempted = true;
+
+        // Stop processing overflows for this round so that object order can be
+        // updated to account for the newly added object.
+        break;
+      }
+
+      if (child.is_leaf () && !priority_bumped_parents.has (r.parent))
+      {
+        // This object is too far from it's parent, attempt to move it closer.
+        //
+        // TODO(garretrieger): initially limiting this to leaf's since they can be
+        //                     moved closer with fewer consequences. However, this can
+        //                     likely can be used for non-leafs as well.
+        // TODO(garretrieger): add a maximum priority, don't try to raise past this.
+        // TODO(garretrieger): also try lowering priority of the parent. Make it
+        //                     get placed further up in the ordering, closer to it's children.
+        //                     this is probably preferable if the total size of the parent object
+        //                     is < then the total size of the children (and the parent can be moved).
+        //                     Since in that case moving the parent will cause a smaller increase in
+        //                     the length of other offsets.
+        sorted_graph.raise_childrens_priority (r.parent);
+        priority_bumped_parents.add (r.parent);
+        resolution_attempted = true;
+        continue;
+      }
+
+      // TODO(garretrieger): add additional offset resolution strategies
+      // - Promotion to extension lookups.
+      // - Table splitting.
+    }
+
+    if (resolution_attempted)
+    {
+      sorted_graph.sort_shortest_distance ();
+      continue;
+    }
+
+    DEBUG_MSG (SUBSET_REPACK, nullptr, "No resolution available :(");
+    c->err (HB_SERIALIZE_ERROR_OFFSET_OVERFLOW);
+    return;
+  }
+
+  if (sorted_graph.in_error ())
+  {
+    c->err (HB_SERIALIZE_ERROR_OTHER);
+    return;
+  }
+  sorted_graph.serialize (c);
+}
+
+
+#endif /* HB_REPACKER_HH */
--- a/src/hb-serialize.hh
+++ b/src/hb-serialize.hh
@ -41,6 +41,16 @@
 * Serialize
 */

+enum hb_serialize_error_t {
+  HB_SERIALIZE_ERROR_NONE =            0x00000000u,
+  HB_SERIALIZE_ERROR_OTHER =           0x00000001u,
+  HB_SERIALIZE_ERROR_OFFSET_OVERFLOW = 0x00000002u,
+  HB_SERIALIZE_ERROR_OUT_OF_ROOM =     0x00000004u,
+  HB_SERIALIZE_ERROR_INT_OVERFLOW =    0x00000008u,
+  HB_SERIALIZE_ERROR_ARRAY_OVERFLOW =  0x00000010u
+};
+HB_MARK_AS_FLAG_T (hb_serialize_error_t);
+
 struct hb_serialize_context_t
 {
  typedef unsigned objidx_t;
@ -51,6 +61,8 @@ struct hb_serialize_context_t
     Absolute	/* Absolute: from the start of the serialize buffer. */
   };

+
+
  struct object_t
  {
    void fini () { links.fini (); }
@ -117,30 +129,54 @@ struct hb_serialize_context_t
    object_pool.fini ();
  }

-  bool in_error () const { return !this->successful; }
+  bool in_error () const { return bool (errors); }
+
+  bool successful () const { return !bool (errors); }
+
+  HB_NODISCARD bool ran_out_of_room () const { return errors & HB_SERIALIZE_ERROR_OUT_OF_ROOM; }
+  HB_NODISCARD bool offset_overflow () const { return errors & HB_SERIALIZE_ERROR_OFFSET_OVERFLOW; }
+  HB_NODISCARD bool only_offset_overflow () const { return errors == HB_SERIALIZE_ERROR_OFFSET_OVERFLOW; }
+
+  void reset (void *start_, unsigned int size)
+  {
+    start = (char*) start_;
+    end = start + size;
+    reset ();
+    current = nullptr;
+  }

  void reset ()
  {
-    this->successful = true;
-    this->ran_out_of_room = false;
+    this->errors = HB_SERIALIZE_ERROR_NONE;
    this->head = this->start;
    this->tail = this->end;
    this->debug_depth = 0;

    fini ();
    this->packed.push (nullptr);
+    this->packed_map.init ();
  }

-  bool check_success (bool success)
-  { return this->successful && (success || (err_other_error (), false)); }
+  bool check_success (bool success,
+                      hb_serialize_error_t err_type = HB_SERIALIZE_ERROR_OTHER)
+  {
+    return successful ()
+        && (success || err (err_type));
+  }

  template <typename T1, typename T2>
-  bool check_equal (T1 &&v1, T2 &&v2)
-  { return check_success ((long long) v1 == (long long) v2); }
+  bool check_equal (T1 &&v1, T2 &&v2, hb_serialize_error_t err_type)
+  {
+    if ((long long) v1 != (long long) v2)
+    {
+      return err (err_type);
+    }
+    return true;
+  }

  template <typename T1, typename T2>
-  bool check_assign (T1 &v1, T2 &&v2)
-  { return check_equal (v1 = v2, v2); }
+  bool check_assign (T1 &v1, T2 &&v2, hb_serialize_error_t err_type)
+  { return check_equal (v1 = v2, v2, err_type); }

  template <typename T> bool propagate_error (T &&obj)
  { return check_success (!hb_deref (obj).in_error ()); }
@ -167,12 +203,18 @@ struct hb_serialize_context_t
 		     "end [%p..%p] serialized %u bytes; %s",
 		     this->start, this->end,
 		     (unsigned) (this->head - this->start),
-		     this->successful ? "successful" : "UNSUCCESSFUL");
+		     successful () ? "successful" : "UNSUCCESSFUL");

    propagate_error (packed, packed_map);

    if (unlikely (!current)) return;
-    if (unlikely (in_error())) return;
+    if (unlikely (in_error()))
+    {
+      // Offset overflows that occur before link resolution cannot be handled
+      // by repacking, so set a more general error.
+      if (offset_overflow ()) err (HB_SERIALIZE_ERROR_OTHER);
+      return;
+    }

    assert (!current->next);

@ -351,7 +393,7 @@ struct hb_serialize_context_t
      for (const object_t::link_t &link : parent->links)
      {
 	const object_t* child = packed[link.objidx];
-	if (unlikely (!child)) { err_other_error(); return; }
+	if (unlikely (!child)) { err (HB_SERIALIZE_ERROR_OTHER); return; }
 	unsigned offset = 0;
 	switch ((whence_t) link.whence) {
 	case Head:     offset = child->head - parent->head; break;
@ -398,19 +440,19 @@ struct hb_serialize_context_t
  Type *start_embed (const Type &obj) const
  { return start_embed (hb_addressof (obj)); }

-  /* Following two functions exist to allow setting breakpoint on. */
-  void err_ran_out_of_room () { this->ran_out_of_room = true; }
-  void err_other_error () { this->successful = false; }
+  bool err (hb_serialize_error_t err_type)
+  {
+    return bool ((errors = (errors | err_type)));
+  }

  template <typename Type>
  Type *allocate_size (unsigned int size)
  {
-    if (unlikely (!this->successful)) return nullptr;
+    if (unlikely (in_error ())) return nullptr;

    if (this->tail - this->head < ptrdiff_t (size))
    {
-      err_ran_out_of_room ();
-      this->successful = false;
+      err (HB_SERIALIZE_ERROR_OUT_OF_ROOM);
      return nullptr;
    }
    memset (this->head, 0, size);
@ -497,7 +539,7 @@ struct hb_serialize_context_t
  /* Output routines. */
  hb_bytes_t copy_bytes () const
  {
-    assert (this->successful);
+    assert (successful ());
    /* Copy both items from head side and tail side... */
    unsigned int len = (this->head - this->start)
 		     + (this->end  - this->tail);
@ -520,20 +562,22 @@ struct hb_serialize_context_t
 			   (char *) b.arrayZ, free);
  }

+  const hb_vector_t<object_t *>& object_graph() const
+  { return packed; }
+
  private:
  template <typename T>
  void assign_offset (const object_t* parent, const object_t::link_t &link, unsigned offset)
  {
    auto &off = * ((BEInt<T> *) (parent->head + link.position));
    assert (0 == off);
-    check_assign (off, offset);
+    check_assign (off, offset, HB_SERIALIZE_ERROR_OFFSET_OVERFLOW);
  }

  public: /* TODO Make private. */
  char *start, *head, *tail, *end;
  unsigned int debug_depth;
-  bool successful;
-  bool ran_out_of_room;
+  hb_serialize_error_t errors;

  private:

@ -550,5 +594,4 @@ struct hb_serialize_context_t
  hb_hashmap_t<const object_t *, objidx_t, nullptr, 0> packed_map;
 };

-
 #endif /* HB_SERIALIZE_HH */
--- a/src/hb-subset.cc
+++ b/src/hb-subset.cc
@ -50,6 +50,7 @@
 #include "hb-ot-layout-gpos-table.hh"
 #include "hb-ot-var-gvar-table.hh"
 #include "hb-ot-var-hvar-table.hh"
+#include "hb-repacker.hh"


 static unsigned
@ -64,69 +65,129 @@ _plan_estimate_subset_table_size (hb_subset_plan_t *plan, unsigned table_len)
  return 512 + (unsigned) (table_len * sqrt ((double) dst_glyphs / src_glyphs));
 }

+/*
+ * Repack the serialization buffer if any offset overflows exist.
+ */
+static hb_blob_t*
+_repack (hb_tag_t tag, const hb_serialize_context_t& c)
+{
+  if (tag != HB_OT_TAG_GPOS
+      &&  tag != HB_OT_TAG_GSUB)
+    return c.copy_blob ();
+
+  if (!c.offset_overflow ())
+    return c.copy_blob ();
+
+  hb_vector_t<char> buf;
+  int buf_size = c.end - c.start;
+  if (unlikely (!buf.alloc (buf_size)))
+    return nullptr;
+
+  hb_serialize_context_t repacked ((void *) buf, buf_size);
+  hb_resolve_overflows (c.object_graph (), &repacked);
+
+  if (unlikely (repacked.in_error ()))
+    // TODO(garretrieger): refactor so we can share the resize/retry logic with the subset
+    //                     portion.
+    return nullptr;
+
+  return repacked.copy_blob ();
+}
+
+template<typename TableType>
+static
+bool
+_try_subset (const TableType *table,
+             hb_vector_t<char>* buf,
+             unsigned buf_size,
+             hb_subset_context_t* c /* OUT */)
+{
+  c->serializer->start_serialize<TableType> ();
+
+  bool needed = table->subset (c);
+  if (!c->serializer->ran_out_of_room ())
+  {
+    c->serializer->end_serialize ();
+    return needed;
+  }
+
+  buf_size += (buf_size >> 1) + 32;
+  DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c ran out of room; reallocating to %u bytes.",
+             HB_UNTAG (c->table_tag), buf_size);
+
+  if (unlikely (!buf->alloc (buf_size)))
+  {
+    DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c failed to reallocate %u bytes.",
+               HB_UNTAG (c->table_tag), buf_size);
+    return needed;
+  }
+
+  c->serializer->reset (buf->arrayZ, buf_size);
+  return _try_subset (table, buf, buf_size, c);
+}
+
 template<typename TableType>
 static bool
 _subset (hb_subset_plan_t *plan)
 {
-  bool result = false;
  hb_blob_t *source_blob = hb_sanitize_context_t ().reference_table<TableType> (plan->source);
  const TableType *table = source_blob->as<TableType> ();

  hb_tag_t tag = TableType::tableTag;
-  if (source_blob->data)
+  if (!source_blob->data)
  {
-    hb_vector_t<char> buf;
-    /* TODO Not all tables are glyph-related.  'name' table size for example should not be
-     * affected by number of glyphs.  Accommodate that. */
-    unsigned buf_size = _plan_estimate_subset_table_size (plan, source_blob->length);
-    DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c initial estimated table size: %u bytes.", HB_UNTAG (tag), buf_size);
-    if (unlikely (!buf.alloc (buf_size)))
-    {
-      DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c failed to allocate %u bytes.", HB_UNTAG (tag), buf_size);
-      hb_blob_destroy (source_blob);
-      return false;
-    }
-  retry:
-    hb_serialize_context_t serializer ((void *) buf, buf_size);
-    serializer.start_serialize<TableType> ();
-    hb_subset_context_t c (source_blob, plan, &serializer, tag);
-    bool needed = table->subset (&c);
-    if (serializer.ran_out_of_room)
-    {
-      buf_size += (buf_size >> 1) + 32;
-      DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c ran out of room; reallocating to %u bytes.", HB_UNTAG (tag), buf_size);
-      if (unlikely (!buf.alloc (buf_size)))
-      {
-	DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c failed to reallocate %u bytes.", HB_UNTAG (tag), buf_size);
-	hb_blob_destroy (source_blob);
-	return false;
-      }
-      goto retry;
-    }
-    serializer.end_serialize ();
-
-    result = !serializer.in_error ();
-
-    if (result)
-    {
-      if (needed)
-      {
-	hb_blob_t *dest_blob = serializer.copy_blob ();
-	DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c final subset table size: %u bytes.", HB_UNTAG (tag), dest_blob->length);
-	result = c.plan->add_table (tag, dest_blob);
-	hb_blob_destroy (dest_blob);
-      }
-      else
-      {
-	DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c::subset table subsetted to empty.", HB_UNTAG (tag));
-      }
-    }
+    DEBUG_MSG (SUBSET, nullptr,
+               "OT::%c%c%c%c::subset sanitize failed on source table.", HB_UNTAG (tag));
+    hb_blob_destroy (source_blob);
+    return false;
  }
-  else
-    DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c::subset sanitize failed on source table.", HB_UNTAG (tag));

+  hb_vector_t<char> buf;
+  /* TODO Not all tables are glyph-related.  'name' table size for example should not be
+   * affected by number of glyphs.  Accommodate that. */
+  unsigned buf_size = _plan_estimate_subset_table_size (plan, source_blob->length);
+  DEBUG_MSG (SUBSET, nullptr,
+             "OT::%c%c%c%c initial estimated table size: %u bytes.", HB_UNTAG (tag), buf_size);
+  if (unlikely (!buf.alloc (buf_size)))
+  {
+    DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c failed to allocate %u bytes.", HB_UNTAG (tag), buf_size);
+    hb_blob_destroy (source_blob);
+    return false;
+  }
+
+  bool needed = false;
+  hb_serialize_context_t serializer (buf.arrayZ, buf_size);
+  {
+    hb_subset_context_t c (source_blob, plan, &serializer, tag);
+    needed = _try_subset (table, &buf, buf_size, &c);
+  }
  hb_blob_destroy (source_blob);
-  DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c::subset %s", HB_UNTAG (tag), result ? "success" : "FAILED!");
+
+  if (serializer.in_error () && !serializer.only_offset_overflow ())
+  {
+    DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c::subset FAILED!", HB_UNTAG (tag));
+    return false;
+  }
+
+  if (!needed)
+  {
+    DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c::subset table subsetted to empty.", HB_UNTAG (tag));
+    return true;
+  }
+
+  bool result = false;
+  hb_blob_t *dest_blob = _repack (tag, serializer);
+  if (dest_blob)
+  {
+    DEBUG_MSG (SUBSET, nullptr,
+               "OT::%c%c%c%c final subset table size: %u bytes.",
+               HB_UNTAG (tag), dest_blob->length);
+    result = plan->add_table (tag, dest_blob);
+    hb_blob_destroy (dest_blob);
+  }
+
+  DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c::subset %s",
+             HB_UNTAG (tag), result ? "success" : "FAILED!");
  return result;
 }

--- a/src/meson.build
+++ b/src/meson.build
@ -477,6 +477,8 @@ if get_option('tests').enabled()
  compiled_tests = {
    'test-algs': ['test-algs.cc', 'hb-static.cc'],
    'test-array': ['test-array.cc'],
+    'test-repacker': ['test-repacker.cc', 'hb-static.cc'],
+    'test-priority-queue': ['test-priority-queue.cc', 'hb-static.cc'],
    'test-iter': ['test-iter.cc', 'hb-static.cc'],
    'test-meta': ['test-meta.cc', 'hb-static.cc'],
    'test-number': ['test-number.cc', 'hb-number.cc'],
--- a/src/test-priority-queue.cc
+++ b/src/test-priority-queue.cc
@ -0,0 +1,89 @@
+/*
+ * Copyright © 2020  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger
+ */
+
+#include "hb.hh"
+#include "hb-priority-queue.hh"
+
+static void
+test_insert ()
+{
+  hb_priority_queue_t queue;
+  assert (queue.is_empty ());
+
+  queue.insert (10, 0);
+  assert (!queue.is_empty ());
+  assert (queue.minimum () == hb_pair (10, 0));
+
+  queue.insert (20, 1);
+  assert (queue.minimum () == hb_pair (10, 0));
+
+  queue.insert (5, 2);
+  assert (queue.minimum () == hb_pair (5, 2));
+
+  queue.insert (15, 3);
+  assert (queue.minimum () == hb_pair (5, 2));
+
+  queue.insert (1, 4);
+  assert (queue.minimum () == hb_pair (1, 4));
+}
+
+static void
+test_extract ()
+{
+  hb_priority_queue_t queue;
+  queue.insert (0, 0);
+  queue.insert (60, 6);
+  queue.insert (30, 3);
+  queue.insert (40 ,4);
+  queue.insert (20, 2);
+  queue.insert (50, 5);
+  queue.insert (70, 7);
+  queue.insert (10, 1);
+
+  for (int i = 0; i < 8; i++)
+  {
+    assert (!queue.is_empty ());
+    assert (queue.minimum () == hb_pair (i * 10, i));
+    assert (queue.pop_minimum () == hb_pair (i * 10, i));
+  }
+
+  assert (queue.is_empty ());
+}
+
+static void
+test_extract_empty ()
+{
+  hb_priority_queue_t queue;
+  assert (queue.pop_minimum () == hb_pair (0, 0));
+}
+
+int
+main (int argc, char **argv)
+{
+  test_insert ();
+  test_extract ();
+  test_extract_empty ();
+}
--- a/src/test-repacker.cc
+++ b/src/test-repacker.cc
@ -0,0 +1,485 @@
+/*
+ * Copyright © 2020  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger
+ */
+
+#include <string>
+
+#include "hb-repacker.hh"
+#include "hb-open-type.hh"
+
+static void start_object(const char* tag,
+                         unsigned len,
+                         hb_serialize_context_t* c)
+{
+  c->push ();
+  char* obj = c->allocate_size<char> (len);
+  strncpy (obj, tag, len);
+}
+
+
+static unsigned add_object(const char* tag,
+                           unsigned len,
+                           hb_serialize_context_t* c)
+{
+  start_object (tag, len, c);
+  return c->pop_pack (false);
+}
+
+
+static void add_offset (unsigned id,
+                        hb_serialize_context_t* c)
+{
+  OT::Offset16* offset = c->start_embed<OT::Offset16> ();
+  c->extend_min (offset);
+  c->add_link (*offset, id);
+}
+
+static void
+populate_serializer_simple (hb_serialize_context_t* c)
+{
+  c->start_serialize<char> ();
+
+  unsigned obj_1 = add_object ("ghi", 3, c);
+  unsigned obj_2 = add_object ("def", 3, c);
+
+  start_object ("abc", 3, c);
+  add_offset (obj_2, c);
+  add_offset (obj_1, c);
+  c->pop_pack ();
+
+  c->end_serialize();
+}
+
+static void
+populate_serializer_with_overflow (hb_serialize_context_t* c)
+{
+  std::string large_string(50000, 'a');
+  c->start_serialize<char> ();
+
+  unsigned obj_1 = add_object (large_string.c_str(), 10000, c);
+  unsigned obj_2 = add_object (large_string.c_str(), 20000, c);
+  unsigned obj_3 = add_object (large_string.c_str(), 50000, c);
+
+  start_object ("abc", 3, c);
+  add_offset (obj_3, c);
+  add_offset (obj_2, c);
+  add_offset (obj_1, c);
+  c->pop_pack ();
+
+  c->end_serialize();
+}
+
+static void
+populate_serializer_with_dedup_overflow (hb_serialize_context_t* c)
+{
+  std::string large_string(70000, 'a');
+  c->start_serialize<char> ();
+
+  unsigned obj_1 = add_object ("def", 3, c);
+
+  start_object (large_string.c_str(), 60000, c);
+  add_offset (obj_1, c);
+  unsigned obj_2 = c->pop_pack (false);
+
+  start_object (large_string.c_str(), 10000, c);
+  add_offset (obj_2, c);
+  add_offset (obj_1, c);
+  c->pop_pack (false);
+
+  c->end_serialize();
+}
+
+static void
+populate_serializer_complex_1 (hb_serialize_context_t* c)
+{
+  c->start_serialize<char> ();
+
+  unsigned obj_4 = add_object ("jkl", 3, c);
+  unsigned obj_3 = add_object ("ghi", 3, c);
+
+  start_object ("def", 3, c);
+  add_offset (obj_3, c);
+  unsigned obj_2 = c->pop_pack (false);
+
+  start_object ("abc", 3, c);
+  add_offset (obj_2, c);
+  add_offset (obj_4, c);
+  c->pop_pack ();
+
+  c->end_serialize();
+}
+
+static void
+populate_serializer_complex_2 (hb_serialize_context_t* c)
+{
+  c->start_serialize<char> ();
+
+  unsigned obj_5 = add_object ("mn", 2, c);
+
+  unsigned obj_4 = add_object ("jkl", 3, c);
+
+  start_object ("ghi", 3, c);
+  add_offset (obj_4, c);
+  unsigned obj_3 = c->pop_pack (false);
+
+  start_object ("def", 3, c);
+  add_offset (obj_3, c);
+  unsigned obj_2 = c->pop_pack (false);
+
+  start_object ("abc", 3, c);
+  add_offset (obj_2, c);
+  add_offset (obj_4, c);
+  add_offset (obj_5, c);
+  c->pop_pack ();
+
+  c->end_serialize();
+}
+
+static void
+populate_serializer_complex_3 (hb_serialize_context_t* c)
+{
+  c->start_serialize<char> ();
+
+  unsigned obj_6 = add_object ("opqrst", 6, c);
+
+  unsigned obj_5 = add_object ("mn", 2, c);
+
+  start_object ("jkl", 3, c);
+  add_offset (obj_6, c);
+  unsigned obj_4 = c->pop_pack (false);
+
+  start_object ("ghi", 3, c);
+  add_offset (obj_4, c);
+  unsigned obj_3 = c->pop_pack (false);
+
+  start_object ("def", 3, c);
+  add_offset (obj_3, c);
+  unsigned obj_2 = c->pop_pack (false);
+
+  start_object ("abc", 3, c);
+  add_offset (obj_2, c);
+  add_offset (obj_4, c);
+  add_offset (obj_5, c);
+  c->pop_pack ();
+
+  c->end_serialize();
+}
+
+static void test_sort_kahn_1 ()
+{
+  size_t buffer_size = 100;
+  void* buffer = malloc (buffer_size);
+  hb_serialize_context_t c (buffer, buffer_size);
+  populate_serializer_complex_1 (&c);
+
+  graph_t graph (c.object_graph ());
+  graph.sort_kahn ();
+
+  assert(strncmp (graph.object (3).head, "abc", 3) == 0);
+  assert(graph.object (3).links.length == 2);
+  assert(graph.object (3).links[0].objidx == 2);
+  assert(graph.object (3).links[1].objidx == 1);
+
+  assert(strncmp (graph.object (2).head, "def", 3) == 0);
+  assert(graph.object (2).links.length == 1);
+  assert(graph.object (2).links[0].objidx == 0);
+
+  assert(strncmp (graph.object (1).head, "jkl", 3) == 0);
+  assert(graph.object (1).links.length == 0);
+
+  assert(strncmp (graph.object (0).head, "ghi", 3) == 0);
+  assert(graph.object (0).links.length == 0);
+
+  free (buffer);
+}
+
+static void test_sort_kahn_2 ()
+{
+  size_t buffer_size = 100;
+  void* buffer = malloc (buffer_size);
+  hb_serialize_context_t c (buffer, buffer_size);
+  populate_serializer_complex_2 (&c);
+
+  graph_t graph (c.object_graph ());
+  graph.sort_kahn ();
+
+
+  assert(strncmp (graph.object (4).head, "abc", 3) == 0);
+  assert(graph.object (4).links.length == 3);
+  assert(graph.object (4).links[0].objidx == 3);
+    assert(graph.object (4).links[1].objidx == 0);
+  assert(graph.object (4).links[2].objidx == 2);
+
+  assert(strncmp (graph.object (3).head, "def", 3) == 0);
+  assert(graph.object (3).links.length == 1);
+  assert(graph.object (3).links[0].objidx == 1);
+
+  assert(strncmp (graph.object (2).head, "mn", 2) == 0);
+  assert(graph.object (2).links.length == 0);
+
+  assert(strncmp (graph.object (1).head, "ghi", 3) == 0);
+  assert(graph.object (1).links.length == 1);
+  assert(graph.object (1).links[0].objidx == 0);
+
+  assert(strncmp (graph.object (0).head, "jkl", 3) == 0);
+  assert(graph.object (0).links.length == 0);
+
+  free (buffer);
+}
+
+static void test_sort_shortest ()
+{
+  size_t buffer_size = 100;
+  void* buffer = malloc (buffer_size);
+  hb_serialize_context_t c (buffer, buffer_size);
+  populate_serializer_complex_2 (&c);
+
+  graph_t graph (c.object_graph ());
+  graph.sort_shortest_distance ();
+
+  assert(strncmp (graph.object (4).head, "abc", 3) == 0);
+  assert(graph.object (4).links.length == 3);
+  assert(graph.object (4).links[0].objidx == 2);
+  assert(graph.object (4).links[1].objidx == 0);
+  assert(graph.object (4).links[2].objidx == 3);
+
+  assert(strncmp (graph.object (3).head, "mn", 2) == 0);
+  assert(graph.object (3).links.length == 0);
+
+  assert(strncmp (graph.object (2).head, "def", 3) == 0);
+  assert(graph.object (2).links.length == 1);
+  assert(graph.object (2).links[0].objidx == 1);
+
+  assert(strncmp (graph.object (1).head, "ghi", 3) == 0);
+  assert(graph.object (1).links.length == 1);
+  assert(graph.object (1).links[0].objidx == 0);
+
+  assert(strncmp (graph.object (0).head, "jkl", 3) == 0);
+  assert(graph.object (0).links.length == 0);
+
+  free (buffer);
+}
+
+static void test_duplicate_leaf ()
+{
+  size_t buffer_size = 100;
+  void* buffer = malloc (buffer_size);
+  hb_serialize_context_t c (buffer, buffer_size);
+  populate_serializer_complex_2 (&c);
+
+  graph_t graph (c.object_graph ());
+  graph.duplicate (4, 1);
+
+  assert(strncmp (graph.object (5).head, "abc", 3) == 0);
+  assert(graph.object (5).links.length == 3);
+  assert(graph.object (5).links[0].objidx == 3);
+  assert(graph.object (5).links[1].objidx == 4);
+  assert(graph.object (5).links[2].objidx == 0);
+
+  assert(strncmp (graph.object (4).head, "jkl", 3) == 0);
+  assert(graph.object (4).links.length == 0);
+
+  assert(strncmp (graph.object (3).head, "def", 3) == 0);
+  assert(graph.object (3).links.length == 1);
+  assert(graph.object (3).links[0].objidx == 2);
+
+  assert(strncmp (graph.object (2).head, "ghi", 3) == 0);
+  assert(graph.object (2).links.length == 1);
+  assert(graph.object (2).links[0].objidx == 1);
+
+  assert(strncmp (graph.object (1).head, "jkl", 3) == 0);
+  assert(graph.object (1).links.length == 0);
+
+  assert(strncmp (graph.object (0).head, "mn", 2) == 0);
+  assert(graph.object (0).links.length == 0);
+
+  free (buffer);
+}
+
+static void test_duplicate_interior ()
+{
+  size_t buffer_size = 100;
+  void* buffer = malloc (buffer_size);
+  hb_serialize_context_t c (buffer, buffer_size);
+  populate_serializer_complex_3 (&c);
+
+  graph_t graph (c.object_graph ());
+  graph.duplicate (3, 2);
+
+  assert(strncmp (graph.object (6).head, "abc", 3) == 0);
+  assert(graph.object (6).links.length == 3);
+  assert(graph.object (6).links[0].objidx == 4);
+  assert(graph.object (6).links[1].objidx == 2);
+  assert(graph.object (6).links[2].objidx == 1);
+
+  assert(strncmp (graph.object (5).head, "jkl", 3) == 0);
+  assert(graph.object (5).links.length == 1);
+  assert(graph.object (5).links[0].objidx == 0);
+
+  assert(strncmp (graph.object (4).head, "def", 3) == 0);
+  assert(graph.object (4).links.length == 1);
+  assert(graph.object (4).links[0].objidx == 3);
+
+  assert(strncmp (graph.object (3).head, "ghi", 3) == 0);
+  assert(graph.object (3).links.length == 1);
+  assert(graph.object (3).links[0].objidx == 5);
+
+  assert(strncmp (graph.object (2).head, "jkl", 3) == 0);
+  assert(graph.object (2).links.length == 1);
+  assert(graph.object (2).links[0].objidx == 0);
+
+  assert(strncmp (graph.object (1).head, "mn", 2) == 0);
+  assert(graph.object (1).links.length == 0);
+
+  assert(strncmp (graph.object (0).head, "opqrst", 6) == 0);
+  assert(graph.object (0).links.length == 0);
+
+  free (buffer);
+}
+
+static void
+test_serialize ()
+{
+  size_t buffer_size = 100;
+  void* buffer_1 = malloc (buffer_size);
+  hb_serialize_context_t c1 (buffer_1, buffer_size);
+  populate_serializer_simple (&c1);
+  hb_bytes_t expected = c1.copy_bytes ();
+
+  void* buffer_2 = malloc (buffer_size);
+  hb_serialize_context_t c2 (buffer_2, buffer_size);
+
+  graph_t graph (c1.object_graph ());
+  graph.serialize (&c2);
+  hb_bytes_t actual = c2.copy_bytes ();
+
+  assert (actual == expected);
+
+  actual.free ();
+  expected.free ();
+  free (buffer_1);
+  free (buffer_2);
+}
+
+static void test_will_overflow_1 ()
+{
+  size_t buffer_size = 100;
+  void* buffer = malloc (buffer_size);
+  hb_serialize_context_t c (buffer, buffer_size);
+  populate_serializer_complex_2 (&c);
+  graph_t graph (c.object_graph ());
+
+  assert (!graph.will_overflow (nullptr));
+
+  free (buffer);
+}
+
+static void test_will_overflow_2 ()
+{
+  size_t buffer_size = 160000;
+  void* buffer = malloc (buffer_size);
+  hb_serialize_context_t c (buffer, buffer_size);
+  populate_serializer_with_overflow (&c);
+  graph_t graph (c.object_graph ());
+
+  assert (graph.will_overflow (nullptr));
+
+  free (buffer);
+}
+
+static void test_will_overflow_3 ()
+{
+  size_t buffer_size = 160000;
+  void* buffer = malloc (buffer_size);
+  hb_serialize_context_t c (buffer, buffer_size);
+  populate_serializer_with_dedup_overflow (&c);
+  graph_t graph (c.object_graph ());
+
+  assert (graph.will_overflow (nullptr));
+
+  free (buffer);
+}
+
+static void test_resolve_overflows_via_sort ()
+{
+  size_t buffer_size = 160000;
+  void* buffer = malloc (buffer_size);
+  hb_serialize_context_t c (buffer, buffer_size);
+  populate_serializer_with_overflow (&c);
+  graph_t graph (c.object_graph ());
+
+  void* out_buffer = malloc (buffer_size);
+  hb_serialize_context_t out (out_buffer, buffer_size);
+
+  hb_resolve_overflows (c.object_graph (), &out);
+  assert (!out.offset_overflow ());
+  hb_bytes_t result = out.copy_bytes ();
+  assert (result.length == (80000 + 3 + 3 * 2));
+
+  result.free ();
+  free (buffer);
+  free (out_buffer);
+}
+
+static void test_resolve_overflows_via_duplication ()
+{
+  size_t buffer_size = 160000;
+  void* buffer = malloc (buffer_size);
+  hb_serialize_context_t c (buffer, buffer_size);
+  populate_serializer_with_dedup_overflow (&c);
+  graph_t graph (c.object_graph ());
+
+  void* out_buffer = malloc (buffer_size);
+  hb_serialize_context_t out (out_buffer, buffer_size);
+
+  hb_resolve_overflows (c.object_graph (), &out);
+  assert (!out.offset_overflow ());
+  hb_bytes_t result = out.copy_bytes ();
+  assert (result.length == (10000 + 2 * 2 + 60000 + 2 + 3 * 2));
+
+  result.free ();
+  free (buffer);
+  free (out_buffer);
+}
+
+// TODO(garretrieger): update will_overflow tests to check the overflows array.
+// TODO(garretrieger): add a test(s) using a real font.
+// TODO(garretrieger): add tests for priority raising.
+
+int
+main (int argc, char **argv)
+{
+  test_serialize ();
+  test_sort_kahn_1 ();
+  test_sort_kahn_2 ();
+  test_sort_shortest ();
+  test_will_overflow_1 ();
+  test_will_overflow_2 ();
+  test_will_overflow_3 ();
+  test_resolve_overflows_via_sort ();
+  test_resolve_overflows_via_duplication ();
+  test_duplicate_leaf ();
+  test_duplicate_interior ();
+}
--- a/test/subset/Makefile.am
+++ b/test/subset/Makefile.am
@ -13,7 +13,9 @@ libs:
 EXTRA_DIST += \
 	meson.build \
 	run-tests.py \
+	run-repack-tests.py \
 	subset_test_suite.py \
+	repack_test.py \
 	$(NULL)

 CLEANFILES += \
--- a/test/subset/data/Makefile.am
+++ b/test/subset/data/Makefile.am
@ -3,7 +3,7 @@
 NULL =
 EXTRA_DIST =
 CLEANFILES =
-SUBDIRS =
+SUBDIRS = repack_tests

 EXTRA_DIST += \
 	$(TESTS) \
--- a/test/subset/data/fonts/NotoNastaliqUrdu-Bold.ttf
+++ b/test/subset/data/fonts/NotoNastaliqUrdu-Bold.ttf
--- a/test/subset/data/repack_tests/Makefile.am
+++ b/test/subset/data/repack_tests/Makefile.am
@ -0,0 +1,21 @@
+# Process this file with automake to produce Makefile.in
+
+NULL =
+EXTRA_DIST =
+CLEANFILES =
+SUBDIRS =
+
+# Convenience targets:
+lib: libs # Always build subsetter lib in this subdir
+libs:
+	@$(MAKE) $(AM_MAKEFLAGS) -C $(top_builddir)/src libs
+
+TEST_EXTENSIONS = .tests
+TESTS_LOG_COMPILER = $(srcdir)/../../run-repack-tests.py $(top_builddir)/util/hb-subset$(EXEEXT)
+include Makefile.sources
+
+EXTRA_DIST += \
+	$(TESTS) \
+	$(NULL)
+
+-include $(top_srcdir)/git.mk
--- a/test/subset/data/repack_tests/Makefile.sources
+++ b/test/subset/data/repack_tests/Makefile.sources
@ -0,0 +1,12 @@
+TESTS = \
+	basic.tests \
+	prioritization.tests \
+	table_duplication.tests \
+	$(NULL)
+
+XFAIL_TESTS = \
+	advanced_prioritization.tests \
+	$(NULL)
+
+DISABLED_TESTS = \
+	$(NULL)
--- a/test/subset/data/repack_tests/advanced_prioritization.tests
+++ b/test/subset/data/repack_tests/advanced_prioritization.tests
@ -0,0 +1,72 @@
+NotoNastaliqUrdu-Bold.ttf
+0x0020
+0x0028
+0x0029
+0x002C
+0x002D
+0x002E
+0x0030
+0x0031
+0x0032
+0x0033
+0x0034
+0x0035
+0x0036
+0x0037
+0x0038
+0x0039
+0x003A
+0x060C
+0x061F
+0x0621
+0x0622
+0x0623
+0x0624
+0x0625
+0x0626
+0x0627
+0x0628
+0x0629
+0x062A
+0x062B
+0x062C
+0x062D
+0x062E
+0x062F
+0x0630
+0x0631
+0x0632
+0x0633
+0x0634
+0x0635
+0x0636
+0x0637
+0x0638
+0x0639
+0x063A
+0x0640
+0x0641
+0x0642
+0x0643
+0x0644
+0x0645
+0x0646
+0x0647
+0x0648
+0x0649
+0x064A
+0x064B
+0x064C
+0x064F
+0x0651
+0x067E
+0x0686
+0x0698
+0x06A9
+0x06AF
+0x06BE
+0x06CC
+0x200C
+0x200D
+0x200E
+
--- a/test/subset/data/repack_tests/basic.tests
+++ b/test/subset/data/repack_tests/basic.tests
@ -0,0 +1,52 @@
+NotoNastaliqUrdu-Bold.ttf
+0x060C
+0x061F
+0x0621
+0x0622
+0x0623
+0x0624
+0x0625
+0x0626
+0x0627
+0x0628
+0x0629
+0x062A
+0x062B
+0x062C
+0x062D
+0x062E
+0x062F
+0x0630
+0x0631
+0x0632
+0x0633
+0x0634
+0x0635
+0x0636
+0x0637
+0x0638
+0x0639
+0x063A
+0x0640
+0x0641
+0x0642
+0x0643
+0x0644
+0x0645
+0x0646
+0x0647
+0x0648
+0x0649
+0x064A
+0x064B
+0x064F
+0x0651
+0x067E
+0x0686
+0x0698
+0x06A9
+0x06AF
+0x06CC
+0x200C
+0x200D
+0x200E
--- a/test/subset/data/repack_tests/prioritization.tests
+++ b/test/subset/data/repack_tests/prioritization.tests
@ -0,0 +1,77 @@
+NotoNastaliqUrdu-Bold.ttf
+0x0020
+0x0028
+0x0029
+0x002C
+0x002D
+0x002E
+0x0030
+0x0031
+0x0032
+0x0033
+0x0034
+0x0035
+0x0036
+0x0037
+0x0038
+0x0039
+0x003A
+0x060C
+0x061F
+0x0621
+0x0622
+0x0623
+0x0624
+0x0625
+0x0626
+0x0627
+0x0628
+0x0629
+0x062A
+0x062B
+0x062C
+0x062D
+0x062E
+0x062F
+0x0630
+0x0631
+0x0632
+0x0633
+0x0634
+0x0635
+0x0636
+0x0637
+0x0638
+0x0639
+0x063A
+0x0640
+0x0641
+0x0642
+0x0643
+0x0644
+0x0645
+0x0646
+0x0647
+0x0648
+0x0649
+0x064A
+0x064B
+0x064F
+0x0651
+0x0653
+0x0679
+0x067E
+0x0686
+0x0688
+0x0691
+0x0698
+0x06A9
+0x06AF
+0x06BA
+0x06BE
+0x06C1
+0x06CC
+0x06D2
+0x200C
+0x200D
+0x200E
--- a/test/subset/data/repack_tests/table_duplication.tests
+++ b/test/subset/data/repack_tests/table_duplication.tests
@ -0,0 +1,97 @@
+NotoNastaliqUrdu-Bold.ttf
+0x0028
+0x0029
+0x002C
+0x002D
+0x002E
+0x0030
+0x0031
+0x0032
+0x0033
+0x0034
+0x0035
+0x0036
+0x0037
+0x0038
+0x0039
+0x003A
+0x0041
+0x0042
+0x0043
+0x0044
+0x0045
+0x0046
+0x0047
+0x0048
+0x0049
+0x004C
+0x004D
+0x004E
+0x004F
+0x0050
+0x0052
+0x0053
+0x0054
+0x0055
+0x0056
+0x0057
+0x0061
+0x0062
+0x0063
+0x0064
+0x0065
+0x0066
+0x0067
+0x0068
+0x0069
+0x006B
+0x006C
+0x006D
+0x006E
+0x006F
+0x0070
+0x0072
+0x0073
+0x0074
+0x0075
+0x0076
+0x0077
+0x0078
+0x0079
+0x060C
+0x0626
+0x0627
+0x0628
+0x062A
+0x062C
+0x062D
+0x062E
+0x062F
+0x0631
+0x0632
+0x0633
+0x0634
+0x0635
+0x0636
+0x0637
+0x0638
+0x0639
+0x0641
+0x0642
+0x0644
+0x0645
+0x0646
+0x0648
+0x0653
+0x0679
+0x067E
+0x0686
+0x0688
+0x0691
+0x06A9
+0x06AF
+0x06BA
+0x06BE
+0x06C1
+0x06CC
+0x06D2
--- a/test/subset/meson.build
+++ b/test/subset/meson.build
@ -28,6 +28,13 @@ tests = [
  'cbdt',
 ]

+repack_tests = [
+  'basic',
+  'prioritization',
+  'table_duplication',
+]
+
+
 run_test = find_program('run-tests.py')

 foreach t : tests
@ -45,3 +52,18 @@ foreach t : tests
    suite: ['subset', 'slow'],
  )
 endforeach
+
+run_repack_test = find_program('run-repack-tests.py')
+
+foreach t : repack_tests
+  fname = '@0@.tests'.format(t)
+
+  test(t, run_repack_test,
+    args: [
+      hb_subset,
+      join_paths(meson.current_source_dir(), 'data', 'repack_tests', fname),
+    ],
+    workdir: join_paths(meson.current_build_dir(), '..', '..'),
+    suite: ['subset', 'repack'],
+  )
+endforeach
--- a/test/subset/repack_test.py
+++ b/test/subset/repack_test.py
@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+
+import os
+
+# Parses a single repacking test file. The first line of the file is
+# the name of the font to use and the remaining lines define the set of
+# codepoints in the subset.
+class RepackTest:
+
+	def __init__(self, test_path, definition):
+		self.test_path = test_path
+		self.font_name = None
+		self.codepoints = set ()
+		self._parse(definition)
+
+	def font_path(self):
+		return os.path.join (self._base_path (), "fonts", self.font_name)
+
+	def codepoints_string (self):
+		return ",".join (self.codepoints)
+
+	def _base_path(self):
+	        return os.path.join(
+		    os.path.dirname(self.test_path),
+		    "../")
+
+
+	def _parse(self, definition):
+		lines = definition.splitlines ()
+		self.font_name = lines.pop (0)
+		for line in lines:
+			line = line.strip()
+			if not line:
+				continue
+
+			self.codepoints.add (line)
--- a/test/subset/run-repack-tests.py
+++ b/test/subset/run-repack-tests.py
@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+
+# Runs a subsetting test suite. Compares the results of subsetting via harfbuzz
+# to subsetting via fonttools.
+
+from difflib import unified_diff
+import os
+import re
+import subprocess
+import sys
+import tempfile
+import shutil
+import io
+
+from repack_test import RepackTest
+
+try:
+	from fontTools.ttLib import TTFont
+except ImportError:
+	print ("fonttools is not present, skipping test.")
+	sys.exit (77)
+
+ots_sanitize = shutil.which ("ots-sanitize")
+
+def cmd (command):
+	p = subprocess.Popen (
+		command, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+		universal_newlines=True)
+	(stdoutdata, stderrdata) = p.communicate ()
+	print (stderrdata, end="", file=sys.stderr)
+	return stdoutdata, p.returncode
+
+def fail_test (test, cli_args, message):
+	print ('ERROR: %s' % message)
+	print ('Test State:')
+	print ('  test.font_name    %s' % test.font_name)
+	print ('  test.test_path %s' % os.path.abspath (test.test_path))
+	return 1
+
+def run_test (test, should_check_ots):
+	out_file = os.path.join (tempfile.mkdtemp (), test.font_name + '-subset.ttf')
+	cli_args = [hb_subset,
+		    "--font-file=" + test.font_path (),
+		    "--output-file=" + out_file,
+		    "--unicodes=%s" % test.codepoints_string (),
+		    "--drop-tables-=GPOS,GSUB,GDEF",]
+	print (' '.join (cli_args))
+	_, return_code = cmd (cli_args)
+
+	if return_code:
+		return fail_test (test, cli_args, "%s returned %d" % (' '.join (cli_args), return_code))
+
+	try:
+		with TTFont (out_file) as font:
+			pass
+	except Exception as e:
+		print (e)
+		return fail_test (test, cli_args, "ttx failed to parse the result")
+
+	if should_check_ots:
+		print ("Checking output with ots-sanitize.")
+		if not check_ots (out_file):
+			return fail_test (test, cli_args, 'ots for subsetted file fails.')
+
+	return 0
+
+def has_ots ():
+	if not ots_sanitize:
+		print ("OTS is not present, skipping all ots checks.")
+		return False
+	return True
+
+def check_ots (path):
+	ots_report, returncode = cmd ([ots_sanitize, path])
+	if returncode:
+		print ("OTS Failure: %s" % ots_report)
+		return False
+	return True
+
+args = sys.argv[1:]
+if not args or sys.argv[1].find ('hb-subset') == -1 or not os.path.exists (sys.argv[1]):
+	sys.exit ("First argument does not seem to point to usable hb-subset.")
+hb_subset, args = args[0], args[1:]
+
+if len (args) != 1:
+	sys.exit ("No tests supplied.")
+
+has_ots = has_ots()
+
+fails = 0
+
+path = args[0]
+if not path.endswith(".tests"):
+        sys.exit ("Not a valid test case path.")
+
+with open (path, mode="r", encoding="utf-8") as f:
+	# TODO(garretrieger): re-enable OTS checking.
+	fails += run_test (RepackTest (path, f.read ()), False)
+
+
+if fails != 0:
+	sys.exit ("%d test(s) failed." % fails)
+else:
+	print ("All tests passed.")