Merge pull request #3779 from harfbuzz/split_pair_pos_2

[repacker] Add PairPosFormat2 table splitting in the repacker.
2022-08-09 22:23:23 -06:00 · 2022-08-09 22:23:23 -06:00 · f4f7d691af
parent 7fde6ab025 fa46dbca9d
commit f4f7d691af
13 changed files with 1329 additions and 133 deletions
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -376,6 +376,7 @@ COMPILED_TESTS = \
 	test-unicode-ranges \
 	test-vector \
 	test-repacker \
+	test-classdef-graph \
 	$(NULL)
 COMPILED_TESTS_CPPFLAGS = $(HBCFLAGS) -DMAIN -UNDEBUG
 COMPILED_TESTS_LDADD = libharfbuzz.la $(HBLIBS)
@ -422,6 +423,10 @@ test_repacker_SOURCES = test-repacker.cc hb-static.cc graph/gsubgpos-context.cc
 test_repacker_CPPFLAGS = $(HBCFLAGS)
 test_repacker_LDADD = libharfbuzz.la libharfbuzz-subset.la $(HBLIBS)

+test_classdef_graph_SOURCES = graph/test-classdef-graph.cc hb-static.cc graph/gsubgpos-context.cc
+test_classdef_graph_CPPFLAGS = $(HBCFLAGS)
+test_classdef_graph_LDADD = libharfbuzz.la libharfbuzz-subset.la $(HBLIBS)
+
 test_set_SOURCES = test-set.cc hb-static.cc
 test_set_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
 test_set_LDADD = $(COMPILED_TESTS_LDADD)
--- a/src/Makefile.sources
+++ b/src/Makefile.sources
@ -353,7 +353,9 @@ HB_SUBSET_sources = \
 	graph/gsubgpos-context.cc \
 	graph/pairpos-graph.hh \
 	graph/coverage-graph.hh \
+	graph/classdef-graph.hh \
 	graph/pairpos-graph.hh \
+	graph/split-helpers.hh \
 	graph/serialize.hh \
 	$(NULL)

--- a/src/OT/Layout/GPOS/ValueFormat.hh
+++ b/src/OT/Layout/GPOS/ValueFormat.hh
@ -59,6 +59,24 @@ struct ValueFormat : HBUINT16
  unsigned int get_len () const  { return hb_popcount ((unsigned int) *this); }
  unsigned int get_size () const { return get_len () * Value::static_size; }

+  hb_vector_t<unsigned> get_device_table_indices () const {
+    unsigned i = 0;
+    hb_vector_t<unsigned> result;
+    unsigned format = *this;
+
+    if (format & xPlacement) i++;
+    if (format & yPlacement) i++;
+    if (format & xAdvance)   i++;
+    if (format & yAdvance)   i++;
+
+    if (format & xPlaDevice) result.push (i++);
+    if (format & yPlaDevice) result.push (i++);
+    if (format & xAdvDevice) result.push (i++);
+    if (format & yAdvDevice) result.push (i++);
+
+    return result;
+  }
+
  bool apply_value (hb_ot_apply_context_t *c,
                    const void            *base,
                    const Value           *values,
--- a/src/graph/classdef-graph.hh
+++ b/src/graph/classdef-graph.hh
@ -0,0 +1,216 @@
+/*
+ * Copyright © 2022  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger
+ */
+
+#include "graph.hh"
+#include "../hb-ot-layout-common.hh"
+
+#ifndef GRAPH_CLASSDEF_GRAPH_HH
+#define GRAPH_CLASSDEF_GRAPH_HH
+
+namespace graph {
+
+struct ClassDefFormat1 : public OT::ClassDefFormat1_3<SmallTypes>
+{
+  bool sanitize (graph_t::vertex_t& vertex) const
+  {
+    int64_t vertex_len = vertex.obj.tail - vertex.obj.head;
+    constexpr unsigned min_size = OT::ClassDefFormat1_3<SmallTypes>::min_size;
+    if (vertex_len < min_size) return false;
+    return vertex_len >= min_size + classValue.get_size () - classValue.len.get_size ();
+  }
+};
+
+struct ClassDefFormat2 : public OT::ClassDefFormat2_4<SmallTypes>
+{
+  bool sanitize (graph_t::vertex_t& vertex) const
+  {
+    int64_t vertex_len = vertex.obj.tail - vertex.obj.head;
+    constexpr unsigned min_size = OT::ClassDefFormat2_4<SmallTypes>::min_size;
+    if (vertex_len < min_size) return false;
+    return vertex_len >= min_size + rangeRecord.get_size () - rangeRecord.len.get_size ();
+  }
+};
+
+struct ClassDef : public OT::ClassDef
+{
+  template<typename It>
+  static bool add_class_def (gsubgpos_graph_context_t& c,
+                             unsigned parent_id,
+                             unsigned link_position,
+                             It glyph_and_class,
+                             unsigned max_size)
+  {
+    unsigned class_def_prime_id = c.graph.new_node (nullptr, nullptr);
+    auto& class_def_prime_vertex = c.graph.vertices_[class_def_prime_id];
+    if (!make_class_def (c, glyph_and_class, class_def_prime_id, max_size))
+      return false;
+
+    auto* class_def_link = c.graph.vertices_[parent_id].obj.real_links.push ();
+    class_def_link->width = SmallTypes::size;
+    class_def_link->objidx = class_def_prime_id;
+    class_def_link->position = link_position;
+    class_def_prime_vertex.parents.push (parent_id);
+
+    return true;
+  }
+
+  template<typename It>
+  static bool make_class_def (gsubgpos_graph_context_t& c,
+                              It glyph_and_class,
+                              unsigned dest_obj,
+                              unsigned max_size)
+  {
+    char* buffer = (char*) hb_calloc (1, max_size);
+    hb_serialize_context_t serializer (buffer, max_size);
+    OT::ClassDef_serialize (&serializer, glyph_and_class);
+    serializer.end_serialize ();
+    if (serializer.in_error ())
+    {
+      hb_free (buffer);
+      return false;
+    }
+
+    hb_bytes_t class_def_copy = serializer.copy_bytes ();
+    c.add_buffer ((char *) class_def_copy.arrayZ); // Give ownership to the context, it will cleanup the buffer.
+
+    auto& obj = c.graph.vertices_[dest_obj].obj;
+    obj.head = (char *) class_def_copy.arrayZ;
+    obj.tail = obj.head + class_def_copy.length;
+
+    hb_free (buffer);
+    return true;
+  }
+
+  bool sanitize (graph_t::vertex_t& vertex) const
+  {
+    int64_t vertex_len = vertex.obj.tail - vertex.obj.head;
+    if (vertex_len < OT::ClassDef::min_size) return false;
+    switch (u.format)
+    {
+    case 1: return ((ClassDefFormat1*)this)->sanitize (vertex);
+    case 2: return ((ClassDefFormat2*)this)->sanitize (vertex);
+#ifndef HB_NO_BORING_EXPANSION
+    // Not currently supported
+    case 3:
+    case 4:
+#endif
+    default: return false;
+    }
+  }
+};
+
+
+struct class_def_size_estimator_t
+{
+  template<typename It>
+  class_def_size_estimator_t (It glyph_and_class)
+      : gids_consecutive (true), num_ranges_per_class (), glyphs_per_class ()
+  {
+    unsigned last_gid = (unsigned) -1;
+    for (auto p : + glyph_and_class)
+    {
+      unsigned gid = p.first;
+      unsigned klass = p.second;
+
+      if (last_gid != (unsigned) -1 && gid != last_gid + 1)
+        gids_consecutive = false;
+      last_gid = gid;
+
+      hb_set_t* glyphs;
+      if (glyphs_per_class.has (klass, &glyphs) && glyphs) {
+        glyphs->add (gid);
+        continue;
+      }
+
+      hb_set_t new_glyphs;
+      new_glyphs.add (gid);
+      glyphs_per_class.set (klass, std::move (new_glyphs));
+    }
+
+    if (in_error ()) return;
+
+    for (unsigned klass : glyphs_per_class.keys ())
+    {
+      if (!klass) continue; // class 0 doesn't get encoded.
+
+      const hb_set_t& glyphs = glyphs_per_class.get (klass);
+      hb_codepoint_t start = HB_SET_VALUE_INVALID;
+      hb_codepoint_t end = HB_SET_VALUE_INVALID;
+
+      unsigned count = 0;
+      while (glyphs.next_range (&start, &end))
+        count++;
+
+      num_ranges_per_class.set (klass, count);
+    }
+  }
+
+  // Incremental increase in the Coverage and ClassDef table size
+  // (worst case) if all glyphs associated with 'klass' were added.
+  unsigned incremental_coverage_size (unsigned klass) const
+  {
+    // Coverage takes 2 bytes per glyph worst case,
+    return 2 * glyphs_per_class.get (klass).get_population ();
+  }
+
+  // Incremental increase in the Coverage and ClassDef table size
+  // (worst case) if all glyphs associated with 'klass' were added.
+  unsigned incremental_class_def_size (unsigned klass) const
+  {
+    // ClassDef takes 6 bytes per range
+    unsigned class_def_2_size = 6 * num_ranges_per_class.get (klass);
+    if (gids_consecutive)
+    {
+      // ClassDef1 takes 2 bytes per glyph, but only can be used
+      // when gids are consecutive.
+      return hb_min (2 * glyphs_per_class.get (klass).get_population (), class_def_2_size);
+    }
+
+    return class_def_2_size;
+  }
+
+  bool in_error ()
+  {
+    if (num_ranges_per_class.in_error ()) return true;
+    if (glyphs_per_class.in_error ()) return true;
+
+    for (const hb_set_t& s : glyphs_per_class.values ())
+    {
+      if (s.in_error ()) return true;
+    }
+    return false;
+  }
+
+ private:
+  bool gids_consecutive;
+  hb_hashmap_t<unsigned, unsigned> num_ranges_per_class;
+  hb_hashmap_t<unsigned, hb_set_t> glyphs_per_class;
+};
+
+
+}
+
+#endif  // GRAPH_CLASSDEF_GRAPH_HH
--- a/src/graph/coverage-graph.hh
+++ b/src/graph/coverage-graph.hh
@ -56,6 +56,78 @@ struct CoverageFormat2 : public OT::Layout::Common::CoverageFormat2_4<SmallTypes

 struct Coverage : public OT::Layout::Common::Coverage
 {
+  static Coverage* clone_coverage (gsubgpos_graph_context_t& c,
+                                   unsigned coverage_id,
+                                   unsigned new_parent_id,
+                                   unsigned link_position,
+                                   unsigned start, unsigned end)
+
+  {
+    unsigned coverage_size = c.graph.vertices_[coverage_id].table_size ();
+    auto& coverage_v = c.graph.vertices_[coverage_id];
+    Coverage* coverage_table = (Coverage*) coverage_v.obj.head;
+    if (!coverage_table || !coverage_table->sanitize (coverage_v))
+      return nullptr;
+
+    auto new_coverage =
+        + hb_zip (coverage_table->iter (), hb_range ())
+        | hb_filter ([&] (hb_pair_t<unsigned, unsigned> p) {
+          return p.second >= start && p.second < end;
+        })
+        | hb_map_retains_sorting (hb_first)
+        ;
+
+    return add_coverage (c, new_parent_id, link_position, new_coverage, coverage_size);
+  }
+
+  template<typename It>
+  static Coverage* add_coverage (gsubgpos_graph_context_t& c,
+                                 unsigned parent_id,
+                                 unsigned link_position,
+                                 It glyphs,
+                                 unsigned max_size)
+  {
+    unsigned coverage_prime_id = c.graph.new_node (nullptr, nullptr);
+    auto& coverage_prime_vertex = c.graph.vertices_[coverage_prime_id];
+    if (!make_coverage (c, glyphs, coverage_prime_id, max_size))
+      return nullptr;
+
+    auto* coverage_link = c.graph.vertices_[parent_id].obj.real_links.push ();
+    coverage_link->width = SmallTypes::size;
+    coverage_link->objidx = coverage_prime_id;
+    coverage_link->position = link_position;
+    coverage_prime_vertex.parents.push (parent_id);
+
+    return (Coverage*) coverage_prime_vertex.obj.head;
+  }
+
+  template<typename It>
+  static bool make_coverage (gsubgpos_graph_context_t& c,
+                             It glyphs,
+                             unsigned dest_obj,
+                             unsigned max_size)
+  {
+    char* buffer = (char*) hb_calloc (1, max_size);
+    hb_serialize_context_t serializer (buffer, max_size);
+    Coverage_serialize (&serializer, glyphs);
+    serializer.end_serialize ();
+    if (serializer.in_error ())
+    {
+      hb_free (buffer);
+      return false;
+    }
+
+    hb_bytes_t coverage_copy = serializer.copy_bytes ();
+    c.add_buffer ((char *) coverage_copy.arrayZ); // Give ownership to the context, it will cleanup the buffer.
+
+    auto& obj = c.graph.vertices_[dest_obj].obj;
+    obj.head = (char *) coverage_copy.arrayZ;
+    obj.tail = obj.head + coverage_copy.length;
+
+    hb_free (buffer);
+    return true;
+  }
+
  bool sanitize (graph_t::vertex_t& vertex) const
  {
    int64_t vertex_len = vertex.obj.tail - vertex.obj.head;
--- a/src/graph/graph.hh
+++ b/src/graph/graph.hh
@ -345,7 +345,9 @@ struct graph_t
    }
  }

-  unsigned index_for_offset(unsigned node_idx, const void* offset) const
+  // Finds the object id of the object pointed to by the offset at 'offset'
+  // within object[node_idx].
+  unsigned index_for_offset (unsigned node_idx, const void* offset) const
  {
    const auto& node = object (node_idx);
    if (offset < node.head || offset >= node.tail) return -1;
@ -360,6 +362,24 @@ struct graph_t
    return -1;
  }

+  // Finds the object id of the object pointed to by the offset at 'offset'
+  // within object[node_idx]. Ensures that the returned object is safe to mutate.
+  // That is, if the original child object is shared by parents other than node_idx
+  // it will be duplicated and the duplicate will be returned instead.
+  unsigned mutable_index_for_offset (unsigned node_idx, const void* offset)
+  {
+    unsigned child_idx = index_for_offset (node_idx, offset);
+    auto& child = vertices_[child_idx];
+    for (unsigned p : child.parents)
+    {
+      if (p != node_idx) {
+        return duplicate (node_idx, child_idx);
+      }
+    }
+
+    return child_idx;
+  }
+

  /*
   * Assign unique space numbers to each connected subgraph of 24 bit and/or 32 bit offset(s).
--- a/src/graph/gsubgpos-graph.hh
+++ b/src/graph/gsubgpos-graph.hh
@ -124,7 +124,7 @@ struct Lookup : public OT::Lookup
    if (!is_ext && type != OT::Layout::GPOS_impl::PosLookupSubTable::Type::Pair)
      return true;

-    hb_vector_t<unsigned> all_new_subtables;
+    hb_vector_t<hb_pair_t<unsigned, hb_vector_t<unsigned>>> all_new_subtables;
    for (unsigned i = 0; i < subTable.len; i++)
    {
      unsigned subtable_index = c.graph.index_for_offset (this_index, &subTable[i]);
@ -133,7 +133,7 @@ struct Lookup : public OT::Lookup
        ExtensionFormat1<OT::Layout::GSUB_impl::ExtensionSubst>* extension =
            (ExtensionFormat1<OT::Layout::GSUB_impl::ExtensionSubst>*)
            c.graph.object (ext_subtable_index).head;
-        if (!extension->sanitize (c.graph.vertices_[ext_subtable_index]))
+        if (!extension || !extension->sanitize (c.graph.vertices_[ext_subtable_index]))
          continue;

        subtable_index = extension->get_subtable_index (c.graph, ext_subtable_index);
@ -143,15 +143,18 @@ struct Lookup : public OT::Lookup
      }

      PairPos* pairPos = (PairPos*) c.graph.object (subtable_index).head;
-      if (!pairPos->sanitize (c.graph.vertices_[subtable_index])) continue;
+      if (!pairPos || !pairPos->sanitize (c.graph.vertices_[subtable_index])) continue;

      hb_vector_t<unsigned> new_sub_tables = pairPos->split_subtables (c, subtable_index);
      if (new_sub_tables.in_error ()) return false;
-      + new_sub_tables.iter() | hb_sink (all_new_subtables);
+      hb_pair_t<unsigned, hb_vector_t<unsigned>>* entry = all_new_subtables.push ();
+      entry->first = i;
+      entry->second = std::move (new_sub_tables);
    }

-    if (all_new_subtables)
+    if (all_new_subtables) {
      add_sub_tables (c, this_index, type, all_new_subtables);
+    }

    return true;
  }
@ -159,13 +162,18 @@ struct Lookup : public OT::Lookup
  void add_sub_tables (gsubgpos_graph_context_t& c,
                       unsigned this_index,
                       unsigned type,
-                       hb_vector_t<unsigned>& subtable_indices)
+                       hb_vector_t<hb_pair_t<unsigned, hb_vector_t<unsigned>>>& subtable_ids)
  {
    bool is_ext = is_extension (c.table_tag);
    auto& v = c.graph.vertices_[this_index];
+    fix_existing_subtable_links (c, this_index, subtable_ids);
+
+    unsigned new_subtable_count = 0;
+    for (const auto& p : subtable_ids)
+      new_subtable_count += p.second.length;

    size_t new_size = v.table_size ()
-                      + subtable_indices.length * OT::Offset16::static_size;
+                      + new_subtable_count * OT::Offset16::static_size;
    char* buffer = (char*) hb_calloc (1, new_size);
    c.add_buffer (buffer);
    memcpy (buffer, v.obj.head, v.table_size());
@ -175,30 +183,61 @@ struct Lookup : public OT::Lookup

    Lookup* new_lookup = (Lookup*) buffer;

-    new_lookup->subTable.len = subTable.len + subtable_indices.length;
-    unsigned offset_index = subTable.len;
-    for (unsigned subtable_id : subtable_indices)
+    unsigned shift = 0;
+    new_lookup->subTable.len = subTable.len + new_subtable_count;
+    for (const auto& p : subtable_ids)
    {
-      if (is_ext)
-      {
-        unsigned ext_id = create_extension_subtable (c, subtable_id, type);
-        c.graph.vertices_[subtable_id].parents.push (ext_id);
-        subtable_id = ext_id;
-      }
+      unsigned offset_index = p.first + shift + 1;
+      shift += p.second.length;

-      auto* link = v.obj.real_links.push ();
-      link->width = 2;
-      link->objidx = subtable_id;
-      link->position = (char*) &new_lookup->subTable[offset_index++] -
-                       (char*) new_lookup;
-      c.graph.vertices_[subtable_id].parents.push (this_index);
+      for (unsigned subtable_id : p.second)
+      {
+        if (is_ext)
+        {
+          unsigned ext_id = create_extension_subtable (c, subtable_id, type);
+          c.graph.vertices_[subtable_id].parents.push (ext_id);
+          subtable_id = ext_id;
+        }
+
+        auto* link = v.obj.real_links.push ();
+        link->width = 2;
+        link->objidx = subtable_id;
+        link->position = (char*) &new_lookup->subTable[offset_index++] -
+                         (char*) new_lookup;
+        c.graph.vertices_[subtable_id].parents.push (this_index);
+      }
    }

+    // Repacker sort order depends on link order, which we've messed up so resort it.
+    v.obj.real_links.qsort ();
+
    // The head location of the lookup has changed, invalidating the lookups map entry
    // in the context. Update the map.
    c.lookups.set (this_index, new_lookup);
  }

+  void fix_existing_subtable_links (gsubgpos_graph_context_t& c,
+                                    unsigned this_index,
+                                    hb_vector_t<hb_pair_t<unsigned, hb_vector_t<unsigned>>>& subtable_ids)
+  {
+    auto& v = c.graph.vertices_[this_index];
+    Lookup* lookup = (Lookup*) v.obj.head;
+
+    unsigned shift = 0;
+    for (const auto& p : subtable_ids)
+    {
+      unsigned insert_index = p.first + shift;
+      unsigned pos_offset = p.second.length * OT::Offset16::static_size;
+      unsigned insert_offset = (char*) &lookup->subTable[insert_index] - (char*) lookup;
+      shift += p.second.length;
+
+      for (auto& l : v.obj.all_links_writer ())
+      {
+        if (l.position > insert_offset) l.position += pos_offset;
+      }
+    }
+  }
+
  unsigned create_extension_subtable (gsubgpos_graph_context_t& c,
                                      unsigned subtable_index,
                                      unsigned type)
@ -281,7 +320,7 @@ struct GSTAR : public OT::GSUBGPOS
    const auto& r = graph.root ();

    GSTAR* gstar = (GSTAR*) r.obj.head;
-    if (!gstar->sanitize (r))
+    if (!gstar || !gstar->sanitize (r))
      return nullptr;

    return gstar;
@ -327,17 +366,16 @@ struct GSTAR : public OT::GSUBGPOS
                     hb_hashmap_t<unsigned, Lookup*>& lookups /* OUT */)
  {
    unsigned lookup_list_idx = get_lookup_list_index (graph);
-
    const LookupList<Types>* lookupList =
        (const LookupList<Types>*) graph.object (lookup_list_idx).head;
-    if (!lookupList->sanitize (graph.vertices_[lookup_list_idx]))
+    if (!lookupList || !lookupList->sanitize (graph.vertices_[lookup_list_idx]))
      return;

    for (unsigned i = 0; i < lookupList->len; i++)
    {
      unsigned lookup_idx = graph.index_for_offset (lookup_list_idx, &(lookupList->arrayZ[i]));
      Lookup* lookup = (Lookup*) graph.object (lookup_idx).head;
-      if (!lookup->sanitize (graph.vertices_[lookup_idx])) continue;
+      if (!lookup || !lookup->sanitize (graph.vertices_[lookup_idx])) continue;
      lookups.set (lookup_idx, lookup);
    }
  }
--- a/src/graph/pairpos-graph.hh
+++ b/src/graph/pairpos-graph.hh
@ -27,7 +27,9 @@
 #ifndef GRAPH_PAIRPOS_GRAPH_HH
 #define GRAPH_PAIRPOS_GRAPH_HH

+#include "split-helpers.hh"
 #include "coverage-graph.hh"
+#include "classdef-graph.hh"
 #include "../OT/Layout/GPOS/PairPos.hh"
 #include "../OT/Layout/GPOS/PosLookupSubTable.hh"

@ -51,68 +53,62 @@ struct PairPosFormat1 : public OT::Layout::GPOS_impl::PairPosFormat1_3<SmallType

    const unsigned coverage_id = c.graph.index_for_offset (this_index, &coverage);
    const unsigned coverage_size = c.graph.vertices_[coverage_id].table_size ();
-    const unsigned base_size = OT::Layout::GPOS_impl::PairPosFormat1_3<SmallTypes>::min_size
-                               + coverage_size;
+    const unsigned base_size = OT::Layout::GPOS_impl::PairPosFormat1_3<SmallTypes>::min_size;

+    unsigned partial_coverage_size = 4;
    unsigned accumulated = base_size;
    hb_vector_t<unsigned> split_points;
    for (unsigned i = 0; i < pairSet.len; i++)
    {
      unsigned pair_set_index = pair_set_graph_index (c, this_index, i);
-      accumulated += c.graph.find_subgraph_size (pair_set_index, visited);
-      accumulated += SmallTypes::size; // for PairSet offset.
+      unsigned accumulated_delta =
+          c.graph.find_subgraph_size (pair_set_index, visited) +
+          SmallTypes::size; // for PairSet offset.
+      partial_coverage_size += OT::HBUINT16::static_size;

-      // TODO(garretrieger): don't count the size of the largest pairset against the limit, since
-      //                     it will be packed last in the order and does not contribute to
-      //                     the 64kb limit.
+      accumulated += accumulated_delta;
+      unsigned total = accumulated + hb_min (partial_coverage_size, coverage_size);

-      if (accumulated > (1 << 16))
+      if (total >= (1 << 16))
      {
        split_points.push (i);
-        accumulated = base_size;
-        visited.clear (); // Pretend node sharing isn't allowed between splits.
+        accumulated = base_size + accumulated_delta;
+        partial_coverage_size = 6;
+        visited.clear (); // node sharing isn't allowed between splits.
      }
    }

-    return do_split (c, this_index, split_points);
+    split_context_t split_context {
+      c,
+      this,
+      this_index,
+    };
+
+    return actuate_subtable_split<split_context_t> (split_context, split_points);
  }

 private:

-  // Split this PairPos into two or more PairPos's. split_points defines
-  // the indices (first index to include in the new table) to split at.
-  // Returns the object id's of the newly created PairPos subtables.
-  hb_vector_t<unsigned> do_split (gsubgpos_graph_context_t& c,
-                                  unsigned this_index,
-                                  const hb_vector_t<unsigned> split_points)
-  {
-    hb_vector_t<unsigned> new_objects;
-    if (!split_points)
-      return new_objects;
+  struct split_context_t {
+    gsubgpos_graph_context_t& c;
+    PairPosFormat1* thiz;
+    unsigned this_index;

-    for (unsigned i = 0; i < split_points.length; i++)
+    unsigned original_count ()
    {
-      unsigned start = split_points[i];
-      unsigned end = (i < split_points.length - 1) ? split_points[i + 1] : pairSet.len;
-      unsigned id = clone_range (c, this_index, start, end);
-
-      if (id == (unsigned) -1)
-      {
-        new_objects.reset ();
-        new_objects.allocated = -1; // mark error
-        return new_objects;
-      }
-      new_objects.push (id);
+      return thiz->pairSet.len;
    }

-    if (!shrink (c, this_index, split_points[0]))
+    unsigned clone_range (unsigned start, unsigned end)
    {
-      new_objects.reset ();
-      new_objects.allocated = -1; // mark error
+      return thiz->clone_range (this->c, this->this_index, start, end);
    }

-    return new_objects;
-  }
+    bool shrink (unsigned count)
+    {
+      return thiz->shrink (this->c, this->this_index, count);
+    }
+  };

  bool shrink (gsubgpos_graph_context_t& c,
               unsigned this_index,
@ -129,11 +125,12 @@ struct PairPosFormat1 : public OT::Layout::GPOS_impl::PairPosFormat1_3<SmallType
    pairSet.len = count;
    c.graph.vertices_[this_index].obj.tail -= (old_count - count) * SmallTypes::size;

-    unsigned coverage_id = c.graph.index_for_offset (this_index, &coverage);
+    unsigned coverage_id = c.graph.mutable_index_for_offset (this_index, &coverage);
    unsigned coverage_size = c.graph.vertices_[coverage_id].table_size ();
    auto& coverage_v = c.graph.vertices_[coverage_id];
+
    Coverage* coverage_table = (Coverage*) coverage_v.obj.head;
-    if (!coverage_table->sanitize (coverage_v))
+    if (!coverage_table || !coverage_table->sanitize (coverage_v))
      return false;

    auto new_coverage =
@ -144,7 +141,7 @@ struct PairPosFormat1 : public OT::Layout::GPOS_impl::PairPosFormat1_3<SmallType
        | hb_map_retains_sorting (hb_first)
        ;

-    return make_coverage (c, new_coverage, coverage_id, coverage_size);
+    return Coverage::make_coverage (c, new_coverage, coverage_id, coverage_size);
  }

  // Create a new PairPos including PairSet's from start (inclusive) to end (exclusive).
@ -178,60 +175,17 @@ struct PairPosFormat1 : public OT::Layout::GPOS_impl::PairPosFormat1_3<SmallType
    }

    unsigned coverage_id = c.graph.index_for_offset (this_index, &coverage);
-    unsigned coverage_size = c.graph.vertices_[coverage_id].table_size ();
-    auto& coverage_v = c.graph.vertices_[coverage_id];
-    Coverage* coverage_table = (Coverage*) coverage_v.obj.head;
-    if (!coverage_table->sanitize (coverage_v))
-      return false;
-
-    auto new_coverage =
-        + hb_zip (coverage_table->iter (), hb_range ())
-        | hb_filter ([&] (hb_pair_t<unsigned, unsigned> p) {
-          return p.second >= start && p.second < end;
-        })
-        | hb_map_retains_sorting (hb_first)
-        ;
-
-    unsigned coverage_prime_id = c.graph.new_node (nullptr, nullptr);
-    auto& coverage_prime_vertex = c.graph.vertices_[coverage_prime_id];
-    if (!make_coverage (c, new_coverage, coverage_prime_id, coverage_size))
+    if (!Coverage::clone_coverage (c,
+                                   coverage_id,
+                                   pair_pos_prime_id,
+                                   2,
+                                   start, end))
      return -1;

-    auto* coverage_link = c.graph.vertices_[pair_pos_prime_id].obj.real_links.push ();
-    coverage_link->width = SmallTypes::size;
-    coverage_link->objidx = coverage_prime_id;
-    coverage_link->position = 2;
-    coverage_prime_vertex.parents.push (pair_pos_prime_id);
-
    return pair_pos_prime_id;
  }

-  template<typename It>
-  bool make_coverage (gsubgpos_graph_context_t& c,
-                      It glyphs,
-                      unsigned dest_obj,
-                      unsigned max_size) const
-  {
-    char* buffer = (char*) hb_calloc (1, max_size);
-    hb_serialize_context_t serializer (buffer, max_size);
-    Coverage_serialize (&serializer, glyphs);
-    serializer.end_serialize ();
-    if (serializer.in_error ())
-    {
-      hb_free (buffer);
-      return false;
-    }

-    hb_bytes_t coverage_copy = serializer.copy_bytes ();
-    c.add_buffer ((char *) coverage_copy.arrayZ); // Give ownership to the context, it will cleanup the buffer.
-
-    auto& obj = c.graph.vertices_[dest_obj].obj;
-    obj.head = (char *) coverage_copy.arrayZ;
-    obj.tail = obj.head + coverage_copy.length;
-
-    hb_free (buffer);
-    return true;
-  }

  unsigned pair_set_graph_index (gsubgpos_graph_context_t& c, unsigned this_index, unsigned i) const
  {
@ -243,14 +197,415 @@ struct PairPosFormat2 : public OT::Layout::GPOS_impl::PairPosFormat2_4<SmallType
 {
  bool sanitize (graph_t::vertex_t& vertex) const
  {
-    // TODO(garretrieger): implement me!
-    return true;
+    size_t vertex_len = vertex.table_size ();
+    unsigned min_size = OT::Layout::GPOS_impl::PairPosFormat2_4<SmallTypes>::min_size;
+    if (vertex_len < min_size) return false;
+
+    const unsigned class1_count = class1Count;
+    return vertex_len >=
+        min_size + class1_count * get_class1_record_size ();
  }

  hb_vector_t<unsigned> split_subtables (gsubgpos_graph_context_t& c, unsigned this_index)
  {
-    // TODO(garretrieger): implement me!
-    return hb_vector_t<unsigned> ();
+    const unsigned base_size = OT::Layout::GPOS_impl::PairPosFormat2_4<SmallTypes>::min_size;
+    const unsigned class_def_2_size = size_of (c, this_index, &classDef2);
+    const Coverage* coverage = get_coverage (c, this_index);
+    const ClassDef* class_def_1 = get_class_def_1 (c, this_index);
+    auto gid_and_class =
+        + coverage->iter ()
+        | hb_map_retains_sorting ([&] (hb_codepoint_t gid) {
+          return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (gid, class_def_1->get_class (gid));
+        })
+        ;
+    class_def_size_estimator_t estimator (gid_and_class);
+
+    const unsigned class1_count = class1Count;
+    const unsigned class2_count = class2Count;
+    const unsigned class1_record_size = get_class1_record_size ();
+
+    const unsigned value_1_len = valueFormat1.get_len ();
+    const unsigned value_2_len = valueFormat2.get_len ();
+    const unsigned total_value_len = value_1_len + value_2_len;
+
+    unsigned accumulated = base_size;
+    unsigned coverage_size = 4;
+    unsigned class_def_1_size = 4;
+    unsigned max_coverage_size = coverage_size;
+    unsigned max_class_def_1_size = class_def_1_size;
+
+    hb_vector_t<unsigned> split_points;
+
+    hb_hashmap_t<unsigned, unsigned> device_tables = get_all_device_tables (c, this_index);
+    hb_vector_t<unsigned> format1_device_table_indices = valueFormat1.get_device_table_indices ();
+    hb_vector_t<unsigned> format2_device_table_indices = valueFormat2.get_device_table_indices ();
+    bool has_device_tables = bool(format1_device_table_indices) || bool(format2_device_table_indices);
+
+    hb_set_t visited;
+    for (unsigned i = 0; i < class1_count; i++)
+    {
+      unsigned accumulated_delta = class1_record_size;
+      coverage_size += estimator.incremental_coverage_size (i);
+      class_def_1_size += estimator.incremental_class_def_size (i);
+      max_coverage_size = hb_max (max_coverage_size, coverage_size);
+      max_class_def_1_size = hb_max (max_class_def_1_size, class_def_1_size);
+
+      if (has_device_tables) {
+        for (unsigned j = 0; j < class2_count; j++)
+        {
+          unsigned value1_index = total_value_len * (class2_count * i + j);
+          unsigned value2_index = value1_index + value_1_len;
+          accumulated_delta += size_of_value_record_children (c,
+                                                        device_tables,
+                                                        format1_device_table_indices,
+                                                        value1_index,
+                                                        visited);
+          accumulated_delta += size_of_value_record_children (c,
+                                                        device_tables,
+                                                        format2_device_table_indices,
+                                                        value2_index,
+                                                        visited);
+        }
+      }
+
+      accumulated += accumulated_delta;
+      unsigned total = accumulated
+                       + coverage_size + class_def_1_size + class_def_2_size
+                       // The largest object will pack last and can exceed the size limit.
+                       - hb_max (hb_max (coverage_size, class_def_1_size), class_def_2_size);
+      if (total >= (1 << 16))
+      {
+        split_points.push (i);
+        // split does not include i, so add the size for i when we reset the size counters.
+        accumulated = base_size + accumulated_delta;
+        coverage_size = 4 + estimator.incremental_coverage_size (i);
+        class_def_1_size = 4 + estimator.incremental_class_def_size (i);
+        visited.clear (); // node sharing isn't allowed between splits.
+      }
+    }
+
+    split_context_t split_context {
+      c,
+      this,
+      this_index,
+      class1_record_size,
+      total_value_len,
+      value_1_len,
+      value_2_len,
+      max_coverage_size,
+      max_class_def_1_size,
+      device_tables,
+      format1_device_table_indices,
+      format2_device_table_indices
+    };
+
+    return actuate_subtable_split<split_context_t> (split_context, split_points);
+  }
+ private:
+
+  struct split_context_t
+  {
+    gsubgpos_graph_context_t& c;
+    PairPosFormat2* thiz;
+    unsigned this_index;
+    unsigned class1_record_size;
+    unsigned value_record_len;
+    unsigned value1_record_len;
+    unsigned value2_record_len;
+    unsigned max_coverage_size;
+    unsigned max_class_def_size;
+
+    const hb_hashmap_t<unsigned, unsigned>& device_tables;
+    const hb_vector_t<unsigned>& format1_device_table_indices;
+    const hb_vector_t<unsigned>& format2_device_table_indices;
+
+    unsigned original_count ()
+    {
+      return thiz->class1Count;
+    }
+
+    unsigned clone_range (unsigned start, unsigned end)
+    {
+      return thiz->clone_range (*this, start, end);
+    }
+
+    bool shrink (unsigned count)
+    {
+      return thiz->shrink (*this, count);
+    }
+  };
+
+  size_t get_class1_record_size () const
+  {
+    const size_t class2_count = class2Count;
+    return
+        class2_count * (valueFormat1.get_size () + valueFormat2.get_size ());
+  }
+
+  unsigned clone_range (split_context_t& split_context,
+                        unsigned start, unsigned end) const
+  {
+    DEBUG_MSG (SUBSET_REPACK, nullptr,
+               "  Cloning PairPosFormat2 (%u) range [%u, %u).", split_context.this_index, start, end);
+
+    graph_t& graph = split_context.c.graph;
+
+    unsigned num_records = end - start;
+    unsigned prime_size = OT::Layout::GPOS_impl::PairPosFormat2_4<SmallTypes>::min_size
+                          + num_records * split_context.class1_record_size;
+
+    unsigned pair_pos_prime_id = split_context.c.create_node (prime_size);
+    if (pair_pos_prime_id == (unsigned) -1) return -1;
+
+    PairPosFormat2* pair_pos_prime =
+        (PairPosFormat2*) graph.object (pair_pos_prime_id).head;
+    pair_pos_prime->format = this->format;
+    pair_pos_prime->valueFormat1 = this->valueFormat1;
+    pair_pos_prime->valueFormat2 = this->valueFormat2;
+    pair_pos_prime->class1Count = num_records;
+    pair_pos_prime->class2Count = this->class2Count;
+    clone_class1_records (split_context,
+                          pair_pos_prime_id,
+                          start,
+                          end);
+
+    unsigned coverage_id =
+        graph.index_for_offset (split_context.this_index, &coverage);
+    unsigned class_def_1_id =
+        graph.index_for_offset (split_context.this_index, &classDef1);
+    auto& coverage_v = graph.vertices_[coverage_id];
+    auto& class_def_1_v = graph.vertices_[class_def_1_id];
+    Coverage* coverage_table = (Coverage*) coverage_v.obj.head;
+    ClassDef* class_def_1_table = (ClassDef*) class_def_1_v.obj.head;
+    if (!coverage_table
+        || !coverage_table->sanitize (coverage_v)
+        || !class_def_1_table
+        || !class_def_1_table->sanitize (class_def_1_v))
+      return -1;
+
+    auto klass_map =
+    + coverage_table->iter ()
+    | hb_map_retains_sorting ([&] (hb_codepoint_t gid) {
+      return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (gid, class_def_1_table->get_class (gid));
+    })
+    | hb_filter ([&] (hb_codepoint_t klass) {
+      return klass >= start && klass < end;
+    }, hb_second)
+    | hb_map_retains_sorting ([&] (hb_pair_t<hb_codepoint_t, hb_codepoint_t> gid_and_class) {
+      // Classes must be from 0...N so subtract start
+      return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (gid_and_class.first, gid_and_class.second - start);
+    })
+    ;
+
+    if (!Coverage::add_coverage (split_context.c,
+                                 pair_pos_prime_id,
+                                 2,
+                                 + klass_map | hb_map_retains_sorting (hb_first),
+                                 split_context.max_coverage_size))
+      return -1;
+
+    // classDef1
+    if (!ClassDef::add_class_def (split_context.c,
+                                  pair_pos_prime_id,
+                                  8,
+                                  + klass_map,
+                                  split_context.max_class_def_size))
+      return -1;
+
+    // classDef2
+    unsigned class_def_2_id =
+        graph.index_for_offset (split_context.this_index, &classDef2);
+    auto* class_def_link = graph.vertices_[pair_pos_prime_id].obj.real_links.push ();
+    class_def_link->width = SmallTypes::size;
+    class_def_link->objidx = class_def_2_id;
+    class_def_link->position = 10;
+    graph.vertices_[class_def_2_id].parents.push (pair_pos_prime_id);
+    graph.duplicate (pair_pos_prime_id, class_def_2_id);
+
+    return pair_pos_prime_id;
+  }
+
+  void clone_class1_records (split_context_t& split_context,
+                             unsigned pair_pos_prime_id,
+                             unsigned start, unsigned end) const
+  {
+    PairPosFormat2* pair_pos_prime =
+        (PairPosFormat2*) split_context.c.graph.object (pair_pos_prime_id).head;
+
+    char* start_addr = ((char*)&values[0]) + start * split_context.class1_record_size;
+    unsigned num_records = end - start;
+    memcpy (&pair_pos_prime->values[0],
+            start_addr,
+            num_records * split_context.class1_record_size);
+
+    if (!split_context.format1_device_table_indices
+        && !split_context.format2_device_table_indices)
+      // No device tables to move over.
+      return;
+
+    unsigned class2_count = class2Count;
+    for (unsigned i = start; i < end; i++)
+    {
+      for (unsigned j = 0; j < class2_count; j++)
+      {
+        unsigned value1_index = split_context.value_record_len * (class2_count * i + j);
+        unsigned value2_index = value1_index + split_context.value1_record_len;
+
+        unsigned new_value1_index = split_context.value_record_len * (class2_count * (i - start) + j);
+        unsigned new_value2_index = new_value1_index + split_context.value1_record_len;
+
+        transfer_device_tables (split_context,
+                                pair_pos_prime_id,
+                                split_context.format1_device_table_indices,
+                                value1_index,
+                                new_value1_index);
+
+        transfer_device_tables (split_context,
+                                pair_pos_prime_id,
+                                split_context.format2_device_table_indices,
+                                value2_index,
+                                new_value2_index);
+      }
+    }
+  }
+
+  void transfer_device_tables (split_context_t& split_context,
+                               unsigned pair_pos_prime_id,
+                               const hb_vector_t<unsigned>& device_table_indices,
+                               unsigned old_value_record_index,
+                               unsigned new_value_record_index) const
+  {
+    PairPosFormat2* pair_pos_prime =
+        (PairPosFormat2*) split_context.c.graph.object (pair_pos_prime_id).head;
+
+    for (unsigned i : device_table_indices)
+    {
+      OT::Offset16* record = (OT::Offset16*) &values[old_value_record_index + i];
+      unsigned record_position = ((char*) record) - ((char*) this);
+      if (!split_context.device_tables.has (record_position)) continue;
+
+      split_context.c.graph.move_child (
+          split_context.this_index,
+          record,
+          pair_pos_prime_id,
+          (OT::Offset16*) &pair_pos_prime->values[new_value_record_index + i]);
+    }
+  }
+
+  bool shrink (split_context_t& split_context,
+               unsigned count)
+  {
+    DEBUG_MSG (SUBSET_REPACK, nullptr,
+               "  Shrinking PairPosFormat2 (%u) to [0, %u).",
+               split_context.this_index,
+               count);
+    unsigned old_count = class1Count;
+    if (count >= old_count)
+      return true;
+
+    graph_t& graph = split_context.c.graph;
+    class1Count = count;
+    graph.vertices_[split_context.this_index].obj.tail -=
+        (old_count - count) * split_context.class1_record_size;
+
+    unsigned coverage_id =
+        graph.mutable_index_for_offset (split_context.this_index, &coverage);
+    unsigned class_def_1_id =
+        graph.mutable_index_for_offset (split_context.this_index, &classDef1);
+    auto& coverage_v = graph.vertices_[coverage_id];
+    auto& class_def_1_v = graph.vertices_[class_def_1_id];
+    Coverage* coverage_table = (Coverage*) coverage_v.obj.head;
+    ClassDef* class_def_1_table = (ClassDef*) class_def_1_v.obj.head;
+    if (!coverage_table
+        || !coverage_table->sanitize (coverage_v)
+        || !class_def_1_table
+        || !class_def_1_table->sanitize (class_def_1_v))
+      return false;
+
+    auto klass_map =
+    + coverage_table->iter ()
+    | hb_map_retains_sorting ([&] (hb_codepoint_t gid) {
+      return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (gid, class_def_1_table->get_class (gid));
+    })
+    | hb_filter ([&] (hb_codepoint_t klass) {
+      return klass < count;
+    }, hb_second)
+    ;
+
+    if (!Coverage::make_coverage (split_context.c,
+                                  + klass_map | hb_map_retains_sorting (hb_first),
+                                  coverage_id,
+                                  coverage_v.table_size ()))
+      return false;
+
+    return ClassDef::make_class_def (split_context.c,
+                                     + klass_map,
+                                     class_def_1_id,
+                                     class_def_1_v.table_size ());
+  }
+
+  hb_hashmap_t<unsigned, unsigned>
+  get_all_device_tables (gsubgpos_graph_context_t& c,
+                         unsigned this_index) const
+  {
+    hb_hashmap_t<unsigned, unsigned> result;
+
+    const auto& o = c.graph.object (this_index);
+    for (const auto& l : o.real_links) {
+      result.set (l.position, l.objidx);
+    }
+
+    return result;
+  }
+
+  const Coverage* get_coverage (gsubgpos_graph_context_t& c,
+                          unsigned this_index) const
+  {
+    unsigned coverage_id = c.graph.index_for_offset (this_index, &coverage);
+    auto& coverage_v = c.graph.vertices_[coverage_id];
+
+    Coverage* coverage_table = (Coverage*) coverage_v.obj.head;
+    if (!coverage_table || !coverage_table->sanitize (coverage_v))
+      return &Null(Coverage);
+    return coverage_table;
+  }
+
+  const ClassDef* get_class_def_1 (gsubgpos_graph_context_t& c,
+                                   unsigned this_index) const
+  {
+    unsigned class_def_1_id = c.graph.index_for_offset (this_index, &classDef1);
+    auto& class_def_1_v = c.graph.vertices_[class_def_1_id];
+
+    ClassDef* class_def_1_table = (ClassDef*) class_def_1_v.obj.head;
+    if (!class_def_1_table || !class_def_1_table->sanitize (class_def_1_v))
+      return &Null(ClassDef);
+    return class_def_1_table;
+  }
+
+  unsigned size_of_value_record_children (gsubgpos_graph_context_t& c,
+                                          const hb_hashmap_t<unsigned, unsigned>& device_tables,
+                                          const hb_vector_t<unsigned> device_table_indices,
+                                          unsigned value_record_index,
+                                          hb_set_t& visited)
+  {
+    unsigned size = 0;
+    for (unsigned i : device_table_indices)
+    {
+      OT::Layout::GPOS_impl::Value* record = &values[value_record_index + i];
+      unsigned record_position = ((char*) record) - ((char*) this);
+      unsigned* obj_idx;
+      if (!device_tables.has (record_position, &obj_idx)) continue;
+      size += c.graph.find_subgraph_size (*obj_idx, visited);
+    }
+    return size;
+  }
+
+  unsigned size_of (gsubgpos_graph_context_t& c,
+                    unsigned this_index,
+                    const void* offset) const
+  {
+    const unsigned id = c.graph.index_for_offset (this_index, offset);
+    return c.graph.vertices_[id].table_size ();
  }
 };

--- a/src/graph/split-helpers.hh
+++ b/src/graph/split-helpers.hh
@ -0,0 +1,69 @@
+/*
+ * Copyright © 2022  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger
+ */
+
+#ifndef GRAPH_SPLIT_HELPERS_HH
+#define GRAPH_SPLIT_HELPERS_HH
+
+namespace graph {
+
+template<typename Context>
+HB_INTERNAL
+hb_vector_t<unsigned> actuate_subtable_split (Context& split_context,
+                                              const hb_vector_t<unsigned>& split_points)
+{
+  hb_vector_t<unsigned> new_objects;
+  if (!split_points)
+    return new_objects;
+
+  for (unsigned i = 0; i < split_points.length; i++)
+  {
+    unsigned start = split_points[i];
+    unsigned end = (i < split_points.length - 1)
+                   ? split_points[i + 1]
+                   : split_context.original_count ();
+    unsigned id = split_context.clone_range (start, end);
+
+    if (id == (unsigned) -1)
+    {
+      new_objects.reset ();
+      new_objects.allocated = -1; // mark error
+      return new_objects;
+    }
+    new_objects.push (id);
+  }
+
+  if (!split_context.shrink (split_points[0]))
+  {
+    new_objects.reset ();
+    new_objects.allocated = -1; // mark error
+  }
+
+  return new_objects;
+}
+
+}
+
+#endif  // GRAPH_SPLIT_HELPERS_HH
--- a/src/graph/test-classdef-graph.cc
+++ b/src/graph/test-classdef-graph.cc
@ -0,0 +1,119 @@
+/*
+ * Copyright © 2022  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger
+ */
+
+#include "gsubgpos-context.hh"
+#include "classdef-graph.hh"
+
+typedef hb_pair_t<hb_codepoint_t, hb_codepoint_t> gid_and_class_t;
+typedef hb_vector_t<gid_and_class_t> gid_and_class_list_t;
+
+
+static bool incremental_size_is (const gid_and_class_list_t& list, unsigned klass,
+                                 unsigned cov_expected, unsigned class_def_expected)
+{
+  graph::class_def_size_estimator_t estimator (list.iter ());
+
+  unsigned result = estimator.incremental_coverage_size (klass);
+  if (result != cov_expected)
+  {
+    printf ("FAIL: coverage expected size %u but was %u\n", cov_expected, result);
+    return false;
+  }
+
+  result = estimator.incremental_class_def_size (klass);
+  if (result != class_def_expected)
+  {
+    printf ("FAIL: class def expected size %u but was %u\n", class_def_expected, result);
+    return false;
+  }
+
+  return true;
+}
+
+static void test_class_and_coverage_size_estimates ()
+{
+  gid_and_class_list_t empty = {
+  };
+  assert (incremental_size_is (empty, 0, 0, 0));
+  assert (incremental_size_is (empty, 1, 0, 0));
+
+  gid_and_class_list_t class_zero = {
+    {5, 0},
+  };
+  assert (incremental_size_is (class_zero, 0, 2, 0));
+
+  gid_and_class_list_t consecutive = {
+    {4, 0},
+    {5, 0},
+    {6, 1},
+    {7, 1},
+    {8, 2},
+    {9, 2},
+    {10, 2},
+    {11, 2},
+  };
+  assert (incremental_size_is (consecutive, 0, 4, 0));
+  assert (incremental_size_is (consecutive, 1, 4, 4));
+  assert (incremental_size_is (consecutive, 2, 8, 6));
+
+  gid_and_class_list_t non_consecutive = {
+    {4, 0},
+    {5, 0},
+
+    {6, 1},
+    {7, 1},
+
+    {9, 2},
+    {10, 2},
+    {11, 2},
+    {12, 2},
+  };
+  assert (incremental_size_is (non_consecutive, 0, 4, 0));
+  assert (incremental_size_is (non_consecutive, 1, 4, 6));
+  assert (incremental_size_is (non_consecutive, 2, 8, 6));
+
+  gid_and_class_list_t multiple_ranges = {
+    {4, 0},
+    {5, 0},
+
+    {6, 1},
+    {7, 1},
+
+    {9, 1},
+
+    {11, 1},
+    {12, 1},
+    {13, 1},
+  };
+  assert (incremental_size_is (multiple_ranges, 0, 4, 0));
+  assert (incremental_size_is (multiple_ranges, 1, 2 * 6, 3 * 6));
+}
+
+int
+main (int argc, char **argv)
+{
+  test_class_and_coverage_size_estimates ();
+}
--- a/src/hb-serialize.hh
+++ b/src/hb-serialize.hh
@ -139,6 +139,11 @@ struct hb_serialize_context_t
        objidx = o.objidx;
      }
 #endif
+
+      HB_INTERNAL static int cmp (const void* a, const void* b)
+      {
+        return ((const link_t*)a)->position - ((const link_t*)b)->position;
+      }
    };

    char *head;
--- a/src/meson.build
+++ b/src/meson.build
@ -350,6 +350,8 @@ hb_subset_sources = files(
  'graph/gsubgpos-graph.hh',
  'graph/pairpos-graph.hh',
  'graph/coverage-graph.hh',
+  'graph/classdef-graph.hh',
+  'graph/split-helpers.hh',
  'hb-subset.cc',
  'hb-subset.hh',
 )
@ -580,6 +582,7 @@ if get_option('tests').enabled()
    'test-ot-tag': ['hb-ot-tag.cc'],
    'test-priority-queue': ['test-priority-queue.cc', 'hb-static.cc'],
    'test-repacker': ['test-repacker.cc', 'hb-static.cc', 'graph/gsubgpos-context.cc'],
+    'test-classdef-graph': ['graph/test-classdef-graph.cc', 'hb-static.cc', 'graph/gsubgpos-context.cc'],
    'test-set': ['test-set.cc', 'hb-static.cc'],
    'test-serialize': ['test-serialize.cc', 'hb-static.cc'],
    'test-unicode-ranges': ['test-unicode-ranges.cc'],
--- a/src/test-repacker.cc
+++ b/src/test-repacker.cc
@ -143,28 +143,68 @@ static unsigned add_extension (unsigned child,

 }

-static unsigned add_coverage (char start, char end,
+// Adds coverage table fro [start, end]
+static unsigned add_coverage (unsigned start, unsigned end,
                              hb_serialize_context_t* c)
 {
  if (end - start == 1)
  {
-    char coverage[] = {
+    uint8_t coverage[] = {
      0, 1, // format
      0, 2, // count
-      0, start, // glyph[0]
-      0, end,   // glyph[1]
+
+      (uint8_t) ((start >> 8) & 0xFF),
+      (uint8_t) (start & 0xFF), // glyph[0]
+
+      (uint8_t) ((end >> 8) & 0xFF),
+      (uint8_t) (end & 0xFF), // glyph[1]
    };
-    return add_object (coverage, 8, c);
+    return add_object ((char*) coverage, 8, c);
  }

-  char coverage[] = {
+  uint8_t coverage[] = {
    0, 2, // format
    0, 1, // range count
-    0, start, // start
-    0, end,   // end
+
+    (uint8_t) ((start >> 8) & 0xFF),
+    (uint8_t) (start & 0xFF), // start
+
+    (uint8_t) ((end >> 8) & 0xFF),
+    (uint8_t) (end & 0xFF), // end
+
    0, 0,
  };
-  return add_object (coverage, 10, c);
+  return add_object ((char*) coverage, 10, c);
+}
+
+// Adds a class that maps glyphs from [start_glyph, end_glyph)
+// to classes 1...n
+static unsigned add_class_def (uint16_t start_glyph,
+                               uint16_t end_glyph,
+                               hb_serialize_context_t* c)
+{
+  unsigned count = end_glyph - start_glyph;
+  uint8_t header[] = {
+    0, 1, // format
+
+    (uint8_t) ((start_glyph >> 8) & 0xFF),
+    (uint8_t) (start_glyph & 0xFF), // start_glyph
+
+    (uint8_t) ((count >> 8) & 0xFF),
+    (uint8_t) (count & 0xFF), // count
+  };
+
+  start_object ((char*) header, 6, c);
+  for (uint16_t i = 1; i <= count; i++)
+  {
+    uint8_t class_value[] = {
+      (uint8_t) ((i >> 8) & 0xFF),
+      (uint8_t) (i & 0xFF), // count
+    };
+    extend ((char*) class_value, 2, c);
+  }
+
+  return c->pop_pack (false);
 }

 static unsigned add_pair_pos_1 (unsigned* pair_sets,
@ -192,6 +232,72 @@ static unsigned add_pair_pos_1 (unsigned* pair_sets,
  return c->pop_pack (false);
 }

+static unsigned add_pair_pos_2 (unsigned starting_class,
+                                unsigned coverage,
+                                unsigned class_def_1, uint16_t class_def_1_count,
+                                unsigned class_def_2, uint16_t class_def_2_count,
+                                unsigned* device_tables,
+                                hb_serialize_context_t* c)
+{
+  uint8_t format[] = {
+    0, 2
+  };
+
+  start_object ((char*) format, 2, c);
+  add_offset (coverage, c);
+
+  unsigned num_values = 4;
+  uint8_t format1 = 0x01 | 0x02 | 0x08;
+  uint8_t format2 = 0x04;
+  if (device_tables) {
+    format2 |= 0x20;
+    num_values += 1;
+  }
+  uint8_t value_format[] = {
+    0, format1,
+    0, format2,
+  };
+
+  extend ((char*) value_format, 4, c);
+
+  add_offset (class_def_1, c);
+  add_offset (class_def_2, c);
+
+  uint8_t class_counts[] = {
+    (uint8_t) ((class_def_1_count >> 8) & 0xFF),
+    (uint8_t) (class_def_1_count & 0xFF),
+    (uint8_t) ((class_def_2_count >> 8) & 0xFF),
+    (uint8_t) (class_def_2_count & 0xFF),
+  };
+  extend ((char*) class_counts, 4, c);
+
+  unsigned num_bytes_per_record = class_def_2_count * num_values * 2;
+  uint8_t* record = (uint8_t*) calloc (1, num_bytes_per_record);
+  int device_index = 0;
+  for (uint16_t i = 0; i < class_def_1_count; i++)
+  {
+
+    for (uint16_t j = 0; j < class_def_2_count; j++)
+    {
+      for (int k = 0; k < 4; k++) {
+        uint8_t value[] = {
+          (uint8_t) (i + starting_class),
+          (uint8_t) (i + starting_class),
+        };
+        extend ((char*) value, 2, c);
+      }
+
+      if (device_tables) {
+        add_offset (device_tables[device_index++], c);
+      }
+    }
+  }
+  free (record);
+
+  return c->pop_pack (false);
+}
+
+
 static void run_resolve_overflow_test (const char* name,
                                       hb_serialize_context_t& overflowing,
                                       hb_serialize_context_t& expected,
@ -1086,18 +1192,16 @@ populate_serializer_with_large_pair_pos_1 (hb_serialize_context_t* c,
  unsigned pair_pos_2 = add_object (large_string.c_str(), 200, c);

  if (as_extension) {
-
+    pair_pos_2 = add_extension (pair_pos_2, 2, c);
    for (int i = num_pair_pos_1 - 1; i >= 0; i--)
      pair_pos_1[i] = add_extension (pair_pos_1[i], 2, c);
-    pair_pos_2 = add_extension (pair_pos_2, 2, c);
  }

  start_lookup (as_extension ? 9 : 2, 1 + num_pair_pos_1, c);

-  add_offset (pair_pos_2, c);
  for (int i = 0; i < num_pair_pos_1; i++)
    add_offset (pair_pos_1[i], c);
-
+  add_offset (pair_pos_2, c);

  unsigned lookup = finish_lookup (c);

@ -1108,6 +1212,102 @@ populate_serializer_with_large_pair_pos_1 (hb_serialize_context_t* c,
  c->end_serialize();
 }

+template<int num_pair_pos_2, int num_class_1, int num_class_2>
+static void
+populate_serializer_with_large_pair_pos_2 (hb_serialize_context_t* c,
+                                           bool as_extension = false,
+                                           bool with_device_tables = false,
+                                           bool extra_table = true)
+{
+  std::string large_string(100000, 'a');
+  c->start_serialize<char> ();
+
+  unsigned coverage[num_pair_pos_2];
+  unsigned class_def_1[num_pair_pos_2];
+  unsigned class_def_2[num_pair_pos_2];
+  unsigned pair_pos_2[num_pair_pos_2];
+
+  unsigned* device_tables = (unsigned*) calloc (num_pair_pos_2 * num_class_1 * num_class_2,
+                                                sizeof(unsigned));
+
+  // Total glyphs = num_class_1 * num_pair_pos_2
+  for (int i = num_pair_pos_2 - 1; i >= 0; i--)
+  {
+    unsigned start_glyph = 5 + i * num_class_1;
+    if (num_class_2 >= num_class_1)
+    {
+      class_def_2[i] = add_class_def (11,
+                                      10 + num_class_2, c);
+      class_def_1[i] = add_class_def (start_glyph + 1,
+                                      start_glyph + num_class_1,
+                                      c);
+    } else {
+      class_def_1[i] = add_class_def (start_glyph + 1,
+                                      start_glyph + num_class_1,
+                                      c);
+      class_def_2[i] = add_class_def (11,
+                                      10 + num_class_2, c);
+    }
+
+    coverage[i] = add_coverage (start_glyph,
+                                start_glyph + num_class_1 - 1,
+                                c);
+
+    if (with_device_tables)
+    {
+      for(int j = (i + 1) * num_class_1 * num_class_2 - 1;
+          j >= i * num_class_1 * num_class_2;
+          j--)
+      {
+        uint8_t table[] = {
+          (uint8_t) ((j >> 8) & 0xFF),
+          (uint8_t) (j & 0xFF),
+        };
+        device_tables[j] = add_object ((char*) table, 2, c);
+      }
+    }
+
+    pair_pos_2[i] = add_pair_pos_2 (1 + i * num_class_1,
+                                    coverage[i],
+                                    class_def_1[i], num_class_1,
+                                    class_def_2[i], num_class_2,
+                                    with_device_tables
+                                    ? &device_tables[i * num_class_1 * num_class_2]
+                                    : nullptr,
+                                    c);
+  }
+
+
+  unsigned pair_pos_1 = 0;
+  if (extra_table) pair_pos_1 = add_object (large_string.c_str(), 100000, c);
+
+  if (as_extension) {
+    for (int i = num_pair_pos_2 - 1; i >= 0; i--)
+      pair_pos_2[i] = add_extension (pair_pos_2[i], 2, c);
+
+    if (extra_table)
+      pair_pos_1 = add_extension (pair_pos_1, 2, c);
+  }
+
+  start_lookup (as_extension ? 9 : 2, 1 + num_pair_pos_2, c);
+
+  if (extra_table)
+    add_offset (pair_pos_1, c);
+
+  for (int i = 0; i < num_pair_pos_2; i++)
+    add_offset (pair_pos_2[i], c);
+
+  unsigned lookup = finish_lookup (c);
+
+  unsigned lookup_list = add_lookup_list (&lookup, 1, c);
+
+  add_gsubgpos_header (lookup_list, c);
+
+  c->end_serialize();
+
+  free (device_tables);
+}
+
 static void test_sort_shortest ()
 {
  size_t buffer_size = 100;
@ -1523,6 +1723,74 @@ static void test_resolve_with_extension_pair_pos_1_split ()
  free (expected_buffer);
 }

+static void test_resolve_with_basic_pair_pos_2_split ()
+{
+  size_t buffer_size = 300000;
+  void* buffer = malloc (buffer_size);
+  assert (buffer);
+  hb_serialize_context_t c (buffer, buffer_size);
+  populate_serializer_with_large_pair_pos_2 <1, 4, 3000>(&c);
+
+  void* expected_buffer = malloc (buffer_size);
+  assert (expected_buffer);
+  hb_serialize_context_t e (expected_buffer, buffer_size);
+  populate_serializer_with_large_pair_pos_2 <2, 2, 3000>(&e, true);
+
+  run_resolve_overflow_test ("test_resolve_with_basic_pair_pos_2_split",
+                             c,
+                             e,
+                             20,
+                             true,
+                             HB_TAG('G', 'P', 'O', 'S'));
+  free (buffer);
+  free (expected_buffer);
+}
+
+static void test_resolve_with_close_to_limit_pair_pos_2_split ()
+{
+  size_t buffer_size = 300000;
+  void* buffer = malloc (buffer_size);
+  assert (buffer);
+  hb_serialize_context_t c (buffer, buffer_size);
+  populate_serializer_with_large_pair_pos_2 <1, 1596, 10>(&c, true, false, false);
+
+  void* expected_buffer = malloc (buffer_size);
+  assert (expected_buffer);
+  hb_serialize_context_t e (expected_buffer, buffer_size);
+  populate_serializer_with_large_pair_pos_2 <2, 798, 10>(&e, true, false, false);
+
+  run_resolve_overflow_test ("test_resolve_with_close_to_limit_pair_pos_2_split",
+                             c,
+                             e,
+                             20,
+                             true,
+                             HB_TAG('G', 'P', 'O', 'S'));
+  free (buffer);
+  free (expected_buffer);
+}
+
+static void test_resolve_with_pair_pos_2_split_with_device_tables ()
+{
+  size_t buffer_size = 300000;
+  void* buffer = malloc (buffer_size);
+  assert (buffer);
+  hb_serialize_context_t c (buffer, buffer_size);
+  populate_serializer_with_large_pair_pos_2 <1, 4, 2000>(&c, false, true);
+
+  void* expected_buffer = malloc (buffer_size);
+  assert (expected_buffer);
+  hb_serialize_context_t e (expected_buffer, buffer_size);
+  populate_serializer_with_large_pair_pos_2 <2, 2, 2000>(&e, true, true);
+
+  run_resolve_overflow_test ("test_resolve_with_pair_pos_2_split_with_device_tables",
+                             c,
+                             e,
+                             20,
+                             true,
+                             HB_TAG('G', 'P', 'O', 'S'));
+  free (buffer);
+  free (expected_buffer);
+}

 static void test_resolve_overflows_via_splitting_spaces ()
 {
@ -1673,7 +1941,13 @@ main (int argc, char **argv)
  test_resolve_with_extension_promotion ();
  test_resolve_with_basic_pair_pos_1_split ();
  test_resolve_with_extension_pair_pos_1_split ();
+  test_resolve_with_basic_pair_pos_2_split ();
+  test_resolve_with_pair_pos_2_split_with_device_tables ();
+  test_resolve_with_close_to_limit_pair_pos_2_split ();

+  // TODO(grieger): have run overflow tests compare graph equality not final packed binary.
+  // TODO(grieger): split test where multiple subtables in one lookup are split to test link ordering.
+  // TODO(grieger): split test where coverage table in subtable that is being split is shared.
  // TODO(grieger): test with extensions already mixed in as well.
  // TODO(grieger): test two layer ext promotion setup.
  // TODO(grieger): test sorting by subtables per byte in ext. promotion.