diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh
index 208c9f852..6b6393c8f 100644
--- a/src/hb-ot-cmap-table.hh
+++ b/src/hb-ot-cmap-table.hh
@@ -332,7 +332,6 @@ struct CmapSubtableFormat4
     if (unlikely (!c->extend_min (this))) return;
     this->format = 4;
 
-    // TODO(grieger): does pre-alloc make this faster?
     hb_vector_t<hb_pair_t<hb_codepoint_t, hb_codepoint_t>> cp_to_gid {
       format4_iter
     };
@@ -1664,13 +1663,7 @@ struct cmap
     if (unlikely (has_format12 && (!unicode_ucs4 && !ms_ucs4))) return_trace (false);
 
     auto it =
-    + hb_iter (c->plan->unicodes)
-    | hb_map ([&] (hb_codepoint_t _)
-	      {
-		hb_codepoint_t new_gid = HB_MAP_VALUE_INVALID;
-		c->plan->new_gid_for_codepoint (_, &new_gid);
-		return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (_, new_gid);
-	      })
+    + c->plan->unicode_to_new_gid_list.iter ()
     | hb_filter ([&] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t> _)
 		 { return (_.second != HB_MAP_VALUE_INVALID); })
     ;
diff --git a/src/hb-subset-plan.cc b/src/hb-subset-plan.cc
index 74b7e3977..4cff60e54 100644
--- a/src/hb-subset-plan.cc
+++ b/src/hb-subset-plan.cc
@@ -287,6 +287,14 @@ _remove_invalid_gids (hb_set_t *glyphs,
   }
 }
 
+static inline int
+_compare_cp_gid_pair (const void* a,
+                      const void* b)
+{
+  return ((hb_pair_t<hb_codepoint_t, hb_codepoint_t>*)a)->first -
+      ((hb_pair_t<hb_codepoint_t, hb_codepoint_t>*)b)->first;
+}
+
 static void
 _populate_unicodes_to_retain (const hb_set_t *unicodes,
                               const hb_set_t *glyphs,
@@ -294,12 +302,13 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes,
 {
   OT::cmap::accelerator_t cmap (plan->source);
 
-  constexpr static const int size_threshold = 4096;
-
+  unsigned size_threshold = plan->source->get_num_glyphs ();
   if (glyphs->is_empty () && unicodes->get_population () < size_threshold)
   {
-    /* This is the fast path if it's anticipated that size of unicodes
-     * is << than the number of codepoints in the font. */
+    // This is approach to collection is faster, but can only be used  if glyphs
+    // are not being explicitly added to the subset and the input unicodes set is
+    // not excessively large (eg. an inverted set).
+    plan->unicode_to_new_gid_list.alloc (unicodes->get_population ());
     for (hb_codepoint_t cp : *unicodes)
     {
       hb_codepoint_t gid;
@@ -310,12 +319,19 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes,
       }
 
       plan->codepoint_to_glyph->set (cp, gid);
+      plan->unicode_to_new_gid_list.push (hb_pair (cp, gid));
     }
   }
   else
   {
+    // This approach is slower, but can handle adding in glyphs to the subset and will match
+    // them with cmap entries.
     hb_map_t unicode_glyphid_map;
     cmap.collect_mapping (hb_set_get_empty (), &unicode_glyphid_map);
+    plan->unicode_to_new_gid_list.alloc (hb_min(unicodes->get_population ()
+                                                + glyphs->get_population (),
+                                                unicode_glyphid_map.get_population ()));
+
 
     for (hb_pair_t<hb_codepoint_t, hb_codepoint_t> cp_gid :
 	 + unicode_glyphid_map.iter ())
@@ -324,8 +340,11 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes,
 	continue;
 
       plan->codepoint_to_glyph->set (cp_gid.first, cp_gid.second);
+      plan->unicode_to_new_gid_list.push (hb_pair (cp_gid.first, cp_gid.second));
     }
 
+    plan->unicode_to_new_gid_list.qsort (_compare_cp_gid_pair);
+
     /* Add gids which where requested, but not mapped in cmap */
     // TODO(garretrieger):
     // Once https://github.com/harfbuzz/harfbuzz/issues/3169
@@ -338,8 +357,13 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes,
     }
   }
 
-  + plan->codepoint_to_glyph->keys ()   | hb_sink (plan->unicodes);
-  + plan->codepoint_to_glyph->values () | hb_sink (plan->_glyphset_gsub);
+  for (unsigned i = 0; i < plan->unicode_to_new_gid_list.length; i++)
+  {
+    // Use raw array access for performance.
+    hb_pair_t<hb_codepoint_t, hb_codepoint_t> pair = plan->unicode_to_new_gid_list.arrayZ[i];
+    plan->unicodes->add(pair.first);
+    plan->_glyphset_gsub->add(pair.second);
+  }
 }
 
 static void
@@ -485,6 +509,9 @@ hb_subset_plan_create_or_fail (hb_face_t	 *face,
   plan->successful = true;
   plan->flags = input->flags;
   plan->unicodes = hb_set_create ();
+
+  plan->unicode_to_new_gid_list.init ();
+
   plan->name_ids = hb_set_copy (input->sets.name_ids);
   _nameid_closure (face, plan->name_ids);
   plan->name_languages = hb_set_copy (input->sets.name_languages);
@@ -536,6 +563,14 @@ hb_subset_plan_create_or_fail (hb_face_t	 *face,
 				  plan->reverse_glyph_map,
 				  &plan->_num_output_glyphs);
 
+  // Now that we have old to new gid map update the unicode to new gid list.
+  for (unsigned i = 0; i < plan->unicode_to_new_gid_list.length; i++)
+  {
+    // Use raw array access for performance.
+    plan->unicode_to_new_gid_list.arrayZ[i].second =
+        plan->glyph_map->get(plan->unicode_to_new_gid_list.arrayZ[i].second);
+  }
+
   if (unlikely (plan->in_error ())) {
     hb_subset_plan_destroy (plan);
     return nullptr;
@@ -558,6 +593,7 @@ hb_subset_plan_destroy (hb_subset_plan_t *plan)
   if (!hb_object_destroy (plan)) return;
 
   hb_set_destroy (plan->unicodes);
+  plan->unicode_to_new_gid_list.fini ();
   hb_set_destroy (plan->name_ids);
   hb_set_destroy (plan->name_languages);
   hb_set_destroy (plan->layout_features);
diff --git a/src/hb-subset-plan.hh b/src/hb-subset-plan.hh
index ab2c4c302..0f20d74db 100644
--- a/src/hb-subset-plan.hh
+++ b/src/hb-subset-plan.hh
@@ -44,6 +44,7 @@ struct hb_subset_plan_t
 
   // For each cp that we'd like to retain maps to the corresponding gid.
   hb_set_t *unicodes;
+  hb_vector_t<hb_pair_t<hb_codepoint_t, hb_codepoint_t>> unicode_to_new_gid_list;
 
   // name_ids we would like to retain
   hb_set_t *name_ids;