From aeb50b8942b92cda2b1d5bb03d685f97f79faf5d Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 10 May 2022 18:06:53 -0600 Subject: [PATCH 1/8] [subset] Retain buffer across table subset operations --- src/hb-subset.cc | 61 +++++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/src/hb-subset.cc b/src/hb-subset.cc index 4588268b7..cc695111d 100644 --- a/src/hb-subset.cc +++ b/src/hb-subset.cc @@ -136,7 +136,7 @@ _try_subset (const TableType *table, return needed; } - buf_size += (buf_size >> 1) + 32; + buf_size = buf_size * 2 + 16; DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c ran out of room; reallocating to %u bytes.", HB_UNTAG (c->table_tag), buf_size); @@ -153,7 +153,7 @@ _try_subset (const TableType *table, template static bool -_subset (hb_subset_plan_t *plan) +_subset (hb_subset_plan_t *plan, hb_vector_t &buf) { hb_blob_t *source_blob = hb_sanitize_context_t ().reference_table (plan->source); const TableType *table = source_blob->as (); @@ -167,7 +167,6 @@ _subset (hb_subset_plan_t *plan) return false; } - hb_vector_t buf; /* TODO Not all tables are glyph-related. 'name' table size for example should not be * affected by number of glyphs. Accommodate that. */ unsigned buf_size = _plan_estimate_subset_table_size (plan, source_blob->length); @@ -274,7 +273,9 @@ _passthrough (hb_subset_plan_t *plan, hb_tag_t tag) } static bool -_subset_table (hb_subset_plan_t *plan, hb_tag_t tag) +_subset_table (hb_subset_plan_t *plan, + hb_vector_t &buf, + hb_tag_t tag) { if (plan->no_subset_tables->has (tag)) { return _passthrough (plan, tag); @@ -283,42 +284,42 @@ _subset_table (hb_subset_plan_t *plan, hb_tag_t tag) DEBUG_MSG (SUBSET, nullptr, "subset %c%c%c%c", HB_UNTAG (tag)); switch (tag) { - case HB_OT_TAG_glyf: return _subset (plan); - case HB_OT_TAG_hdmx: return _subset (plan); - case HB_OT_TAG_name: return _subset (plan); + case HB_OT_TAG_glyf: return _subset (plan, buf); + case HB_OT_TAG_hdmx: return _subset (plan, buf); + case HB_OT_TAG_name: return _subset (plan, buf); case HB_OT_TAG_head: if (_is_table_present (plan->source, HB_OT_TAG_glyf) && !_should_drop_table (plan, HB_OT_TAG_glyf)) return true; /* skip head, handled by glyf */ - return _subset (plan); + return _subset (plan, buf); case HB_OT_TAG_hhea: return true; /* skip hhea, handled by hmtx */ - case HB_OT_TAG_hmtx: return _subset (plan); + case HB_OT_TAG_hmtx: return _subset (plan, buf); case HB_OT_TAG_vhea: return true; /* skip vhea, handled by vmtx */ - case HB_OT_TAG_vmtx: return _subset (plan); - case HB_OT_TAG_maxp: return _subset (plan); - case HB_OT_TAG_sbix: return _subset (plan); + case HB_OT_TAG_vmtx: return _subset (plan, buf); + case HB_OT_TAG_maxp: return _subset (plan, buf); + case HB_OT_TAG_sbix: return _subset (plan, buf); case HB_OT_TAG_loca: return true; /* skip loca, handled by glyf */ - case HB_OT_TAG_cmap: return _subset (plan); - case HB_OT_TAG_OS2 : return _subset (plan); - case HB_OT_TAG_post: return _subset (plan); - case HB_OT_TAG_COLR: return _subset (plan); - case HB_OT_TAG_CPAL: return _subset (plan); - case HB_OT_TAG_CBLC: return _subset (plan); + case HB_OT_TAG_cmap: return _subset (plan, buf); + case HB_OT_TAG_OS2 : return _subset (plan, buf); + case HB_OT_TAG_post: return _subset (plan, buf); + case HB_OT_TAG_COLR: return _subset (plan, buf); + case HB_OT_TAG_CPAL: return _subset (plan, buf); + case HB_OT_TAG_CBLC: return _subset (plan, buf); case HB_OT_TAG_CBDT: return true; /* skip CBDT, handled by CBLC */ - case HB_OT_TAG_MATH: return _subset (plan); + case HB_OT_TAG_MATH: return _subset (plan, buf); #ifndef HB_NO_SUBSET_CFF - case HB_OT_TAG_cff1: return _subset (plan); - case HB_OT_TAG_cff2: return _subset (plan); - case HB_OT_TAG_VORG: return _subset (plan); + case HB_OT_TAG_cff1: return _subset (plan, buf); + case HB_OT_TAG_cff2: return _subset (plan, buf); + case HB_OT_TAG_VORG: return _subset (plan, buf); #endif #ifndef HB_NO_SUBSET_LAYOUT - case HB_OT_TAG_GDEF: return _subset (plan); - case HB_OT_TAG_GSUB: return _subset (plan); - case HB_OT_TAG_GPOS: return _subset (plan); - case HB_OT_TAG_gvar: return _subset (plan); - case HB_OT_TAG_HVAR: return _subset (plan); - case HB_OT_TAG_VVAR: return _subset (plan); + case HB_OT_TAG_GDEF: return _subset (plan, buf); + case HB_OT_TAG_GSUB: return _subset (plan, buf); + case HB_OT_TAG_GPOS: return _subset (plan, buf); + case HB_OT_TAG_gvar: return _subset (plan, buf); + case HB_OT_TAG_HVAR: return _subset (plan, buf); + case HB_OT_TAG_VVAR: return _subset (plan, buf); #endif default: @@ -379,6 +380,8 @@ hb_subset_plan_execute_or_fail (hb_subset_plan_t *plan) bool success = true; hb_tag_t table_tags[32]; unsigned offset = 0, num_tables = ARRAY_LENGTH (table_tags); + hb_vector_t buf; + buf.alloc (4096 - 16); while ((hb_face_get_table_tags (plan->source, offset, &num_tables, table_tags), num_tables)) { for (unsigned i = 0; i < num_tables; ++i) @@ -386,7 +389,7 @@ hb_subset_plan_execute_or_fail (hb_subset_plan_t *plan) hb_tag_t tag = table_tags[i]; if (_should_drop_table (plan, tag) && !tags_set.has (tag)) continue; tags_set.add (tag); - success = _subset_table (plan, tag); + success = _subset_table (plan, buf, tag); if (unlikely (!success)) goto end; } offset += num_tables; From 7edd54f3ddadc10307577575f47e943b86198e9d Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 10 May 2022 18:44:14 -0600 Subject: [PATCH 2/8] [perf/benchmark-subset] Minor cleanup --- perf/benchmark-subset.cc | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/perf/benchmark-subset.cc b/perf/benchmark-subset.cc index 765a61c8d..9d80a675c 100644 --- a/perf/benchmark-subset.cc +++ b/perf/benchmark-subset.cc @@ -34,11 +34,12 @@ void AddCodepoints(const hb_set_t* codepoints_in_font, unsigned subset_size, hb_subset_input_t* input) { + auto *unicodes = hb_subset_input_unicode_set (input); hb_codepoint_t cp = HB_SET_VALUE_INVALID; for (unsigned i = 0; i < subset_size; i++) { // TODO(garretrieger): pick randomly. if (!hb_set_next (codepoints_in_font, &cp)) return; - hb_set_add (hb_subset_input_unicode_set (input), cp); + hb_set_add (unicodes, cp); } } @@ -46,9 +47,10 @@ void AddGlyphs(unsigned num_glyphs_in_font, unsigned subset_size, hb_subset_input_t* input) { + auto *glyphs = hb_subset_input_glyph_set (input); for (unsigned i = 0; i < subset_size && i < num_glyphs_in_font; i++) { // TODO(garretrieger): pick randomly. - hb_set_add (hb_subset_input_glyph_set (input), i); + hb_set_add (glyphs, i); } } @@ -70,19 +72,23 @@ static void BM_subset (benchmark::State &state, hb_subset_input_t* input = hb_subset_input_create_or_fail (); assert (input); + switch (operation) { - hb_set_t* all_codepoints = hb_set_create (); - switch (operation) { - case subset_codepoints: - hb_face_collect_unicodes (face, all_codepoints); - AddCodepoints(all_codepoints, subset_size, input); - break; - case subset_glyphs: - unsigned num_glyphs = hb_face_get_glyph_count (face); - AddGlyphs(num_glyphs, subset_size, input); - break; + case subset_codepoints: + { + hb_set_t* all_codepoints = hb_set_create (); + hb_face_collect_unicodes (face, all_codepoints); + AddCodepoints(all_codepoints, subset_size, input); + hb_set_destroy (all_codepoints); } - hb_set_destroy (all_codepoints); + break; + + case subset_glyphs: + { + unsigned num_glyphs = hb_face_get_glyph_count (face); + AddGlyphs(num_glyphs, subset_size, input); + } + break; } for (auto _ : state) From f08537963b5157cd9e7a02f6e1695ff6bd27cc1b Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Wed, 11 May 2022 12:10:03 -0600 Subject: [PATCH 3/8] [cff-subset] Pre-alloc vector for operator decoding --- src/hb-cff-interp-common.hh | 5 +++++ src/hb-subset-cff-common.hh | 1 + 2 files changed, 6 insertions(+) diff --git a/src/hb-cff-interp-common.hh b/src/hb-cff-interp-common.hh index 058dc0cdd..1cd165615 100644 --- a/src/hb-cff-interp-common.hh +++ b/src/hb-cff-interp-common.hh @@ -520,6 +520,11 @@ struct parsed_values_t } void fini () { values.fini (); } + void alloc (unsigned n) + { + values.alloc (n); + } + void add_op (op_code_t op, const byte_str_ref_t& str_ref = byte_str_ref_t ()) { VAL *val = values.push (); diff --git a/src/hb-subset-cff-common.hh b/src/hb-subset-cff-common.hh index e3fae0850..0405b35bf 100644 --- a/src/hb-subset-cff-common.hh +++ b/src/hb-subset-cff-common.hh @@ -569,6 +569,7 @@ struct subr_subsetter_t ENV env (str, acc, fd); cs_interpreter_t interp (env); + parsed_charstrings[i].alloc (str.length); subr_subset_param_t param (&parsed_charstrings[i], &parsed_global_subrs, &parsed_local_subrs[fd], From 2e7f1ae48feaa2db8248b7ae01e46ef70e461a31 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Wed, 11 May 2022 12:49:16 -0600 Subject: [PATCH 4/8] [subset] Use vector.allocated size instead of tracking buf_size --- src/hb-subset.cc | 10 +++++----- src/hb-vector.hh | 3 +-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/hb-subset.cc b/src/hb-subset.cc index cc695111d..0cda2f498 100644 --- a/src/hb-subset.cc +++ b/src/hb-subset.cc @@ -123,7 +123,6 @@ static bool _try_subset (const TableType *table, hb_vector_t* buf, - unsigned buf_size, hb_subset_context_t* c /* OUT */) { c->serializer->start_serialize (); @@ -136,6 +135,7 @@ _try_subset (const TableType *table, return needed; } + unsigned buf_size = buf->allocated; buf_size = buf_size * 2 + 16; DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c ran out of room; reallocating to %u bytes.", HB_UNTAG (c->table_tag), buf_size); @@ -147,8 +147,8 @@ _try_subset (const TableType *table, return needed; } - c->serializer->reset (buf->arrayZ, buf_size); - return _try_subset (table, buf, buf_size, c); + c->serializer->reset (buf->arrayZ, buf->allocated); + return _try_subset (table, buf, c); } template @@ -180,10 +180,10 @@ _subset (hb_subset_plan_t *plan, hb_vector_t &buf) } bool needed = false; - hb_serialize_context_t serializer (buf.arrayZ, buf_size); + hb_serialize_context_t serializer (buf.arrayZ, buf.allocated); { hb_subset_context_t c (source_blob, plan, &serializer, tag); - needed = _try_subset (table, &buf, buf_size, &c); + needed = _try_subset (table, &buf, &c); } hb_blob_destroy (source_blob); diff --git a/src/hb-vector.hh b/src/hb-vector.hh index 6c7d32e49..eefae3834 100644 --- a/src/hb-vector.hh +++ b/src/hb-vector.hh @@ -70,9 +70,8 @@ struct hb_vector_t : std::conditional, hb_empty } ~hb_vector_t () { fini (); } - private: - int allocated = 0; /* == -1 means allocation failed. */ public: + int allocated = 0; /* == -1 means allocation failed. */ unsigned int length = 0; public: Type *arrayZ = nullptr; From c78d8ba60b49013e3ca98a2d7b030dc5d8c569d8 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Wed, 11 May 2022 13:05:41 -0600 Subject: [PATCH 5/8] [subset] Allocate same size as source table for GSUB/GPOS/name --- src/hb-subset.cc | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/hb-subset.cc b/src/hb-subset.cc index 0cda2f498..31ddb4f89 100644 --- a/src/hb-subset.cc +++ b/src/hb-subset.cc @@ -79,12 +79,14 @@ using OT::Layout::GSUB::GSUB; */ static unsigned -_plan_estimate_subset_table_size (hb_subset_plan_t *plan, unsigned table_len) +_plan_estimate_subset_table_size (hb_subset_plan_t *plan, + unsigned table_len, + bool same_size) { unsigned src_glyphs = plan->source->get_num_glyphs (); unsigned dst_glyphs = plan->glyphset ()->get_population (); - if (unlikely (!src_glyphs)) + if (unlikely (!src_glyphs) || same_size) return 512 + table_len; return 512 + (unsigned) (table_len * sqrt ((double) dst_glyphs / src_glyphs)); @@ -167,9 +169,13 @@ _subset (hb_subset_plan_t *plan, hb_vector_t &buf) return false; } - /* TODO Not all tables are glyph-related. 'name' table size for example should not be - * affected by number of glyphs. Accommodate that. */ - unsigned buf_size = _plan_estimate_subset_table_size (plan, source_blob->length); + /* Tables that we want to allocate same space as the source table. For GSUB/GPOS it's + * because those are expensive to subset, so giving them more room is fine. */ + bool same_size_table = TableType::tableTag == HB_OT_TAG_GSUB || + TableType::tableTag == HB_OT_TAG_GPOS || + TableType::tableTag == HB_OT_TAG_name; + + unsigned buf_size = _plan_estimate_subset_table_size (plan, source_blob->length, same_size_table); DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c initial estimated table size: %u bytes.", HB_UNTAG (tag), buf_size); if (unlikely (!buf.alloc (buf_size))) From 3261e05bdbb067cb9411a38a585bb04be1fb2542 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Wed, 11 May 2022 13:16:31 -0600 Subject: [PATCH 6/8] [subset] Optimize ClassDef1::intersected_class_glyphs() for class0 --- src/hb-ot-layout-common.hh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/hb-ot-layout-common.hh b/src/hb-ot-layout-common.hh index f2a58028e..655bd39a4 100644 --- a/src/hb-ot-layout-common.hh +++ b/src/hb-ot-layout-common.hh @@ -2057,10 +2057,13 @@ struct ClassDefFormat1 unsigned count = classValue.len; if (klass == 0) { - hb_codepoint_t endGlyph = startGlyph + count -1; - for (hb_codepoint_t g : glyphs->iter ()) - if (g < startGlyph || g > endGlyph) - intersect_glyphs->add (g); + for (unsigned g = HB_SET_VALUE_INVALID; + hb_set_next (glyphs, &g) && g < startGlyph;) + intersect_glyphs->add (g); + + for (unsigned g = startGlyph + count - 1; + hb_set_next (glyphs, &g);) + intersect_glyphs->add (g); return; } From 137af3612bcf038103bfc315f445d6574cba8d2c Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Wed, 11 May 2022 13:39:30 -0600 Subject: [PATCH 7/8] [gsubgpos] Simplify OT::ClassDefFormat2::intersected_class_glyphs() --- src/hb-ot-layout-common.hh | 39 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/src/hb-ot-layout-common.hh b/src/hb-ot-layout-common.hh index 655bd39a4..c3550756a 100644 --- a/src/hb-ot-layout-common.hh +++ b/src/hb-ot-layout-common.hh @@ -2057,8 +2057,9 @@ struct ClassDefFormat1 unsigned count = classValue.len; if (klass == 0) { + unsigned start_glyph = startGlyph; for (unsigned g = HB_SET_VALUE_INVALID; - hb_set_next (glyphs, &g) && g < startGlyph;) + hb_set_next (glyphs, &g) && g < start_glyph;) intersect_glyphs->add (g); for (unsigned g = startGlyph + count - 1; @@ -2070,7 +2071,19 @@ struct ClassDefFormat1 for (unsigned i = 0; i < count; i++) if (classValue[i] == klass && glyphs->has (startGlyph + i)) - intersect_glyphs->add (startGlyph + i); + intersect_glyphs->add (startGlyph + i); + return; + +#if 0 + /* The following implementation is faster asymptotically, but slower + * in practice. */ + unsigned start_glyph = startGlyph; + unsigned end_glyph = start_glyph + count; + for (unsigned g = startGlyph - 1; + hb_set_next (glyphs, &g) && g < end_glyph;) + if (classValue.arrayZ[g - start_glyph] == klass) + intersect_glyphs->add (g); +#endif } void intersected_classes (const hb_set_t *glyphs, hb_set_t *intersect_classes) const @@ -2297,28 +2310,14 @@ struct ClassDefFormat2 return; } - hb_codepoint_t g = HB_SET_VALUE_INVALID; for (unsigned int i = 0; i < count; i++) { if (rangeRecord[i].value != klass) continue; - if (g != HB_SET_VALUE_INVALID) - { - if (g >= rangeRecord[i].first && - g <= rangeRecord[i].last) - intersect_glyphs->add (g); - if (g > rangeRecord[i].last) - continue; - } - - g = rangeRecord[i].first - 1; - while (hb_set_next (glyphs, &g)) - { - if (g >= rangeRecord[i].first && g <= rangeRecord[i].last) - intersect_glyphs->add (g); - else if (g > rangeRecord[i].last) - break; - } + unsigned end = rangeRecord[i].last + 1; + for (hb_codepoint_t g = rangeRecord[i].first - 1; + hb_set_next (glyphs, &g) && g < end;) + intersect_glyphs->add (g); } } From 175319cd89fbab431616eb83d4d7c569fe4e8800 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Wed, 11 May 2022 13:47:17 -0600 Subject: [PATCH 8/8] [gsubgpos] Clean up OT::ClassDefFormat2::intersected_class_glyphs 0 case --- src/hb-ot-layout-common.hh | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/hb-ot-layout-common.hh b/src/hb-ot-layout-common.hh index c3550756a..6f02c87bb 100644 --- a/src/hb-ot-layout-common.hh +++ b/src/hb-ot-layout-common.hh @@ -2295,17 +2295,19 @@ struct ClassDefFormat2 hb_codepoint_t g = HB_SET_VALUE_INVALID; for (unsigned int i = 0; i < count; i++) { - if (!hb_set_next (glyphs, &g)) - break; - while (g != HB_SET_VALUE_INVALID && g < rangeRecord[i].first) - { - intersect_glyphs->add (g); - hb_set_next (glyphs, &g); + if (!hb_set_next (glyphs, &g)) + goto done; + while (g < rangeRecord[i].first) + { + intersect_glyphs->add (g); + if (!hb_set_next (glyphs, &g)) + goto done; } g = rangeRecord[i].last; } - while (g != HB_SET_VALUE_INVALID && hb_set_next (glyphs, &g)) - intersect_glyphs->add (g); + while (hb_set_next (glyphs, &g)) + intersect_glyphs->add (g); + done: return; }