diff --git a/perf/benchmark-subset.cc b/perf/benchmark-subset.cc index 2b40846dc..9bf3447a6 100644 --- a/perf/benchmark-subset.cc +++ b/perf/benchmark-subset.cc @@ -97,6 +97,15 @@ void AddGlyphs(unsigned num_glyphs_in_font, } } +// Preprocess face and populate the subset accelerator on it to speed up +// the subsetting operations. +static hb_face_t* preprocess_face(hb_face_t* face) +{ + hb_face_t* new_face = hb_subset_preprocess(face); + hb_face_destroy(face); + return new_face; +} + /* benchmark for subsetting a font */ static void BM_subset (benchmark::State &state, operation_t operation, @@ -110,6 +119,8 @@ static void BM_subset (benchmark::State &state, assert (blob); face = hb_face_create (blob, 0); hb_blob_destroy (blob); + + face = preprocess_face (face); } hb_subset_input_t* input = hb_subset_input_create_or_fail (); diff --git a/src/Makefile.sources b/src/Makefile.sources index 37c83dc9f..6c891eac5 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -341,6 +341,7 @@ HB_SUBSET_sources = \ hb-subset-cff2.hh \ hb-subset-input.cc \ hb-subset-input.hh \ + hb-subset-accelerator.hh \ hb-subset-plan.cc \ hb-subset-plan.hh \ hb-subset-repacker.cc \ diff --git a/src/gen-def.py b/src/gen-def.py index 47b7b479d..e751f524e 100755 --- a/src/gen-def.py +++ b/src/gen-def.py @@ -21,7 +21,9 @@ if '--experimental-api' not in sys.argv: experimental_symbols = \ """hb_subset_repack_or_fail hb_subset_input_pin_axis_location -hb_subset_input_pin_axis_to_default""".splitlines () +hb_subset_input_pin_axis_to_default +hb_subset_preprocess +""".splitlines () symbols = [x for x in symbols if x not in experimental_symbols] symbols = "\n".join (symbols) diff --git a/src/hb-subset-accelerator.hh b/src/hb-subset-accelerator.hh new file mode 100644 index 000000000..34bd0534f --- /dev/null +++ b/src/hb-subset-accelerator.hh @@ -0,0 +1,76 @@ +/* + * Copyright © 2022 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Garret Rieger + */ + +#ifndef HB_SUBSET_ACCELERATOR_HH +#define HB_SUBSET_ACCELERATOR_HH + + +#include "hb.hh" + +#include "hb-map.hh" +#include "hb-set.hh" + +struct hb_subset_accelerator_t +{ + static hb_user_data_key_t* user_data_key() + { + static hb_user_data_key_t key; + return &key; + } + + static hb_subset_accelerator_t* create(const hb_map_t& unicode_to_gid_, + const hb_set_t& unicodes_) { + hb_subset_accelerator_t* accel = + (hb_subset_accelerator_t*) hb_malloc (sizeof(hb_subset_accelerator_t)); + new (accel) hb_subset_accelerator_t (unicode_to_gid_, unicodes_); + return accel; + } + + static void destroy(void* value) { + if (!value) return; + + hb_subset_accelerator_t* accel = (hb_subset_accelerator_t*) value; + accel->~hb_subset_accelerator_t (); + hb_free (accel); + } + + hb_subset_accelerator_t(const hb_map_t& unicode_to_gid_, + const hb_set_t& unicodes_) + : unicode_to_gid(unicode_to_gid_), unicodes(unicodes_) {} + + const hb_map_t unicode_to_gid; + const hb_set_t unicodes; + // TODO(garretrieger): cumulative glyf checksum map + // TODO(garretrieger): sanitized table cache. + + bool in_error () const + { + return unicode_to_gid.in_error() || unicodes.in_error (); + } +}; + + +#endif /* HB_SUBSET_ACCELERATOR_HH */ diff --git a/src/hb-subset-input.cc b/src/hb-subset-input.cc index 2c5e6daf1..fd250104b 100644 --- a/src/hb-subset-input.cc +++ b/src/hb-subset-input.cc @@ -49,7 +49,7 @@ hb_subset_input_create_or_fail (void) set = hb_set_create (); input->axes_location = hb_hashmap_create (); - + if (!input->axes_location || input->in_error ()) { hb_subset_input_destroy (input); @@ -392,7 +392,7 @@ hb_subset_input_get_user_data (const hb_subset_input_t *input, * * Since: EXPERIMENTAL **/ -hb_bool_t +HB_EXTERN hb_bool_t hb_subset_input_pin_axis_to_default (hb_subset_input_t *input, hb_face_t *face, hb_tag_t axis_tag) @@ -416,7 +416,7 @@ hb_subset_input_pin_axis_to_default (hb_subset_input_t *input, * * Since: EXPERIMENTAL **/ -hb_bool_t +HB_EXTERN hb_bool_t hb_subset_input_pin_axis_location (hb_subset_input_t *input, hb_face_t *face, hb_tag_t axis_tag, @@ -431,3 +431,51 @@ hb_subset_input_pin_axis_location (hb_subset_input_t *input, } #endif #endif + +#ifdef HB_EXPERIMENTAL_API +/** + * hb_subset_preprocess + * @input: a #hb_face_t object. + * + * Preprocesses the face and attaches data that will be needed by the + * subsetter. Future subsetting operations can then use the precomputed data + * to speed up the subsetting operation. + * + * Since: EXPERIMENTAL + **/ + +HB_EXTERN hb_face_t * +hb_subset_preprocess (hb_face_t *source) +{ + hb_subset_input_t* input = hb_subset_input_create_or_fail (); + + hb_set_clear (hb_subset_input_set(input, HB_SUBSET_SETS_UNICODE)); + hb_set_invert (hb_subset_input_set(input, HB_SUBSET_SETS_UNICODE)); + + hb_set_clear (hb_subset_input_set(input, + HB_SUBSET_SETS_LAYOUT_FEATURE_TAG)); + hb_set_invert (hb_subset_input_set(input, + HB_SUBSET_SETS_LAYOUT_FEATURE_TAG)); + + hb_set_clear (hb_subset_input_set(input, + HB_SUBSET_SETS_LAYOUT_SCRIPT_TAG)); + hb_set_invert (hb_subset_input_set(input, + HB_SUBSET_SETS_LAYOUT_SCRIPT_TAG)); + + hb_set_clear (hb_subset_input_set(input, + HB_SUBSET_SETS_NAME_ID)); + hb_set_invert (hb_subset_input_set(input, + HB_SUBSET_SETS_NAME_ID)); + + hb_subset_input_set_flags(input, + HB_SUBSET_FLAGS_NOTDEF_OUTLINE | + HB_SUBSET_FLAGS_GLYPH_NAMES | + HB_SUBSET_FLAGS_RETAIN_GIDS); + input->attach_accelerator_data = true; + + hb_face_t* new_source = hb_subset_or_fail (source, input); + hb_subset_input_destroy (input); + + return new_source; +} +#endif diff --git a/src/hb-subset-input.hh b/src/hb-subset-input.hh index 2335f0634..dabb4918f 100644 --- a/src/hb-subset-input.hh +++ b/src/hb-subset-input.hh @@ -59,6 +59,7 @@ struct hb_subset_input_t }; unsigned flags; + bool attach_accelerator_data = false; hb_hashmap_t *axes_location; inline unsigned num_sets () const diff --git a/src/hb-subset-plan.cc b/src/hb-subset-plan.cc index 532336268..9cf7c9e43 100644 --- a/src/hb-subset-plan.cc +++ b/src/hb-subset-plan.cc @@ -25,6 +25,7 @@ */ #include "hb-subset-plan.hh" +#include "hb-subset-accelerator.hh" #include "hb-map.hh" #include "hb-set.hh" @@ -456,41 +457,73 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes, hb_subset_plan_t *plan) { OT::cmap::accelerator_t cmap (plan->source); - unsigned size_threshold = plan->source->get_num_glyphs (); if (glyphs->is_empty () && unicodes->get_population () < size_threshold) { + + const hb_map_t* unicode_to_gid = nullptr; + if (plan->accelerator) + unicode_to_gid = &plan->accelerator->unicode_to_gid; + // This is approach to collection is faster, but can only be used if glyphs // are not being explicitly added to the subset and the input unicodes set is // not excessively large (eg. an inverted set). plan->unicode_to_new_gid_list.alloc (unicodes->get_population ()); - for (hb_codepoint_t cp : *unicodes) - { - hb_codepoint_t gid; - if (!cmap.get_nominal_glyph (cp, &gid)) + if (!unicode_to_gid) { + for (hb_codepoint_t cp : *unicodes) { - DEBUG_MSG(SUBSET, nullptr, "Drop U+%04X; no gid", cp); - continue; - } + hb_codepoint_t gid; + if (!cmap.get_nominal_glyph (cp, &gid)) + { + DEBUG_MSG(SUBSET, nullptr, "Drop U+%04X; no gid", cp); + continue; + } - plan->codepoint_to_glyph->set (cp, gid); - plan->unicode_to_new_gid_list.push (hb_pair (cp, gid)); + plan->codepoint_to_glyph->set (cp, gid); + plan->unicode_to_new_gid_list.push (hb_pair (cp, gid)); + } + } else { + // Use in memory unicode to gid map it's faster then looking up from + // the map. This code is mostly duplicated from above to avoid doing + // conditionals on the presence of the unicode_to_gid map each + // iteration. + for (hb_codepoint_t cp : *unicodes) + { + hb_codepoint_t gid = unicode_to_gid->get (cp); + if (gid == HB_MAP_VALUE_INVALID) + { + DEBUG_MSG(SUBSET, nullptr, "Drop U+%04X; no gid", cp); + continue; + } + + plan->codepoint_to_glyph->set (cp, gid); + plan->unicode_to_new_gid_list.push (hb_pair (cp, gid)); + } } } else { // This approach is slower, but can handle adding in glyphs to the subset and will match // them with cmap entries. - hb_map_t unicode_glyphid_map; - hb_set_t cmap_unicodes; - cmap.collect_mapping (&cmap_unicodes, &unicode_glyphid_map); - plan->unicode_to_new_gid_list.alloc (hb_min(unicodes->get_population () - + glyphs->get_population (), - cmap_unicodes.get_population ())); - for (hb_codepoint_t cp : cmap_unicodes) + hb_map_t unicode_glyphid_map_storage; + hb_set_t cmap_unicodes_storage; + const hb_map_t* unicode_glyphid_map = &unicode_glyphid_map_storage; + const hb_set_t* cmap_unicodes = &cmap_unicodes_storage; + + if (!plan->accelerator) { + cmap.collect_mapping (&cmap_unicodes_storage, &unicode_glyphid_map_storage); + plan->unicode_to_new_gid_list.alloc (hb_min(unicodes->get_population () + + glyphs->get_population (), + cmap_unicodes->get_population ())); + } else { + unicode_glyphid_map = &plan->accelerator->unicode_to_gid; + cmap_unicodes = &plan->accelerator->unicodes; + } + + for (hb_codepoint_t cp : *cmap_unicodes) { - hb_codepoint_t gid = unicode_glyphid_map[cp]; + hb_codepoint_t gid = (*unicode_glyphid_map)[cp]; if (!unicodes->has (cp) && !glyphs->has (gid)) continue; @@ -729,7 +762,7 @@ _normalize_axes_location (hb_face_t *face, hb_subset_plan_t *plan) } if (has_avar) seg_maps = &StructAfter (*seg_maps); - + old_axis_idx++; } plan->all_axes_pinned = !axis_not_pinned; @@ -815,6 +848,13 @@ hb_subset_plan_create_or_fail (hb_face_t *face, plan->check_success (plan->vmtx_map = hb_hashmap_create> ()); plan->check_success (plan->hmtx_map = hb_hashmap_create> ()); + void* accel = hb_face_get_user_data(face, hb_subset_accelerator_t::user_data_key()); + + plan->attach_accelerator_data = input->attach_accelerator_data; + if (accel) + plan->accelerator = (hb_subset_accelerator_t*) accel; + + if (unlikely (plan->in_error ())) { hb_subset_plan_destroy (plan); return nullptr; diff --git a/src/hb-subset-plan.hh b/src/hb-subset-plan.hh index 1172cb55f..15fabba9c 100644 --- a/src/hb-subset-plan.hh +++ b/src/hb-subset-plan.hh @@ -31,6 +31,7 @@ #include "hb-subset.h" #include "hb-subset-input.hh" +#include "hb-subset-accelerator.hh" #include "hb-map.hh" #include "hb-bimap.hh" @@ -97,6 +98,7 @@ struct hb_subset_plan_t bool successful; unsigned flags; + bool attach_accelerator_data = false; // For each cp that we'd like to retain maps to the corresponding gid. hb_set_t *unicodes; @@ -189,6 +191,8 @@ struct hb_subset_plan_t //vmtx metrics map: new gid->(advance, lsb) hb_hashmap_t> *vmtx_map; + const hb_subset_accelerator_t* accelerator; + public: template diff --git a/src/hb-subset.cc b/src/hb-subset.cc index 5e116be07..6026aa6ef 100644 --- a/src/hb-subset.cc +++ b/src/hb-subset.cc @@ -56,6 +56,7 @@ #include "hb-ot-math-table.hh" #include "hb-ot-stat-table.hh" #include "hb-repacker.hh" +#include "hb-subset-accelerator.hh" using OT::Layout::GSUB; using OT::Layout::GPOS; @@ -494,6 +495,27 @@ _subset_table (hb_subset_plan_t *plan, } } +static void _attach_accelerator_data (const hb_subset_plan_t* plan, + hb_face_t* face /* IN/OUT */) +{ + hb_subset_accelerator_t* accel = + hb_subset_accelerator_t::create (*plan->codepoint_to_glyph, + *plan->unicodes); + + if (accel->in_error ()) + { + hb_subset_accelerator_t::destroy (accel); + return; + } + + if (!hb_face_set_user_data(face, + hb_subset_accelerator_t::user_data_key(), + accel, + hb_subset_accelerator_t::destroy, + true)) + hb_subset_accelerator_t::destroy (accel); +} + /** * hb_subset_or_fail: * @source: font face data to be subset. @@ -576,6 +598,10 @@ hb_subset_plan_execute_or_fail (hb_subset_plan_t *plan) offset += num_tables; } + if (success && plan->attach_accelerator_data) { + _attach_accelerator_data (plan, plan->dest); + } + end: return success ? hb_face_reference (plan->dest) : nullptr; } diff --git a/src/hb-subset.h b/src/hb-subset.h index 08e52dbd2..6a2c5f611 100644 --- a/src/hb-subset.h +++ b/src/hb-subset.h @@ -70,6 +70,14 @@ typedef struct hb_subset_plan_t hb_subset_plan_t; * in the final subset. * @HB_SUBSET_FLAGS_NO_PRUNE_UNICODE_RANGES: If set then the unicode ranges in * OS/2 will not be recalculated. + * @HB_SUBSET_FLAGS_PATCH_MODE: If set the subsetter behaviour will be modified + * to produce a subset that is better suited to patching. For example cmap + * subtable format will be kept stable. + * @HB_SUBSET_FLAGS_OMIT_GLYF: If set the subsetter won't actually produce the final + * glyf table bytes. The table directory will include and entry as if the table was + * there but the actual final font blob will be truncated prior to the glyf data. This + * is a useful performance optimization when a font aware binary patching algorithm + * is being used to diff two subsets. * * List of boolean properties that can be configured on the subset input. * @@ -86,6 +94,8 @@ typedef enum { /*< flags >*/ HB_SUBSET_FLAGS_NOTDEF_OUTLINE = 0x00000040u, HB_SUBSET_FLAGS_GLYPH_NAMES = 0x00000080u, HB_SUBSET_FLAGS_NO_PRUNE_UNICODE_RANGES = 0x00000100u, + // Not supported yet: HB_SUBSET_FLAGS_PATCH_MODE = 0x00000200u, + // Not supported yet: HB_SUBSET_FLAGS_OMIT_GLYF = 0x00000400u, } hb_subset_flags_t; /** @@ -169,6 +179,13 @@ hb_subset_input_pin_axis_location (hb_subset_input_t *input, #endif #endif +#ifdef HB_EXPERIMENTAL_API + +HB_EXTERN hb_face_t * +hb_subset_preprocess (hb_face_t *source); + +#endif + HB_EXTERN hb_face_t * hb_subset_or_fail (hb_face_t *source, const hb_subset_input_t *input); diff --git a/src/meson.build b/src/meson.build index 4cf3451e9..ba3470fff 100644 --- a/src/meson.build +++ b/src/meson.build @@ -334,6 +334,7 @@ hb_subset_sources = files( 'hb-ot-cff1-table.cc', 'hb-ot-cff2-table.cc', 'hb-static.cc', + 'hb-subset-accelerator.hh', 'hb-subset-cff-common.cc', 'hb-subset-cff-common.hh', 'hb-subset-cff1.cc', diff --git a/test/subset/run-tests.py b/test/subset/run-tests.py index a0c9e2885..6a2146dbe 100755 --- a/test/subset/run-tests.py +++ b/test/subset/run-tests.py @@ -52,6 +52,7 @@ def run_test (test, should_check_ots): cli_args = ["--font-file=" + test.font_path, "--output-file=" + out_file, "--unicodes=%s" % test.unicodes (), + "--preprocess-face", "--drop-tables+=DSIG", "--drop-tables-=sbix"] cli_args.extend (test.get_profile_flags ()) diff --git a/util/hb-subset.cc b/util/hb-subset.cc index b27b82f8a..f2606c767 100644 --- a/util/hb-subset.cc +++ b/util/hb-subset.cc @@ -32,6 +32,15 @@ #include +static hb_face_t* preprocess_face(hb_face_t* face) +{ + #ifdef HB_EXPERIMENTAL_API + return hb_subset_preprocess (face); + #else + return hb_face_reference(face); + #endif +} + /* * Command line interface to the harfbuzz font subsetter. */ @@ -103,6 +112,10 @@ struct subset_main_t : option_parser_t, face_options_t, output_options_t { parse (argc, argv); + hb_face_t* orig_face = face; + if (preprocess) + orig_face = preprocess_face (face); + hb_face_t *new_face = nullptr; for (unsigned i = 0; i < num_iterations; i++) { @@ -119,6 +132,8 @@ struct subset_main_t : option_parser_t, face_options_t, output_options_t } hb_face_destroy (new_face); + if (preprocess) + hb_face_destroy (orig_face); return success ? 0 : 1; } @@ -160,6 +175,7 @@ struct subset_main_t : option_parser_t, face_options_t, output_options_t public: unsigned num_iterations = 1; + gboolean preprocess; hb_subset_input_t *input = nullptr; }; @@ -657,7 +673,7 @@ parse_instance (const char *name, GError **error) { subset_main_t *subset_main = (subset_main_t *) data; - + char *s = strtok((char *) arg, "="); while (s) { @@ -915,6 +931,8 @@ subset_main_t::add_options () {"no-prune-unicode-ranges", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, (gpointer) &set_flag, "Don't change the 'OS/2 ulUnicodeRange*' bits.", nullptr}, {"glyph-names", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, (gpointer) &set_flag, "Keep PS glyph names in TT-flavored fonts. ", nullptr}, {"passthrough-tables", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, (gpointer) &set_flag, "Do not drop tables that the tool does not know how to subset.", nullptr}, + {"preprocess-face", 0, 0, G_OPTION_ARG_NONE, &this->preprocess, + "If set preprocesses the face with the add accelerator option before actually subsetting.", nullptr}, {nullptr} }; add_group (flag_entries,