Merge pull request #3842 from harfbuzz/patch_mode

[subset] Begin implementing a subset accelerator
This commit is contained in:
Behdad Esfahbod 2022-10-14 15:08:22 -06:00 committed by GitHub
commit bda42fc34a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 270 additions and 24 deletions

View File

@ -97,6 +97,15 @@ void AddGlyphs(unsigned num_glyphs_in_font,
} }
} }
// Preprocess face and populate the subset accelerator on it to speed up
// the subsetting operations.
static hb_face_t* preprocess_face(hb_face_t* face)
{
hb_face_t* new_face = hb_subset_preprocess(face);
hb_face_destroy(face);
return new_face;
}
/* benchmark for subsetting a font */ /* benchmark for subsetting a font */
static void BM_subset (benchmark::State &state, static void BM_subset (benchmark::State &state,
operation_t operation, operation_t operation,
@ -110,6 +119,8 @@ static void BM_subset (benchmark::State &state,
assert (blob); assert (blob);
face = hb_face_create (blob, 0); face = hb_face_create (blob, 0);
hb_blob_destroy (blob); hb_blob_destroy (blob);
face = preprocess_face (face);
} }
hb_subset_input_t* input = hb_subset_input_create_or_fail (); hb_subset_input_t* input = hb_subset_input_create_or_fail ();

View File

@ -341,6 +341,7 @@ HB_SUBSET_sources = \
hb-subset-cff2.hh \ hb-subset-cff2.hh \
hb-subset-input.cc \ hb-subset-input.cc \
hb-subset-input.hh \ hb-subset-input.hh \
hb-subset-accelerator.hh \
hb-subset-plan.cc \ hb-subset-plan.cc \
hb-subset-plan.hh \ hb-subset-plan.hh \
hb-subset-repacker.cc \ hb-subset-repacker.cc \

View File

@ -21,7 +21,9 @@ if '--experimental-api' not in sys.argv:
experimental_symbols = \ experimental_symbols = \
"""hb_subset_repack_or_fail """hb_subset_repack_or_fail
hb_subset_input_pin_axis_location hb_subset_input_pin_axis_location
hb_subset_input_pin_axis_to_default""".splitlines () hb_subset_input_pin_axis_to_default
hb_subset_preprocess
""".splitlines ()
symbols = [x for x in symbols if x not in experimental_symbols] symbols = [x for x in symbols if x not in experimental_symbols]
symbols = "\n".join (symbols) symbols = "\n".join (symbols)

View File

@ -0,0 +1,76 @@
/*
* Copyright © 2022 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Google Author(s): Garret Rieger
*/
#ifndef HB_SUBSET_ACCELERATOR_HH
#define HB_SUBSET_ACCELERATOR_HH
#include "hb.hh"
#include "hb-map.hh"
#include "hb-set.hh"
struct hb_subset_accelerator_t
{
static hb_user_data_key_t* user_data_key()
{
static hb_user_data_key_t key;
return &key;
}
static hb_subset_accelerator_t* create(const hb_map_t& unicode_to_gid_,
const hb_set_t& unicodes_) {
hb_subset_accelerator_t* accel =
(hb_subset_accelerator_t*) hb_malloc (sizeof(hb_subset_accelerator_t));
new (accel) hb_subset_accelerator_t (unicode_to_gid_, unicodes_);
return accel;
}
static void destroy(void* value) {
if (!value) return;
hb_subset_accelerator_t* accel = (hb_subset_accelerator_t*) value;
accel->~hb_subset_accelerator_t ();
hb_free (accel);
}
hb_subset_accelerator_t(const hb_map_t& unicode_to_gid_,
const hb_set_t& unicodes_)
: unicode_to_gid(unicode_to_gid_), unicodes(unicodes_) {}
const hb_map_t unicode_to_gid;
const hb_set_t unicodes;
// TODO(garretrieger): cumulative glyf checksum map
// TODO(garretrieger): sanitized table cache.
bool in_error () const
{
return unicode_to_gid.in_error() || unicodes.in_error ();
}
};
#endif /* HB_SUBSET_ACCELERATOR_HH */

View File

@ -392,7 +392,7 @@ hb_subset_input_get_user_data (const hb_subset_input_t *input,
* *
* Since: EXPERIMENTAL * Since: EXPERIMENTAL
**/ **/
hb_bool_t HB_EXTERN hb_bool_t
hb_subset_input_pin_axis_to_default (hb_subset_input_t *input, hb_subset_input_pin_axis_to_default (hb_subset_input_t *input,
hb_face_t *face, hb_face_t *face,
hb_tag_t axis_tag) hb_tag_t axis_tag)
@ -416,7 +416,7 @@ hb_subset_input_pin_axis_to_default (hb_subset_input_t *input,
* *
* Since: EXPERIMENTAL * Since: EXPERIMENTAL
**/ **/
hb_bool_t HB_EXTERN hb_bool_t
hb_subset_input_pin_axis_location (hb_subset_input_t *input, hb_subset_input_pin_axis_location (hb_subset_input_t *input,
hb_face_t *face, hb_face_t *face,
hb_tag_t axis_tag, hb_tag_t axis_tag,
@ -431,3 +431,51 @@ hb_subset_input_pin_axis_location (hb_subset_input_t *input,
} }
#endif #endif
#endif #endif
#ifdef HB_EXPERIMENTAL_API
/**
* hb_subset_preprocess
* @input: a #hb_face_t object.
*
* Preprocesses the face and attaches data that will be needed by the
* subsetter. Future subsetting operations can then use the precomputed data
* to speed up the subsetting operation.
*
* Since: EXPERIMENTAL
**/
HB_EXTERN hb_face_t *
hb_subset_preprocess (hb_face_t *source)
{
hb_subset_input_t* input = hb_subset_input_create_or_fail ();
hb_set_clear (hb_subset_input_set(input, HB_SUBSET_SETS_UNICODE));
hb_set_invert (hb_subset_input_set(input, HB_SUBSET_SETS_UNICODE));
hb_set_clear (hb_subset_input_set(input,
HB_SUBSET_SETS_LAYOUT_FEATURE_TAG));
hb_set_invert (hb_subset_input_set(input,
HB_SUBSET_SETS_LAYOUT_FEATURE_TAG));
hb_set_clear (hb_subset_input_set(input,
HB_SUBSET_SETS_LAYOUT_SCRIPT_TAG));
hb_set_invert (hb_subset_input_set(input,
HB_SUBSET_SETS_LAYOUT_SCRIPT_TAG));
hb_set_clear (hb_subset_input_set(input,
HB_SUBSET_SETS_NAME_ID));
hb_set_invert (hb_subset_input_set(input,
HB_SUBSET_SETS_NAME_ID));
hb_subset_input_set_flags(input,
HB_SUBSET_FLAGS_NOTDEF_OUTLINE |
HB_SUBSET_FLAGS_GLYPH_NAMES |
HB_SUBSET_FLAGS_RETAIN_GIDS);
input->attach_accelerator_data = true;
hb_face_t* new_source = hb_subset_or_fail (source, input);
hb_subset_input_destroy (input);
return new_source;
}
#endif

View File

@ -59,6 +59,7 @@ struct hb_subset_input_t
}; };
unsigned flags; unsigned flags;
bool attach_accelerator_data = false;
hb_hashmap_t<hb_tag_t, float> *axes_location; hb_hashmap_t<hb_tag_t, float> *axes_location;
inline unsigned num_sets () const inline unsigned num_sets () const

View File

@ -25,6 +25,7 @@
*/ */
#include "hb-subset-plan.hh" #include "hb-subset-plan.hh"
#include "hb-subset-accelerator.hh"
#include "hb-map.hh" #include "hb-map.hh"
#include "hb-set.hh" #include "hb-set.hh"
@ -456,14 +457,19 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes,
hb_subset_plan_t *plan) hb_subset_plan_t *plan)
{ {
OT::cmap::accelerator_t cmap (plan->source); OT::cmap::accelerator_t cmap (plan->source);
unsigned size_threshold = plan->source->get_num_glyphs (); unsigned size_threshold = plan->source->get_num_glyphs ();
if (glyphs->is_empty () && unicodes->get_population () < size_threshold) if (glyphs->is_empty () && unicodes->get_population () < size_threshold)
{ {
const hb_map_t* unicode_to_gid = nullptr;
if (plan->accelerator)
unicode_to_gid = &plan->accelerator->unicode_to_gid;
// This is approach to collection is faster, but can only be used if glyphs // This is approach to collection is faster, but can only be used if glyphs
// are not being explicitly added to the subset and the input unicodes set is // are not being explicitly added to the subset and the input unicodes set is
// not excessively large (eg. an inverted set). // not excessively large (eg. an inverted set).
plan->unicode_to_new_gid_list.alloc (unicodes->get_population ()); plan->unicode_to_new_gid_list.alloc (unicodes->get_population ());
if (!unicode_to_gid) {
for (hb_codepoint_t cp : *unicodes) for (hb_codepoint_t cp : *unicodes)
{ {
hb_codepoint_t gid; hb_codepoint_t gid;
@ -476,21 +482,48 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes,
plan->codepoint_to_glyph->set (cp, gid); plan->codepoint_to_glyph->set (cp, gid);
plan->unicode_to_new_gid_list.push (hb_pair (cp, gid)); plan->unicode_to_new_gid_list.push (hb_pair (cp, gid));
} }
} else {
// Use in memory unicode to gid map it's faster then looking up from
// the map. This code is mostly duplicated from above to avoid doing
// conditionals on the presence of the unicode_to_gid map each
// iteration.
for (hb_codepoint_t cp : *unicodes)
{
hb_codepoint_t gid = unicode_to_gid->get (cp);
if (gid == HB_MAP_VALUE_INVALID)
{
DEBUG_MSG(SUBSET, nullptr, "Drop U+%04X; no gid", cp);
continue;
}
plan->codepoint_to_glyph->set (cp, gid);
plan->unicode_to_new_gid_list.push (hb_pair (cp, gid));
}
}
} }
else else
{ {
// This approach is slower, but can handle adding in glyphs to the subset and will match // This approach is slower, but can handle adding in glyphs to the subset and will match
// them with cmap entries. // them with cmap entries.
hb_map_t unicode_glyphid_map;
hb_set_t cmap_unicodes; hb_map_t unicode_glyphid_map_storage;
cmap.collect_mapping (&cmap_unicodes, &unicode_glyphid_map); hb_set_t cmap_unicodes_storage;
const hb_map_t* unicode_glyphid_map = &unicode_glyphid_map_storage;
const hb_set_t* cmap_unicodes = &cmap_unicodes_storage;
if (!plan->accelerator) {
cmap.collect_mapping (&cmap_unicodes_storage, &unicode_glyphid_map_storage);
plan->unicode_to_new_gid_list.alloc (hb_min(unicodes->get_population () plan->unicode_to_new_gid_list.alloc (hb_min(unicodes->get_population ()
+ glyphs->get_population (), + glyphs->get_population (),
cmap_unicodes.get_population ())); cmap_unicodes->get_population ()));
} else {
unicode_glyphid_map = &plan->accelerator->unicode_to_gid;
cmap_unicodes = &plan->accelerator->unicodes;
}
for (hb_codepoint_t cp : cmap_unicodes) for (hb_codepoint_t cp : *cmap_unicodes)
{ {
hb_codepoint_t gid = unicode_glyphid_map[cp]; hb_codepoint_t gid = (*unicode_glyphid_map)[cp];
if (!unicodes->has (cp) && !glyphs->has (gid)) if (!unicodes->has (cp) && !glyphs->has (gid))
continue; continue;
@ -815,6 +848,13 @@ hb_subset_plan_create_or_fail (hb_face_t *face,
plan->check_success (plan->vmtx_map = hb_hashmap_create<unsigned, hb_pair_t<unsigned, int>> ()); plan->check_success (plan->vmtx_map = hb_hashmap_create<unsigned, hb_pair_t<unsigned, int>> ());
plan->check_success (plan->hmtx_map = hb_hashmap_create<unsigned, hb_pair_t<unsigned, int>> ()); plan->check_success (plan->hmtx_map = hb_hashmap_create<unsigned, hb_pair_t<unsigned, int>> ());
void* accel = hb_face_get_user_data(face, hb_subset_accelerator_t::user_data_key());
plan->attach_accelerator_data = input->attach_accelerator_data;
if (accel)
plan->accelerator = (hb_subset_accelerator_t*) accel;
if (unlikely (plan->in_error ())) { if (unlikely (plan->in_error ())) {
hb_subset_plan_destroy (plan); hb_subset_plan_destroy (plan);
return nullptr; return nullptr;

View File

@ -31,6 +31,7 @@
#include "hb-subset.h" #include "hb-subset.h"
#include "hb-subset-input.hh" #include "hb-subset-input.hh"
#include "hb-subset-accelerator.hh"
#include "hb-map.hh" #include "hb-map.hh"
#include "hb-bimap.hh" #include "hb-bimap.hh"
@ -97,6 +98,7 @@ struct hb_subset_plan_t
bool successful; bool successful;
unsigned flags; unsigned flags;
bool attach_accelerator_data = false;
// For each cp that we'd like to retain maps to the corresponding gid. // For each cp that we'd like to retain maps to the corresponding gid.
hb_set_t *unicodes; hb_set_t *unicodes;
@ -189,6 +191,8 @@ struct hb_subset_plan_t
//vmtx metrics map: new gid->(advance, lsb) //vmtx metrics map: new gid->(advance, lsb)
hb_hashmap_t<unsigned, hb_pair_t<unsigned, int>> *vmtx_map; hb_hashmap_t<unsigned, hb_pair_t<unsigned, int>> *vmtx_map;
const hb_subset_accelerator_t* accelerator;
public: public:
template<typename T> template<typename T>

View File

@ -56,6 +56,7 @@
#include "hb-ot-math-table.hh" #include "hb-ot-math-table.hh"
#include "hb-ot-stat-table.hh" #include "hb-ot-stat-table.hh"
#include "hb-repacker.hh" #include "hb-repacker.hh"
#include "hb-subset-accelerator.hh"
using OT::Layout::GSUB; using OT::Layout::GSUB;
using OT::Layout::GPOS; using OT::Layout::GPOS;
@ -494,6 +495,27 @@ _subset_table (hb_subset_plan_t *plan,
} }
} }
static void _attach_accelerator_data (const hb_subset_plan_t* plan,
hb_face_t* face /* IN/OUT */)
{
hb_subset_accelerator_t* accel =
hb_subset_accelerator_t::create (*plan->codepoint_to_glyph,
*plan->unicodes);
if (accel->in_error ())
{
hb_subset_accelerator_t::destroy (accel);
return;
}
if (!hb_face_set_user_data(face,
hb_subset_accelerator_t::user_data_key(),
accel,
hb_subset_accelerator_t::destroy,
true))
hb_subset_accelerator_t::destroy (accel);
}
/** /**
* hb_subset_or_fail: * hb_subset_or_fail:
* @source: font face data to be subset. * @source: font face data to be subset.
@ -576,6 +598,10 @@ hb_subset_plan_execute_or_fail (hb_subset_plan_t *plan)
offset += num_tables; offset += num_tables;
} }
if (success && plan->attach_accelerator_data) {
_attach_accelerator_data (plan, plan->dest);
}
end: end:
return success ? hb_face_reference (plan->dest) : nullptr; return success ? hb_face_reference (plan->dest) : nullptr;
} }

View File

@ -70,6 +70,14 @@ typedef struct hb_subset_plan_t hb_subset_plan_t;
* in the final subset. * in the final subset.
* @HB_SUBSET_FLAGS_NO_PRUNE_UNICODE_RANGES: If set then the unicode ranges in * @HB_SUBSET_FLAGS_NO_PRUNE_UNICODE_RANGES: If set then the unicode ranges in
* OS/2 will not be recalculated. * OS/2 will not be recalculated.
* @HB_SUBSET_FLAGS_PATCH_MODE: If set the subsetter behaviour will be modified
* to produce a subset that is better suited to patching. For example cmap
* subtable format will be kept stable.
* @HB_SUBSET_FLAGS_OMIT_GLYF: If set the subsetter won't actually produce the final
* glyf table bytes. The table directory will include and entry as if the table was
* there but the actual final font blob will be truncated prior to the glyf data. This
* is a useful performance optimization when a font aware binary patching algorithm
* is being used to diff two subsets.
* *
* List of boolean properties that can be configured on the subset input. * List of boolean properties that can be configured on the subset input.
* *
@ -86,6 +94,8 @@ typedef enum { /*< flags >*/
HB_SUBSET_FLAGS_NOTDEF_OUTLINE = 0x00000040u, HB_SUBSET_FLAGS_NOTDEF_OUTLINE = 0x00000040u,
HB_SUBSET_FLAGS_GLYPH_NAMES = 0x00000080u, HB_SUBSET_FLAGS_GLYPH_NAMES = 0x00000080u,
HB_SUBSET_FLAGS_NO_PRUNE_UNICODE_RANGES = 0x00000100u, HB_SUBSET_FLAGS_NO_PRUNE_UNICODE_RANGES = 0x00000100u,
// Not supported yet: HB_SUBSET_FLAGS_PATCH_MODE = 0x00000200u,
// Not supported yet: HB_SUBSET_FLAGS_OMIT_GLYF = 0x00000400u,
} hb_subset_flags_t; } hb_subset_flags_t;
/** /**
@ -169,6 +179,13 @@ hb_subset_input_pin_axis_location (hb_subset_input_t *input,
#endif #endif
#endif #endif
#ifdef HB_EXPERIMENTAL_API
HB_EXTERN hb_face_t *
hb_subset_preprocess (hb_face_t *source);
#endif
HB_EXTERN hb_face_t * HB_EXTERN hb_face_t *
hb_subset_or_fail (hb_face_t *source, const hb_subset_input_t *input); hb_subset_or_fail (hb_face_t *source, const hb_subset_input_t *input);

View File

@ -334,6 +334,7 @@ hb_subset_sources = files(
'hb-ot-cff1-table.cc', 'hb-ot-cff1-table.cc',
'hb-ot-cff2-table.cc', 'hb-ot-cff2-table.cc',
'hb-static.cc', 'hb-static.cc',
'hb-subset-accelerator.hh',
'hb-subset-cff-common.cc', 'hb-subset-cff-common.cc',
'hb-subset-cff-common.hh', 'hb-subset-cff-common.hh',
'hb-subset-cff1.cc', 'hb-subset-cff1.cc',

View File

@ -52,6 +52,7 @@ def run_test (test, should_check_ots):
cli_args = ["--font-file=" + test.font_path, cli_args = ["--font-file=" + test.font_path,
"--output-file=" + out_file, "--output-file=" + out_file,
"--unicodes=%s" % test.unicodes (), "--unicodes=%s" % test.unicodes (),
"--preprocess-face",
"--drop-tables+=DSIG", "--drop-tables+=DSIG",
"--drop-tables-=sbix"] "--drop-tables-=sbix"]
cli_args.extend (test.get_profile_flags ()) cli_args.extend (test.get_profile_flags ())

View File

@ -32,6 +32,15 @@
#include <hb-subset.h> #include <hb-subset.h>
static hb_face_t* preprocess_face(hb_face_t* face)
{
#ifdef HB_EXPERIMENTAL_API
return hb_subset_preprocess (face);
#else
return hb_face_reference(face);
#endif
}
/* /*
* Command line interface to the harfbuzz font subsetter. * Command line interface to the harfbuzz font subsetter.
*/ */
@ -103,6 +112,10 @@ struct subset_main_t : option_parser_t, face_options_t, output_options_t<false>
{ {
parse (argc, argv); parse (argc, argv);
hb_face_t* orig_face = face;
if (preprocess)
orig_face = preprocess_face (face);
hb_face_t *new_face = nullptr; hb_face_t *new_face = nullptr;
for (unsigned i = 0; i < num_iterations; i++) for (unsigned i = 0; i < num_iterations; i++)
{ {
@ -119,6 +132,8 @@ struct subset_main_t : option_parser_t, face_options_t, output_options_t<false>
} }
hb_face_destroy (new_face); hb_face_destroy (new_face);
if (preprocess)
hb_face_destroy (orig_face);
return success ? 0 : 1; return success ? 0 : 1;
} }
@ -160,6 +175,7 @@ struct subset_main_t : option_parser_t, face_options_t, output_options_t<false>
public: public:
unsigned num_iterations = 1; unsigned num_iterations = 1;
gboolean preprocess;
hb_subset_input_t *input = nullptr; hb_subset_input_t *input = nullptr;
}; };
@ -915,6 +931,8 @@ subset_main_t::add_options ()
{"no-prune-unicode-ranges", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, (gpointer) &set_flag<HB_SUBSET_FLAGS_NO_PRUNE_UNICODE_RANGES>, "Don't change the 'OS/2 ulUnicodeRange*' bits.", nullptr}, {"no-prune-unicode-ranges", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, (gpointer) &set_flag<HB_SUBSET_FLAGS_NO_PRUNE_UNICODE_RANGES>, "Don't change the 'OS/2 ulUnicodeRange*' bits.", nullptr},
{"glyph-names", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, (gpointer) &set_flag<HB_SUBSET_FLAGS_GLYPH_NAMES>, "Keep PS glyph names in TT-flavored fonts. ", nullptr}, {"glyph-names", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, (gpointer) &set_flag<HB_SUBSET_FLAGS_GLYPH_NAMES>, "Keep PS glyph names in TT-flavored fonts. ", nullptr},
{"passthrough-tables", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, (gpointer) &set_flag<HB_SUBSET_FLAGS_PASSTHROUGH_UNRECOGNIZED>, "Do not drop tables that the tool does not know how to subset.", nullptr}, {"passthrough-tables", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, (gpointer) &set_flag<HB_SUBSET_FLAGS_PASSTHROUGH_UNRECOGNIZED>, "Do not drop tables that the tool does not know how to subset.", nullptr},
{"preprocess-face", 0, 0, G_OPTION_ARG_NONE, &this->preprocess,
"If set preprocesses the face with the add accelerator option before actually subsetting.", nullptr},
{nullptr} {nullptr}
}; };
add_group (flag_entries, add_group (flag_entries,