Merge pull request #3842 from harfbuzz/patch_mode

[subset] Begin implementing a subset accelerator
This commit is contained in:
Behdad Esfahbod 2022-10-14 15:08:22 -06:00 committed by GitHub
commit bda42fc34a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 270 additions and 24 deletions

View File

@ -97,6 +97,15 @@ void AddGlyphs(unsigned num_glyphs_in_font,
}
}
// Preprocess face and populate the subset accelerator on it to speed up
// the subsetting operations.
static hb_face_t* preprocess_face(hb_face_t* face)
{
hb_face_t* new_face = hb_subset_preprocess(face);
hb_face_destroy(face);
return new_face;
}
/* benchmark for subsetting a font */
static void BM_subset (benchmark::State &state,
operation_t operation,
@ -110,6 +119,8 @@ static void BM_subset (benchmark::State &state,
assert (blob);
face = hb_face_create (blob, 0);
hb_blob_destroy (blob);
face = preprocess_face (face);
}
hb_subset_input_t* input = hb_subset_input_create_or_fail ();

View File

@ -341,6 +341,7 @@ HB_SUBSET_sources = \
hb-subset-cff2.hh \
hb-subset-input.cc \
hb-subset-input.hh \
hb-subset-accelerator.hh \
hb-subset-plan.cc \
hb-subset-plan.hh \
hb-subset-repacker.cc \

View File

@ -21,7 +21,9 @@ if '--experimental-api' not in sys.argv:
experimental_symbols = \
"""hb_subset_repack_or_fail
hb_subset_input_pin_axis_location
hb_subset_input_pin_axis_to_default""".splitlines ()
hb_subset_input_pin_axis_to_default
hb_subset_preprocess
""".splitlines ()
symbols = [x for x in symbols if x not in experimental_symbols]
symbols = "\n".join (symbols)

View File

@ -0,0 +1,76 @@
/*
* Copyright © 2022 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Google Author(s): Garret Rieger
*/
#ifndef HB_SUBSET_ACCELERATOR_HH
#define HB_SUBSET_ACCELERATOR_HH
#include "hb.hh"
#include "hb-map.hh"
#include "hb-set.hh"
struct hb_subset_accelerator_t
{
static hb_user_data_key_t* user_data_key()
{
static hb_user_data_key_t key;
return &key;
}
static hb_subset_accelerator_t* create(const hb_map_t& unicode_to_gid_,
const hb_set_t& unicodes_) {
hb_subset_accelerator_t* accel =
(hb_subset_accelerator_t*) hb_malloc (sizeof(hb_subset_accelerator_t));
new (accel) hb_subset_accelerator_t (unicode_to_gid_, unicodes_);
return accel;
}
static void destroy(void* value) {
if (!value) return;
hb_subset_accelerator_t* accel = (hb_subset_accelerator_t*) value;
accel->~hb_subset_accelerator_t ();
hb_free (accel);
}
hb_subset_accelerator_t(const hb_map_t& unicode_to_gid_,
const hb_set_t& unicodes_)
: unicode_to_gid(unicode_to_gid_), unicodes(unicodes_) {}
const hb_map_t unicode_to_gid;
const hb_set_t unicodes;
// TODO(garretrieger): cumulative glyf checksum map
// TODO(garretrieger): sanitized table cache.
bool in_error () const
{
return unicode_to_gid.in_error() || unicodes.in_error ();
}
};
#endif /* HB_SUBSET_ACCELERATOR_HH */

View File

@ -49,7 +49,7 @@ hb_subset_input_create_or_fail (void)
set = hb_set_create ();
input->axes_location = hb_hashmap_create<hb_tag_t, float> ();
if (!input->axes_location || input->in_error ())
{
hb_subset_input_destroy (input);
@ -392,7 +392,7 @@ hb_subset_input_get_user_data (const hb_subset_input_t *input,
*
* Since: EXPERIMENTAL
**/
hb_bool_t
HB_EXTERN hb_bool_t
hb_subset_input_pin_axis_to_default (hb_subset_input_t *input,
hb_face_t *face,
hb_tag_t axis_tag)
@ -416,7 +416,7 @@ hb_subset_input_pin_axis_to_default (hb_subset_input_t *input,
*
* Since: EXPERIMENTAL
**/
hb_bool_t
HB_EXTERN hb_bool_t
hb_subset_input_pin_axis_location (hb_subset_input_t *input,
hb_face_t *face,
hb_tag_t axis_tag,
@ -431,3 +431,51 @@ hb_subset_input_pin_axis_location (hb_subset_input_t *input,
}
#endif
#endif
#ifdef HB_EXPERIMENTAL_API
/**
* hb_subset_preprocess
* @input: a #hb_face_t object.
*
* Preprocesses the face and attaches data that will be needed by the
* subsetter. Future subsetting operations can then use the precomputed data
* to speed up the subsetting operation.
*
* Since: EXPERIMENTAL
**/
HB_EXTERN hb_face_t *
hb_subset_preprocess (hb_face_t *source)
{
hb_subset_input_t* input = hb_subset_input_create_or_fail ();
hb_set_clear (hb_subset_input_set(input, HB_SUBSET_SETS_UNICODE));
hb_set_invert (hb_subset_input_set(input, HB_SUBSET_SETS_UNICODE));
hb_set_clear (hb_subset_input_set(input,
HB_SUBSET_SETS_LAYOUT_FEATURE_TAG));
hb_set_invert (hb_subset_input_set(input,
HB_SUBSET_SETS_LAYOUT_FEATURE_TAG));
hb_set_clear (hb_subset_input_set(input,
HB_SUBSET_SETS_LAYOUT_SCRIPT_TAG));
hb_set_invert (hb_subset_input_set(input,
HB_SUBSET_SETS_LAYOUT_SCRIPT_TAG));
hb_set_clear (hb_subset_input_set(input,
HB_SUBSET_SETS_NAME_ID));
hb_set_invert (hb_subset_input_set(input,
HB_SUBSET_SETS_NAME_ID));
hb_subset_input_set_flags(input,
HB_SUBSET_FLAGS_NOTDEF_OUTLINE |
HB_SUBSET_FLAGS_GLYPH_NAMES |
HB_SUBSET_FLAGS_RETAIN_GIDS);
input->attach_accelerator_data = true;
hb_face_t* new_source = hb_subset_or_fail (source, input);
hb_subset_input_destroy (input);
return new_source;
}
#endif

View File

@ -59,6 +59,7 @@ struct hb_subset_input_t
};
unsigned flags;
bool attach_accelerator_data = false;
hb_hashmap_t<hb_tag_t, float> *axes_location;
inline unsigned num_sets () const

View File

@ -25,6 +25,7 @@
*/
#include "hb-subset-plan.hh"
#include "hb-subset-accelerator.hh"
#include "hb-map.hh"
#include "hb-set.hh"
@ -456,41 +457,73 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes,
hb_subset_plan_t *plan)
{
OT::cmap::accelerator_t cmap (plan->source);
unsigned size_threshold = plan->source->get_num_glyphs ();
if (glyphs->is_empty () && unicodes->get_population () < size_threshold)
{
const hb_map_t* unicode_to_gid = nullptr;
if (plan->accelerator)
unicode_to_gid = &plan->accelerator->unicode_to_gid;
// This is approach to collection is faster, but can only be used if glyphs
// are not being explicitly added to the subset and the input unicodes set is
// not excessively large (eg. an inverted set).
plan->unicode_to_new_gid_list.alloc (unicodes->get_population ());
for (hb_codepoint_t cp : *unicodes)
{
hb_codepoint_t gid;
if (!cmap.get_nominal_glyph (cp, &gid))
if (!unicode_to_gid) {
for (hb_codepoint_t cp : *unicodes)
{
DEBUG_MSG(SUBSET, nullptr, "Drop U+%04X; no gid", cp);
continue;
}
hb_codepoint_t gid;
if (!cmap.get_nominal_glyph (cp, &gid))
{
DEBUG_MSG(SUBSET, nullptr, "Drop U+%04X; no gid", cp);
continue;
}
plan->codepoint_to_glyph->set (cp, gid);
plan->unicode_to_new_gid_list.push (hb_pair (cp, gid));
plan->codepoint_to_glyph->set (cp, gid);
plan->unicode_to_new_gid_list.push (hb_pair (cp, gid));
}
} else {
// Use in memory unicode to gid map it's faster then looking up from
// the map. This code is mostly duplicated from above to avoid doing
// conditionals on the presence of the unicode_to_gid map each
// iteration.
for (hb_codepoint_t cp : *unicodes)
{
hb_codepoint_t gid = unicode_to_gid->get (cp);
if (gid == HB_MAP_VALUE_INVALID)
{
DEBUG_MSG(SUBSET, nullptr, "Drop U+%04X; no gid", cp);
continue;
}
plan->codepoint_to_glyph->set (cp, gid);
plan->unicode_to_new_gid_list.push (hb_pair (cp, gid));
}
}
}
else
{
// This approach is slower, but can handle adding in glyphs to the subset and will match
// them with cmap entries.
hb_map_t unicode_glyphid_map;
hb_set_t cmap_unicodes;
cmap.collect_mapping (&cmap_unicodes, &unicode_glyphid_map);
plan->unicode_to_new_gid_list.alloc (hb_min(unicodes->get_population ()
+ glyphs->get_population (),
cmap_unicodes.get_population ()));
for (hb_codepoint_t cp : cmap_unicodes)
hb_map_t unicode_glyphid_map_storage;
hb_set_t cmap_unicodes_storage;
const hb_map_t* unicode_glyphid_map = &unicode_glyphid_map_storage;
const hb_set_t* cmap_unicodes = &cmap_unicodes_storage;
if (!plan->accelerator) {
cmap.collect_mapping (&cmap_unicodes_storage, &unicode_glyphid_map_storage);
plan->unicode_to_new_gid_list.alloc (hb_min(unicodes->get_population ()
+ glyphs->get_population (),
cmap_unicodes->get_population ()));
} else {
unicode_glyphid_map = &plan->accelerator->unicode_to_gid;
cmap_unicodes = &plan->accelerator->unicodes;
}
for (hb_codepoint_t cp : *cmap_unicodes)
{
hb_codepoint_t gid = unicode_glyphid_map[cp];
hb_codepoint_t gid = (*unicode_glyphid_map)[cp];
if (!unicodes->has (cp) && !glyphs->has (gid))
continue;
@ -729,7 +762,7 @@ _normalize_axes_location (hb_face_t *face, hb_subset_plan_t *plan)
}
if (has_avar)
seg_maps = &StructAfter<OT::SegmentMaps> (*seg_maps);
old_axis_idx++;
}
plan->all_axes_pinned = !axis_not_pinned;
@ -815,6 +848,13 @@ hb_subset_plan_create_or_fail (hb_face_t *face,
plan->check_success (plan->vmtx_map = hb_hashmap_create<unsigned, hb_pair_t<unsigned, int>> ());
plan->check_success (plan->hmtx_map = hb_hashmap_create<unsigned, hb_pair_t<unsigned, int>> ());
void* accel = hb_face_get_user_data(face, hb_subset_accelerator_t::user_data_key());
plan->attach_accelerator_data = input->attach_accelerator_data;
if (accel)
plan->accelerator = (hb_subset_accelerator_t*) accel;
if (unlikely (plan->in_error ())) {
hb_subset_plan_destroy (plan);
return nullptr;

View File

@ -31,6 +31,7 @@
#include "hb-subset.h"
#include "hb-subset-input.hh"
#include "hb-subset-accelerator.hh"
#include "hb-map.hh"
#include "hb-bimap.hh"
@ -97,6 +98,7 @@ struct hb_subset_plan_t
bool successful;
unsigned flags;
bool attach_accelerator_data = false;
// For each cp that we'd like to retain maps to the corresponding gid.
hb_set_t *unicodes;
@ -189,6 +191,8 @@ struct hb_subset_plan_t
//vmtx metrics map: new gid->(advance, lsb)
hb_hashmap_t<unsigned, hb_pair_t<unsigned, int>> *vmtx_map;
const hb_subset_accelerator_t* accelerator;
public:
template<typename T>

View File

@ -56,6 +56,7 @@
#include "hb-ot-math-table.hh"
#include "hb-ot-stat-table.hh"
#include "hb-repacker.hh"
#include "hb-subset-accelerator.hh"
using OT::Layout::GSUB;
using OT::Layout::GPOS;
@ -494,6 +495,27 @@ _subset_table (hb_subset_plan_t *plan,
}
}
static void _attach_accelerator_data (const hb_subset_plan_t* plan,
hb_face_t* face /* IN/OUT */)
{
hb_subset_accelerator_t* accel =
hb_subset_accelerator_t::create (*plan->codepoint_to_glyph,
*plan->unicodes);
if (accel->in_error ())
{
hb_subset_accelerator_t::destroy (accel);
return;
}
if (!hb_face_set_user_data(face,
hb_subset_accelerator_t::user_data_key(),
accel,
hb_subset_accelerator_t::destroy,
true))
hb_subset_accelerator_t::destroy (accel);
}
/**
* hb_subset_or_fail:
* @source: font face data to be subset.
@ -576,6 +598,10 @@ hb_subset_plan_execute_or_fail (hb_subset_plan_t *plan)
offset += num_tables;
}
if (success && plan->attach_accelerator_data) {
_attach_accelerator_data (plan, plan->dest);
}
end:
return success ? hb_face_reference (plan->dest) : nullptr;
}

View File

@ -70,6 +70,14 @@ typedef struct hb_subset_plan_t hb_subset_plan_t;
* in the final subset.
* @HB_SUBSET_FLAGS_NO_PRUNE_UNICODE_RANGES: If set then the unicode ranges in
* OS/2 will not be recalculated.
* @HB_SUBSET_FLAGS_PATCH_MODE: If set the subsetter behaviour will be modified
* to produce a subset that is better suited to patching. For example cmap
* subtable format will be kept stable.
* @HB_SUBSET_FLAGS_OMIT_GLYF: If set the subsetter won't actually produce the final
* glyf table bytes. The table directory will include and entry as if the table was
* there but the actual final font blob will be truncated prior to the glyf data. This
* is a useful performance optimization when a font aware binary patching algorithm
* is being used to diff two subsets.
*
* List of boolean properties that can be configured on the subset input.
*
@ -86,6 +94,8 @@ typedef enum { /*< flags >*/
HB_SUBSET_FLAGS_NOTDEF_OUTLINE = 0x00000040u,
HB_SUBSET_FLAGS_GLYPH_NAMES = 0x00000080u,
HB_SUBSET_FLAGS_NO_PRUNE_UNICODE_RANGES = 0x00000100u,
// Not supported yet: HB_SUBSET_FLAGS_PATCH_MODE = 0x00000200u,
// Not supported yet: HB_SUBSET_FLAGS_OMIT_GLYF = 0x00000400u,
} hb_subset_flags_t;
/**
@ -169,6 +179,13 @@ hb_subset_input_pin_axis_location (hb_subset_input_t *input,
#endif
#endif
#ifdef HB_EXPERIMENTAL_API
HB_EXTERN hb_face_t *
hb_subset_preprocess (hb_face_t *source);
#endif
HB_EXTERN hb_face_t *
hb_subset_or_fail (hb_face_t *source, const hb_subset_input_t *input);

View File

@ -334,6 +334,7 @@ hb_subset_sources = files(
'hb-ot-cff1-table.cc',
'hb-ot-cff2-table.cc',
'hb-static.cc',
'hb-subset-accelerator.hh',
'hb-subset-cff-common.cc',
'hb-subset-cff-common.hh',
'hb-subset-cff1.cc',

View File

@ -52,6 +52,7 @@ def run_test (test, should_check_ots):
cli_args = ["--font-file=" + test.font_path,
"--output-file=" + out_file,
"--unicodes=%s" % test.unicodes (),
"--preprocess-face",
"--drop-tables+=DSIG",
"--drop-tables-=sbix"]
cli_args.extend (test.get_profile_flags ())

View File

@ -32,6 +32,15 @@
#include <hb-subset.h>
static hb_face_t* preprocess_face(hb_face_t* face)
{
#ifdef HB_EXPERIMENTAL_API
return hb_subset_preprocess (face);
#else
return hb_face_reference(face);
#endif
}
/*
* Command line interface to the harfbuzz font subsetter.
*/
@ -103,6 +112,10 @@ struct subset_main_t : option_parser_t, face_options_t, output_options_t<false>
{
parse (argc, argv);
hb_face_t* orig_face = face;
if (preprocess)
orig_face = preprocess_face (face);
hb_face_t *new_face = nullptr;
for (unsigned i = 0; i < num_iterations; i++)
{
@ -119,6 +132,8 @@ struct subset_main_t : option_parser_t, face_options_t, output_options_t<false>
}
hb_face_destroy (new_face);
if (preprocess)
hb_face_destroy (orig_face);
return success ? 0 : 1;
}
@ -160,6 +175,7 @@ struct subset_main_t : option_parser_t, face_options_t, output_options_t<false>
public:
unsigned num_iterations = 1;
gboolean preprocess;
hb_subset_input_t *input = nullptr;
};
@ -657,7 +673,7 @@ parse_instance (const char *name,
GError **error)
{
subset_main_t *subset_main = (subset_main_t *) data;
char *s = strtok((char *) arg, "=");
while (s)
{
@ -915,6 +931,8 @@ subset_main_t::add_options ()
{"no-prune-unicode-ranges", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, (gpointer) &set_flag<HB_SUBSET_FLAGS_NO_PRUNE_UNICODE_RANGES>, "Don't change the 'OS/2 ulUnicodeRange*' bits.", nullptr},
{"glyph-names", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, (gpointer) &set_flag<HB_SUBSET_FLAGS_GLYPH_NAMES>, "Keep PS glyph names in TT-flavored fonts. ", nullptr},
{"passthrough-tables", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, (gpointer) &set_flag<HB_SUBSET_FLAGS_PASSTHROUGH_UNRECOGNIZED>, "Do not drop tables that the tool does not know how to subset.", nullptr},
{"preprocess-face", 0, 0, G_OPTION_ARG_NONE, &this->preprocess,
"If set preprocesses the face with the add accelerator option before actually subsetting.", nullptr},
{nullptr}
};
add_group (flag_entries,