Merge pull request #3579 from harfbuzz/subset-retain-buffer

Subset retain buffer
This commit is contained in:
Behdad Esfahbod 2022-05-11 16:45:40 -06:00 committed by GitHub
commit 8dc072d20d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 109 additions and 85 deletions

View File

@ -34,11 +34,12 @@ void AddCodepoints(const hb_set_t* codepoints_in_font,
unsigned subset_size,
hb_subset_input_t* input)
{
auto *unicodes = hb_subset_input_unicode_set (input);
hb_codepoint_t cp = HB_SET_VALUE_INVALID;
for (unsigned i = 0; i < subset_size; i++) {
// TODO(garretrieger): pick randomly.
if (!hb_set_next (codepoints_in_font, &cp)) return;
hb_set_add (hb_subset_input_unicode_set (input), cp);
hb_set_add (unicodes, cp);
}
}
@ -46,9 +47,10 @@ void AddGlyphs(unsigned num_glyphs_in_font,
unsigned subset_size,
hb_subset_input_t* input)
{
auto *glyphs = hb_subset_input_glyph_set (input);
for (unsigned i = 0; i < subset_size && i < num_glyphs_in_font; i++) {
// TODO(garretrieger): pick randomly.
hb_set_add (hb_subset_input_glyph_set (input), i);
hb_set_add (glyphs, i);
}
}
@ -70,19 +72,23 @@ static void BM_subset (benchmark::State &state,
hb_subset_input_t* input = hb_subset_input_create_or_fail ();
assert (input);
switch (operation)
{
hb_set_t* all_codepoints = hb_set_create ();
switch (operation) {
case subset_codepoints:
hb_face_collect_unicodes (face, all_codepoints);
AddCodepoints(all_codepoints, subset_size, input);
break;
case subset_glyphs:
unsigned num_glyphs = hb_face_get_glyph_count (face);
AddGlyphs(num_glyphs, subset_size, input);
break;
case subset_codepoints:
{
hb_set_t* all_codepoints = hb_set_create ();
hb_face_collect_unicodes (face, all_codepoints);
AddCodepoints(all_codepoints, subset_size, input);
hb_set_destroy (all_codepoints);
}
hb_set_destroy (all_codepoints);
break;
case subset_glyphs:
{
unsigned num_glyphs = hb_face_get_glyph_count (face);
AddGlyphs(num_glyphs, subset_size, input);
}
break;
}
for (auto _ : state)

View File

@ -520,6 +520,11 @@ struct parsed_values_t
}
void fini () { values.fini (); }
void alloc (unsigned n)
{
values.alloc (n);
}
void add_op (op_code_t op, const byte_str_ref_t& str_ref = byte_str_ref_t ())
{
VAL *val = values.push ();

View File

@ -2057,17 +2057,33 @@ struct ClassDefFormat1
unsigned count = classValue.len;
if (klass == 0)
{
hb_codepoint_t endGlyph = startGlyph + count -1;
for (hb_codepoint_t g : glyphs->iter ())
if (g < startGlyph || g > endGlyph)
intersect_glyphs->add (g);
unsigned start_glyph = startGlyph;
for (unsigned g = HB_SET_VALUE_INVALID;
hb_set_next (glyphs, &g) && g < start_glyph;)
intersect_glyphs->add (g);
for (unsigned g = startGlyph + count - 1;
hb_set_next (glyphs, &g);)
intersect_glyphs->add (g);
return;
}
for (unsigned i = 0; i < count; i++)
if (classValue[i] == klass && glyphs->has (startGlyph + i))
intersect_glyphs->add (startGlyph + i);
intersect_glyphs->add (startGlyph + i);
return;
#if 0
/* The following implementation is faster asymptotically, but slower
* in practice. */
unsigned start_glyph = startGlyph;
unsigned end_glyph = start_glyph + count;
for (unsigned g = startGlyph - 1;
hb_set_next (glyphs, &g) && g < end_glyph;)
if (classValue.arrayZ[g - start_glyph] == klass)
intersect_glyphs->add (g);
#endif
}
void intersected_classes (const hb_set_t *glyphs, hb_set_t *intersect_classes) const
@ -2279,43 +2295,31 @@ struct ClassDefFormat2
hb_codepoint_t g = HB_SET_VALUE_INVALID;
for (unsigned int i = 0; i < count; i++)
{
if (!hb_set_next (glyphs, &g))
break;
while (g != HB_SET_VALUE_INVALID && g < rangeRecord[i].first)
{
intersect_glyphs->add (g);
hb_set_next (glyphs, &g);
if (!hb_set_next (glyphs, &g))
goto done;
while (g < rangeRecord[i].first)
{
intersect_glyphs->add (g);
if (!hb_set_next (glyphs, &g))
goto done;
}
g = rangeRecord[i].last;
}
while (g != HB_SET_VALUE_INVALID && hb_set_next (glyphs, &g))
intersect_glyphs->add (g);
while (hb_set_next (glyphs, &g))
intersect_glyphs->add (g);
done:
return;
}
hb_codepoint_t g = HB_SET_VALUE_INVALID;
for (unsigned int i = 0; i < count; i++)
{
if (rangeRecord[i].value != klass) continue;
if (g != HB_SET_VALUE_INVALID)
{
if (g >= rangeRecord[i].first &&
g <= rangeRecord[i].last)
intersect_glyphs->add (g);
if (g > rangeRecord[i].last)
continue;
}
g = rangeRecord[i].first - 1;
while (hb_set_next (glyphs, &g))
{
if (g >= rangeRecord[i].first && g <= rangeRecord[i].last)
intersect_glyphs->add (g);
else if (g > rangeRecord[i].last)
break;
}
unsigned end = rangeRecord[i].last + 1;
for (hb_codepoint_t g = rangeRecord[i].first - 1;
hb_set_next (glyphs, &g) && g < end;)
intersect_glyphs->add (g);
}
}

View File

@ -569,6 +569,7 @@ struct subr_subsetter_t
ENV env (str, acc, fd);
cs_interpreter_t<ENV, OPSET, subr_subset_param_t> interp (env);
parsed_charstrings[i].alloc (str.length);
subr_subset_param_t param (&parsed_charstrings[i],
&parsed_global_subrs,
&parsed_local_subrs[fd],

View File

@ -79,12 +79,14 @@ using OT::Layout::GSUB::GSUB;
*/
static unsigned
_plan_estimate_subset_table_size (hb_subset_plan_t *plan, unsigned table_len)
_plan_estimate_subset_table_size (hb_subset_plan_t *plan,
unsigned table_len,
bool same_size)
{
unsigned src_glyphs = plan->source->get_num_glyphs ();
unsigned dst_glyphs = plan->glyphset ()->get_population ();
if (unlikely (!src_glyphs))
if (unlikely (!src_glyphs) || same_size)
return 512 + table_len;
return 512 + (unsigned) (table_len * sqrt ((double) dst_glyphs / src_glyphs));
@ -123,7 +125,6 @@ static
bool
_try_subset (const TableType *table,
hb_vector_t<char>* buf,
unsigned buf_size,
hb_subset_context_t* c /* OUT */)
{
c->serializer->start_serialize<TableType> ();
@ -136,7 +137,8 @@ _try_subset (const TableType *table,
return needed;
}
buf_size += (buf_size >> 1) + 32;
unsigned buf_size = buf->allocated;
buf_size = buf_size * 2 + 16;
DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c ran out of room; reallocating to %u bytes.",
HB_UNTAG (c->table_tag), buf_size);
@ -147,13 +149,13 @@ _try_subset (const TableType *table,
return needed;
}
c->serializer->reset (buf->arrayZ, buf_size);
return _try_subset (table, buf, buf_size, c);
c->serializer->reset (buf->arrayZ, buf->allocated);
return _try_subset (table, buf, c);
}
template<typename TableType>
static bool
_subset (hb_subset_plan_t *plan)
_subset (hb_subset_plan_t *plan, hb_vector_t<char> &buf)
{
hb_blob_t *source_blob = hb_sanitize_context_t ().reference_table<TableType> (plan->source);
const TableType *table = source_blob->as<TableType> ();
@ -167,10 +169,13 @@ _subset (hb_subset_plan_t *plan)
return false;
}
hb_vector_t<char> buf;
/* TODO Not all tables are glyph-related. 'name' table size for example should not be
* affected by number of glyphs. Accommodate that. */
unsigned buf_size = _plan_estimate_subset_table_size (plan, source_blob->length);
/* Tables that we want to allocate same space as the source table. For GSUB/GPOS it's
* because those are expensive to subset, so giving them more room is fine. */
bool same_size_table = TableType::tableTag == HB_OT_TAG_GSUB ||
TableType::tableTag == HB_OT_TAG_GPOS ||
TableType::tableTag == HB_OT_TAG_name;
unsigned buf_size = _plan_estimate_subset_table_size (plan, source_blob->length, same_size_table);
DEBUG_MSG (SUBSET, nullptr,
"OT::%c%c%c%c initial estimated table size: %u bytes.", HB_UNTAG (tag), buf_size);
if (unlikely (!buf.alloc (buf_size)))
@ -181,10 +186,10 @@ _subset (hb_subset_plan_t *plan)
}
bool needed = false;
hb_serialize_context_t serializer (buf.arrayZ, buf_size);
hb_serialize_context_t serializer (buf.arrayZ, buf.allocated);
{
hb_subset_context_t c (source_blob, plan, &serializer, tag);
needed = _try_subset (table, &buf, buf_size, &c);
needed = _try_subset (table, &buf, &c);
}
hb_blob_destroy (source_blob);
@ -274,7 +279,9 @@ _passthrough (hb_subset_plan_t *plan, hb_tag_t tag)
}
static bool
_subset_table (hb_subset_plan_t *plan, hb_tag_t tag)
_subset_table (hb_subset_plan_t *plan,
hb_vector_t<char> &buf,
hb_tag_t tag)
{
if (plan->no_subset_tables->has (tag)) {
return _passthrough (plan, tag);
@ -283,42 +290,42 @@ _subset_table (hb_subset_plan_t *plan, hb_tag_t tag)
DEBUG_MSG (SUBSET, nullptr, "subset %c%c%c%c", HB_UNTAG (tag));
switch (tag)
{
case HB_OT_TAG_glyf: return _subset<const OT::glyf> (plan);
case HB_OT_TAG_hdmx: return _subset<const OT::hdmx> (plan);
case HB_OT_TAG_name: return _subset<const OT::name> (plan);
case HB_OT_TAG_glyf: return _subset<const OT::glyf> (plan, buf);
case HB_OT_TAG_hdmx: return _subset<const OT::hdmx> (plan, buf);
case HB_OT_TAG_name: return _subset<const OT::name> (plan, buf);
case HB_OT_TAG_head:
if (_is_table_present (plan->source, HB_OT_TAG_glyf) && !_should_drop_table (plan, HB_OT_TAG_glyf))
return true; /* skip head, handled by glyf */
return _subset<const OT::head> (plan);
return _subset<const OT::head> (plan, buf);
case HB_OT_TAG_hhea: return true; /* skip hhea, handled by hmtx */
case HB_OT_TAG_hmtx: return _subset<const OT::hmtx> (plan);
case HB_OT_TAG_hmtx: return _subset<const OT::hmtx> (plan, buf);
case HB_OT_TAG_vhea: return true; /* skip vhea, handled by vmtx */
case HB_OT_TAG_vmtx: return _subset<const OT::vmtx> (plan);
case HB_OT_TAG_maxp: return _subset<const OT::maxp> (plan);
case HB_OT_TAG_sbix: return _subset<const OT::sbix> (plan);
case HB_OT_TAG_vmtx: return _subset<const OT::vmtx> (plan, buf);
case HB_OT_TAG_maxp: return _subset<const OT::maxp> (plan, buf);
case HB_OT_TAG_sbix: return _subset<const OT::sbix> (plan, buf);
case HB_OT_TAG_loca: return true; /* skip loca, handled by glyf */
case HB_OT_TAG_cmap: return _subset<const OT::cmap> (plan);
case HB_OT_TAG_OS2 : return _subset<const OT::OS2 > (plan);
case HB_OT_TAG_post: return _subset<const OT::post> (plan);
case HB_OT_TAG_COLR: return _subset<const OT::COLR> (plan);
case HB_OT_TAG_CPAL: return _subset<const OT::CPAL> (plan);
case HB_OT_TAG_CBLC: return _subset<const OT::CBLC> (plan);
case HB_OT_TAG_cmap: return _subset<const OT::cmap> (plan, buf);
case HB_OT_TAG_OS2 : return _subset<const OT::OS2 > (plan, buf);
case HB_OT_TAG_post: return _subset<const OT::post> (plan, buf);
case HB_OT_TAG_COLR: return _subset<const OT::COLR> (plan, buf);
case HB_OT_TAG_CPAL: return _subset<const OT::CPAL> (plan, buf);
case HB_OT_TAG_CBLC: return _subset<const OT::CBLC> (plan, buf);
case HB_OT_TAG_CBDT: return true; /* skip CBDT, handled by CBLC */
case HB_OT_TAG_MATH: return _subset<const OT::MATH> (plan);
case HB_OT_TAG_MATH: return _subset<const OT::MATH> (plan, buf);
#ifndef HB_NO_SUBSET_CFF
case HB_OT_TAG_cff1: return _subset<const OT::cff1> (plan);
case HB_OT_TAG_cff2: return _subset<const OT::cff2> (plan);
case HB_OT_TAG_VORG: return _subset<const OT::VORG> (plan);
case HB_OT_TAG_cff1: return _subset<const OT::cff1> (plan, buf);
case HB_OT_TAG_cff2: return _subset<const OT::cff2> (plan, buf);
case HB_OT_TAG_VORG: return _subset<const OT::VORG> (plan, buf);
#endif
#ifndef HB_NO_SUBSET_LAYOUT
case HB_OT_TAG_GDEF: return _subset<const OT::GDEF> (plan);
case HB_OT_TAG_GSUB: return _subset<const GSUB> (plan);
case HB_OT_TAG_GPOS: return _subset<const OT::GPOS> (plan);
case HB_OT_TAG_gvar: return _subset<const OT::gvar> (plan);
case HB_OT_TAG_HVAR: return _subset<const OT::HVAR> (plan);
case HB_OT_TAG_VVAR: return _subset<const OT::VVAR> (plan);
case HB_OT_TAG_GDEF: return _subset<const OT::GDEF> (plan, buf);
case HB_OT_TAG_GSUB: return _subset<const GSUB> (plan, buf);
case HB_OT_TAG_GPOS: return _subset<const OT::GPOS> (plan, buf);
case HB_OT_TAG_gvar: return _subset<const OT::gvar> (plan, buf);
case HB_OT_TAG_HVAR: return _subset<const OT::HVAR> (plan, buf);
case HB_OT_TAG_VVAR: return _subset<const OT::VVAR> (plan, buf);
#endif
default:
@ -379,6 +386,8 @@ hb_subset_plan_execute_or_fail (hb_subset_plan_t *plan)
bool success = true;
hb_tag_t table_tags[32];
unsigned offset = 0, num_tables = ARRAY_LENGTH (table_tags);
hb_vector_t<char> buf;
buf.alloc (4096 - 16);
while ((hb_face_get_table_tags (plan->source, offset, &num_tables, table_tags), num_tables))
{
for (unsigned i = 0; i < num_tables; ++i)
@ -386,7 +395,7 @@ hb_subset_plan_execute_or_fail (hb_subset_plan_t *plan)
hb_tag_t tag = table_tags[i];
if (_should_drop_table (plan, tag) && !tags_set.has (tag)) continue;
tags_set.add (tag);
success = _subset_table (plan, tag);
success = _subset_table (plan, buf, tag);
if (unlikely (!success)) goto end;
}
offset += num_tables;

View File

@ -70,9 +70,8 @@ struct hb_vector_t : std::conditional<sorted, hb_vector_t<Type, false>, hb_empty
}
~hb_vector_t () { fini (); }
private:
int allocated = 0; /* == -1 means allocation failed. */
public:
int allocated = 0; /* == -1 means allocation failed. */
unsigned int length = 0;
public:
Type *arrayZ = nullptr;