Merge pull request #3579 from harfbuzz/subset-retain-buffer

Subset retain buffer
This commit is contained in:
Behdad Esfahbod 2022-05-11 16:45:40 -06:00 committed by GitHub
commit 8dc072d20d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 109 additions and 85 deletions

View File

@ -34,11 +34,12 @@ void AddCodepoints(const hb_set_t* codepoints_in_font,
unsigned subset_size, unsigned subset_size,
hb_subset_input_t* input) hb_subset_input_t* input)
{ {
auto *unicodes = hb_subset_input_unicode_set (input);
hb_codepoint_t cp = HB_SET_VALUE_INVALID; hb_codepoint_t cp = HB_SET_VALUE_INVALID;
for (unsigned i = 0; i < subset_size; i++) { for (unsigned i = 0; i < subset_size; i++) {
// TODO(garretrieger): pick randomly. // TODO(garretrieger): pick randomly.
if (!hb_set_next (codepoints_in_font, &cp)) return; if (!hb_set_next (codepoints_in_font, &cp)) return;
hb_set_add (hb_subset_input_unicode_set (input), cp); hb_set_add (unicodes, cp);
} }
} }
@ -46,9 +47,10 @@ void AddGlyphs(unsigned num_glyphs_in_font,
unsigned subset_size, unsigned subset_size,
hb_subset_input_t* input) hb_subset_input_t* input)
{ {
auto *glyphs = hb_subset_input_glyph_set (input);
for (unsigned i = 0; i < subset_size && i < num_glyphs_in_font; i++) { for (unsigned i = 0; i < subset_size && i < num_glyphs_in_font; i++) {
// TODO(garretrieger): pick randomly. // TODO(garretrieger): pick randomly.
hb_set_add (hb_subset_input_glyph_set (input), i); hb_set_add (glyphs, i);
} }
} }
@ -70,19 +72,23 @@ static void BM_subset (benchmark::State &state,
hb_subset_input_t* input = hb_subset_input_create_or_fail (); hb_subset_input_t* input = hb_subset_input_create_or_fail ();
assert (input); assert (input);
switch (operation)
{ {
hb_set_t* all_codepoints = hb_set_create (); case subset_codepoints:
switch (operation) { {
case subset_codepoints: hb_set_t* all_codepoints = hb_set_create ();
hb_face_collect_unicodes (face, all_codepoints); hb_face_collect_unicodes (face, all_codepoints);
AddCodepoints(all_codepoints, subset_size, input); AddCodepoints(all_codepoints, subset_size, input);
break; hb_set_destroy (all_codepoints);
case subset_glyphs:
unsigned num_glyphs = hb_face_get_glyph_count (face);
AddGlyphs(num_glyphs, subset_size, input);
break;
} }
hb_set_destroy (all_codepoints); break;
case subset_glyphs:
{
unsigned num_glyphs = hb_face_get_glyph_count (face);
AddGlyphs(num_glyphs, subset_size, input);
}
break;
} }
for (auto _ : state) for (auto _ : state)

View File

@ -520,6 +520,11 @@ struct parsed_values_t
} }
void fini () { values.fini (); } void fini () { values.fini (); }
void alloc (unsigned n)
{
values.alloc (n);
}
void add_op (op_code_t op, const byte_str_ref_t& str_ref = byte_str_ref_t ()) void add_op (op_code_t op, const byte_str_ref_t& str_ref = byte_str_ref_t ())
{ {
VAL *val = values.push (); VAL *val = values.push ();

View File

@ -2057,17 +2057,33 @@ struct ClassDefFormat1
unsigned count = classValue.len; unsigned count = classValue.len;
if (klass == 0) if (klass == 0)
{ {
hb_codepoint_t endGlyph = startGlyph + count -1; unsigned start_glyph = startGlyph;
for (hb_codepoint_t g : glyphs->iter ()) for (unsigned g = HB_SET_VALUE_INVALID;
if (g < startGlyph || g > endGlyph) hb_set_next (glyphs, &g) && g < start_glyph;)
intersect_glyphs->add (g); intersect_glyphs->add (g);
for (unsigned g = startGlyph + count - 1;
hb_set_next (glyphs, &g);)
intersect_glyphs->add (g);
return; return;
} }
for (unsigned i = 0; i < count; i++) for (unsigned i = 0; i < count; i++)
if (classValue[i] == klass && glyphs->has (startGlyph + i)) if (classValue[i] == klass && glyphs->has (startGlyph + i))
intersect_glyphs->add (startGlyph + i); intersect_glyphs->add (startGlyph + i);
return;
#if 0
/* The following implementation is faster asymptotically, but slower
* in practice. */
unsigned start_glyph = startGlyph;
unsigned end_glyph = start_glyph + count;
for (unsigned g = startGlyph - 1;
hb_set_next (glyphs, &g) && g < end_glyph;)
if (classValue.arrayZ[g - start_glyph] == klass)
intersect_glyphs->add (g);
#endif
} }
void intersected_classes (const hb_set_t *glyphs, hb_set_t *intersect_classes) const void intersected_classes (const hb_set_t *glyphs, hb_set_t *intersect_classes) const
@ -2279,43 +2295,31 @@ struct ClassDefFormat2
hb_codepoint_t g = HB_SET_VALUE_INVALID; hb_codepoint_t g = HB_SET_VALUE_INVALID;
for (unsigned int i = 0; i < count; i++) for (unsigned int i = 0; i < count; i++)
{ {
if (!hb_set_next (glyphs, &g)) if (!hb_set_next (glyphs, &g))
break; goto done;
while (g != HB_SET_VALUE_INVALID && g < rangeRecord[i].first) while (g < rangeRecord[i].first)
{ {
intersect_glyphs->add (g); intersect_glyphs->add (g);
hb_set_next (glyphs, &g); if (!hb_set_next (glyphs, &g))
goto done;
} }
g = rangeRecord[i].last; g = rangeRecord[i].last;
} }
while (g != HB_SET_VALUE_INVALID && hb_set_next (glyphs, &g)) while (hb_set_next (glyphs, &g))
intersect_glyphs->add (g); intersect_glyphs->add (g);
done:
return; return;
} }
hb_codepoint_t g = HB_SET_VALUE_INVALID;
for (unsigned int i = 0; i < count; i++) for (unsigned int i = 0; i < count; i++)
{ {
if (rangeRecord[i].value != klass) continue; if (rangeRecord[i].value != klass) continue;
if (g != HB_SET_VALUE_INVALID) unsigned end = rangeRecord[i].last + 1;
{ for (hb_codepoint_t g = rangeRecord[i].first - 1;
if (g >= rangeRecord[i].first && hb_set_next (glyphs, &g) && g < end;)
g <= rangeRecord[i].last) intersect_glyphs->add (g);
intersect_glyphs->add (g);
if (g > rangeRecord[i].last)
continue;
}
g = rangeRecord[i].first - 1;
while (hb_set_next (glyphs, &g))
{
if (g >= rangeRecord[i].first && g <= rangeRecord[i].last)
intersect_glyphs->add (g);
else if (g > rangeRecord[i].last)
break;
}
} }
} }

View File

@ -569,6 +569,7 @@ struct subr_subsetter_t
ENV env (str, acc, fd); ENV env (str, acc, fd);
cs_interpreter_t<ENV, OPSET, subr_subset_param_t> interp (env); cs_interpreter_t<ENV, OPSET, subr_subset_param_t> interp (env);
parsed_charstrings[i].alloc (str.length);
subr_subset_param_t param (&parsed_charstrings[i], subr_subset_param_t param (&parsed_charstrings[i],
&parsed_global_subrs, &parsed_global_subrs,
&parsed_local_subrs[fd], &parsed_local_subrs[fd],

View File

@ -79,12 +79,14 @@ using OT::Layout::GSUB::GSUB;
*/ */
static unsigned static unsigned
_plan_estimate_subset_table_size (hb_subset_plan_t *plan, unsigned table_len) _plan_estimate_subset_table_size (hb_subset_plan_t *plan,
unsigned table_len,
bool same_size)
{ {
unsigned src_glyphs = plan->source->get_num_glyphs (); unsigned src_glyphs = plan->source->get_num_glyphs ();
unsigned dst_glyphs = plan->glyphset ()->get_population (); unsigned dst_glyphs = plan->glyphset ()->get_population ();
if (unlikely (!src_glyphs)) if (unlikely (!src_glyphs) || same_size)
return 512 + table_len; return 512 + table_len;
return 512 + (unsigned) (table_len * sqrt ((double) dst_glyphs / src_glyphs)); return 512 + (unsigned) (table_len * sqrt ((double) dst_glyphs / src_glyphs));
@ -123,7 +125,6 @@ static
bool bool
_try_subset (const TableType *table, _try_subset (const TableType *table,
hb_vector_t<char>* buf, hb_vector_t<char>* buf,
unsigned buf_size,
hb_subset_context_t* c /* OUT */) hb_subset_context_t* c /* OUT */)
{ {
c->serializer->start_serialize<TableType> (); c->serializer->start_serialize<TableType> ();
@ -136,7 +137,8 @@ _try_subset (const TableType *table,
return needed; return needed;
} }
buf_size += (buf_size >> 1) + 32; unsigned buf_size = buf->allocated;
buf_size = buf_size * 2 + 16;
DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c ran out of room; reallocating to %u bytes.", DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c ran out of room; reallocating to %u bytes.",
HB_UNTAG (c->table_tag), buf_size); HB_UNTAG (c->table_tag), buf_size);
@ -147,13 +149,13 @@ _try_subset (const TableType *table,
return needed; return needed;
} }
c->serializer->reset (buf->arrayZ, buf_size); c->serializer->reset (buf->arrayZ, buf->allocated);
return _try_subset (table, buf, buf_size, c); return _try_subset (table, buf, c);
} }
template<typename TableType> template<typename TableType>
static bool static bool
_subset (hb_subset_plan_t *plan) _subset (hb_subset_plan_t *plan, hb_vector_t<char> &buf)
{ {
hb_blob_t *source_blob = hb_sanitize_context_t ().reference_table<TableType> (plan->source); hb_blob_t *source_blob = hb_sanitize_context_t ().reference_table<TableType> (plan->source);
const TableType *table = source_blob->as<TableType> (); const TableType *table = source_blob->as<TableType> ();
@ -167,10 +169,13 @@ _subset (hb_subset_plan_t *plan)
return false; return false;
} }
hb_vector_t<char> buf; /* Tables that we want to allocate same space as the source table. For GSUB/GPOS it's
/* TODO Not all tables are glyph-related. 'name' table size for example should not be * because those are expensive to subset, so giving them more room is fine. */
* affected by number of glyphs. Accommodate that. */ bool same_size_table = TableType::tableTag == HB_OT_TAG_GSUB ||
unsigned buf_size = _plan_estimate_subset_table_size (plan, source_blob->length); TableType::tableTag == HB_OT_TAG_GPOS ||
TableType::tableTag == HB_OT_TAG_name;
unsigned buf_size = _plan_estimate_subset_table_size (plan, source_blob->length, same_size_table);
DEBUG_MSG (SUBSET, nullptr, DEBUG_MSG (SUBSET, nullptr,
"OT::%c%c%c%c initial estimated table size: %u bytes.", HB_UNTAG (tag), buf_size); "OT::%c%c%c%c initial estimated table size: %u bytes.", HB_UNTAG (tag), buf_size);
if (unlikely (!buf.alloc (buf_size))) if (unlikely (!buf.alloc (buf_size)))
@ -181,10 +186,10 @@ _subset (hb_subset_plan_t *plan)
} }
bool needed = false; bool needed = false;
hb_serialize_context_t serializer (buf.arrayZ, buf_size); hb_serialize_context_t serializer (buf.arrayZ, buf.allocated);
{ {
hb_subset_context_t c (source_blob, plan, &serializer, tag); hb_subset_context_t c (source_blob, plan, &serializer, tag);
needed = _try_subset (table, &buf, buf_size, &c); needed = _try_subset (table, &buf, &c);
} }
hb_blob_destroy (source_blob); hb_blob_destroy (source_blob);
@ -274,7 +279,9 @@ _passthrough (hb_subset_plan_t *plan, hb_tag_t tag)
} }
static bool static bool
_subset_table (hb_subset_plan_t *plan, hb_tag_t tag) _subset_table (hb_subset_plan_t *plan,
hb_vector_t<char> &buf,
hb_tag_t tag)
{ {
if (plan->no_subset_tables->has (tag)) { if (plan->no_subset_tables->has (tag)) {
return _passthrough (plan, tag); return _passthrough (plan, tag);
@ -283,42 +290,42 @@ _subset_table (hb_subset_plan_t *plan, hb_tag_t tag)
DEBUG_MSG (SUBSET, nullptr, "subset %c%c%c%c", HB_UNTAG (tag)); DEBUG_MSG (SUBSET, nullptr, "subset %c%c%c%c", HB_UNTAG (tag));
switch (tag) switch (tag)
{ {
case HB_OT_TAG_glyf: return _subset<const OT::glyf> (plan); case HB_OT_TAG_glyf: return _subset<const OT::glyf> (plan, buf);
case HB_OT_TAG_hdmx: return _subset<const OT::hdmx> (plan); case HB_OT_TAG_hdmx: return _subset<const OT::hdmx> (plan, buf);
case HB_OT_TAG_name: return _subset<const OT::name> (plan); case HB_OT_TAG_name: return _subset<const OT::name> (plan, buf);
case HB_OT_TAG_head: case HB_OT_TAG_head:
if (_is_table_present (plan->source, HB_OT_TAG_glyf) && !_should_drop_table (plan, HB_OT_TAG_glyf)) if (_is_table_present (plan->source, HB_OT_TAG_glyf) && !_should_drop_table (plan, HB_OT_TAG_glyf))
return true; /* skip head, handled by glyf */ return true; /* skip head, handled by glyf */
return _subset<const OT::head> (plan); return _subset<const OT::head> (plan, buf);
case HB_OT_TAG_hhea: return true; /* skip hhea, handled by hmtx */ case HB_OT_TAG_hhea: return true; /* skip hhea, handled by hmtx */
case HB_OT_TAG_hmtx: return _subset<const OT::hmtx> (plan); case HB_OT_TAG_hmtx: return _subset<const OT::hmtx> (plan, buf);
case HB_OT_TAG_vhea: return true; /* skip vhea, handled by vmtx */ case HB_OT_TAG_vhea: return true; /* skip vhea, handled by vmtx */
case HB_OT_TAG_vmtx: return _subset<const OT::vmtx> (plan); case HB_OT_TAG_vmtx: return _subset<const OT::vmtx> (plan, buf);
case HB_OT_TAG_maxp: return _subset<const OT::maxp> (plan); case HB_OT_TAG_maxp: return _subset<const OT::maxp> (plan, buf);
case HB_OT_TAG_sbix: return _subset<const OT::sbix> (plan); case HB_OT_TAG_sbix: return _subset<const OT::sbix> (plan, buf);
case HB_OT_TAG_loca: return true; /* skip loca, handled by glyf */ case HB_OT_TAG_loca: return true; /* skip loca, handled by glyf */
case HB_OT_TAG_cmap: return _subset<const OT::cmap> (plan); case HB_OT_TAG_cmap: return _subset<const OT::cmap> (plan, buf);
case HB_OT_TAG_OS2 : return _subset<const OT::OS2 > (plan); case HB_OT_TAG_OS2 : return _subset<const OT::OS2 > (plan, buf);
case HB_OT_TAG_post: return _subset<const OT::post> (plan); case HB_OT_TAG_post: return _subset<const OT::post> (plan, buf);
case HB_OT_TAG_COLR: return _subset<const OT::COLR> (plan); case HB_OT_TAG_COLR: return _subset<const OT::COLR> (plan, buf);
case HB_OT_TAG_CPAL: return _subset<const OT::CPAL> (plan); case HB_OT_TAG_CPAL: return _subset<const OT::CPAL> (plan, buf);
case HB_OT_TAG_CBLC: return _subset<const OT::CBLC> (plan); case HB_OT_TAG_CBLC: return _subset<const OT::CBLC> (plan, buf);
case HB_OT_TAG_CBDT: return true; /* skip CBDT, handled by CBLC */ case HB_OT_TAG_CBDT: return true; /* skip CBDT, handled by CBLC */
case HB_OT_TAG_MATH: return _subset<const OT::MATH> (plan); case HB_OT_TAG_MATH: return _subset<const OT::MATH> (plan, buf);
#ifndef HB_NO_SUBSET_CFF #ifndef HB_NO_SUBSET_CFF
case HB_OT_TAG_cff1: return _subset<const OT::cff1> (plan); case HB_OT_TAG_cff1: return _subset<const OT::cff1> (plan, buf);
case HB_OT_TAG_cff2: return _subset<const OT::cff2> (plan); case HB_OT_TAG_cff2: return _subset<const OT::cff2> (plan, buf);
case HB_OT_TAG_VORG: return _subset<const OT::VORG> (plan); case HB_OT_TAG_VORG: return _subset<const OT::VORG> (plan, buf);
#endif #endif
#ifndef HB_NO_SUBSET_LAYOUT #ifndef HB_NO_SUBSET_LAYOUT
case HB_OT_TAG_GDEF: return _subset<const OT::GDEF> (plan); case HB_OT_TAG_GDEF: return _subset<const OT::GDEF> (plan, buf);
case HB_OT_TAG_GSUB: return _subset<const GSUB> (plan); case HB_OT_TAG_GSUB: return _subset<const GSUB> (plan, buf);
case HB_OT_TAG_GPOS: return _subset<const OT::GPOS> (plan); case HB_OT_TAG_GPOS: return _subset<const OT::GPOS> (plan, buf);
case HB_OT_TAG_gvar: return _subset<const OT::gvar> (plan); case HB_OT_TAG_gvar: return _subset<const OT::gvar> (plan, buf);
case HB_OT_TAG_HVAR: return _subset<const OT::HVAR> (plan); case HB_OT_TAG_HVAR: return _subset<const OT::HVAR> (plan, buf);
case HB_OT_TAG_VVAR: return _subset<const OT::VVAR> (plan); case HB_OT_TAG_VVAR: return _subset<const OT::VVAR> (plan, buf);
#endif #endif
default: default:
@ -379,6 +386,8 @@ hb_subset_plan_execute_or_fail (hb_subset_plan_t *plan)
bool success = true; bool success = true;
hb_tag_t table_tags[32]; hb_tag_t table_tags[32];
unsigned offset = 0, num_tables = ARRAY_LENGTH (table_tags); unsigned offset = 0, num_tables = ARRAY_LENGTH (table_tags);
hb_vector_t<char> buf;
buf.alloc (4096 - 16);
while ((hb_face_get_table_tags (plan->source, offset, &num_tables, table_tags), num_tables)) while ((hb_face_get_table_tags (plan->source, offset, &num_tables, table_tags), num_tables))
{ {
for (unsigned i = 0; i < num_tables; ++i) for (unsigned i = 0; i < num_tables; ++i)
@ -386,7 +395,7 @@ hb_subset_plan_execute_or_fail (hb_subset_plan_t *plan)
hb_tag_t tag = table_tags[i]; hb_tag_t tag = table_tags[i];
if (_should_drop_table (plan, tag) && !tags_set.has (tag)) continue; if (_should_drop_table (plan, tag) && !tags_set.has (tag)) continue;
tags_set.add (tag); tags_set.add (tag);
success = _subset_table (plan, tag); success = _subset_table (plan, buf, tag);
if (unlikely (!success)) goto end; if (unlikely (!success)) goto end;
} }
offset += num_tables; offset += num_tables;

View File

@ -70,9 +70,8 @@ struct hb_vector_t : std::conditional<sorted, hb_vector_t<Type, false>, hb_empty
} }
~hb_vector_t () { fini (); } ~hb_vector_t () { fini (); }
private:
int allocated = 0; /* == -1 means allocation failed. */
public: public:
int allocated = 0; /* == -1 means allocation failed. */
unsigned int length = 0; unsigned int length = 0;
public: public:
Type *arrayZ = nullptr; Type *arrayZ = nullptr;