[repacker] estimate size of classDef1 and coverage during PairPos2 split point analysis.

This commit is contained in:
Garret Rieger 2022-08-05 20:19:11 +00:00
parent 2264df6da3
commit 0e48a65d32
4 changed files with 134 additions and 42 deletions

View File

@ -170,20 +170,26 @@ struct class_def_size_estimator_t
// Incremental increase in the Coverage and ClassDef table size // Incremental increase in the Coverage and ClassDef table size
// (worst case) if all glyphs associated with 'klass' were added. // (worst case) if all glyphs associated with 'klass' were added.
unsigned incremental_size_for_class (unsigned klass) const unsigned incremental_coverage_size (unsigned klass) const
{ {
// Coverage takes 2 bytes per glyph worst case, // Coverage takes 2 bytes per glyph worst case,
unsigned cov_size = 2 * glyphs_per_class.get (klass).get_population (); return 2 * glyphs_per_class.get (klass).get_population ();
}
// Incremental increase in the Coverage and ClassDef table size
// (worst case) if all glyphs associated with 'klass' were added.
unsigned incremental_class_def_size (unsigned klass) const
{
// ClassDef takes 6 bytes per range // ClassDef takes 6 bytes per range
unsigned class_def_2_size = 6 * num_ranges_per_class.get (klass); unsigned class_def_2_size = 6 * num_ranges_per_class.get (klass);
if (gids_consecutive) if (gids_consecutive)
{ {
// ClassDef1 takes 2 bytes per glyph, but only can be used // ClassDef1 takes 2 bytes per glyph, but only can be used
// when gids are consecutive. // when gids are consecutive.
return cov_size + hb_min (cov_size, class_def_2_size); return hb_min (2 * glyphs_per_class.get (klass).get_population (), class_def_2_size);
} }
return cov_size + class_def_2_size; return class_def_2_size;
} }
bool in_error () bool in_error ()

View File

@ -206,10 +206,17 @@ struct PairPosFormat2 : public OT::Layout::GPOS_impl::PairPosFormat2_4<SmallType
hb_vector_t<unsigned> split_subtables (gsubgpos_graph_context_t& c, unsigned this_index) hb_vector_t<unsigned> split_subtables (gsubgpos_graph_context_t& c, unsigned this_index)
{ {
const unsigned base_size = OT::Layout::GPOS_impl::PairPosFormat2_4<SmallTypes>::min_size const unsigned base_size = OT::Layout::GPOS_impl::PairPosFormat2_4<SmallTypes>::min_size;
+ size_of (c, this_index, &coverage) const unsigned class_def_2_size = size_of (c, this_index, &classDef2);
+ size_of (c, this_index, &classDef1) const Coverage* coverage = get_coverage (c, this_index);
+ size_of (c, this_index, &classDef2); const ClassDef* class_def_1 = get_class_def_1 (c, this_index);
auto gid_and_class =
+ coverage->iter ()
| hb_map_retains_sorting ([&] (hb_codepoint_t gid) {
return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (gid, class_def_1->get_class (gid));
})
;
class_def_size_estimator_t estimator (gid_and_class);
const unsigned class1_count = class1Count; const unsigned class1_count = class1Count;
const unsigned class2_count = class2Count; const unsigned class2_count = class2Count;
@ -220,6 +227,9 @@ struct PairPosFormat2 : public OT::Layout::GPOS_impl::PairPosFormat2_4<SmallType
const unsigned total_value_len = value_1_len + value_2_len; const unsigned total_value_len = value_1_len + value_2_len;
unsigned accumulated = base_size; unsigned accumulated = base_size;
unsigned coverage_size = 4;
unsigned class_def_1_size = 4;
hb_vector_t<unsigned> split_points; hb_vector_t<unsigned> split_points;
hb_hashmap_t<void*, unsigned> device_tables = get_all_device_tables (c, this_index); hb_hashmap_t<void*, unsigned> device_tables = get_all_device_tables (c, this_index);
@ -231,6 +241,9 @@ struct PairPosFormat2 : public OT::Layout::GPOS_impl::PairPosFormat2_4<SmallType
for (unsigned i = 0; i < class1_count; i++) for (unsigned i = 0; i < class1_count; i++)
{ {
accumulated += class1_record_size; accumulated += class1_record_size;
coverage_size += estimator.incremental_coverage_size (i);
class_def_1_size += estimator.incremental_class_def_size (i);
if (has_device_tables) { if (has_device_tables) {
for (unsigned j = 0; j < class2_count; j++) for (unsigned j = 0; j < class2_count; j++)
{ {
@ -249,14 +262,16 @@ struct PairPosFormat2 : public OT::Layout::GPOS_impl::PairPosFormat2_4<SmallType
} }
} }
// TODO(garretrieger): don't count the size of the largest pairset against the limit, since unsigned total = accumulated
// it will be packed last in the order and does not contribute to + coverage_size + class_def_1_size + class_def_2_size
// the 64kb limit. // The largest object will pack last and can exceed the size limit.
- hb_max (hb_max (coverage_size, class_def_1_size), class_def_2_size);
if (accumulated > (1 << 16)) if (total > (1 << 16))
{ {
split_points.push (i); split_points.push (i);
accumulated = base_size; accumulated = base_size;
coverage_size = 4;
class_def_1_size = 4;
visited.clear (); // node sharing isn't allowed between splits. visited.clear (); // node sharing isn't allowed between splits.
} }
} }
@ -526,6 +541,30 @@ struct PairPosFormat2 : public OT::Layout::GPOS_impl::PairPosFormat2_4<SmallType
return result; return result;
} }
const Coverage* get_coverage (gsubgpos_graph_context_t& c,
unsigned this_index) const
{
unsigned coverage_id = c.graph.index_for_offset (this_index, &coverage);
auto& coverage_v = c.graph.vertices_[coverage_id];
Coverage* coverage_table = (Coverage*) coverage_v.obj.head;
if (!coverage_table->sanitize (coverage_v))
return &Null(Coverage);
return coverage_table;
}
const ClassDef* get_class_def_1 (gsubgpos_graph_context_t& c,
unsigned this_index) const
{
unsigned class_def_1_id = c.graph.index_for_offset (this_index, &classDef1);
auto& class_def_1_v = c.graph.vertices_[class_def_1_id];
ClassDef* class_def_1_table = (ClassDef*) class_def_1_v.obj.head;
if (!class_def_1_table->sanitize (class_def_1_v))
return &Null(ClassDef);
return class_def_1_table;
}
unsigned size_of_value_record_children (gsubgpos_graph_context_t& c, unsigned size_of_value_record_children (gsubgpos_graph_context_t& c,
const hb_hashmap_t<void*, unsigned>& device_tables, const hb_hashmap_t<void*, unsigned>& device_tables,
const hb_vector_t<unsigned> device_table_indices, const hb_vector_t<unsigned> device_table_indices,

View File

@ -31,13 +31,22 @@ typedef hb_pair_t<hb_codepoint_t, hb_codepoint_t> gid_and_class_t;
typedef hb_vector_t<gid_and_class_t> gid_and_class_list_t; typedef hb_vector_t<gid_and_class_t> gid_and_class_list_t;
static bool incremental_size_is (const gid_and_class_list_t& list, unsigned klass, unsigned expected) static bool incremental_size_is (const gid_and_class_list_t& list, unsigned klass,
unsigned cov_expected, unsigned class_def_expected)
{ {
graph::class_def_size_estimator_t estimator (list.iter ()); graph::class_def_size_estimator_t estimator (list.iter ());
unsigned result = estimator.incremental_size_for_class (klass);
if (result != expected) unsigned result = estimator.incremental_coverage_size (klass);
if (result != cov_expected)
{ {
printf ("FAIL: expected size %u but was %u\n", expected, result); printf ("FAIL: coverage expected size %u but was %u\n", cov_expected, result);
return false;
}
result = estimator.incremental_class_def_size (klass);
if (result != class_def_expected)
{
printf ("FAIL: class def expected size %u but was %u\n", class_def_expected, result);
return false; return false;
} }
@ -48,13 +57,13 @@ static void test_class_and_coverage_size_estimates ()
{ {
gid_and_class_list_t empty = { gid_and_class_list_t empty = {
}; };
assert (incremental_size_is (empty, 0, 0)); assert (incremental_size_is (empty, 0, 0, 0));
assert (incremental_size_is (empty, 1, 0)); assert (incremental_size_is (empty, 1, 0, 0));
gid_and_class_list_t class_zero = { gid_and_class_list_t class_zero = {
{5, 0}, {5, 0},
}; };
assert (incremental_size_is (class_zero, 0, 2)); assert (incremental_size_is (class_zero, 0, 2, 0));
gid_and_class_list_t consecutive = { gid_and_class_list_t consecutive = {
{4, 0}, {4, 0},
@ -66,9 +75,9 @@ static void test_class_and_coverage_size_estimates ()
{10, 2}, {10, 2},
{11, 2}, {11, 2},
}; };
assert (incremental_size_is (consecutive, 0, 4)); assert (incremental_size_is (consecutive, 0, 4, 0));
assert (incremental_size_is (consecutive, 1, 4 + 4)); assert (incremental_size_is (consecutive, 1, 4, 4));
assert (incremental_size_is (consecutive, 2, 8 + 6)); assert (incremental_size_is (consecutive, 2, 8, 6));
gid_and_class_list_t non_consecutive = { gid_and_class_list_t non_consecutive = {
{4, 0}, {4, 0},
@ -82,9 +91,9 @@ static void test_class_and_coverage_size_estimates ()
{11, 2}, {11, 2},
{12, 2}, {12, 2},
}; };
assert (incremental_size_is (non_consecutive, 0, 4)); assert (incremental_size_is (non_consecutive, 0, 4, 0));
assert (incremental_size_is (non_consecutive, 1, 4 + 6)); assert (incremental_size_is (non_consecutive, 1, 4, 6));
assert (incremental_size_is (non_consecutive, 2, 8 + 6)); assert (incremental_size_is (non_consecutive, 2, 8, 6));
gid_and_class_list_t multiple_ranges = { gid_and_class_list_t multiple_ranges = {
{4, 0}, {4, 0},
@ -99,8 +108,8 @@ static void test_class_and_coverage_size_estimates ()
{12, 1}, {12, 1},
{13, 1}, {13, 1},
}; };
assert (incremental_size_is (multiple_ranges, 0, 4)); assert (incremental_size_is (multiple_ranges, 0, 4, 0));
assert (incremental_size_is (multiple_ranges, 1, 2 * 6 + 3 * 6)); assert (incremental_size_is (multiple_ranges, 1, 2 * 6, 3 * 6));
} }
int int

View File

@ -144,28 +144,37 @@ static unsigned add_extension (unsigned child,
} }
// Adds coverage table fro [start, end] // Adds coverage table fro [start, end]
static unsigned add_coverage (char start, char end, static unsigned add_coverage (unsigned start, unsigned end,
hb_serialize_context_t* c) hb_serialize_context_t* c)
{ {
if (end - start == 1) if (end - start == 1)
{ {
char coverage[] = { uint8_t coverage[] = {
0, 1, // format 0, 1, // format
0, 2, // count 0, 2, // count
0, start, // glyph[0]
0, end, // glyph[1] (uint8_t) ((start >> 8) & 0xFF),
(uint8_t) (start & 0xFF), // glyph[0]
(uint8_t) ((end >> 8) & 0xFF),
(uint8_t) (end & 0xFF), // glyph[1]
}; };
return add_object (coverage, 8, c); return add_object ((char*) coverage, 8, c);
} }
char coverage[] = { uint8_t coverage[] = {
0, 2, // format 0, 2, // format
0, 1, // range count 0, 1, // range count
0, start, // start
0, end, // end (uint8_t) ((start >> 8) & 0xFF),
(uint8_t) (start & 0xFF), // start
(uint8_t) ((end >> 8) & 0xFF),
(uint8_t) (end & 0xFF), // end
0, 0, 0, 0,
}; };
return add_object (coverage, 10, c); return add_object ((char*) coverage, 10, c);
} }
// Adds a class that maps glyphs from [start_glyph, end_glyph) // Adds a class that maps glyphs from [start_glyph, end_glyph)
@ -1207,7 +1216,8 @@ template<int num_pair_pos_2, int num_class_1, int num_class_2>
static void static void
populate_serializer_with_large_pair_pos_2 (hb_serialize_context_t* c, populate_serializer_with_large_pair_pos_2 (hb_serialize_context_t* c,
bool as_extension = false, bool as_extension = false,
bool with_device_tables = false) bool with_device_tables = false,
bool extra_table = true)
{ {
std::string large_string(100000, 'a'); std::string large_string(100000, 'a');
c->start_serialize<char> (); c->start_serialize<char> ();
@ -1267,22 +1277,26 @@ populate_serializer_with_large_pair_pos_2 (hb_serialize_context_t* c,
c); c);
} }
unsigned pair_pos_1 = add_object (large_string.c_str(), 100000, c);
unsigned pair_pos_1 = 0;
if (extra_table) pair_pos_1 = add_object (large_string.c_str(), 100000, c);
if (as_extension) { if (as_extension) {
for (int i = num_pair_pos_2 - 1; i >= 0; i--) for (int i = num_pair_pos_2 - 1; i >= 0; i--)
pair_pos_2[i] = add_extension (pair_pos_2[i], 2, c); pair_pos_2[i] = add_extension (pair_pos_2[i], 2, c);
if (extra_table)
pair_pos_1 = add_extension (pair_pos_1, 2, c); pair_pos_1 = add_extension (pair_pos_1, 2, c);
} }
start_lookup (as_extension ? 9 : 2, 1 + num_pair_pos_2, c); start_lookup (as_extension ? 9 : 2, 1 + num_pair_pos_2, c);
if (extra_table)
add_offset (pair_pos_1, c); add_offset (pair_pos_1, c);
for (int i = 0; i < num_pair_pos_2; i++) for (int i = 0; i < num_pair_pos_2; i++)
add_offset (pair_pos_2[i], c); add_offset (pair_pos_2[i], c);
unsigned lookup = finish_lookup (c); unsigned lookup = finish_lookup (c);
unsigned lookup_list = add_lookup_list (&lookup, 1, c); unsigned lookup_list = add_lookup_list (&lookup, 1, c);
@ -1732,6 +1746,29 @@ static void test_resolve_with_basic_pair_pos_2_split ()
free (expected_buffer); free (expected_buffer);
} }
static void test_resolve_with_close_to_limit_pair_pos_2_split ()
{
size_t buffer_size = 300000;
void* buffer = malloc (buffer_size);
assert (buffer);
hb_serialize_context_t c (buffer, buffer_size);
populate_serializer_with_large_pair_pos_2 <1, 1596, 10>(&c, true, false, false);
void* expected_buffer = malloc (buffer_size);
assert (expected_buffer);
hb_serialize_context_t e (expected_buffer, buffer_size);
populate_serializer_with_large_pair_pos_2 <2, 798, 10>(&e, true, false, false);
run_resolve_overflow_test ("test_resolve_with_close_to_limit_pair_pos_2_split",
c,
e,
20,
true,
HB_TAG('G', 'P', 'O', 'S'));
free (buffer);
free (expected_buffer);
}
static void test_resolve_with_pair_pos_2_split_with_device_tables () static void test_resolve_with_pair_pos_2_split_with_device_tables ()
{ {
size_t buffer_size = 300000; size_t buffer_size = 300000;
@ -1906,6 +1943,7 @@ main (int argc, char **argv)
test_resolve_with_extension_pair_pos_1_split (); test_resolve_with_extension_pair_pos_1_split ();
test_resolve_with_basic_pair_pos_2_split (); test_resolve_with_basic_pair_pos_2_split ();
test_resolve_with_pair_pos_2_split_with_device_tables (); test_resolve_with_pair_pos_2_split_with_device_tables ();
test_resolve_with_close_to_limit_pair_pos_2_split ();
// TODO(grieger): have run overflow tests compare graph equality not final packed binary. // TODO(grieger): have run overflow tests compare graph equality not final packed binary.
// TODO(grieger): split test where multiple subtables in one lookup are split to test link ordering. // TODO(grieger): split test where multiple subtables in one lookup are split to test link ordering.