From 2264df6da3c25a803217338faf685f963972a68b Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Fri, 5 Aug 2022 18:33:03 +0000 Subject: [PATCH] [repacker] add utility that can calculate the size of Coverage+ClassDef via incremental class inclusion. --- src/Makefile.am | 5 ++ src/graph/classdef-graph.hh | 82 +++++++++++++++++++++++ src/graph/test-classdef-graph.cc | 110 +++++++++++++++++++++++++++++++ src/meson.build | 1 + 4 files changed, 198 insertions(+) create mode 100644 src/graph/test-classdef-graph.cc diff --git a/src/Makefile.am b/src/Makefile.am index 83cda8f85..6f080fc8f 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -371,6 +371,7 @@ COMPILED_TESTS = \ test-unicode-ranges \ test-vector \ test-repacker \ + test-classdef-graph \ $(NULL) COMPILED_TESTS_CPPFLAGS = $(HBCFLAGS) -DMAIN -UNDEBUG COMPILED_TESTS_LDADD = libharfbuzz.la $(HBLIBS) @@ -417,6 +418,10 @@ test_repacker_SOURCES = test-repacker.cc hb-static.cc graph/gsubgpos-context.cc test_repacker_CPPFLAGS = $(HBCFLAGS) test_repacker_LDADD = libharfbuzz.la libharfbuzz-subset.la $(HBLIBS) +test_classdef_graph_SOURCES = graph/test-classdef-graph.cc hb-static.cc graph/gsubgpos-context.cc +test_classdef_graph_CPPFLAGS = $(HBCFLAGS) +test_classdef_graph_LDADD = libharfbuzz.la libharfbuzz-subset.la $(HBLIBS) + test_set_SOURCES = test-set.cc hb-static.cc test_set_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS) test_set_LDADD = $(COMPILED_TESTS_LDADD) diff --git a/src/graph/classdef-graph.hh b/src/graph/classdef-graph.hh index fd6472a0d..38130952f 100644 --- a/src/graph/classdef-graph.hh +++ b/src/graph/classdef-graph.hh @@ -123,6 +123,88 @@ struct ClassDef : public OT::ClassDef }; +struct class_def_size_estimator_t +{ + template + class_def_size_estimator_t (It glyph_and_class) + : gids_consecutive (true), num_ranges_per_class (), glyphs_per_class () + { + unsigned last_gid = (unsigned) -1; + for (auto p : + glyph_and_class) + { + unsigned gid = p.first; + unsigned klass = p.second; + + if (last_gid != (unsigned) -1 && gid != last_gid + 1) + gids_consecutive = false; + last_gid = gid; + + hb_set_t* glyphs; + if (glyphs_per_class.has (klass, &glyphs) && glyphs) { + glyphs->add (gid); + continue; + } + + hb_set_t new_glyphs; + new_glyphs.add (gid); + glyphs_per_class.set (klass, std::move (new_glyphs)); + } + + if (in_error ()) return; + + for (unsigned klass : glyphs_per_class.keys ()) + { + if (!klass) continue; // class 0 doesn't get encoded. + + const hb_set_t& glyphs = glyphs_per_class.get (klass); + hb_codepoint_t start = HB_SET_VALUE_INVALID; + hb_codepoint_t end = HB_SET_VALUE_INVALID; + + unsigned count = 0; + while (glyphs.next_range (&start, &end)) + count++; + + num_ranges_per_class.set (klass, count); + } + } + + // Incremental increase in the Coverage and ClassDef table size + // (worst case) if all glyphs associated with 'klass' were added. + unsigned incremental_size_for_class (unsigned klass) const + { + // Coverage takes 2 bytes per glyph worst case, + unsigned cov_size = 2 * glyphs_per_class.get (klass).get_population (); + // ClassDef takes 6 bytes per range + unsigned class_def_2_size = 6 * num_ranges_per_class.get (klass); + if (gids_consecutive) + { + // ClassDef1 takes 2 bytes per glyph, but only can be used + // when gids are consecutive. + return cov_size + hb_min (cov_size, class_def_2_size); + } + + return cov_size + class_def_2_size; + } + + bool in_error () + { + if (num_ranges_per_class.in_error ()) return true; + if (glyphs_per_class.in_error ()) return true; + + for (const hb_set_t& s : glyphs_per_class.values ()) + { + if (s.in_error ()) return true; + } + return false; + } + + private: + bool gids_consecutive; + hb_hashmap_t num_ranges_per_class; + hb_hashmap_t glyphs_per_class; +}; + + } #endif // GRAPH_CLASSDEF_GRAPH_HH diff --git a/src/graph/test-classdef-graph.cc b/src/graph/test-classdef-graph.cc new file mode 100644 index 000000000..908658446 --- /dev/null +++ b/src/graph/test-classdef-graph.cc @@ -0,0 +1,110 @@ +/* + * Copyright © 2022 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Garret Rieger + */ + +#include "gsubgpos-context.hh" +#include "classdef-graph.hh" + +typedef hb_pair_t gid_and_class_t; +typedef hb_vector_t gid_and_class_list_t; + + +static bool incremental_size_is (const gid_and_class_list_t& list, unsigned klass, unsigned expected) +{ + graph::class_def_size_estimator_t estimator (list.iter ()); + unsigned result = estimator.incremental_size_for_class (klass); + if (result != expected) + { + printf ("FAIL: expected size %u but was %u\n", expected, result); + return false; + } + + return true; +} + +static void test_class_and_coverage_size_estimates () +{ + gid_and_class_list_t empty = { + }; + assert (incremental_size_is (empty, 0, 0)); + assert (incremental_size_is (empty, 1, 0)); + + gid_and_class_list_t class_zero = { + {5, 0}, + }; + assert (incremental_size_is (class_zero, 0, 2)); + + gid_and_class_list_t consecutive = { + {4, 0}, + {5, 0}, + {6, 1}, + {7, 1}, + {8, 2}, + {9, 2}, + {10, 2}, + {11, 2}, + }; + assert (incremental_size_is (consecutive, 0, 4)); + assert (incremental_size_is (consecutive, 1, 4 + 4)); + assert (incremental_size_is (consecutive, 2, 8 + 6)); + + gid_and_class_list_t non_consecutive = { + {4, 0}, + {5, 0}, + + {6, 1}, + {7, 1}, + + {9, 2}, + {10, 2}, + {11, 2}, + {12, 2}, + }; + assert (incremental_size_is (non_consecutive, 0, 4)); + assert (incremental_size_is (non_consecutive, 1, 4 + 6)); + assert (incremental_size_is (non_consecutive, 2, 8 + 6)); + + gid_and_class_list_t multiple_ranges = { + {4, 0}, + {5, 0}, + + {6, 1}, + {7, 1}, + + {9, 1}, + + {11, 1}, + {12, 1}, + {13, 1}, + }; + assert (incremental_size_is (multiple_ranges, 0, 4)); + assert (incremental_size_is (multiple_ranges, 1, 2 * 6 + 3 * 6)); +} + +int +main (int argc, char **argv) +{ + test_class_and_coverage_size_estimates (); +} diff --git a/src/meson.build b/src/meson.build index 5e0c172c6..9ee919dd1 100644 --- a/src/meson.build +++ b/src/meson.build @@ -581,6 +581,7 @@ if get_option('tests').enabled() 'test-ot-tag': ['hb-ot-tag.cc'], 'test-priority-queue': ['test-priority-queue.cc', 'hb-static.cc'], 'test-repacker': ['test-repacker.cc', 'hb-static.cc', 'graph/gsubgpos-context.cc'], + 'test-classdef-graph': ['graph/test-classdef-graph.cc', 'hb-static.cc', 'graph/gsubgpos-context.cc'], 'test-set': ['test-set.cc', 'hb-static.cc'], 'test-serialize': ['test-serialize.cc', 'hb-static.cc'], 'test-unicode-ranges': ['test-unicode-ranges.cc'],