From 1584d3cb8faf244ae439cd59eac5f3d006d7a106 Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Wed, 28 Oct 2020 17:49:09 -0700 Subject: [PATCH] [subset] Start a proof of concept implementation of the GSUB/GPOS offset overflow resolver. --- src/Makefile.am | 6 +- src/Makefile.sources | 1 + src/hb-repacker.hh | 180 +++++++++++++++++++++++++++++++++++++++++++ src/hb-serialize.hh | 3 + src/test-repacker.cc | 87 +++++++++++++++++++++ 5 files changed, 276 insertions(+), 1 deletion(-) create mode 100644 src/hb-repacker.hh create mode 100644 src/test-repacker.cc diff --git a/src/Makefile.am b/src/Makefile.am index e10068e94..20e9be32f 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -342,7 +342,7 @@ test_gsub_would_substitute_SOURCES = test-gsub-would-substitute.cc test_gsub_would_substitute_CPPFLAGS = $(HBCFLAGS) $(FREETYPE_CFLAGS) test_gsub_would_substitute_LDADD = libharfbuzz.la $(HBLIBS) $(FREETYPE_LIBS) -COMPILED_TESTS = test-algs test-array test-iter test-meta test-number test-ot-tag test-unicode-ranges test-bimap +COMPILED_TESTS = test-algs test-array test-iter test-meta test-number test-ot-tag test-unicode-ranges test-bimap test-repacker COMPILED_TESTS_CPPFLAGS = $(HBCFLAGS) -DMAIN -UNDEBUG COMPILED_TESTS_LDADD = libharfbuzz.la $(HBLIBS) check_PROGRAMS += $(COMPILED_TESTS) @@ -356,6 +356,10 @@ test_array_SOURCES = test-array.cc test_array_CPPFLAGS = $(HBCFLAGS) test_array_LDADD = libharfbuzz.la $(HBLIBS) +test_repacker_SOURCES = test-repacker.cc hb-static.cc +test_repacker_CPPFLAGS = $(HBCFLAGS) +test_repacker_LDADD = libharfbuzz.la libharfbuzz-subset.la $(HBLIBS) + test_iter_SOURCES = test-iter.cc hb-static.cc test_iter_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS) test_iter_LDADD = $(COMPILED_TESTS_LDADD) diff --git a/src/Makefile.sources b/src/Makefile.sources index 6a6d3018b..933ae8850 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -268,6 +268,7 @@ HB_SUBSET_sources = \ hb-subset-plan.hh \ hb-subset.cc \ hb-subset.hh \ + hb-repacker.hh \ $(NULL) HB_SUBSET_headers = \ diff --git a/src/hb-repacker.hh b/src/hb-repacker.hh new file mode 100644 index 000000000..4bcc2393a --- /dev/null +++ b/src/hb-repacker.hh @@ -0,0 +1,180 @@ +/* + * Copyright © 2020 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Garret Rieger + */ + +#ifndef HB_REPACKER_HH +#define HB_REPACKER_HH + +#include "hb-open-type.hh" +#include "hb-serialize.hh" +#include "hb-vector.hh" + + +struct graph_t +{ + /* + * A topological sorting of an object graph. Ordered + * in reverse serialization order (first object in the + * serialization is at the end of the graph). This matches + * the 'packed' object stack used internally in the + * serializer + */ + graph_t (const hb_vector_t& objects) + : objects_ (objects) + {} + + /* + * serialize graph into the provided serialization buffer. + */ + void serialize (hb_serialize_context_t* c) + { + c->start_serialize (); + for (unsigned i = 0; i < objects_.length; i++) { + if (!objects_[i]) continue; + + c->push (); + + size_t size = objects_[i]->tail - objects_[i]->head; + char* start = c->allocate_size (size); + if (!start) return; + + memcpy (start, objects_[i]->head, size); + + for (const auto& link : objects_[i]->links) + serialize_link (link, start, c); + + c->pop_pack (false); + } + c->end_serialize (); + } + + /* + * Generates a new topological sorting of graph using BFS. + */ + void sort_bfs () + { + hb_vector_t queue; + hb_vector_t sorted_graph; + + // Object graphs are in reverse order, the first object is at the end + // of the vector. + queue.push (objects_.length - 1); + + hb_set_t visited; + while (queue.length) + { + int next_id = queue[0]; + queue.remove(0); + visited.add(next_id); + + hb_serialize_context_t::object_t* next = objects_[next_id]; + sorted_graph.push (next); + + for (const auto& link : next->links) { + if (!visited.has (link.objidx)) + queue.push (link.objidx); + } + } + + sorted_graph.as_array ().reverse (); + objects_ = sorted_graph; + // TODO(garretrieger): remap object id's on the links. + // TODO(garretrieger): what order should graphs be in (first object at the end? or the beginning) + + // TODO(garretrieger): check that all objects made it over into the sorted copy + // (ie. all objects are connected in the original graph). + } + + /* + * Will any offsets overflow on graph when it's serialized? + */ + bool will_overflow() + { + // TODO(garretrieger): implement me. + return false; + } + + private: + + template void + serialize_link_of_type (const hb_serialize_context_t::object_t::link_t& link, + char* head, + hb_serialize_context_t* c) + { + OT::Offset* offset = reinterpret_cast*> (head + link.position); + *offset = 0; + c->add_link (*offset, + link.objidx, + (hb_serialize_context_t::whence_t) link.whence, + link.bias); + } + + void serialize_link (const hb_serialize_context_t::object_t::link_t& link, + char* head, + hb_serialize_context_t* c) + { + if (link.is_wide) + { + if (link.is_signed) + { + serialize_link_of_type (link, head, c); + } else { + serialize_link_of_type (link, head, c); + } + } else { + if (link.is_signed) + { + serialize_link_of_type (link, head, c); + } else { + serialize_link_of_type (link, head, c); + } + } + } + + hb_vector_t objects_; +}; + + +/* + * Re-serialize the provided object graph into the serialization context + * using BFS (Breadth First Search) to produce the topological ordering. + */ +inline void +hb_resolve_overflows (const hb_vector_t& packed, + hb_serialize_context_t* c) { + graph_t sorted_graph (packed); + sorted_graph.sort_bfs (); + if (sorted_graph.will_overflow ()) { + // TODO(garretrieger): additional offset resolution strategies + // - Promotion to extension lookups. + // - Table duplication. + // - Table splitting. + } + + sorted_graph.serialize (c); +} + + +#endif /* HB_REPACKER_HH */ diff --git a/src/hb-serialize.hh b/src/hb-serialize.hh index fe29bdf96..d863a2c3a 100644 --- a/src/hb-serialize.hh +++ b/src/hb-serialize.hh @@ -520,6 +520,9 @@ struct hb_serialize_context_t (char *) b.arrayZ, free); } + const hb_vector_t& object_graph() + { return packed; } + private: template void assign_offset (const object_t* parent, const object_t::link_t &link, unsigned offset) diff --git a/src/test-repacker.cc b/src/test-repacker.cc new file mode 100644 index 000000000..c94804054 --- /dev/null +++ b/src/test-repacker.cc @@ -0,0 +1,87 @@ +/* + * Copyright © 2020 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Garret Rieger + */ + +#include "hb-repacker.hh" +#include "hb-open-type.hh" + +static void +populate_serializer (hb_serialize_context_t* c) +{ + c->start_serialize (); + c->push (); + char* obj = c->allocate_size (3); + strncpy (obj, "ghi", 3); + unsigned obj_3 = c->pop_pack (); + + c->push (); + obj = c->allocate_size (3); + strncpy (obj, "def", 3); + unsigned obj_2 = c->pop_pack (); + + c->push (); + obj = c->allocate_size (3); + strncpy (obj, "abc", 3); + + OT::Offset16* offset = c->start_embed (); + c->extend_min (offset); + c->add_link (*offset, obj_2); + + offset = c->start_embed (); + c->extend_min (offset); + c->add_link (*offset, obj_3); + + c->pop_pack (); + + c->end_serialize(); +} + +static void +test_serialize () +{ + size_t buffer_size = 100; + void* buffer_1 = malloc (buffer_size); + hb_serialize_context_t c1 (buffer_1, buffer_size); + populate_serializer (&c1); + hb_bytes_t expected = c1.copy_bytes (); + + void* buffer_2 = malloc (buffer_size); + hb_serialize_context_t c2 (buffer_2, buffer_size); + + graph_t graph (c1.object_graph ()); + graph.serialize (&c2); + hb_bytes_t actual = c2.copy_bytes (); + + assert (actual == expected); + + free (buffer_1); + free (buffer_2); +} + +int +main (int argc, char **argv) +{ + test_serialize (); +}