diff --git a/configure.ac b/configure.ac index d7f1e2673..db596bc4c 100644 --- a/configure.ac +++ b/configure.ac @@ -432,6 +432,7 @@ test/shaping/data/in-house/Makefile test/shaping/data/text-rendering-tests/Makefile test/subset/Makefile test/subset/data/Makefile +test/subset/data/repack_tests/Makefile docs/Makefile docs/version.xml ]) diff --git a/src/Makefile.am b/src/Makefile.am index e10068e94..7a0ca2985 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -342,7 +342,7 @@ test_gsub_would_substitute_SOURCES = test-gsub-would-substitute.cc test_gsub_would_substitute_CPPFLAGS = $(HBCFLAGS) $(FREETYPE_CFLAGS) test_gsub_would_substitute_LDADD = libharfbuzz.la $(HBLIBS) $(FREETYPE_LIBS) -COMPILED_TESTS = test-algs test-array test-iter test-meta test-number test-ot-tag test-unicode-ranges test-bimap +COMPILED_TESTS = test-algs test-array test-iter test-meta test-number test-ot-tag test-priority-queue test-unicode-ranges test-bimap test-repacker COMPILED_TESTS_CPPFLAGS = $(HBCFLAGS) -DMAIN -UNDEBUG COMPILED_TESTS_LDADD = libharfbuzz.la $(HBLIBS) check_PROGRAMS += $(COMPILED_TESTS) @@ -356,6 +356,14 @@ test_array_SOURCES = test-array.cc test_array_CPPFLAGS = $(HBCFLAGS) test_array_LDADD = libharfbuzz.la $(HBLIBS) +test_priority_queue_SOURCES = test-priority-queue.cc hb-static.cc +test_priority_queue_CPPFLAGS = $(HBCFLAGS) +test_priority_queue_LDADD = libharfbuzz.la $(HBLIBS) + +test_repacker_SOURCES = test-repacker.cc hb-static.cc +test_repacker_CPPFLAGS = $(HBCFLAGS) +test_repacker_LDADD = libharfbuzz.la libharfbuzz-subset.la $(HBLIBS) + test_iter_SOURCES = test-iter.cc hb-static.cc test_iter_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS) test_iter_LDADD = $(COMPILED_TESTS_LDADD) diff --git a/src/Makefile.sources b/src/Makefile.sources index 6a6d3018b..14c97996b 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -167,6 +167,7 @@ HB_BASE_sources = \ hb-unicode.hh \ hb-utf.hh \ hb-vector.hh \ + hb-priority-queue.hh \ hb.hh \ $(NULL) @@ -268,6 +269,7 @@ HB_SUBSET_sources = \ hb-subset-plan.hh \ hb-subset.cc \ hb-subset.hh \ + hb-repacker.hh \ $(NULL) HB_SUBSET_headers = \ diff --git a/src/hb-cff-interp-common.hh b/src/hb-cff-interp-common.hh index 91a9b7d0d..c251e2d0e 100644 --- a/src/hb-cff-interp-common.hh +++ b/src/hb-cff-interp-common.hh @@ -263,7 +263,7 @@ struct UnsizedByteStr : UnsizedArrayOf T *ip = c->allocate_size (T::static_size); if (unlikely (!ip)) return_trace (false); - return_trace (c->check_assign (*ip, value)); + return_trace (c->check_assign (*ip, value, HB_SERIALIZE_ERROR_INT_OVERFLOW)); } template diff --git a/src/hb-debug.hh b/src/hb-debug.hh index ec3a1ff21..a92614d01 100644 --- a/src/hb-debug.hh +++ b/src/hb-debug.hh @@ -438,6 +438,10 @@ struct hb_no_trace_t { #define TRACE_SUBSET(this) hb_no_trace_t trace #endif +#ifndef HB_DEBUG_SUBSET_REPACK +#define HB_DEBUG_SUBSET_REPACK (HB_DEBUG+0) +#endif + #ifndef HB_DEBUG_DISPATCH #define HB_DEBUG_DISPATCH ( \ HB_DEBUG_APPLY + \ diff --git a/src/hb-open-type.hh b/src/hb-open-type.hh index dc0ae1d98..297edf08e 100644 --- a/src/hb-open-type.hh +++ b/src/hb-open-type.hh @@ -209,7 +209,9 @@ struct Offset : Type void *serialize (hb_serialize_context_t *c, const void *base) { void *t = c->start_embed (); - c->check_assign (*this, (unsigned) ((char *) t - (char *) base)); + c->check_assign (*this, + (unsigned) ((char *) t - (char *) base), + HB_SERIALIZE_ERROR_OFFSET_OVERFLOW); return t; } @@ -621,7 +623,7 @@ struct ArrayOf { TRACE_SERIALIZE (this); if (unlikely (!c->extend_min (*this))) return_trace (false); - c->check_assign (len, items_len); + c->check_assign (len, items_len, HB_SERIALIZE_ERROR_ARRAY_OVERFLOW); if (unlikely (!c->extend (*this))) return_trace (false); return_trace (true); } @@ -656,7 +658,7 @@ struct ArrayOf TRACE_SERIALIZE (this); auto *out = c->start_embed (this); if (unlikely (!c->extend_min (out))) return_trace (nullptr); - c->check_assign (out->len, len); + c->check_assign (out->len, len, HB_SERIALIZE_ERROR_ARRAY_OVERFLOW); if (unlikely (!as_array ().copy (c))) return_trace (nullptr); return_trace (out); } @@ -787,7 +789,7 @@ struct HeadlessArrayOf { TRACE_SERIALIZE (this); if (unlikely (!c->extend_min (*this))) return_trace (false); - c->check_assign (lenP1, items_len + 1); + c->check_assign (lenP1, items_len + 1, HB_SERIALIZE_ERROR_ARRAY_OVERFLOW); if (unlikely (!c->extend (*this))) return_trace (false); return_trace (true); } diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh index 878e02ff1..97cd0f526 100644 --- a/src/hb-ot-cmap-table.hh +++ b/src/hb-ot-cmap-table.hh @@ -276,7 +276,9 @@ struct CmapSubtableFormat4 HBUINT16 *idRangeOffset = serialize_rangeoffset_glyid (c, format4_iter, endCode, startCode, idDelta, segcount); if (unlikely (!c->check_success (idRangeOffset))) return; - if (unlikely (!c->check_assign(this->length, c->length () - table_initpos))) return; + if (unlikely (!c->check_assign(this->length, + c->length () - table_initpos, + HB_SERIALIZE_ERROR_INT_OVERFLOW))) return; this->segCountX2 = segcount * 2; this->entrySelector = hb_max (1u, hb_bit_storage (segcount)) - 1; this->searchRange = 2 * (1u << this->entrySelector); @@ -850,7 +852,9 @@ struct DefaultUVS : SortedArrayOf } else { - if (unlikely (!c->check_assign (out->len, (c->length () - init_len) / UnicodeValueRange::static_size))) return nullptr; + if (unlikely (!c->check_assign (out->len, + (c->length () - init_len) / UnicodeValueRange::static_size, + HB_SERIALIZE_ERROR_INT_OVERFLOW))) return nullptr; return out; } } @@ -1112,10 +1116,12 @@ struct CmapSubtableFormat14 return; int tail_len = init_tail - c->tail; - c->check_assign (this->length, c->length () - table_initpos + tail_len); + c->check_assign (this->length, c->length () - table_initpos + tail_len, + HB_SERIALIZE_ERROR_INT_OVERFLOW); c->check_assign (this->record.len, (c->length () - table_initpos - CmapSubtableFormat14::min_size) / - VariationSelectorRecord::static_size); + VariationSelectorRecord::static_size, + HB_SERIALIZE_ERROR_INT_OVERFLOW); /* Correct the incorrect write order by reversing the order of the variation records array. */ @@ -1401,7 +1407,9 @@ struct cmap } } - c->check_assign(this->encodingRecord.len, (c->length () - cmap::min_size)/EncodingRecord::static_size); + c->check_assign(this->encodingRecord.len, + (c->length () - cmap::min_size)/EncodingRecord::static_size, + HB_SERIALIZE_ERROR_INT_OVERFLOW); } void closure_glyphs (const hb_set_t *unicodes, diff --git a/src/hb-ot-hdmx-table.hh b/src/hb-ot-hdmx-table.hh index c9c391bad..590fa154b 100644 --- a/src/hb-ot-hdmx-table.hh +++ b/src/hb-ot-hdmx-table.hh @@ -110,7 +110,7 @@ struct hdmx for (const hb_item_type& _ : +it) c->start_embed ()->serialize (c, _.first, _.second); - return_trace (c->successful); + return_trace (c->successful ()); } diff --git a/src/hb-ot-hmtx-table.hh b/src/hb-ot-hmtx-table.hh index d06c0fa4a..403832993 100644 --- a/src/hb-ot-hmtx-table.hh +++ b/src/hb-ot-hmtx-table.hh @@ -146,7 +146,7 @@ struct hmtxvmtx _mtx.fini (); - if (unlikely (c->serializer->ran_out_of_room || c->serializer->in_error ())) + if (unlikely (c->serializer->in_error ())) return_trace (false); // Amend header num hmetrics diff --git a/src/hb-ot-layout-gpos-table.hh b/src/hb-ot-layout-gpos-table.hh index 72a44bf69..7c8c85777 100644 --- a/src/hb-ot-layout-gpos-table.hh +++ b/src/hb-ot-layout-gpos-table.hh @@ -694,7 +694,7 @@ struct MarkArray : ArrayOf /* Array of MarkRecords--in Coverage orde { TRACE_SERIALIZE (this); if (unlikely (!c->extend_min (*this))) return_trace (false); - if (unlikely (!c->check_assign (len, it.len ()))) return_trace (false); + if (unlikely (!c->check_assign (len, it.len (), HB_SERIALIZE_ERROR_ARRAY_OVERFLOW))) return_trace (false); c->copy_all (it, base, c->to_bias (this), klass_mapping, layout_variation_idx_map); return_trace (true); } @@ -756,7 +756,7 @@ struct SinglePosFormat1 { auto out = c->extend_min (*this); if (unlikely (!out)) return; - if (unlikely (!c->check_assign (valueFormat, valFormat))) return; + if (unlikely (!c->check_assign (valueFormat, valFormat, HB_SERIALIZE_ERROR_INT_OVERFLOW))) return; + it | hb_map (hb_second) @@ -870,8 +870,8 @@ struct SinglePosFormat2 { auto out = c->extend_min (*this); if (unlikely (!out)) return; - if (unlikely (!c->check_assign (valueFormat, valFormat))) return; - if (unlikely (!c->check_assign (valueCount, it.len ()))) return; + if (unlikely (!c->check_assign (valueFormat, valFormat, HB_SERIALIZE_ERROR_INT_OVERFLOW))) return; + if (unlikely (!c->check_assign (valueCount, it.len (), HB_SERIALIZE_ERROR_ARRAY_OVERFLOW))) return; + it | hb_map (hb_second) diff --git a/src/hb-ot-layout-gsub-table.hh b/src/hb-ot-layout-gsub-table.hh index ee95bbc00..062eea6ff 100644 --- a/src/hb-ot-layout-gsub-table.hh +++ b/src/hb-ot-layout-gsub-table.hh @@ -102,7 +102,7 @@ struct SingleSubstFormat1 TRACE_SERIALIZE (this); if (unlikely (!c->extend_min (*this))) return_trace (false); if (unlikely (!coverage.serialize (c, this).serialize (c, glyphs))) return_trace (false); - c->check_assign (deltaGlyphID, delta); + c->check_assign (deltaGlyphID, delta, HB_SERIALIZE_ERROR_INT_OVERFLOW); return_trace (true); } @@ -1551,7 +1551,7 @@ struct SubstLookup : Lookup template static inline typename context_t::return_t dispatch_recurse_func (context_t *c, unsigned int lookup_index); - + static inline typename hb_closure_context_t::return_t closure_glyphs_recurse_func (hb_closure_context_t *c, unsigned lookup_index, hb_set_t *covered_seq_indices, unsigned seq_index, unsigned end_index); static inline hb_closure_context_t::return_t dispatch_closure_recurse_func (hb_closure_context_t *c, unsigned lookup_index, hb_set_t *covered_seq_indices, unsigned seq_index, unsigned end_index) diff --git a/src/hb-ot-name-table.hh b/src/hb-ot-name-table.hh index ece3c2846..794a7cdb6 100644 --- a/src/hb-ot-name-table.hh +++ b/src/hb-ot-name-table.hh @@ -230,7 +230,8 @@ struct name c->copy_all (records, src_string_pool); free (records.arrayZ); - if (unlikely (c->ran_out_of_room)) return_trace (false); + + if (unlikely (c->ran_out_of_room ())) return_trace (false); this->stringOffset = c->length (); diff --git a/src/hb-priority-queue.hh b/src/hb-priority-queue.hh new file mode 100644 index 000000000..7d799ae90 --- /dev/null +++ b/src/hb-priority-queue.hh @@ -0,0 +1,151 @@ +/* + * Copyright © 2020 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Garret Rieger + */ + +#ifndef HB_PRIORITY_QUEUE_HH +#define HB_PRIORITY_QUEUE_HH + +#include "hb.hh" +#include "hb-vector.hh" + +/* + * hb_priority_queue_t + * + * Priority queue implemented as a binary heap. Supports extract minimum + * and insert operations. + */ +struct hb_priority_queue_t +{ + HB_DELETE_COPY_ASSIGN (hb_priority_queue_t); + hb_priority_queue_t () { init (); } + ~hb_priority_queue_t () { fini (); } + + private: + typedef hb_pair_t item_t; + hb_vector_t heap; + + public: + void init () { heap.init (); } + + void fini () { heap.fini (); } + + void reset () { heap.resize (0); } + + bool in_error () const { return heap.in_error (); } + + void insert (int64_t priority, unsigned value) + { + heap.push (item_t (priority, value)); + bubble_up (heap.length - 1); + } + + item_t pop_minimum () + { + item_t result = heap[0]; + + heap[0] = heap[heap.length - 1]; + heap.shrink (heap.length - 1); + bubble_down (0); + + return result; + } + + const item_t& minimum () + { + return heap[0]; + } + + bool is_empty () const { return heap.length == 0; } + explicit operator bool () const { return !is_empty (); } + unsigned int get_population () const { return heap.length; } + + /* Sink interface. */ + hb_priority_queue_t& operator << (item_t item) + { insert (item.first, item.second); return *this; } + + private: + + static constexpr unsigned parent (unsigned index) + { + return (index - 1) / 2; + } + + static constexpr unsigned left_child (unsigned index) + { + return 2 * index + 1; + } + + static constexpr unsigned right_child (unsigned index) + { + return 2 * index + 2; + } + + void bubble_down (unsigned index) + { + unsigned left = left_child (index); + unsigned right = right_child (index); + + bool has_left = left < heap.length; + if (!has_left) + // If there's no left, then there's also no right. + return; + + bool has_right = right < heap.length; + if (heap[index].first <= heap[left].first + && (!has_right || heap[index].first <= heap[right].first)) + return; + + if (!has_right || heap[left].first < heap[right].first) + { + swap (index, left); + bubble_down (left); + return; + } + + swap (index, right); + bubble_down (right); + } + + void bubble_up (unsigned index) + { + if (index == 0) return; + + unsigned parent_index = parent (index); + if (heap[parent_index].first <= heap[index].first) + return; + + swap (index, parent_index); + bubble_up (parent_index); + } + + void swap (unsigned a, unsigned b) + { + item_t temp = heap[a]; + heap[a] = heap[b]; + heap[b] = temp; + } +}; + +#endif /* HB_PRIORITY_QUEUE_HH */ diff --git a/src/hb-repacker.hh b/src/hb-repacker.hh new file mode 100644 index 000000000..35e3566e5 --- /dev/null +++ b/src/hb-repacker.hh @@ -0,0 +1,754 @@ +/* + * Copyright © 2020 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Garret Rieger + */ + +#ifndef HB_REPACKER_HH +#define HB_REPACKER_HH + +#include "hb-open-type.hh" +#include "hb-map.hh" +#include "hb-priority-queue.hh" +#include "hb-serialize.hh" +#include "hb-vector.hh" + + +struct graph_t +{ + struct vertex_t + { + vertex_t () : + distance (0), + incoming_edges (0), + start (0), + end (0), + priority(0) {} + + void fini () { obj.fini (); } + + hb_serialize_context_t::object_t obj; + int64_t distance; + unsigned incoming_edges; + unsigned start; + unsigned end; + unsigned priority; + + bool is_shared () const + { + return incoming_edges > 1; + } + + bool is_leaf () const + { + return !obj.links.length; + } + + void raise_priority () + { + priority++; + } + + int64_t modified_distance (unsigned order) const + { + // TODO(garretrieger): once priority is high enough, should try + // setting distance = 0 which will force to sort immediately after + // it's parent where possible. + + int64_t modified_distance = distance + distance_modifier (); + return (modified_distance << 24) | (0x00FFFFFF & order); + } + + int64_t distance_modifier () const + { + if (!priority) return 0; + int64_t table_size = obj.tail - obj.head; + return -(table_size - table_size / (1 << hb_min(priority, 16u))); + } + }; + + struct overflow_record_t + { + unsigned parent; + const hb_serialize_context_t::object_t::link_t* link; + }; + + struct clone_buffer_t + { + clone_buffer_t () : head (nullptr), tail (nullptr) {} + + bool copy (const hb_serialize_context_t::object_t& object) + { + fini (); + unsigned size = object.tail - object.head; + head = (char*) malloc (size); + if (!head) return false; + + memcpy (head, object.head, size); + tail = head + size; + return true; + } + + char* head; + char* tail; + + void fini () + { + if (!head) return; + free (head); + head = nullptr; + } + }; + + /* + * A topological sorting of an object graph. Ordered + * in reverse serialization order (first object in the + * serialization is at the end of the list). This matches + * the 'packed' object stack used internally in the + * serializer + */ + graph_t (const hb_vector_t& objects) + : edge_count_invalid (true), + distance_invalid (true), + positions_invalid (true), + successful (true) + { + bool removed_nil = false; + for (unsigned i = 0; i < objects.length; i++) + { + // TODO(grieger): check all links point to valid objects. + + // If this graph came from a serialization buffer object 0 is the + // nil object. We don't need it for our purposes here so drop it. + if (i == 0 && !objects[i]) + { + removed_nil = true; + continue; + } + + vertex_t* v = vertices_.push (); + v->obj = *objects[i]; + if (!removed_nil) continue; + for (unsigned i = 0; i < v->obj.links.length; i++) + // Fix indices to account for removed nil object. + v->obj.links[i].objidx--; + } + } + + ~graph_t () + { + vertices_.fini_deep (); + clone_buffers_.fini_deep (); + } + + bool in_error () const + { + return !successful || vertices_.in_error () || clone_buffers_.in_error (); + } + + const vertex_t& root () const + { + return vertices_[root_idx ()]; + } + + unsigned root_idx () const + { + // Object graphs are in reverse order, the first object is at the end + // of the vector. Since the graph is topologically sorted it's safe to + // assume the first object has no incoming edges. + return vertices_.length - 1; + } + + const hb_serialize_context_t::object_t& object(unsigned i) const + { + return vertices_[i].obj; + } + + /* + * serialize graph into the provided serialization buffer. + */ + void serialize (hb_serialize_context_t* c) const + { + c->start_serialize (); + for (unsigned i = 0; i < vertices_.length; i++) { + c->push (); + + size_t size = vertices_[i].obj.tail - vertices_[i].obj.head; + char* start = c->allocate_size (size); + if (!start) return; + + memcpy (start, vertices_[i].obj.head, size); + + for (const auto& link : vertices_[i].obj.links) + serialize_link (link, start, c); + + // All duplications are already encoded in the graph, so don't + // enable sharing during packing. + c->pop_pack (false); + } + c->end_serialize (); + } + + /* + * Generates a new topological sorting of graph using Kahn's + * algorithm: https://en.wikipedia.org/wiki/Topological_sorting#Algorithms + */ + void sort_kahn () + { + positions_invalid = true; + + if (vertices_.length <= 1) { + // Graph of 1 or less doesn't need sorting. + return; + } + + hb_vector_t queue; + hb_vector_t sorted_graph; + hb_vector_t id_map; + check_success (id_map.resize (vertices_.length)); + + hb_vector_t removed_edges; + check_success (removed_edges.resize (vertices_.length)); + update_incoming_edge_count (); + + queue.push (root_idx ()); + int new_id = vertices_.length - 1; + + while (!queue.in_error () && queue.length) + { + unsigned next_id = queue[0]; + queue.remove (0); + + vertex_t& next = vertices_[next_id]; + sorted_graph.push (next); + id_map[next_id] = new_id--; + + for (const auto& link : next.obj.links) { + removed_edges[link.objidx]++; + if (!(vertices_[link.objidx].incoming_edges - removed_edges[link.objidx])) + queue.push (link.objidx); + } + } + + check_success (!queue.in_error ()); + check_success (!sorted_graph.in_error ()); + if (!check_success (new_id == -1)) + DEBUG_MSG (SUBSET_REPACK, nullptr, "Graph is not fully connected."); + + remap_obj_indices (id_map, &sorted_graph); + + sorted_graph.as_array ().reverse (); + + vertices_.fini_deep (); + vertices_ = sorted_graph; + sorted_graph.fini_deep (); + } + + /* + * Generates a new topological sorting of graph ordered by the shortest + * distance to each node. + */ + void sort_shortest_distance () + { + positions_invalid = true; + + if (vertices_.length <= 1) { + // Graph of 1 or less doesn't need sorting. + return; + } + + update_distances (); + + hb_priority_queue_t queue; + hb_vector_t sorted_graph; + hb_vector_t id_map; + check_success (id_map.resize (vertices_.length)); + + hb_vector_t removed_edges; + check_success (removed_edges.resize (vertices_.length)); + update_incoming_edge_count (); + + queue.insert (root ().modified_distance (0), root_idx ()); + int new_id = root_idx (); + unsigned order = 1; + while (!queue.in_error () && !queue.is_empty ()) + { + unsigned next_id = queue.pop_minimum().second; + + vertex_t& next = vertices_[next_id]; + sorted_graph.push (next); + id_map[next_id] = new_id--; + + for (const auto& link : next.obj.links) { + removed_edges[link.objidx]++; + if (!(vertices_[link.objidx].incoming_edges - removed_edges[link.objidx])) + // Add the order that the links were encountered to the priority. + // This ensures that ties between priorities objects are broken in a consistent + // way. More specifically this is set up so that if a set of objects have the same + // distance they'll be added to the topological order in the order that they are + // referenced from the parent object. + queue.insert (vertices_[link.objidx].modified_distance (order++), + link.objidx); + } + } + + check_success (!queue.in_error ()); + check_success (!sorted_graph.in_error ()); + if (!check_success (new_id == -1)) + DEBUG_MSG (SUBSET_REPACK, nullptr, "Graph is not fully connected."); + + remap_obj_indices (id_map, &sorted_graph); + + sorted_graph.as_array ().reverse (); + + vertices_.fini_deep (); + vertices_ = sorted_graph; + sorted_graph.fini_deep (); + } + + /* + * Creates a copy of child and re-assigns the link from + * parent to the clone. The copy is a shallow copy, objects + * linked from child are not duplicated. + */ + void duplicate (unsigned parent_idx, unsigned child_idx) + { + DEBUG_MSG (SUBSET_REPACK, nullptr, " Duplicating %d => %d", + parent_idx, child_idx); + + positions_invalid = true; + + auto* clone = vertices_.push (); + auto& child = vertices_[child_idx]; + clone_buffer_t* buffer = clone_buffers_.push (); + if (!check_success (buffer->copy (child.obj))) { + return; + } + + clone->obj.head = buffer->head; + clone->obj.tail = buffer->tail; + clone->distance = child.distance; + + for (const auto& l : child.obj.links) + clone->obj.links.push (l); + + check_success (!clone->obj.links.in_error ()); + + auto& parent = vertices_[parent_idx]; + unsigned clone_idx = vertices_.length - 2; + for (unsigned i = 0; i < parent.obj.links.length; i++) + { + auto& l = parent.obj.links[i]; + if (l.objidx == child_idx) + { + l.objidx = clone_idx; + clone->incoming_edges++; + child.incoming_edges--; + } + } + + // The last object is the root of the graph, so swap back the root to the end. + // The root's obj idx does change, however since it's root nothing else refers to it. + // all other obj idx's will be unaffected. + vertex_t root = vertices_[vertices_.length - 2]; + vertices_[vertices_.length - 2] = *clone; + vertices_[vertices_.length - 1] = root; + } + + /* + * Raises the sorting priority of all children. + */ + void raise_childrens_priority (unsigned parent_idx) + { + DEBUG_MSG (SUBSET_REPACK, nullptr, " Raising priority of all children of %d", + parent_idx); + // This operation doesn't change ordering until a sort is run, so no need + // to invalidate positions. It does not change graph structure so no need + // to update distances or edge counts. + auto& parent = vertices_[parent_idx].obj; + for (unsigned i = 0; i < parent.links.length; i++) + vertices_[parent.links[i].objidx].raise_priority (); + } + + /* + * Will any offsets overflow on graph when it's serialized? + */ + bool will_overflow (hb_vector_t* overflows = nullptr) + { + if (overflows) overflows->resize (0); + update_positions (); + + for (int parent_idx = vertices_.length - 1; parent_idx >= 0; parent_idx--) + { + for (const auto& link : vertices_[parent_idx].obj.links) + { + int64_t offset = compute_offset (parent_idx, link); + if (is_valid_offset (offset, link)) + continue; + + if (!overflows) return true; + + overflow_record_t r; + r.parent = parent_idx; + r.link = &link; + overflows->push (r); + } + } + + if (!overflows) return false; + return overflows->length; + } + + void print_overflows (const hb_vector_t& overflows) + { + if (!DEBUG_ENABLED(SUBSET_REPACK)) return; + + update_incoming_edge_count (); + for (const auto& o : overflows) + { + const auto& child = vertices_[o.link->objidx]; + DEBUG_MSG (SUBSET_REPACK, nullptr, " overflow from %d => %d (%d incoming , %d outgoing)", + o.parent, + o.link->objidx, + child.incoming_edges, + child.obj.links.length); + } + } + + void err_other_error () { this->successful = false; } + + private: + + bool check_success (bool success) + { return this->successful && (success || (err_other_error (), false)); } + + /* + * Creates a map from objid to # of incoming edges. + */ + void update_incoming_edge_count () + { + if (!edge_count_invalid) return; + + for (unsigned i = 0; i < vertices_.length; i++) + vertices_[i].incoming_edges = 0; + + for (const vertex_t& v : vertices_) + { + for (auto& l : v.obj.links) + { + vertices_[l.objidx].incoming_edges++; + } + } + + edge_count_invalid = false; + } + + /* + * compute the serialized start and end positions for each vertex. + */ + void update_positions () + { + if (!positions_invalid) return; + + unsigned current_pos = 0; + for (int i = root_idx (); i >= 0; i--) + { + auto& v = vertices_[i]; + v.start = current_pos; + current_pos += v.obj.tail - v.obj.head; + v.end = current_pos; + } + + positions_invalid = false; + } + + /* + * Finds the distance to each object in the graph + * from the initial node. + */ + void update_distances () + { + if (!distance_invalid) return; + + // Uses Dijkstra's algorithm to find all of the shortest distances. + // https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm + // + // Implementation Note: + // Since our priority queue doesn't support fast priority decreases + // we instead just add new entries into the queue when a priority changes. + // Redundant ones are filtered out later on by the visited set. + // According to https://www3.cs.stonybrook.edu/~rezaul/papers/TR-07-54.pdf + // for practical performance this is faster then using a more advanced queue + // (such as a fibonaacci queue) with a fast decrease priority. + for (unsigned i = 0; i < vertices_.length; i++) + { + if (i == vertices_.length - 1) + vertices_[i].distance = 0; + else + vertices_[i].distance = hb_int_max (int64_t); + } + + hb_priority_queue_t queue; + queue.insert (0, vertices_.length - 1); + + hb_set_t visited; + + while (!queue.in_error () && !queue.is_empty ()) + { + unsigned next_idx = queue.pop_minimum ().second; + if (visited.has (next_idx)) continue; + const auto& next = vertices_[next_idx]; + int64_t next_distance = vertices_[next_idx].distance; + visited.add (next_idx); + + for (const auto& link : next.obj.links) + { + if (visited.has (link.objidx)) continue; + + const auto& child = vertices_[link.objidx].obj; + int64_t child_weight = child.tail - child.head + + (!link.is_wide ? (1 << 16) : ((int64_t) 1 << 32)); + int64_t child_distance = next_distance + child_weight; + + if (child_distance < vertices_[link.objidx].distance) + { + vertices_[link.objidx].distance = child_distance; + queue.insert (child_distance, link.objidx); + } + } + } + + check_success (!queue.in_error ()); + if (!check_success (queue.is_empty ())) + { + DEBUG_MSG (SUBSET_REPACK, nullptr, "Graph is not fully connected."); + return; + } + + distance_invalid = false; + } + + int64_t compute_offset ( + unsigned parent_idx, + const hb_serialize_context_t::object_t::link_t& link) const + { + const auto& parent = vertices_[parent_idx]; + const auto& child = vertices_[link.objidx]; + int64_t offset = 0; + switch ((hb_serialize_context_t::whence_t) link.whence) { + case hb_serialize_context_t::whence_t::Head: + offset = child.start - parent.start; break; + case hb_serialize_context_t::whence_t::Tail: + offset = child.start - parent.end; break; + case hb_serialize_context_t::whence_t::Absolute: + offset = child.start; break; + } + + assert (offset >= link.bias); + offset -= link.bias; + return offset; + } + + bool is_valid_offset (int64_t offset, + const hb_serialize_context_t::object_t::link_t& link) const + { + if (link.is_signed) + { + if (link.is_wide) + return offset >= -((int64_t) 1 << 31) && offset < ((int64_t) 1 << 31); + else + return offset >= -(1 << 15) && offset < (1 << 15); + } + else + { + if (link.is_wide) + return offset >= 0 && offset < ((int64_t) 1 << 32); + else + return offset >= 0 && offset < (1 << 16); + } + } + + /* + * Updates all objidx's in all links using the provided mapping. + */ + void remap_obj_indices (const hb_vector_t& id_map, + hb_vector_t* sorted_graph) const + { + for (unsigned i = 0; i < sorted_graph->length; i++) + { + for (unsigned j = 0; j < (*sorted_graph)[i].obj.links.length; j++) + { + auto& link = (*sorted_graph)[i].obj.links[j]; + link.objidx = id_map[link.objidx]; + } + } + } + + template void + serialize_link_of_type (const hb_serialize_context_t::object_t::link_t& link, + char* head, + hb_serialize_context_t* c) const + { + OT::Offset* offset = reinterpret_cast*> (head + link.position); + *offset = 0; + c->add_link (*offset, + // serializer has an extra nil object at the start of the + // object array. So all id's are +1 of what our id's are. + link.objidx + 1, + (hb_serialize_context_t::whence_t) link.whence, + link.bias); + } + + void serialize_link (const hb_serialize_context_t::object_t::link_t& link, + char* head, + hb_serialize_context_t* c) const + { + if (link.is_wide) + { + if (link.is_signed) + { + serialize_link_of_type (link, head, c); + } else { + serialize_link_of_type (link, head, c); + } + } else { + if (link.is_signed) + { + serialize_link_of_type (link, head, c); + } else { + serialize_link_of_type (link, head, c); + } + } + } + + public: + // TODO(garretrieger): make private, will need to move most of offset overflow code into graph. + hb_vector_t vertices_; + private: + hb_vector_t clone_buffers_; + bool edge_count_invalid; + bool distance_invalid; + bool positions_invalid; + bool successful; +}; + + +/* + * Attempts to modify the topological sorting of the provided object graph to + * eliminate offset overflows in the links between objects of the graph. If a + * non-overflowing ordering is found the updated graph is serialized it into the + * provided serialization context. + * + * If necessary the structure of the graph may be modified in ways that do not + * affect the functionality of the graph. For example shared objects may be + * duplicated. + */ +inline void +hb_resolve_overflows (const hb_vector_t& packed, + hb_serialize_context_t* c) { + // Kahn sort is ~twice as fast as shortest distance sort and works for many fonts + // so try it first to save time. + graph_t sorted_graph (packed); + sorted_graph.sort_kahn (); + if (!sorted_graph.will_overflow ()) + { + sorted_graph.serialize (c); + return; + } + + sorted_graph.sort_shortest_distance (); + + unsigned round = 0; + hb_vector_t overflows; + // TODO(garretrieger): select a good limit for max rounds. + while (!sorted_graph.in_error () + && sorted_graph.will_overflow (&overflows) + && round++ < 10) { + DEBUG_MSG (SUBSET_REPACK, nullptr, "=== Over flow resolution round %d ===", round); + sorted_graph.print_overflows (overflows); + + bool resolution_attempted = false; + hb_set_t priority_bumped_parents; + // Try resolving the furthest overflows first. + for (int i = overflows.length - 1; i >= 0; i--) + { + const graph_t::overflow_record_t& r = overflows[i]; + const auto& child = sorted_graph.vertices_[r.link->objidx]; + if (child.is_shared ()) + { + // The child object is shared, we may be able to eliminate the overflow + // by duplicating it. + sorted_graph.duplicate (r.parent, r.link->objidx); + resolution_attempted = true; + + // Stop processing overflows for this round so that object order can be + // updated to account for the newly added object. + break; + } + + if (child.is_leaf () && !priority_bumped_parents.has (r.parent)) + { + // This object is too far from it's parent, attempt to move it closer. + // + // TODO(garretrieger): initially limiting this to leaf's since they can be + // moved closer with fewer consequences. However, this can + // likely can be used for non-leafs as well. + // TODO(garretrieger): add a maximum priority, don't try to raise past this. + // TODO(garretrieger): also try lowering priority of the parent. Make it + // get placed further up in the ordering, closer to it's children. + // this is probably preferable if the total size of the parent object + // is < then the total size of the children (and the parent can be moved). + // Since in that case moving the parent will cause a smaller increase in + // the length of other offsets. + sorted_graph.raise_childrens_priority (r.parent); + priority_bumped_parents.add (r.parent); + resolution_attempted = true; + continue; + } + + // TODO(garretrieger): add additional offset resolution strategies + // - Promotion to extension lookups. + // - Table splitting. + } + + if (resolution_attempted) + { + sorted_graph.sort_shortest_distance (); + continue; + } + + DEBUG_MSG (SUBSET_REPACK, nullptr, "No resolution available :("); + c->err (HB_SERIALIZE_ERROR_OFFSET_OVERFLOW); + return; + } + + if (sorted_graph.in_error ()) + { + c->err (HB_SERIALIZE_ERROR_OTHER); + return; + } + sorted_graph.serialize (c); +} + + +#endif /* HB_REPACKER_HH */ diff --git a/src/hb-serialize.hh b/src/hb-serialize.hh index fe29bdf96..87aafe0e2 100644 --- a/src/hb-serialize.hh +++ b/src/hb-serialize.hh @@ -41,6 +41,16 @@ * Serialize */ +enum hb_serialize_error_t { + HB_SERIALIZE_ERROR_NONE = 0x00000000u, + HB_SERIALIZE_ERROR_OTHER = 0x00000001u, + HB_SERIALIZE_ERROR_OFFSET_OVERFLOW = 0x00000002u, + HB_SERIALIZE_ERROR_OUT_OF_ROOM = 0x00000004u, + HB_SERIALIZE_ERROR_INT_OVERFLOW = 0x00000008u, + HB_SERIALIZE_ERROR_ARRAY_OVERFLOW = 0x00000010u +}; +HB_MARK_AS_FLAG_T (hb_serialize_error_t); + struct hb_serialize_context_t { typedef unsigned objidx_t; @@ -51,6 +61,8 @@ struct hb_serialize_context_t Absolute /* Absolute: from the start of the serialize buffer. */ }; + + struct object_t { void fini () { links.fini (); } @@ -117,30 +129,54 @@ struct hb_serialize_context_t object_pool.fini (); } - bool in_error () const { return !this->successful; } + bool in_error () const { return bool (errors); } + + bool successful () const { return !bool (errors); } + + HB_NODISCARD bool ran_out_of_room () const { return errors & HB_SERIALIZE_ERROR_OUT_OF_ROOM; } + HB_NODISCARD bool offset_overflow () const { return errors & HB_SERIALIZE_ERROR_OFFSET_OVERFLOW; } + HB_NODISCARD bool only_offset_overflow () const { return errors == HB_SERIALIZE_ERROR_OFFSET_OVERFLOW; } + + void reset (void *start_, unsigned int size) + { + start = (char*) start_; + end = start + size; + reset (); + current = nullptr; + } void reset () { - this->successful = true; - this->ran_out_of_room = false; + this->errors = HB_SERIALIZE_ERROR_NONE; this->head = this->start; this->tail = this->end; this->debug_depth = 0; fini (); this->packed.push (nullptr); + this->packed_map.init (); } - bool check_success (bool success) - { return this->successful && (success || (err_other_error (), false)); } + bool check_success (bool success, + hb_serialize_error_t err_type = HB_SERIALIZE_ERROR_OTHER) + { + return successful () + && (success || err (err_type)); + } template - bool check_equal (T1 &&v1, T2 &&v2) - { return check_success ((long long) v1 == (long long) v2); } + bool check_equal (T1 &&v1, T2 &&v2, hb_serialize_error_t err_type) + { + if ((long long) v1 != (long long) v2) + { + return err (err_type); + } + return true; + } template - bool check_assign (T1 &v1, T2 &&v2) - { return check_equal (v1 = v2, v2); } + bool check_assign (T1 &v1, T2 &&v2, hb_serialize_error_t err_type) + { return check_equal (v1 = v2, v2, err_type); } template bool propagate_error (T &&obj) { return check_success (!hb_deref (obj).in_error ()); } @@ -167,12 +203,18 @@ struct hb_serialize_context_t "end [%p..%p] serialized %u bytes; %s", this->start, this->end, (unsigned) (this->head - this->start), - this->successful ? "successful" : "UNSUCCESSFUL"); + successful () ? "successful" : "UNSUCCESSFUL"); propagate_error (packed, packed_map); if (unlikely (!current)) return; - if (unlikely (in_error())) return; + if (unlikely (in_error())) + { + // Offset overflows that occur before link resolution cannot be handled + // by repacking, so set a more general error. + if (offset_overflow ()) err (HB_SERIALIZE_ERROR_OTHER); + return; + } assert (!current->next); @@ -351,7 +393,7 @@ struct hb_serialize_context_t for (const object_t::link_t &link : parent->links) { const object_t* child = packed[link.objidx]; - if (unlikely (!child)) { err_other_error(); return; } + if (unlikely (!child)) { err (HB_SERIALIZE_ERROR_OTHER); return; } unsigned offset = 0; switch ((whence_t) link.whence) { case Head: offset = child->head - parent->head; break; @@ -398,19 +440,19 @@ struct hb_serialize_context_t Type *start_embed (const Type &obj) const { return start_embed (hb_addressof (obj)); } - /* Following two functions exist to allow setting breakpoint on. */ - void err_ran_out_of_room () { this->ran_out_of_room = true; } - void err_other_error () { this->successful = false; } + bool err (hb_serialize_error_t err_type) + { + return bool ((errors = (errors | err_type))); + } template Type *allocate_size (unsigned int size) { - if (unlikely (!this->successful)) return nullptr; + if (unlikely (in_error ())) return nullptr; if (this->tail - this->head < ptrdiff_t (size)) { - err_ran_out_of_room (); - this->successful = false; + err (HB_SERIALIZE_ERROR_OUT_OF_ROOM); return nullptr; } memset (this->head, 0, size); @@ -497,7 +539,7 @@ struct hb_serialize_context_t /* Output routines. */ hb_bytes_t copy_bytes () const { - assert (this->successful); + assert (successful ()); /* Copy both items from head side and tail side... */ unsigned int len = (this->head - this->start) + (this->end - this->tail); @@ -520,20 +562,22 @@ struct hb_serialize_context_t (char *) b.arrayZ, free); } + const hb_vector_t& object_graph() const + { return packed; } + private: template void assign_offset (const object_t* parent, const object_t::link_t &link, unsigned offset) { auto &off = * ((BEInt *) (parent->head + link.position)); assert (0 == off); - check_assign (off, offset); + check_assign (off, offset, HB_SERIALIZE_ERROR_OFFSET_OVERFLOW); } public: /* TODO Make private. */ char *start, *head, *tail, *end; unsigned int debug_depth; - bool successful; - bool ran_out_of_room; + hb_serialize_error_t errors; private: @@ -550,5 +594,4 @@ struct hb_serialize_context_t hb_hashmap_t packed_map; }; - #endif /* HB_SERIALIZE_HH */ diff --git a/src/hb-subset.cc b/src/hb-subset.cc index 8b77ecd45..57915f677 100644 --- a/src/hb-subset.cc +++ b/src/hb-subset.cc @@ -50,6 +50,7 @@ #include "hb-ot-layout-gpos-table.hh" #include "hb-ot-var-gvar-table.hh" #include "hb-ot-var-hvar-table.hh" +#include "hb-repacker.hh" static unsigned @@ -64,69 +65,129 @@ _plan_estimate_subset_table_size (hb_subset_plan_t *plan, unsigned table_len) return 512 + (unsigned) (table_len * sqrt ((double) dst_glyphs / src_glyphs)); } +/* + * Repack the serialization buffer if any offset overflows exist. + */ +static hb_blob_t* +_repack (hb_tag_t tag, const hb_serialize_context_t& c) +{ + if (tag != HB_OT_TAG_GPOS + && tag != HB_OT_TAG_GSUB) + return c.copy_blob (); + + if (!c.offset_overflow ()) + return c.copy_blob (); + + hb_vector_t buf; + int buf_size = c.end - c.start; + if (unlikely (!buf.alloc (buf_size))) + return nullptr; + + hb_serialize_context_t repacked ((void *) buf, buf_size); + hb_resolve_overflows (c.object_graph (), &repacked); + + if (unlikely (repacked.in_error ())) + // TODO(garretrieger): refactor so we can share the resize/retry logic with the subset + // portion. + return nullptr; + + return repacked.copy_blob (); +} + +template +static +bool +_try_subset (const TableType *table, + hb_vector_t* buf, + unsigned buf_size, + hb_subset_context_t* c /* OUT */) +{ + c->serializer->start_serialize (); + + bool needed = table->subset (c); + if (!c->serializer->ran_out_of_room ()) + { + c->serializer->end_serialize (); + return needed; + } + + buf_size += (buf_size >> 1) + 32; + DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c ran out of room; reallocating to %u bytes.", + HB_UNTAG (c->table_tag), buf_size); + + if (unlikely (!buf->alloc (buf_size))) + { + DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c failed to reallocate %u bytes.", + HB_UNTAG (c->table_tag), buf_size); + return needed; + } + + c->serializer->reset (buf->arrayZ, buf_size); + return _try_subset (table, buf, buf_size, c); +} + template static bool _subset (hb_subset_plan_t *plan) { - bool result = false; hb_blob_t *source_blob = hb_sanitize_context_t ().reference_table (plan->source); const TableType *table = source_blob->as (); hb_tag_t tag = TableType::tableTag; - if (source_blob->data) + if (!source_blob->data) { - hb_vector_t buf; - /* TODO Not all tables are glyph-related. 'name' table size for example should not be - * affected by number of glyphs. Accommodate that. */ - unsigned buf_size = _plan_estimate_subset_table_size (plan, source_blob->length); - DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c initial estimated table size: %u bytes.", HB_UNTAG (tag), buf_size); - if (unlikely (!buf.alloc (buf_size))) - { - DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c failed to allocate %u bytes.", HB_UNTAG (tag), buf_size); - hb_blob_destroy (source_blob); - return false; - } - retry: - hb_serialize_context_t serializer ((void *) buf, buf_size); - serializer.start_serialize (); - hb_subset_context_t c (source_blob, plan, &serializer, tag); - bool needed = table->subset (&c); - if (serializer.ran_out_of_room) - { - buf_size += (buf_size >> 1) + 32; - DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c ran out of room; reallocating to %u bytes.", HB_UNTAG (tag), buf_size); - if (unlikely (!buf.alloc (buf_size))) - { - DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c failed to reallocate %u bytes.", HB_UNTAG (tag), buf_size); - hb_blob_destroy (source_blob); - return false; - } - goto retry; - } - serializer.end_serialize (); - - result = !serializer.in_error (); - - if (result) - { - if (needed) - { - hb_blob_t *dest_blob = serializer.copy_blob (); - DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c final subset table size: %u bytes.", HB_UNTAG (tag), dest_blob->length); - result = c.plan->add_table (tag, dest_blob); - hb_blob_destroy (dest_blob); - } - else - { - DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c::subset table subsetted to empty.", HB_UNTAG (tag)); - } - } + DEBUG_MSG (SUBSET, nullptr, + "OT::%c%c%c%c::subset sanitize failed on source table.", HB_UNTAG (tag)); + hb_blob_destroy (source_blob); + return false; } - else - DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c::subset sanitize failed on source table.", HB_UNTAG (tag)); + hb_vector_t buf; + /* TODO Not all tables are glyph-related. 'name' table size for example should not be + * affected by number of glyphs. Accommodate that. */ + unsigned buf_size = _plan_estimate_subset_table_size (plan, source_blob->length); + DEBUG_MSG (SUBSET, nullptr, + "OT::%c%c%c%c initial estimated table size: %u bytes.", HB_UNTAG (tag), buf_size); + if (unlikely (!buf.alloc (buf_size))) + { + DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c failed to allocate %u bytes.", HB_UNTAG (tag), buf_size); + hb_blob_destroy (source_blob); + return false; + } + + bool needed = false; + hb_serialize_context_t serializer (buf.arrayZ, buf_size); + { + hb_subset_context_t c (source_blob, plan, &serializer, tag); + needed = _try_subset (table, &buf, buf_size, &c); + } hb_blob_destroy (source_blob); - DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c::subset %s", HB_UNTAG (tag), result ? "success" : "FAILED!"); + + if (serializer.in_error () && !serializer.only_offset_overflow ()) + { + DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c::subset FAILED!", HB_UNTAG (tag)); + return false; + } + + if (!needed) + { + DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c::subset table subsetted to empty.", HB_UNTAG (tag)); + return true; + } + + bool result = false; + hb_blob_t *dest_blob = _repack (tag, serializer); + if (dest_blob) + { + DEBUG_MSG (SUBSET, nullptr, + "OT::%c%c%c%c final subset table size: %u bytes.", + HB_UNTAG (tag), dest_blob->length); + result = plan->add_table (tag, dest_blob); + hb_blob_destroy (dest_blob); + } + + DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c::subset %s", + HB_UNTAG (tag), result ? "success" : "FAILED!"); return result; } diff --git a/src/meson.build b/src/meson.build index ec3b7c718..dddafe9c2 100644 --- a/src/meson.build +++ b/src/meson.build @@ -477,6 +477,8 @@ if get_option('tests').enabled() compiled_tests = { 'test-algs': ['test-algs.cc', 'hb-static.cc'], 'test-array': ['test-array.cc'], + 'test-repacker': ['test-repacker.cc', 'hb-static.cc'], + 'test-priority-queue': ['test-priority-queue.cc', 'hb-static.cc'], 'test-iter': ['test-iter.cc', 'hb-static.cc'], 'test-meta': ['test-meta.cc', 'hb-static.cc'], 'test-number': ['test-number.cc', 'hb-number.cc'], diff --git a/src/test-priority-queue.cc b/src/test-priority-queue.cc new file mode 100644 index 000000000..fab63acb6 --- /dev/null +++ b/src/test-priority-queue.cc @@ -0,0 +1,89 @@ +/* + * Copyright © 2020 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Garret Rieger + */ + +#include "hb.hh" +#include "hb-priority-queue.hh" + +static void +test_insert () +{ + hb_priority_queue_t queue; + assert (queue.is_empty ()); + + queue.insert (10, 0); + assert (!queue.is_empty ()); + assert (queue.minimum () == hb_pair (10, 0)); + + queue.insert (20, 1); + assert (queue.minimum () == hb_pair (10, 0)); + + queue.insert (5, 2); + assert (queue.minimum () == hb_pair (5, 2)); + + queue.insert (15, 3); + assert (queue.minimum () == hb_pair (5, 2)); + + queue.insert (1, 4); + assert (queue.minimum () == hb_pair (1, 4)); +} + +static void +test_extract () +{ + hb_priority_queue_t queue; + queue.insert (0, 0); + queue.insert (60, 6); + queue.insert (30, 3); + queue.insert (40 ,4); + queue.insert (20, 2); + queue.insert (50, 5); + queue.insert (70, 7); + queue.insert (10, 1); + + for (int i = 0; i < 8; i++) + { + assert (!queue.is_empty ()); + assert (queue.minimum () == hb_pair (i * 10, i)); + assert (queue.pop_minimum () == hb_pair (i * 10, i)); + } + + assert (queue.is_empty ()); +} + +static void +test_extract_empty () +{ + hb_priority_queue_t queue; + assert (queue.pop_minimum () == hb_pair (0, 0)); +} + +int +main (int argc, char **argv) +{ + test_insert (); + test_extract (); + test_extract_empty (); +} diff --git a/src/test-repacker.cc b/src/test-repacker.cc new file mode 100644 index 000000000..a8cc6395f --- /dev/null +++ b/src/test-repacker.cc @@ -0,0 +1,485 @@ +/* + * Copyright © 2020 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Garret Rieger + */ + +#include + +#include "hb-repacker.hh" +#include "hb-open-type.hh" + +static void start_object(const char* tag, + unsigned len, + hb_serialize_context_t* c) +{ + c->push (); + char* obj = c->allocate_size (len); + strncpy (obj, tag, len); +} + + +static unsigned add_object(const char* tag, + unsigned len, + hb_serialize_context_t* c) +{ + start_object (tag, len, c); + return c->pop_pack (false); +} + + +static void add_offset (unsigned id, + hb_serialize_context_t* c) +{ + OT::Offset16* offset = c->start_embed (); + c->extend_min (offset); + c->add_link (*offset, id); +} + +static void +populate_serializer_simple (hb_serialize_context_t* c) +{ + c->start_serialize (); + + unsigned obj_1 = add_object ("ghi", 3, c); + unsigned obj_2 = add_object ("def", 3, c); + + start_object ("abc", 3, c); + add_offset (obj_2, c); + add_offset (obj_1, c); + c->pop_pack (); + + c->end_serialize(); +} + +static void +populate_serializer_with_overflow (hb_serialize_context_t* c) +{ + std::string large_string(50000, 'a'); + c->start_serialize (); + + unsigned obj_1 = add_object (large_string.c_str(), 10000, c); + unsigned obj_2 = add_object (large_string.c_str(), 20000, c); + unsigned obj_3 = add_object (large_string.c_str(), 50000, c); + + start_object ("abc", 3, c); + add_offset (obj_3, c); + add_offset (obj_2, c); + add_offset (obj_1, c); + c->pop_pack (); + + c->end_serialize(); +} + +static void +populate_serializer_with_dedup_overflow (hb_serialize_context_t* c) +{ + std::string large_string(70000, 'a'); + c->start_serialize (); + + unsigned obj_1 = add_object ("def", 3, c); + + start_object (large_string.c_str(), 60000, c); + add_offset (obj_1, c); + unsigned obj_2 = c->pop_pack (false); + + start_object (large_string.c_str(), 10000, c); + add_offset (obj_2, c); + add_offset (obj_1, c); + c->pop_pack (false); + + c->end_serialize(); +} + +static void +populate_serializer_complex_1 (hb_serialize_context_t* c) +{ + c->start_serialize (); + + unsigned obj_4 = add_object ("jkl", 3, c); + unsigned obj_3 = add_object ("ghi", 3, c); + + start_object ("def", 3, c); + add_offset (obj_3, c); + unsigned obj_2 = c->pop_pack (false); + + start_object ("abc", 3, c); + add_offset (obj_2, c); + add_offset (obj_4, c); + c->pop_pack (); + + c->end_serialize(); +} + +static void +populate_serializer_complex_2 (hb_serialize_context_t* c) +{ + c->start_serialize (); + + unsigned obj_5 = add_object ("mn", 2, c); + + unsigned obj_4 = add_object ("jkl", 3, c); + + start_object ("ghi", 3, c); + add_offset (obj_4, c); + unsigned obj_3 = c->pop_pack (false); + + start_object ("def", 3, c); + add_offset (obj_3, c); + unsigned obj_2 = c->pop_pack (false); + + start_object ("abc", 3, c); + add_offset (obj_2, c); + add_offset (obj_4, c); + add_offset (obj_5, c); + c->pop_pack (); + + c->end_serialize(); +} + +static void +populate_serializer_complex_3 (hb_serialize_context_t* c) +{ + c->start_serialize (); + + unsigned obj_6 = add_object ("opqrst", 6, c); + + unsigned obj_5 = add_object ("mn", 2, c); + + start_object ("jkl", 3, c); + add_offset (obj_6, c); + unsigned obj_4 = c->pop_pack (false); + + start_object ("ghi", 3, c); + add_offset (obj_4, c); + unsigned obj_3 = c->pop_pack (false); + + start_object ("def", 3, c); + add_offset (obj_3, c); + unsigned obj_2 = c->pop_pack (false); + + start_object ("abc", 3, c); + add_offset (obj_2, c); + add_offset (obj_4, c); + add_offset (obj_5, c); + c->pop_pack (); + + c->end_serialize(); +} + +static void test_sort_kahn_1 () +{ + size_t buffer_size = 100; + void* buffer = malloc (buffer_size); + hb_serialize_context_t c (buffer, buffer_size); + populate_serializer_complex_1 (&c); + + graph_t graph (c.object_graph ()); + graph.sort_kahn (); + + assert(strncmp (graph.object (3).head, "abc", 3) == 0); + assert(graph.object (3).links.length == 2); + assert(graph.object (3).links[0].objidx == 2); + assert(graph.object (3).links[1].objidx == 1); + + assert(strncmp (graph.object (2).head, "def", 3) == 0); + assert(graph.object (2).links.length == 1); + assert(graph.object (2).links[0].objidx == 0); + + assert(strncmp (graph.object (1).head, "jkl", 3) == 0); + assert(graph.object (1).links.length == 0); + + assert(strncmp (graph.object (0).head, "ghi", 3) == 0); + assert(graph.object (0).links.length == 0); + + free (buffer); +} + +static void test_sort_kahn_2 () +{ + size_t buffer_size = 100; + void* buffer = malloc (buffer_size); + hb_serialize_context_t c (buffer, buffer_size); + populate_serializer_complex_2 (&c); + + graph_t graph (c.object_graph ()); + graph.sort_kahn (); + + + assert(strncmp (graph.object (4).head, "abc", 3) == 0); + assert(graph.object (4).links.length == 3); + assert(graph.object (4).links[0].objidx == 3); + assert(graph.object (4).links[1].objidx == 0); + assert(graph.object (4).links[2].objidx == 2); + + assert(strncmp (graph.object (3).head, "def", 3) == 0); + assert(graph.object (3).links.length == 1); + assert(graph.object (3).links[0].objidx == 1); + + assert(strncmp (graph.object (2).head, "mn", 2) == 0); + assert(graph.object (2).links.length == 0); + + assert(strncmp (graph.object (1).head, "ghi", 3) == 0); + assert(graph.object (1).links.length == 1); + assert(graph.object (1).links[0].objidx == 0); + + assert(strncmp (graph.object (0).head, "jkl", 3) == 0); + assert(graph.object (0).links.length == 0); + + free (buffer); +} + +static void test_sort_shortest () +{ + size_t buffer_size = 100; + void* buffer = malloc (buffer_size); + hb_serialize_context_t c (buffer, buffer_size); + populate_serializer_complex_2 (&c); + + graph_t graph (c.object_graph ()); + graph.sort_shortest_distance (); + + assert(strncmp (graph.object (4).head, "abc", 3) == 0); + assert(graph.object (4).links.length == 3); + assert(graph.object (4).links[0].objidx == 2); + assert(graph.object (4).links[1].objidx == 0); + assert(graph.object (4).links[2].objidx == 3); + + assert(strncmp (graph.object (3).head, "mn", 2) == 0); + assert(graph.object (3).links.length == 0); + + assert(strncmp (graph.object (2).head, "def", 3) == 0); + assert(graph.object (2).links.length == 1); + assert(graph.object (2).links[0].objidx == 1); + + assert(strncmp (graph.object (1).head, "ghi", 3) == 0); + assert(graph.object (1).links.length == 1); + assert(graph.object (1).links[0].objidx == 0); + + assert(strncmp (graph.object (0).head, "jkl", 3) == 0); + assert(graph.object (0).links.length == 0); + + free (buffer); +} + +static void test_duplicate_leaf () +{ + size_t buffer_size = 100; + void* buffer = malloc (buffer_size); + hb_serialize_context_t c (buffer, buffer_size); + populate_serializer_complex_2 (&c); + + graph_t graph (c.object_graph ()); + graph.duplicate (4, 1); + + assert(strncmp (graph.object (5).head, "abc", 3) == 0); + assert(graph.object (5).links.length == 3); + assert(graph.object (5).links[0].objidx == 3); + assert(graph.object (5).links[1].objidx == 4); + assert(graph.object (5).links[2].objidx == 0); + + assert(strncmp (graph.object (4).head, "jkl", 3) == 0); + assert(graph.object (4).links.length == 0); + + assert(strncmp (graph.object (3).head, "def", 3) == 0); + assert(graph.object (3).links.length == 1); + assert(graph.object (3).links[0].objidx == 2); + + assert(strncmp (graph.object (2).head, "ghi", 3) == 0); + assert(graph.object (2).links.length == 1); + assert(graph.object (2).links[0].objidx == 1); + + assert(strncmp (graph.object (1).head, "jkl", 3) == 0); + assert(graph.object (1).links.length == 0); + + assert(strncmp (graph.object (0).head, "mn", 2) == 0); + assert(graph.object (0).links.length == 0); + + free (buffer); +} + +static void test_duplicate_interior () +{ + size_t buffer_size = 100; + void* buffer = malloc (buffer_size); + hb_serialize_context_t c (buffer, buffer_size); + populate_serializer_complex_3 (&c); + + graph_t graph (c.object_graph ()); + graph.duplicate (3, 2); + + assert(strncmp (graph.object (6).head, "abc", 3) == 0); + assert(graph.object (6).links.length == 3); + assert(graph.object (6).links[0].objidx == 4); + assert(graph.object (6).links[1].objidx == 2); + assert(graph.object (6).links[2].objidx == 1); + + assert(strncmp (graph.object (5).head, "jkl", 3) == 0); + assert(graph.object (5).links.length == 1); + assert(graph.object (5).links[0].objidx == 0); + + assert(strncmp (graph.object (4).head, "def", 3) == 0); + assert(graph.object (4).links.length == 1); + assert(graph.object (4).links[0].objidx == 3); + + assert(strncmp (graph.object (3).head, "ghi", 3) == 0); + assert(graph.object (3).links.length == 1); + assert(graph.object (3).links[0].objidx == 5); + + assert(strncmp (graph.object (2).head, "jkl", 3) == 0); + assert(graph.object (2).links.length == 1); + assert(graph.object (2).links[0].objidx == 0); + + assert(strncmp (graph.object (1).head, "mn", 2) == 0); + assert(graph.object (1).links.length == 0); + + assert(strncmp (graph.object (0).head, "opqrst", 6) == 0); + assert(graph.object (0).links.length == 0); + + free (buffer); +} + +static void +test_serialize () +{ + size_t buffer_size = 100; + void* buffer_1 = malloc (buffer_size); + hb_serialize_context_t c1 (buffer_1, buffer_size); + populate_serializer_simple (&c1); + hb_bytes_t expected = c1.copy_bytes (); + + void* buffer_2 = malloc (buffer_size); + hb_serialize_context_t c2 (buffer_2, buffer_size); + + graph_t graph (c1.object_graph ()); + graph.serialize (&c2); + hb_bytes_t actual = c2.copy_bytes (); + + assert (actual == expected); + + actual.free (); + expected.free (); + free (buffer_1); + free (buffer_2); +} + +static void test_will_overflow_1 () +{ + size_t buffer_size = 100; + void* buffer = malloc (buffer_size); + hb_serialize_context_t c (buffer, buffer_size); + populate_serializer_complex_2 (&c); + graph_t graph (c.object_graph ()); + + assert (!graph.will_overflow (nullptr)); + + free (buffer); +} + +static void test_will_overflow_2 () +{ + size_t buffer_size = 160000; + void* buffer = malloc (buffer_size); + hb_serialize_context_t c (buffer, buffer_size); + populate_serializer_with_overflow (&c); + graph_t graph (c.object_graph ()); + + assert (graph.will_overflow (nullptr)); + + free (buffer); +} + +static void test_will_overflow_3 () +{ + size_t buffer_size = 160000; + void* buffer = malloc (buffer_size); + hb_serialize_context_t c (buffer, buffer_size); + populate_serializer_with_dedup_overflow (&c); + graph_t graph (c.object_graph ()); + + assert (graph.will_overflow (nullptr)); + + free (buffer); +} + +static void test_resolve_overflows_via_sort () +{ + size_t buffer_size = 160000; + void* buffer = malloc (buffer_size); + hb_serialize_context_t c (buffer, buffer_size); + populate_serializer_with_overflow (&c); + graph_t graph (c.object_graph ()); + + void* out_buffer = malloc (buffer_size); + hb_serialize_context_t out (out_buffer, buffer_size); + + hb_resolve_overflows (c.object_graph (), &out); + assert (!out.offset_overflow ()); + hb_bytes_t result = out.copy_bytes (); + assert (result.length == (80000 + 3 + 3 * 2)); + + result.free (); + free (buffer); + free (out_buffer); +} + +static void test_resolve_overflows_via_duplication () +{ + size_t buffer_size = 160000; + void* buffer = malloc (buffer_size); + hb_serialize_context_t c (buffer, buffer_size); + populate_serializer_with_dedup_overflow (&c); + graph_t graph (c.object_graph ()); + + void* out_buffer = malloc (buffer_size); + hb_serialize_context_t out (out_buffer, buffer_size); + + hb_resolve_overflows (c.object_graph (), &out); + assert (!out.offset_overflow ()); + hb_bytes_t result = out.copy_bytes (); + assert (result.length == (10000 + 2 * 2 + 60000 + 2 + 3 * 2)); + + result.free (); + free (buffer); + free (out_buffer); +} + +// TODO(garretrieger): update will_overflow tests to check the overflows array. +// TODO(garretrieger): add a test(s) using a real font. +// TODO(garretrieger): add tests for priority raising. + +int +main (int argc, char **argv) +{ + test_serialize (); + test_sort_kahn_1 (); + test_sort_kahn_2 (); + test_sort_shortest (); + test_will_overflow_1 (); + test_will_overflow_2 (); + test_will_overflow_3 (); + test_resolve_overflows_via_sort (); + test_resolve_overflows_via_duplication (); + test_duplicate_leaf (); + test_duplicate_interior (); +} diff --git a/test/subset/Makefile.am b/test/subset/Makefile.am index 47b003932..cfd739b31 100644 --- a/test/subset/Makefile.am +++ b/test/subset/Makefile.am @@ -13,7 +13,9 @@ libs: EXTRA_DIST += \ meson.build \ run-tests.py \ + run-repack-tests.py \ subset_test_suite.py \ + repack_test.py \ $(NULL) CLEANFILES += \ diff --git a/test/subset/data/Makefile.am b/test/subset/data/Makefile.am index daebed90b..73585f85c 100644 --- a/test/subset/data/Makefile.am +++ b/test/subset/data/Makefile.am @@ -3,7 +3,7 @@ NULL = EXTRA_DIST = CLEANFILES = -SUBDIRS = +SUBDIRS = repack_tests EXTRA_DIST += \ $(TESTS) \ diff --git a/test/subset/data/fonts/NotoNastaliqUrdu-Bold.ttf b/test/subset/data/fonts/NotoNastaliqUrdu-Bold.ttf new file mode 100644 index 000000000..d05dabe0b Binary files /dev/null and b/test/subset/data/fonts/NotoNastaliqUrdu-Bold.ttf differ diff --git a/test/subset/data/repack_tests/Makefile.am b/test/subset/data/repack_tests/Makefile.am new file mode 100644 index 000000000..f85af6a1b --- /dev/null +++ b/test/subset/data/repack_tests/Makefile.am @@ -0,0 +1,21 @@ +# Process this file with automake to produce Makefile.in + +NULL = +EXTRA_DIST = +CLEANFILES = +SUBDIRS = + +# Convenience targets: +lib: libs # Always build subsetter lib in this subdir +libs: + @$(MAKE) $(AM_MAKEFLAGS) -C $(top_builddir)/src libs + +TEST_EXTENSIONS = .tests +TESTS_LOG_COMPILER = $(srcdir)/../../run-repack-tests.py $(top_builddir)/util/hb-subset$(EXEEXT) +include Makefile.sources + +EXTRA_DIST += \ + $(TESTS) \ + $(NULL) + +-include $(top_srcdir)/git.mk diff --git a/test/subset/data/repack_tests/Makefile.sources b/test/subset/data/repack_tests/Makefile.sources new file mode 100644 index 000000000..e778e52a7 --- /dev/null +++ b/test/subset/data/repack_tests/Makefile.sources @@ -0,0 +1,12 @@ +TESTS = \ + basic.tests \ + prioritization.tests \ + table_duplication.tests \ + $(NULL) + +XFAIL_TESTS = \ + advanced_prioritization.tests \ + $(NULL) + +DISABLED_TESTS = \ + $(NULL) diff --git a/test/subset/data/repack_tests/advanced_prioritization.tests b/test/subset/data/repack_tests/advanced_prioritization.tests new file mode 100644 index 000000000..adcbb001b --- /dev/null +++ b/test/subset/data/repack_tests/advanced_prioritization.tests @@ -0,0 +1,72 @@ +NotoNastaliqUrdu-Bold.ttf +0x0020 +0x0028 +0x0029 +0x002C +0x002D +0x002E +0x0030 +0x0031 +0x0032 +0x0033 +0x0034 +0x0035 +0x0036 +0x0037 +0x0038 +0x0039 +0x003A +0x060C +0x061F +0x0621 +0x0622 +0x0623 +0x0624 +0x0625 +0x0626 +0x0627 +0x0628 +0x0629 +0x062A +0x062B +0x062C +0x062D +0x062E +0x062F +0x0630 +0x0631 +0x0632 +0x0633 +0x0634 +0x0635 +0x0636 +0x0637 +0x0638 +0x0639 +0x063A +0x0640 +0x0641 +0x0642 +0x0643 +0x0644 +0x0645 +0x0646 +0x0647 +0x0648 +0x0649 +0x064A +0x064B +0x064C +0x064F +0x0651 +0x067E +0x0686 +0x0698 +0x06A9 +0x06AF +0x06BE +0x06CC +0x200C +0x200D +0x200E + diff --git a/test/subset/data/repack_tests/basic.tests b/test/subset/data/repack_tests/basic.tests new file mode 100644 index 000000000..896cc9b48 --- /dev/null +++ b/test/subset/data/repack_tests/basic.tests @@ -0,0 +1,52 @@ +NotoNastaliqUrdu-Bold.ttf +0x060C +0x061F +0x0621 +0x0622 +0x0623 +0x0624 +0x0625 +0x0626 +0x0627 +0x0628 +0x0629 +0x062A +0x062B +0x062C +0x062D +0x062E +0x062F +0x0630 +0x0631 +0x0632 +0x0633 +0x0634 +0x0635 +0x0636 +0x0637 +0x0638 +0x0639 +0x063A +0x0640 +0x0641 +0x0642 +0x0643 +0x0644 +0x0645 +0x0646 +0x0647 +0x0648 +0x0649 +0x064A +0x064B +0x064F +0x0651 +0x067E +0x0686 +0x0698 +0x06A9 +0x06AF +0x06CC +0x200C +0x200D +0x200E diff --git a/test/subset/data/repack_tests/prioritization.tests b/test/subset/data/repack_tests/prioritization.tests new file mode 100644 index 000000000..63b437c92 --- /dev/null +++ b/test/subset/data/repack_tests/prioritization.tests @@ -0,0 +1,77 @@ +NotoNastaliqUrdu-Bold.ttf +0x0020 +0x0028 +0x0029 +0x002C +0x002D +0x002E +0x0030 +0x0031 +0x0032 +0x0033 +0x0034 +0x0035 +0x0036 +0x0037 +0x0038 +0x0039 +0x003A +0x060C +0x061F +0x0621 +0x0622 +0x0623 +0x0624 +0x0625 +0x0626 +0x0627 +0x0628 +0x0629 +0x062A +0x062B +0x062C +0x062D +0x062E +0x062F +0x0630 +0x0631 +0x0632 +0x0633 +0x0634 +0x0635 +0x0636 +0x0637 +0x0638 +0x0639 +0x063A +0x0640 +0x0641 +0x0642 +0x0643 +0x0644 +0x0645 +0x0646 +0x0647 +0x0648 +0x0649 +0x064A +0x064B +0x064F +0x0651 +0x0653 +0x0679 +0x067E +0x0686 +0x0688 +0x0691 +0x0698 +0x06A9 +0x06AF +0x06BA +0x06BE +0x06C1 +0x06CC +0x06D2 +0x200C +0x200D +0x200E diff --git a/test/subset/data/repack_tests/table_duplication.tests b/test/subset/data/repack_tests/table_duplication.tests new file mode 100644 index 000000000..3cc90d6bc --- /dev/null +++ b/test/subset/data/repack_tests/table_duplication.tests @@ -0,0 +1,97 @@ +NotoNastaliqUrdu-Bold.ttf +0x0028 +0x0029 +0x002C +0x002D +0x002E +0x0030 +0x0031 +0x0032 +0x0033 +0x0034 +0x0035 +0x0036 +0x0037 +0x0038 +0x0039 +0x003A +0x0041 +0x0042 +0x0043 +0x0044 +0x0045 +0x0046 +0x0047 +0x0048 +0x0049 +0x004C +0x004D +0x004E +0x004F +0x0050 +0x0052 +0x0053 +0x0054 +0x0055 +0x0056 +0x0057 +0x0061 +0x0062 +0x0063 +0x0064 +0x0065 +0x0066 +0x0067 +0x0068 +0x0069 +0x006B +0x006C +0x006D +0x006E +0x006F +0x0070 +0x0072 +0x0073 +0x0074 +0x0075 +0x0076 +0x0077 +0x0078 +0x0079 +0x060C +0x0626 +0x0627 +0x0628 +0x062A +0x062C +0x062D +0x062E +0x062F +0x0631 +0x0632 +0x0633 +0x0634 +0x0635 +0x0636 +0x0637 +0x0638 +0x0639 +0x0641 +0x0642 +0x0644 +0x0645 +0x0646 +0x0648 +0x0653 +0x0679 +0x067E +0x0686 +0x0688 +0x0691 +0x06A9 +0x06AF +0x06BA +0x06BE +0x06C1 +0x06CC +0x06D2 diff --git a/test/subset/meson.build b/test/subset/meson.build index 458bf9646..5da61a9e0 100644 --- a/test/subset/meson.build +++ b/test/subset/meson.build @@ -28,6 +28,13 @@ tests = [ 'cbdt', ] +repack_tests = [ + 'basic', + 'prioritization', + 'table_duplication', +] + + run_test = find_program('run-tests.py') foreach t : tests @@ -45,3 +52,18 @@ foreach t : tests suite: ['subset', 'slow'], ) endforeach + +run_repack_test = find_program('run-repack-tests.py') + +foreach t : repack_tests + fname = '@0@.tests'.format(t) + + test(t, run_repack_test, + args: [ + hb_subset, + join_paths(meson.current_source_dir(), 'data', 'repack_tests', fname), + ], + workdir: join_paths(meson.current_build_dir(), '..', '..'), + suite: ['subset', 'repack'], + ) +endforeach diff --git a/test/subset/repack_test.py b/test/subset/repack_test.py new file mode 100644 index 000000000..2b53dd333 --- /dev/null +++ b/test/subset/repack_test.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 + +import os + +# Parses a single repacking test file. The first line of the file is +# the name of the font to use and the remaining lines define the set of +# codepoints in the subset. +class RepackTest: + + def __init__(self, test_path, definition): + self.test_path = test_path + self.font_name = None + self.codepoints = set () + self._parse(definition) + + def font_path(self): + return os.path.join (self._base_path (), "fonts", self.font_name) + + def codepoints_string (self): + return ",".join (self.codepoints) + + def _base_path(self): + return os.path.join( + os.path.dirname(self.test_path), + "../") + + + def _parse(self, definition): + lines = definition.splitlines () + self.font_name = lines.pop (0) + for line in lines: + line = line.strip() + if not line: + continue + + self.codepoints.add (line) diff --git a/test/subset/run-repack-tests.py b/test/subset/run-repack-tests.py new file mode 100755 index 000000000..22154ba9f --- /dev/null +++ b/test/subset/run-repack-tests.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 + +# Runs a subsetting test suite. Compares the results of subsetting via harfbuzz +# to subsetting via fonttools. + +from difflib import unified_diff +import os +import re +import subprocess +import sys +import tempfile +import shutil +import io + +from repack_test import RepackTest + +try: + from fontTools.ttLib import TTFont +except ImportError: + print ("fonttools is not present, skipping test.") + sys.exit (77) + +ots_sanitize = shutil.which ("ots-sanitize") + +def cmd (command): + p = subprocess.Popen ( + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=True) + (stdoutdata, stderrdata) = p.communicate () + print (stderrdata, end="", file=sys.stderr) + return stdoutdata, p.returncode + +def fail_test (test, cli_args, message): + print ('ERROR: %s' % message) + print ('Test State:') + print (' test.font_name %s' % test.font_name) + print (' test.test_path %s' % os.path.abspath (test.test_path)) + return 1 + +def run_test (test, should_check_ots): + out_file = os.path.join (tempfile.mkdtemp (), test.font_name + '-subset.ttf') + cli_args = [hb_subset, + "--font-file=" + test.font_path (), + "--output-file=" + out_file, + "--unicodes=%s" % test.codepoints_string (), + "--drop-tables-=GPOS,GSUB,GDEF",] + print (' '.join (cli_args)) + _, return_code = cmd (cli_args) + + if return_code: + return fail_test (test, cli_args, "%s returned %d" % (' '.join (cli_args), return_code)) + + try: + with TTFont (out_file) as font: + pass + except Exception as e: + print (e) + return fail_test (test, cli_args, "ttx failed to parse the result") + + if should_check_ots: + print ("Checking output with ots-sanitize.") + if not check_ots (out_file): + return fail_test (test, cli_args, 'ots for subsetted file fails.') + + return 0 + +def has_ots (): + if not ots_sanitize: + print ("OTS is not present, skipping all ots checks.") + return False + return True + +def check_ots (path): + ots_report, returncode = cmd ([ots_sanitize, path]) + if returncode: + print ("OTS Failure: %s" % ots_report) + return False + return True + +args = sys.argv[1:] +if not args or sys.argv[1].find ('hb-subset') == -1 or not os.path.exists (sys.argv[1]): + sys.exit ("First argument does not seem to point to usable hb-subset.") +hb_subset, args = args[0], args[1:] + +if len (args) != 1: + sys.exit ("No tests supplied.") + +has_ots = has_ots() + +fails = 0 + +path = args[0] +if not path.endswith(".tests"): + sys.exit ("Not a valid test case path.") + +with open (path, mode="r", encoding="utf-8") as f: + # TODO(garretrieger): re-enable OTS checking. + fails += run_test (RepackTest (path, f.read ()), False) + + +if fails != 0: + sys.exit ("%d test(s) failed." % fails) +else: + print ("All tests passed.")