diff --git a/src/Makefile.sources b/src/Makefile.sources index 0a6b845ee..37c83dc9f 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -351,10 +351,10 @@ HB_SUBSET_sources = \ graph/gsubgpos-graph.hh \ graph/gsubgpos-context.hh \ graph/gsubgpos-context.cc \ - graph/pairpos-graph.hh \ graph/coverage-graph.hh \ graph/classdef-graph.hh \ graph/pairpos-graph.hh \ + graph/markbasepos-graph.hh \ graph/split-helpers.hh \ graph/serialize.hh \ $(NULL) diff --git a/src/OT/Layout/GPOS/MarkRecord.hh b/src/OT/Layout/GPOS/MarkRecord.hh index 7a514453a..a7d489d2a 100644 --- a/src/OT/Layout/GPOS/MarkRecord.hh +++ b/src/OT/Layout/GPOS/MarkRecord.hh @@ -9,7 +9,7 @@ struct MarkRecord { friend struct MarkArray; - protected: + public: HBUINT16 klass; /* Class defined for this mark */ Offset16To markAnchor; /* Offset to Anchor table--from diff --git a/src/graph/coverage-graph.hh b/src/graph/coverage-graph.hh index da71ea6fb..3c1022f09 100644 --- a/src/graph/coverage-graph.hh +++ b/src/graph/coverage-graph.hh @@ -109,7 +109,7 @@ struct Coverage : public OT::Layout::Common::Coverage { char* buffer = (char*) hb_calloc (1, max_size); hb_serialize_context_t serializer (buffer, max_size); - Coverage_serialize (&serializer, glyphs); + OT::Layout::Common::Coverage_serialize (&serializer, glyphs); serializer.end_serialize (); if (serializer.in_error ()) { diff --git a/src/graph/graph.hh b/src/graph/graph.hh index 0d6adcb64..64878a84a 100644 --- a/src/graph/graph.hh +++ b/src/graph/graph.hh @@ -49,6 +49,51 @@ struct graph_t unsigned end = 0; unsigned priority = 0; + void normalize () + { + obj.real_links.qsort (); + for (auto& l : obj.real_links) + { + for (unsigned i = 0; i < l.width; i++) + { + obj.head[l.position + i] = 0; + } + } + } + + bool equals (const vertex_t& other, + const graph_t& graph, + const graph_t& other_graph, + unsigned depth) const + { + if (!(as_bytes () == other.as_bytes ())) + { + DEBUG_MSG (SUBSET_REPACK, nullptr, + "vertex [%lu] bytes != [%lu] bytes, depth = %u", + table_size (), + other.table_size (), + depth); + + auto a = as_bytes (); + auto b = other.as_bytes (); + while (a || b) + { + DEBUG_MSG (SUBSET_REPACK, nullptr, + " 0x%x %s 0x%x", *a, (*a == *b) ? "==" : "!=", *b); + a++; + b++; + } + return false; + } + + return links_equal (obj.real_links, other.obj.real_links, graph, other_graph, depth); + } + + hb_bytes_t as_bytes () const + { + return hb_bytes_t (obj.head, table_size ()); + } + friend void swap (vertex_t& a, vertex_t& b) { hb_swap (a.obj, b.obj); @@ -60,6 +105,18 @@ struct graph_t hb_swap (a.priority, b.priority); } + hb_hashmap_t + position_to_index_map () const + { + hb_hashmap_t result; + + for (const auto& l : obj.real_links) { + result.set (l.position, l.objidx); + } + + return result; + } + bool is_shared () const { return parents.length > 1; @@ -84,7 +141,7 @@ struct graph_t { for (unsigned i = 0; i < obj.real_links.length; i++) { - auto& link = obj.real_links[i]; + auto& link = obj.real_links.arrayZ[i]; if (link.objidx != child_index) continue; @@ -155,6 +212,57 @@ struct graph_t return -table_size; } + + private: + bool links_equal (const hb_vector_t& this_links, + const hb_vector_t& other_links, + const graph_t& graph, + const graph_t& other_graph, + unsigned depth) const + { + auto a = this_links.iter (); + auto b = other_links.iter (); + + while (a && b) + { + const auto& link_a = *a; + const auto& link_b = *b; + + if (link_a.width != link_b.width || + link_a.is_signed != link_b.is_signed || + link_a.whence != link_b.whence || + link_a.position != link_b.position || + link_a.bias != link_b.bias) + return false; + + if (!graph.vertices_[link_a.objidx].equals ( + other_graph.vertices_[link_b.objidx], graph, other_graph, depth + 1)) + return false; + + a++; + b++; + } + + if (bool (a) != bool (b)) + return false; + + return true; + } + }; + + template + struct vertex_and_table_t + { + vertex_and_table_t () : index (0), vertex (nullptr), table (nullptr) + {} + + unsigned index; + vertex_t* vertex; + T* table; + + operator bool () { + return table && vertex; + } }; /* @@ -169,7 +277,8 @@ struct graph_t : parents_invalid (true), distance_invalid (true), positions_invalid (true), - successful (true) + successful (true), + buffers () { num_roots_for_space_.push (1); bool removed_nil = false; @@ -201,6 +310,20 @@ struct graph_t ~graph_t () { vertices_.fini (); + for (char* b : buffers) + hb_free (b); + } + + bool operator== (const graph_t& other) const + { + return root ().equals (other.root (), *this, other, 0); + } + + // Sorts links of all objects in a consistent manner and zeroes all offsets. + void normalize () + { + for (auto& v : vertices_.writer ()) + v.normalize (); } bool in_error () const @@ -228,6 +351,27 @@ struct graph_t return vertices_[i].obj; } + void add_buffer (char* buffer) + { + buffers.push (buffer); + } + + /* + * Adds a 16 bit link from parent_id to child_id + */ + template + void add_link (T* offset, + unsigned parent_id, + unsigned child_id) + { + auto& v = vertices_[parent_id]; + auto* link = v.obj.real_links.push (); + link->width = 2; + link->objidx = child_id; + link->position = (char*) offset - (char*) v.obj.head; + vertices_[child_id].parents.push (parent_id); + } + /* * Generates a new topological sorting of graph ordered by the shortest * distance to each node if positions are marked as invalid. @@ -345,6 +489,31 @@ struct graph_t } } + template + vertex_and_table_t as_table (unsigned parent, const void* offset, Ts... ds) + { + return as_table_from_index (index_for_offset (parent, offset), std::forward(ds)...); + } + + template + vertex_and_table_t as_table_from_index (unsigned index, Ts... ds) + { + if (index >= vertices_.length) + return vertex_and_table_t (); + + vertex_and_table_t r; + r.vertex = &vertices_[index]; + r.table = (T*) r.vertex->obj.head; + r.index = index; + if (!r.table) + return vertex_and_table_t (); + + if (!r.table->sanitize (*(r.vertex), std::forward(ds)...)) + return vertex_and_table_t (); + + return r; + } + // Finds the object id of the object pointed to by the offset at 'offset' // within object[node_idx]. unsigned index_for_offset (unsigned node_idx, const void* offset) const @@ -352,8 +521,11 @@ struct graph_t const auto& node = object (node_idx); if (offset < node.head || offset >= node.tail) return -1; - for (const auto& link : node.real_links) + unsigned length = node.real_links.length; + for (unsigned i = 0; i < length; i++) { + // Use direct access for increased performance, this is a hot method. + const auto& link = node.real_links.arrayZ[i]; if (offset != node.head + link.position) continue; return link.objidx; @@ -1059,6 +1231,7 @@ struct graph_t bool positions_invalid; bool successful; hb_vector_t num_roots_for_space_; + hb_vector_t buffers; }; } diff --git a/src/graph/gsubgpos-context.cc b/src/graph/gsubgpos-context.cc index e0ff6ff85..b2044426d 100644 --- a/src/graph/gsubgpos-context.cc +++ b/src/graph/gsubgpos-context.cc @@ -33,8 +33,7 @@ gsubgpos_graph_context_t::gsubgpos_graph_context_t (hb_tag_t table_tag_, : table_tag (table_tag_), graph (graph_), lookup_list_index (0), - lookups (), - buffers () + lookups () { if (table_tag_ != HB_OT_TAG_GPOS && table_tag_ != HB_OT_TAG_GSUB) @@ -53,7 +52,7 @@ unsigned gsubgpos_graph_context_t::create_node (unsigned size) if (!buffer) return -1; - buffers.push (buffer); + add_buffer (buffer); return graph.new_node (buffer, buffer + size); } diff --git a/src/graph/gsubgpos-context.hh b/src/graph/gsubgpos-context.hh index 49b24198f..9fe9662e6 100644 --- a/src/graph/gsubgpos-context.hh +++ b/src/graph/gsubgpos-context.hh @@ -40,22 +40,16 @@ struct gsubgpos_graph_context_t graph_t& graph; unsigned lookup_list_index; hb_hashmap_t lookups; - hb_vector_t buffers; + HB_INTERNAL gsubgpos_graph_context_t (hb_tag_t table_tag_, graph_t& graph_); - ~gsubgpos_graph_context_t () - { - for (char* b : buffers) - hb_free (b); - } - HB_INTERNAL unsigned create_node (unsigned size); void add_buffer (char* buffer) { - buffers.push (buffer); + graph.add_buffer (buffer); } private: diff --git a/src/graph/gsubgpos-graph.hh b/src/graph/gsubgpos-graph.hh index f963a49ad..a93e7d1c7 100644 --- a/src/graph/gsubgpos-graph.hh +++ b/src/graph/gsubgpos-graph.hh @@ -29,6 +29,7 @@ #include "../OT/Layout/GSUB/ExtensionSubst.hh" #include "gsubgpos-context.hh" #include "pairpos-graph.hh" +#include "markbasepos-graph.hh" #ifndef GRAPH_GSUBGPOS_GRAPH_HH #define GRAPH_GSUBGPOS_GRAPH_HH @@ -121,7 +122,9 @@ struct Lookup : public OT::Lookup if (c.table_tag != HB_OT_TAG_GPOS) return true; - if (!is_ext && type != OT::Layout::GPOS_impl::PosLookupSubTable::Type::Pair) + if (!is_ext && + type != OT::Layout::GPOS_impl::PosLookupSubTable::Type::Pair && + type != OT::Layout::GPOS_impl::PosLookupSubTable::Type::MarkBase) return true; hb_vector_t>> all_new_subtables; @@ -138,15 +141,23 @@ struct Lookup : public OT::Lookup subtable_index = extension->get_subtable_index (c.graph, ext_subtable_index); type = extension->get_lookup_type (); - if (type != OT::Layout::GPOS_impl::PosLookupSubTable::Type::Pair) + if (type != OT::Layout::GPOS_impl::PosLookupSubTable::Type::Pair + && type != OT::Layout::GPOS_impl::PosLookupSubTable::Type::MarkBase) continue; } - PairPos* pairPos = (PairPos*) c.graph.object (subtable_index).head; - if (!pairPos || !pairPos->sanitize (c.graph.vertices_[subtable_index])) continue; - - hb_vector_t new_sub_tables = pairPos->split_subtables (c, subtable_index); + hb_vector_t new_sub_tables; + switch (type) + { + case 2: + new_sub_tables = split_subtable (c, subtable_index); break; + case 4: + new_sub_tables = split_subtable (c, subtable_index); break; + default: + break; + } if (new_sub_tables.in_error ()) return false; + if (!new_sub_tables) continue; hb_pair_t>* entry = all_new_subtables.push (); entry->first = i; entry->second = std::move (new_sub_tables); @@ -159,6 +170,17 @@ struct Lookup : public OT::Lookup return true; } + template + hb_vector_t split_subtable (gsubgpos_graph_context_t& c, + unsigned objidx) + { + T* sub_table = (T*) c.graph.object (objidx).head; + if (!sub_table || !sub_table->sanitize (c.graph.vertices_[objidx])) + return hb_vector_t (); + + return sub_table->split_subtables (c, objidx); + } + void add_sub_tables (gsubgpos_graph_context_t& c, unsigned this_index, unsigned type, diff --git a/src/graph/markbasepos-graph.hh b/src/graph/markbasepos-graph.hh new file mode 100644 index 000000000..56fa81240 --- /dev/null +++ b/src/graph/markbasepos-graph.hh @@ -0,0 +1,507 @@ +/* + * Copyright © 2022 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Garret Rieger + */ + +#ifndef GRAPH_MARKBASEPOS_GRAPH_HH +#define GRAPH_MARKBASEPOS_GRAPH_HH + +#include "split-helpers.hh" +#include "coverage-graph.hh" +#include "../OT/Layout/GPOS/MarkBasePos.hh" +#include "../OT/Layout/GPOS/PosLookupSubTable.hh" + +namespace graph { + +struct AnchorMatrix : public OT::Layout::GPOS_impl::AnchorMatrix +{ + bool sanitize (graph_t::vertex_t& vertex, unsigned class_count) const + { + int64_t vertex_len = vertex.obj.tail - vertex.obj.head; + if (vertex_len < AnchorMatrix::min_size) return false; + + return vertex_len >= AnchorMatrix::min_size + + OT::Offset16::static_size * class_count * this->rows; + } + + bool shrink (gsubgpos_graph_context_t& c, + unsigned this_index, + unsigned old_class_count, + unsigned new_class_count) + { + if (new_class_count >= old_class_count) return false; + auto& o = c.graph.vertices_[this_index].obj; + unsigned base_count = rows; + o.tail = o.head + + AnchorMatrix::min_size + + OT::Offset16::static_size * base_count * new_class_count; + + // Reposition links into the new indexing scheme. + for (auto& link : o.real_links.writer ()) + { + unsigned index = (link.position - 2) / 2; + unsigned base = index / old_class_count; + unsigned klass = index % old_class_count; + if (klass >= new_class_count) + // should have already been removed + return false; + + unsigned new_index = base * new_class_count + klass; + + link.position = (char*) &(this->matrixZ[new_index]) - (char*) this; + } + + return true; + } + + unsigned clone (gsubgpos_graph_context_t& c, + unsigned this_index, + unsigned start, + unsigned end, + unsigned class_count) + { + unsigned base_count = rows; + unsigned new_class_count = end - start; + unsigned size = AnchorMatrix::min_size + + OT::Offset16::static_size * new_class_count * rows; + unsigned prime_id = c.create_node (size); + if (prime_id == (unsigned) -1) return -1; + AnchorMatrix* prime = (AnchorMatrix*) c.graph.object (prime_id).head; + prime->rows = base_count; + + auto& o = c.graph.vertices_[this_index].obj; + int num_links = o.real_links.length; + for (int i = 0; i < num_links; i++) + { + const auto& link = o.real_links[i]; + unsigned old_index = (link.position - 2) / OT::Offset16::static_size; + unsigned klass = old_index % class_count; + if (klass < start || klass >= end) continue; + + unsigned base = old_index / class_count; + unsigned new_klass = klass - start; + unsigned new_index = base * new_class_count + new_klass; + + + unsigned child_idx = link.objidx; + c.graph.add_link (&(prime->matrixZ[new_index]), + prime_id, + child_idx); + + auto& child = c.graph.vertices_[child_idx]; + child.remove_parent (this_index); + + o.real_links.remove (i); + num_links--; + i--; + } + + return prime_id; + } +}; + +struct MarkArray : public OT::Layout::GPOS_impl::MarkArray +{ + bool sanitize (graph_t::vertex_t& vertex) const + { + int64_t vertex_len = vertex.obj.tail - vertex.obj.head; + unsigned min_size = MarkArray::min_size; + if (vertex_len < min_size) return false; + + return vertex_len >= get_size (); + } + + bool shrink (gsubgpos_graph_context_t& c, + const hb_hashmap_t& mark_array_links, + unsigned this_index, + unsigned new_class_count) + { + auto& o = c.graph.vertices_[this_index].obj; + for (const auto& link : o.real_links) + c.graph.vertices_[link.objidx].remove_parent (this_index); + o.real_links.reset (); + + unsigned new_index = 0; + for (const auto& record : this->iter ()) + { + unsigned klass = record.klass; + if (klass >= new_class_count) continue; + + (*this)[new_index].klass = klass; + unsigned position = (char*) &record.markAnchor - (char*) this; + unsigned* objidx; + if (!mark_array_links.has (position, &objidx)) + { + new_index++; + continue; + } + + c.graph.add_link (&(*this)[new_index].markAnchor, this_index, *objidx); + new_index++; + } + + this->len = new_index; + o.tail = o.head + MarkArray::min_size + + OT::Layout::GPOS_impl::MarkRecord::static_size * new_index; + return true; + } + + unsigned clone (gsubgpos_graph_context_t& c, + unsigned this_index, + const hb_hashmap_t& pos_to_index, + hb_set_t& marks, + unsigned start_class) + { + unsigned size = MarkArray::min_size + + OT::Layout::GPOS_impl::MarkRecord::static_size * + marks.get_population (); + unsigned prime_id = c.create_node (size); + if (prime_id == (unsigned) -1) return -1; + MarkArray* prime = (MarkArray*) c.graph.object (prime_id).head; + prime->len = marks.get_population (); + + + unsigned i = 0; + for (hb_codepoint_t mark : marks) + { + (*prime)[i].klass = (*this)[mark].klass - start_class; + unsigned offset_pos = (char*) &((*this)[mark].markAnchor) - (char*) this; + unsigned* anchor_index; + if (pos_to_index.has (offset_pos, &anchor_index)) + c.graph.move_child (this_index, + &((*this)[mark].markAnchor), + prime_id, + &((*prime)[i].markAnchor)); + + i++; + } + + return prime_id; + } +}; + +struct MarkBasePosFormat1 : public OT::Layout::GPOS_impl::MarkBasePosFormat1_2 +{ + bool sanitize (graph_t::vertex_t& vertex) const + { + int64_t vertex_len = vertex.obj.tail - vertex.obj.head; + return vertex_len >= MarkBasePosFormat1::static_size; + } + + hb_vector_t split_subtables (gsubgpos_graph_context_t& c, unsigned this_index) + { + hb_set_t visited; + + const unsigned base_coverage_id = c.graph.index_for_offset (this_index, &baseCoverage); + const unsigned base_size = + OT::Layout::GPOS_impl::PairPosFormat1_3::min_size + + MarkArray::min_size + + AnchorMatrix::min_size + + c.graph.vertices_[base_coverage_id].table_size (); + + hb_vector_t class_to_info = get_class_info (c, this_index); + + unsigned class_count = classCount; + auto base_array = c.graph.as_table (this_index, + &baseArray, + class_count); + if (!base_array) return hb_vector_t (); + unsigned base_count = base_array.table->rows; + + unsigned partial_coverage_size = 4; + unsigned accumulated = base_size; + hb_vector_t split_points; + + for (unsigned klass = 0; klass < class_count; klass++) + { + class_info_t& info = class_to_info[klass]; + partial_coverage_size += OT::HBUINT16::static_size * info.marks.get_population (); + unsigned accumulated_delta = + OT::Layout::GPOS_impl::MarkRecord::static_size * info.marks.get_population () + + OT::Offset16::static_size * base_count; + + for (unsigned objidx : info.child_indices) + accumulated_delta += c.graph.find_subgraph_size (objidx, visited); + + accumulated += accumulated_delta; + unsigned total = accumulated + partial_coverage_size; + + if (total >= (1 << 16)) + { + split_points.push (klass); + accumulated = base_size + accumulated_delta; + partial_coverage_size = 4 + OT::HBUINT16::static_size * info.marks.get_population (); + visited.clear (); // node sharing isn't allowed between splits. + } + } + + + const unsigned mark_array_id = c.graph.index_for_offset (this_index, &markArray); + split_context_t split_context { + c, + this, + this_index, + std::move (class_to_info), + c.graph.vertices_[mark_array_id].position_to_index_map (), + }; + + return actuate_subtable_split (split_context, split_points); + } + + private: + + struct class_info_t { + hb_set_t marks; + hb_vector_t child_indices; + }; + + struct split_context_t { + gsubgpos_graph_context_t& c; + MarkBasePosFormat1* thiz; + unsigned this_index; + hb_vector_t class_to_info; + hb_hashmap_t mark_array_links; + + hb_set_t marks_for (unsigned start, unsigned end) + { + hb_set_t marks; + for (unsigned klass = start; klass < end; klass++) + { + + class_to_info[klass].marks.iter () + | hb_sink (marks) + ; + } + return marks; + } + + unsigned original_count () + { + return thiz->classCount; + } + + unsigned clone_range (unsigned start, unsigned end) + { + return thiz->clone_range (*this, this->this_index, start, end); + } + + bool shrink (unsigned count) + { + return thiz->shrink (*this, this->this_index, count); + } + }; + + hb_vector_t get_class_info (gsubgpos_graph_context_t& c, + unsigned this_index) + { + hb_vector_t class_to_info; + + unsigned class_count= classCount; + class_to_info.resize (class_count); + + auto mark_array = c.graph.as_table (this_index, &markArray); + if (!mark_array) return hb_vector_t (); + unsigned mark_count = mark_array.table->len; + for (unsigned mark = 0; mark < mark_count; mark++) + { + unsigned klass = (*mark_array.table)[mark].get_class (); + class_to_info[klass].marks.add (mark); + } + + for (const auto& link : mark_array.vertex->obj.real_links) + { + unsigned mark = (link.position - 2) / + OT::Layout::GPOS_impl::MarkRecord::static_size; + unsigned klass = (*mark_array.table)[mark].get_class (); + class_to_info[klass].child_indices.push (link.objidx); + } + + unsigned base_array_id = + c.graph.index_for_offset (this_index, &baseArray); + auto& base_array_v = c.graph.vertices_[base_array_id]; + + for (const auto& link : base_array_v.obj.real_links) + { + unsigned index = (link.position - 2) / OT::Offset16::static_size; + unsigned klass = index % class_count; + class_to_info[klass].child_indices.push (link.objidx); + } + + return class_to_info; + } + + bool shrink (split_context_t& sc, + unsigned this_index, + unsigned count) + { + DEBUG_MSG (SUBSET_REPACK, nullptr, + " Shrinking MarkBasePosFormat1 (%u) to [0, %u).", + this_index, + count); + + unsigned old_count = classCount; + if (count >= old_count) + return true; + + classCount = count; + + auto mark_coverage = sc.c.graph.as_table (this_index, + &markCoverage); + if (!mark_coverage) return false; + hb_set_t marks = sc.marks_for (0, count); + auto new_coverage = + + hb_zip (hb_range (), mark_coverage.table->iter ()) + | hb_filter (marks, hb_first) + | hb_map_retains_sorting (hb_second) + ; + if (!Coverage::make_coverage (sc.c, + new_coverage, + mark_coverage.index, + 4 + 2 * marks.get_population ())) + return false; + + + auto base_array = sc.c.graph.as_table (this_index, + &baseArray, + old_count); + if (!base_array || !base_array.table->shrink (sc.c, + base_array.index, + old_count, + count)) + return false; + + auto mark_array = sc.c.graph.as_table (this_index, + &markArray); + if (!mark_array || !mark_array.table->shrink (sc.c, + sc.mark_array_links, + mark_array.index, + count)) + return false; + + return true; + } + + // Create a new MarkBasePos that has all of the data for classes from [start, end). + unsigned clone_range (split_context_t& sc, + unsigned this_index, + unsigned start, unsigned end) const + { + DEBUG_MSG (SUBSET_REPACK, nullptr, + " Cloning MarkBasePosFormat1 (%u) range [%u, %u).", this_index, start, end); + + graph_t& graph = sc.c.graph; + unsigned prime_size = OT::Layout::GPOS_impl::MarkBasePosFormat1_2::static_size; + + unsigned prime_id = sc.c.create_node (prime_size); + if (prime_id == (unsigned) -1) return -1; + + MarkBasePosFormat1* prime = (MarkBasePosFormat1*) graph.object (prime_id).head; + prime->format = this->format; + unsigned new_class_count = end - start; + prime->classCount = new_class_count; + + unsigned base_coverage_id = + graph.index_for_offset (sc.this_index, &baseCoverage); + graph.add_link (&(prime->baseCoverage), prime_id, base_coverage_id); + graph.duplicate (prime_id, base_coverage_id); + + auto mark_coverage = sc.c.graph.as_table (this_index, + &markCoverage); + if (!mark_coverage) return false; + hb_set_t marks = sc.marks_for (start, end); + auto new_coverage = + + hb_zip (hb_range (), mark_coverage.table->iter ()) + | hb_filter (marks, hb_first) + | hb_map_retains_sorting (hb_second) + ; + if (!Coverage::add_coverage (sc.c, + prime_id, + 2, + + new_coverage, + marks.get_population () * 2 + 4)) + return -1; + + auto mark_array = + graph.as_table (sc.this_index, &markArray); + if (!mark_array) return -1; + unsigned new_mark_array = + mark_array.table->clone (sc.c, + mark_array.index, + sc.mark_array_links, + marks, + start); + graph.add_link (&(prime->markArray), prime_id, new_mark_array); + + unsigned class_count = classCount; + auto base_array = + graph.as_table (sc.this_index, &baseArray, class_count); + if (!base_array) return -1; + unsigned new_base_array = + base_array.table->clone (sc.c, + base_array.index, + start, end, this->classCount); + graph.add_link (&(prime->baseArray), prime_id, new_base_array); + + return prime_id; + } +}; + + +struct MarkBasePos : public OT::Layout::GPOS_impl::MarkBasePos +{ + hb_vector_t split_subtables (gsubgpos_graph_context_t& c, + unsigned this_index) + { + switch (u.format) { + case 1: + return ((MarkBasePosFormat1*)(&u.format1))->split_subtables (c, this_index); +#ifndef HB_NO_BORING_EXPANSION + case 2: HB_FALLTHROUGH; + // Don't split 24bit PairPos's. +#endif + default: + return hb_vector_t (); + } + } + + bool sanitize (graph_t::vertex_t& vertex) const + { + int64_t vertex_len = vertex.obj.tail - vertex.obj.head; + if (vertex_len < u.format.get_size ()) return false; + + switch (u.format) { + case 1: + return ((MarkBasePosFormat1*)(&u.format1))->sanitize (vertex); +#ifndef HB_NO_BORING_EXPANSION + case 2: HB_FALLTHROUGH; +#endif + default: + // We don't handle format 3 and 4 here. + return false; + } + } +}; + + +} + +#endif // GRAPH_MARKBASEPOS_GRAPH_HH diff --git a/src/graph/pairpos-graph.hh b/src/graph/pairpos-graph.hh index 769f29064..976b87232 100644 --- a/src/graph/pairpos-graph.hh +++ b/src/graph/pairpos-graph.hh @@ -548,14 +548,8 @@ struct PairPosFormat2 : public OT::Layout::GPOS_impl::PairPosFormat2_4 result; - - const auto& o = c.graph.object (this_index); - for (const auto& l : o.real_links) { - result.set (l.position, l.objidx); - } - - return result; + const auto& v = c.graph.vertices_[this_index]; + return v.position_to_index_map (); } const Coverage* get_coverage (gsubgpos_graph_context_t& c, diff --git a/src/hb-repacker.hh b/src/hb-repacker.hh index 61b142238..40a532611 100644 --- a/src/hb-repacker.hh +++ b/src/hb-repacker.hh @@ -276,33 +276,17 @@ bool _process_overflows (const hb_vector_t& overflows, return resolution_attempted; } -/* - * Attempts to modify the topological sorting of the provided object graph to - * eliminate offset overflows in the links between objects of the graph. If a - * non-overflowing ordering is found the updated graph is serialized it into the - * provided serialization context. - * - * If necessary the structure of the graph may be modified in ways that do not - * affect the functionality of the graph. For example shared objects may be - * duplicated. - * - * For a detailed writeup describing how the algorithm operates see: - * docs/repacker.md - */ -template -inline hb_blob_t* -hb_resolve_overflows (const T& packed, - hb_tag_t table_tag, - unsigned max_rounds = 20, - bool recalculate_extensions = false) { - graph_t sorted_graph (packed); +inline bool +hb_resolve_graph_overflows (hb_tag_t table_tag, + unsigned max_rounds , + bool recalculate_extensions, + graph_t& sorted_graph /* IN/OUT */) +{ sorted_graph.sort_shortest_distance (); bool will_overflow = graph::will_overflow (sorted_graph); if (!will_overflow) - { - return graph::serialize (sorted_graph); - } + return true; graph::gsubgpos_graph_context_t ext_context (table_tag, sorted_graph); if ((table_tag == HB_OT_TAG_GPOS @@ -314,13 +298,13 @@ hb_resolve_overflows (const T& packed, DEBUG_MSG (SUBSET_REPACK, nullptr, "Splitting subtables if needed."); if (!_presplit_subtables_if_needed (ext_context)) { DEBUG_MSG (SUBSET_REPACK, nullptr, "Subtable splitting failed."); - return nullptr; + return false; } DEBUG_MSG (SUBSET_REPACK, nullptr, "Promoting lookups to extensions if needed."); if (!_promote_extensions_if_needed (ext_context)) { DEBUG_MSG (SUBSET_REPACK, nullptr, "Extensions promotion failed."); - return nullptr; + return false; } } @@ -360,15 +344,41 @@ hb_resolve_overflows (const T& packed, if (sorted_graph.in_error ()) { DEBUG_MSG (SUBSET_REPACK, nullptr, "Sorted graph in error state."); - return nullptr; + return false; } if (graph::will_overflow (sorted_graph)) { DEBUG_MSG (SUBSET_REPACK, nullptr, "Offset overflow resolution failed."); - return nullptr; + return false; } + return true; +} + +/* + * Attempts to modify the topological sorting of the provided object graph to + * eliminate offset overflows in the links between objects of the graph. If a + * non-overflowing ordering is found the updated graph is serialized it into the + * provided serialization context. + * + * If necessary the structure of the graph may be modified in ways that do not + * affect the functionality of the graph. For example shared objects may be + * duplicated. + * + * For a detailed writeup describing how the algorithm operates see: + * docs/repacker.md + */ +template +inline hb_blob_t* +hb_resolve_overflows (const T& packed, + hb_tag_t table_tag, + unsigned max_rounds = 20, + bool recalculate_extensions = false) { + graph_t sorted_graph (packed); + if (!hb_resolve_graph_overflows (table_tag, max_rounds, recalculate_extensions, sorted_graph)) + return nullptr; + return graph::serialize (sorted_graph); } diff --git a/src/hb-serialize.hh b/src/hb-serialize.hh index 4b22e46a5..f47cde5eb 100644 --- a/src/hb-serialize.hh +++ b/src/hb-serialize.hh @@ -142,7 +142,10 @@ struct hb_serialize_context_t HB_INTERNAL static int cmp (const void* a, const void* b) { - return ((const link_t*)a)->position - ((const link_t*)b)->position; + int cmp = ((const link_t*)a)->position - ((const link_t*)b)->position; + if (cmp) return cmp; + + return ((const link_t*)a)->objidx - ((const link_t*)b)->objidx; } }; diff --git a/src/meson.build b/src/meson.build index 3cbebd3ea..4cf3451e9 100644 --- a/src/meson.build +++ b/src/meson.build @@ -349,6 +349,7 @@ hb_subset_sources = files( 'graph/gsubgpos-context.hh', 'graph/gsubgpos-graph.hh', 'graph/pairpos-graph.hh', + 'graph/markbasepos-graph.hh', 'graph/coverage-graph.hh', 'graph/classdef-graph.hh', 'graph/split-helpers.hh', diff --git a/src/test-repacker.cc b/src/test-repacker.cc index e6ac36e7a..cd8789f1b 100644 --- a/src/test-repacker.cc +++ b/src/test-repacker.cc @@ -177,6 +177,16 @@ static unsigned add_coverage (unsigned start, unsigned end, return add_object ((char*) coverage, 10, c); } + +template +static unsigned add_coverage (It it, + hb_serialize_context_t* c) +{ + c->push (); + OT::Layout::Common::Coverage_serialize (c, it); + return c->pop_pack (false); +} + // Adds a class that maps glyphs from [start_glyph, end_glyph) // to classes 1...n static unsigned add_class_def (uint16_t start_glyph, @@ -297,6 +307,135 @@ static unsigned add_pair_pos_2 (unsigned starting_class, return c->pop_pack (false); } +static unsigned add_mark_base_pos_1 (unsigned mark_coverage, + unsigned base_coverage, + unsigned mark_array, + unsigned base_array, + unsigned class_count, + hb_serialize_context_t* c) +{ + uint8_t format[] = { + 0, 1 + }; + + start_object ((char*) format, 2, c); + add_offset (mark_coverage, c); + add_offset (base_coverage, c); + + uint8_t count[] = { + (uint8_t) ((class_count >> 8) & 0xFF), + (uint8_t) (class_count & 0xFF), + }; + extend ((char*) count, 2, c); + + add_offset (mark_array, c); + add_offset (base_array, c); + + return c->pop_pack (false); +} + +template +struct MarkBasePosBuffers +{ + unsigned base_anchors[class_count * base_count]; + unsigned mark_anchors[mark_count]; + uint8_t anchor_buffers[class_count * base_count + 100]; + uint8_t class_buffer[class_count * 2]; + + MarkBasePosBuffers(hb_serialize_context_t* c) + { + for (unsigned i = 0; i < sizeof(anchor_buffers) / 2; i++) + { + OT::HBUINT16* value = (OT::HBUINT16*) (&anchor_buffers[2*i]); + *value = i; + } + + for (unsigned i = 0; i < class_count * base_count; i++) + { + base_anchors[i] = add_object ((char*) &anchor_buffers[i], 100, c); + if (i < class_count) { + class_buffer[i*2] = (uint8_t) ((i >> 8) & 0xFF); + class_buffer[i*2 + 1] = (uint8_t) (i & 0xFF); + } + } + + for (unsigned i = 0; i < mark_count; i++) + { + mark_anchors[i] = add_object ((char*) &anchor_buffers[i], 4, c); + } + } + + unsigned create_mark_base_pos_1 (unsigned table_index, hb_serialize_context_t* c) + { + unsigned class_per_table = class_count / table_count; + unsigned mark_per_class = mark_count / class_count; + unsigned start_class = class_per_table * table_index; + unsigned end_class = class_per_table * (table_index + 1) - 1; + + // baseArray + uint8_t base_count_buffer[] = { + (uint8_t) ((base_count >> 8) & 0xFF), + (uint8_t) (base_count & 0xFF), + + }; + start_object ((char*) base_count_buffer, 2, c); + for (unsigned base = 0; base < base_count; base++) + { + for (unsigned klass = start_class; klass <= end_class; klass++) + { + unsigned i = base * class_count + klass; + add_offset (base_anchors[i], c); + } + } + unsigned base_array = c->pop_pack (false); + + // markArray + unsigned num_marks = class_per_table * mark_per_class; + uint8_t mark_count_buffer[] = { + (uint8_t) ((num_marks >> 8) & 0xFF), + (uint8_t) (num_marks & 0xFF), + }; + start_object ((char*) mark_count_buffer, 2, c); + for (unsigned mark = 0; mark < mark_count; mark++) + { + unsigned klass = mark % class_count; + if (klass < start_class || klass > end_class) continue; + klass -= start_class; + + extend ((char*) &class_buffer[2 * klass], 2, c); + add_offset (mark_anchors[mark], c); + } + unsigned mark_array = c->pop_pack (false); + + // markCoverage + auto it = + + hb_range ((hb_codepoint_t) mark_count) + | hb_filter ([&] (hb_codepoint_t mark) { + unsigned klass = mark % class_count; + return klass >= class_per_table * table_index && + klass < class_per_table * (table_index + 1); + }) + ; + unsigned mark_coverage = add_coverage (it, c); + + // baseCoverage + unsigned base_coverage = add_coverage (10, 10 + base_count - 1, c); + + return add_mark_base_pos_1 (mark_coverage, + base_coverage, + mark_array, + base_array, + class_per_table, + c); + } +}; + + + + static void run_resolve_overflow_test (const char* name, hb_serialize_context_t& overflowing, @@ -309,44 +448,32 @@ static void run_resolve_overflow_test (const char* name, name); graph_t graph (overflowing.object_graph ()); + graph_t expected_graph (expected.object_graph ()); + if (graph::will_overflow (expected_graph)) + { + expected_graph.assign_spaces (); + expected_graph.sort_shortest_distance (); + } - + // Check that overflow resolution succeeds assert (overflowing.offset_overflow ()); - hb_blob_t* out = hb_resolve_overflows (overflowing.object_graph (), - tag, - num_iterations, - recalculate_extensions); + assert (hb_resolve_graph_overflows (tag, + num_iterations, + recalculate_extensions, + graph)); + + // Check the graphs can be serialized. + hb_blob_t* out = graph::serialize (graph); assert (out); - - hb_bytes_t result = out->as_bytes (); - - assert (!expected.offset_overflow ()); - hb_bytes_t expected_result = expected.copy_bytes (); - - if (result.length != expected_result.length) - { - printf("result.length (%u) != expected.length (%u).\n", - result.length, - expected_result.length); - } - assert (result.length == expected_result.length); - - bool equal = true; - for (unsigned i = 0; i < expected_result.length; i++) - { - if (result[i] != expected_result[i]) - { - equal = false; - uint8_t a = result[i]; - uint8_t b = expected_result[i]; - printf("%08u: %x != %x\n", i, a, b); - } - } - - assert (equal); - - expected_result.fini (); hb_blob_destroy (out); + out = graph::serialize (expected_graph); + assert (out); + hb_blob_destroy (out); + + // Check the graphs are equivalent + graph.normalize (); + expected_graph.normalize (); + assert (graph == expected_graph); } static void add_virtual_offset (unsigned id, @@ -1308,6 +1435,38 @@ populate_serializer_with_large_pair_pos_2 (hb_serialize_context_t* c, free (device_tables); } +template +static void +populate_serializer_with_large_mark_base_pos_1 (hb_serialize_context_t* c) +{ + c->start_serialize (); + + MarkBasePosBuffers buffers (c); + + unsigned mark_base_pos[table_count]; + for (unsigned i = 0; i < table_count; i++) + mark_base_pos[i] = buffers.create_mark_base_pos_1 (i, c); + + for (int i = 0; i < table_count; i++) + mark_base_pos[i] = add_extension (mark_base_pos[i], 4, c); + + start_lookup (9, table_count, c); + + for (int i = 0; i < table_count; i++) + add_offset (mark_base_pos[i], c); + + unsigned lookup = finish_lookup (c); + + unsigned lookup_list = add_lookup_list (&lookup, 1, c); + + add_gsubgpos_header (lookup_list, c); + + c->end_serialize(); +} + static void test_sort_shortest () { size_t buffer_size = 100; @@ -1792,6 +1951,29 @@ static void test_resolve_with_pair_pos_2_split_with_device_tables () free (expected_buffer); } +static void test_resolve_with_basic_mark_base_pos_1_split () +{ + size_t buffer_size = 200000; + void* buffer = malloc (buffer_size); + assert (buffer); + hb_serialize_context_t c (buffer, buffer_size); + populate_serializer_with_large_mark_base_pos_1 <40, 10, 110, 1>(&c); + + void* expected_buffer = malloc (buffer_size); + assert (expected_buffer); + hb_serialize_context_t e (expected_buffer, buffer_size); + populate_serializer_with_large_mark_base_pos_1 <40, 10, 110, 2>(&e); + + run_resolve_overflow_test ("test_resolve_with_basic_mark_base_pos_1_split", + c, + e, + 20, + true, + HB_TAG('G', 'P', 'O', 'S')); + free (buffer); + free (expected_buffer); +} + static void test_resolve_overflows_via_splitting_spaces () { size_t buffer_size = 160000; @@ -1944,6 +2126,7 @@ main (int argc, char **argv) test_resolve_with_basic_pair_pos_2_split (); test_resolve_with_pair_pos_2_split_with_device_tables (); test_resolve_with_close_to_limit_pair_pos_2_split (); + test_resolve_with_basic_mark_base_pos_1_split (); // TODO(grieger): have run overflow tests compare graph equality not final packed binary. // TODO(grieger): split test where multiple subtables in one lookup are split to test link ordering.