From 10c8fc55535e679a75f6f3012273f256e0416d90 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Fri, 2 Dec 2022 15:34:34 -0700 Subject: [PATCH 1/7] [multimap] Add a multimap datastructure --- src/Makefile.am | 5 +++ src/Makefile.sources | 1 + src/hb-multimap.hh | 92 ++++++++++++++++++++++++++++++++++++++++++++ src/meson.build | 2 + src/test-multimap.cc | 50 ++++++++++++++++++++++++ 5 files changed, 150 insertions(+) create mode 100644 src/hb-multimap.hh create mode 100644 src/test-multimap.cc diff --git a/src/Makefile.am b/src/Makefile.am index 225444e32..a4758e792 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -368,6 +368,7 @@ COMPILED_TESTS = \ test-iter \ test-machinery \ test-map \ + test-multimap \ test-number \ test-ot-tag \ test-priority-queue \ @@ -407,6 +408,10 @@ test_map_SOURCES = test-map.cc hb-static.cc test_map_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS) test_map_LDADD = $(COMPILED_TESTS_LDADD) +test_multimap_SOURCES = test-multimap.cc hb-static.cc +test_multimap_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS) +test_multimap_LDADD = $(COMPILED_TESTS_LDADD) + test_number_SOURCES = test-number.cc hb-number.cc test_number_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS) test_number_LDADD = $(COMPILED_TESTS_LDADD) diff --git a/src/Makefile.sources b/src/Makefile.sources index 6c891eac5..10db3c151 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -52,6 +52,7 @@ HB_BASE_sources = \ hb-map.hh \ hb-meta.hh \ hb-ms-feature-ranges.hh \ + hb-multimap.hh \ hb-mutex.hh \ hb-null.hh \ hb-number.cc \ diff --git a/src/hb-multimap.hh b/src/hb-multimap.hh new file mode 100644 index 000000000..4fde00a75 --- /dev/null +++ b/src/hb-multimap.hh @@ -0,0 +1,92 @@ +/* + * Copyright © 2022 Behdad Esfahbod + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_MULTIMAP_HH +#define HB_MULTIMAP_HH + +#include "hb.hh" +#include "hb-map.hh" +#include "hb-vector.hh" + + +/* + * hb_multimap_t + */ + +struct hb_multimap_t +{ + void add (hb_codepoint_t k, hb_codepoint_t v) + { + hb_codepoint_t *i; + if (multiples_indices.has (k, &i)) + { + multiples_values[*i].push (v); + return; + } + + hb_codepoint_t *old_v; + if (singulars.has (k, &old_v)) + { + hb_codepoint_t old = *old_v; + singulars.del (k); + + multiples_indices.set (k, multiples_values.length); + auto *vec = multiples_values.push (); + + vec->push (old); + vec->push (v); + + return; + } + + singulars.set (k, v); + } + + hb_array_t get (hb_codepoint_t k) + { + hb_codepoint_t *v; + if (singulars.has (k, &v)) + return hb_array (v, 1); + + hb_codepoint_t *i; + if (multiples_indices.has (k, &i)) + return multiples_values[*i].as_array (); + + return hb_array_t (); + } + + bool in_error () const + { + return singulars.in_error () || multiples_indices.in_error () || multiples_values.in_error (); + } + + protected: + hb_map_t singulars; + hb_map_t multiples_indices; + hb_vector_t> multiples_values; +}; + + + +#endif /* HB_MULTIMAP_HH */ diff --git a/src/meson.build b/src/meson.build index d53e77b5f..07c0eb3fe 100644 --- a/src/meson.build +++ b/src/meson.build @@ -56,6 +56,7 @@ hb_base_sources = files( 'hb-map.hh', 'hb-meta.hh', 'hb-ms-feature-ranges.hh', + 'hb-multimap.hh', 'hb-mutex.hh', 'hb-null.hh', 'hb-number.cc', @@ -580,6 +581,7 @@ if get_option('tests').enabled() 'test-iter': ['test-iter.cc', 'hb-static.cc'], 'test-machinery': ['test-machinery.cc', 'hb-static.cc'], 'test-map': ['test-map.cc', 'hb-static.cc'], + 'test-multimap': ['test-multimap.cc', 'hb-static.cc'], 'test-number': ['test-number.cc', 'hb-number.cc'], 'test-ot-tag': ['hb-ot-tag.cc'], 'test-priority-queue': ['test-priority-queue.cc', 'hb-static.cc'], diff --git a/src/test-multimap.cc b/src/test-multimap.cc new file mode 100644 index 000000000..7240c5a50 --- /dev/null +++ b/src/test-multimap.cc @@ -0,0 +1,50 @@ +/* + * Copyright © 2022 Behdad Esfahbod + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#include "hb.hh" +#include "hb-multimap.hh" + +int +main (int argc, char **argv) +{ + hb_multimap_t m; + + assert (m.get (10).length == 0); + + m.add (10, 11); + assert (m.get (10).length == 1); + + m.add (10, 12); + assert (m.get (10).length == 2); + + m.add (10, 13); + assert (m.get (10).length == 3); + assert (m.get (10)[0] == 11); + assert (m.get (10)[1] == 12); + assert (m.get (10)[2] == 13); + + assert (m.get (11).length == 0); + + return 0; +} From 7d6893a8034230458ba22f677d54e67c68b1508a Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Fri, 2 Dec 2022 15:50:52 -0700 Subject: [PATCH 2/7] [subset-accelerator] Cache gid-to-unicodes --- src/hb-subset-accelerator.hh | 15 +++++++++++---- src/hb-subset-plan.cc | 12 ++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/hb-subset-accelerator.hh b/src/hb-subset-accelerator.hh index 90f431caf..63ae6d77e 100644 --- a/src/hb-subset-accelerator.hh +++ b/src/hb-subset-accelerator.hh @@ -31,6 +31,7 @@ #include "hb.hh" #include "hb-map.hh" +#include "hb-multimap.hh" #include "hb-set.hh" extern HB_INTERNAL hb_user_data_key_t _hb_subset_accelerator_user_data_key; @@ -51,11 +52,12 @@ struct hb_subset_accelerator_t } static hb_subset_accelerator_t* create(const hb_map_t& unicode_to_gid_, - const hb_set_t& unicodes_, + const hb_multimap_t gid_to_unicodes_, + const hb_set_t& unicodes_, bool has_seac_) { hb_subset_accelerator_t* accel = (hb_subset_accelerator_t*) hb_malloc (sizeof(hb_subset_accelerator_t)); - new (accel) hb_subset_accelerator_t (unicode_to_gid_, unicodes_); + new (accel) hb_subset_accelerator_t (unicode_to_gid_, gid_to_unicodes_, unicodes_); accel->has_seac = has_seac_; return accel; } @@ -76,8 +78,9 @@ struct hb_subset_accelerator_t } hb_subset_accelerator_t (const hb_map_t& unicode_to_gid_, + const hb_multimap_t& gid_to_unicodes_, const hb_set_t& unicodes_) - : unicode_to_gid(unicode_to_gid_), unicodes(unicodes_), + : unicode_to_gid(unicode_to_gid_), gid_to_unicodes (gid_to_unicodes_), unicodes(unicodes_), cmap_cache(nullptr), destroy_cmap_cache(nullptr), has_seac(false), cff_accelerator(nullptr), destroy_cff_accelerator(nullptr) { sanitized_table_cache_lock.init (); } @@ -91,6 +94,7 @@ struct hb_subset_accelerator_t mutable hb_hashmap_t> sanitized_table_cache; const hb_map_t unicode_to_gid; + const hb_multimap_t gid_to_unicodes; const hb_set_t unicodes; // cmap @@ -106,7 +110,10 @@ struct hb_subset_accelerator_t bool in_error () const { - return unicode_to_gid.in_error() || unicodes.in_error () || sanitized_table_cache.in_error (); + return unicode_to_gid.in_error () || + gid_to_unicodes.in_error () || + unicodes.in_error () || + sanitized_table_cache.in_error (); } }; diff --git a/src/hb-subset-plan.cc b/src/hb-subset-plan.cc index ae169fb8e..bde77d557 100644 --- a/src/hb-subset-plan.cc +++ b/src/hb-subset-plan.cc @@ -27,6 +27,7 @@ #include "hb-subset-plan.hh" #include "hb-subset-accelerator.hh" #include "hb-map.hh" +#include "hb-multimap.hh" #include "hb-set.hh" #include "hb-ot-cmap-table.hh" @@ -930,8 +931,19 @@ hb_subset_plan_create_or_fail (hb_face_t *face, if (plan->attach_accelerator_data) { + hb_multimap_t gid_to_unicodes; + + hb_map_t &unicode_to_gid = *plan->codepoint_to_glyph; + + for (auto unicode : *plan->unicodes) + { + auto gid = unicode_to_gid[unicode]; + gid_to_unicodes.add (gid, unicode); + } + plan->inprogress_accelerator = hb_subset_accelerator_t::create (*plan->codepoint_to_glyph, + gid_to_unicodes, *plan->unicodes, plan->has_seac); } From da7961b2e879aab88fedda7cd0c9e2de4c3240a1 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Fri, 2 Dec 2022 16:08:40 -0700 Subject: [PATCH 3/7] . --- src/hb-multimap.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hb-multimap.hh b/src/hb-multimap.hh index 4fde00a75..f0f95917a 100644 --- a/src/hb-multimap.hh +++ b/src/hb-multimap.hh @@ -63,7 +63,7 @@ struct hb_multimap_t singulars.set (k, v); } - hb_array_t get (hb_codepoint_t k) + hb_array_t get (hb_codepoint_t k) const { hb_codepoint_t *v; if (singulars.has (k, &v)) From 32e049a315a1f1d6e2f751f1f93472134fec8f00 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Fri, 2 Dec 2022 16:09:10 -0700 Subject: [PATCH 4/7] [subset-plan] Use gid-to-unicodes multimap One test fails. Need investigation. --- src/hb-subset-plan.cc | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/src/hb-subset-plan.cc b/src/hb-subset-plan.cc index bde77d557..eb88657e1 100644 --- a/src/hb-subset-plan.cc +++ b/src/hb-subset-plan.cc @@ -524,14 +524,39 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes, cmap_unicodes = &plan->accelerator->unicodes; } - for (hb_codepoint_t cp : *cmap_unicodes) + if (plan->accelerator && + unicodes->get_population () < cmap_unicodes->get_population () && + glyphs->get_population () < cmap_unicodes->get_population ()) { - hb_codepoint_t gid = (*unicode_glyphid_map)[cp]; - if (!unicodes->has (cp) && !glyphs->has (gid)) - continue; + auto &gid_to_unicodes = plan->accelerator->gid_to_unicodes; + for (hb_codepoint_t gid : *glyphs) + { + auto unicodes = gid_to_unicodes.get (gid); - plan->codepoint_to_glyph->set (cp, gid); - plan->unicode_to_new_gid_list.push (hb_pair (cp, gid)); + for (hb_codepoint_t cp : unicodes) + { + plan->codepoint_to_glyph->set (cp, gid); + plan->unicode_to_new_gid_list.push (hb_pair (cp, gid)); + } + } + for (hb_codepoint_t cp : *unicodes) + { + hb_codepoint_t gid = (*unicode_glyphid_map)[cp]; + plan->codepoint_to_glyph->set (cp, gid); + plan->unicode_to_new_gid_list.push (hb_pair (cp, gid)); + } + } + else + { + for (hb_codepoint_t cp : *cmap_unicodes) + { + hb_codepoint_t gid = (*unicode_glyphid_map)[cp]; + if (!unicodes->has (cp) && !glyphs->has (gid)) + continue; + + plan->codepoint_to_glyph->set (cp, gid); + plan->unicode_to_new_gid_list.push (hb_pair (cp, gid)); + } } /* Add gids which where requested, but not mapped in cmap */ From 1a40da4ad1a8896f65a99838d5251613ecc8e350 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Fri, 2 Dec 2022 16:13:37 -0700 Subject: [PATCH 5/7] [subset-plan] Use add_array instead of add_sorted_array That vector is not declared as sorted. --- src/hb-subset-plan.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hb-subset-plan.cc b/src/hb-subset-plan.cc index eb88657e1..76666ad17 100644 --- a/src/hb-subset-plan.cc +++ b/src/hb-subset-plan.cc @@ -571,7 +571,7 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes, auto &arr = plan->unicode_to_new_gid_list; if (arr.length) { - plan->unicodes->add_sorted_array (&arr.arrayZ->first, arr.length, sizeof (*arr.arrayZ)); + plan->unicodes->add_array (&arr.arrayZ->first, arr.length, sizeof (*arr.arrayZ)); plan->_glyphset_gsub->add_array (&arr.arrayZ->second, arr.length, sizeof (*arr.arrayZ)); } } From ff419789efb2a7b8f997fbd8d87bea738f2a6c59 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Fri, 2 Dec 2022 16:25:26 -0700 Subject: [PATCH 6/7] [subset-plan] Sort unicode_to_new_gid_list when needed --- src/hb-algs.hh | 12 ++++++++++++ src/hb-subset-plan.cc | 7 ++++++- src/hb-subset-plan.hh | 2 +- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/hb-algs.hh b/src/hb-algs.hh index d63d4f30d..d85a4afe1 100644 --- a/src/hb-algs.hh +++ b/src/hb-algs.hh @@ -524,6 +524,18 @@ struct hb_pair_t bool operator > (const pair_t& o) const { return first > o.first || (first == o.first && second > o.second); } bool operator <= (const pair_t& o) const { return !(*this > o); } + static int cmp (const void *pa, const void *pb) + { + pair_t *a = (pair_t *) pa; + pair_t *b = (pair_t *) pb; + + if (a->first < b->first) return -1; + if (a->first > b->first) return +1; + if (a->second < b->second) return -1; + if (a->second > b->second) return +1; + return 0; + } + friend void swap (hb_pair_t& a, hb_pair_t& b) { hb_swap (a.first, b.first); diff --git a/src/hb-subset-plan.cc b/src/hb-subset-plan.cc index 76666ad17..943cf082d 100644 --- a/src/hb-subset-plan.cc +++ b/src/hb-subset-plan.cc @@ -541,10 +541,15 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes, } for (hb_codepoint_t cp : *unicodes) { + /* Don't double-add entry. */ + if (plan->codepoint_to_glyph->has (cp)) + continue; + hb_codepoint_t gid = (*unicode_glyphid_map)[cp]; plan->codepoint_to_glyph->set (cp, gid); plan->unicode_to_new_gid_list.push (hb_pair (cp, gid)); } + plan->unicode_to_new_gid_list.qsort (); } else { @@ -571,7 +576,7 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes, auto &arr = plan->unicode_to_new_gid_list; if (arr.length) { - plan->unicodes->add_array (&arr.arrayZ->first, arr.length, sizeof (*arr.arrayZ)); + plan->unicodes->add_sorted_array (&arr.arrayZ->first, arr.length, sizeof (*arr.arrayZ)); plan->_glyphset_gsub->add_array (&arr.arrayZ->second, arr.length, sizeof (*arr.arrayZ)); } } diff --git a/src/hb-subset-plan.hh b/src/hb-subset-plan.hh index 1dea7aad9..0b162ee7b 100644 --- a/src/hb-subset-plan.hh +++ b/src/hb-subset-plan.hh @@ -115,7 +115,7 @@ struct hb_subset_plan_t // For each cp that we'd like to retain maps to the corresponding gid. hb_set_t *unicodes; - hb_vector_t> unicode_to_new_gid_list; + hb_sorted_vector_t> unicode_to_new_gid_list; // name_ids we would like to retain hb_set_t *name_ids; From ddeac3658b46a6536a67b06b8bc8f3efd9ce5f6f Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Fri, 2 Dec 2022 16:51:07 -0700 Subject: [PATCH 7/7] [test-multimap] More tests --- src/test-multimap.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/test-multimap.cc b/src/test-multimap.cc index 7240c5a50..8cd8f5285 100644 --- a/src/test-multimap.cc +++ b/src/test-multimap.cc @@ -45,6 +45,15 @@ main (int argc, char **argv) assert (m.get (10)[2] == 13); assert (m.get (11).length == 0); + m.add (11, 14); + assert (m.get (10).length == 3); + assert (m.get (11).length == 1); + assert (m.get (12).length == 0); + assert (m.get (10)[0] == 11); + assert (m.get (10)[1] == 12); + assert (m.get (10)[2] == 13); + assert (m.get (11)[0] == 14); + assert (m.get (12)[0] == 0); // Array fallback value return 0; }