Merge pull request #3914 from harfbuzz/multimap

[multimap] Add a multimap datastructure & use for gid-to-unicodes subset accelerator
This commit is contained in:
Behdad Esfahbod 2022-12-02 17:08:31 -07:00 committed by GitHub
commit 3fb4ea29cd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 231 additions and 11 deletions

View File

@ -368,6 +368,7 @@ COMPILED_TESTS = \
test-iter \ test-iter \
test-machinery \ test-machinery \
test-map \ test-map \
test-multimap \
test-number \ test-number \
test-ot-tag \ test-ot-tag \
test-priority-queue \ test-priority-queue \
@ -407,6 +408,10 @@ test_map_SOURCES = test-map.cc hb-static.cc
test_map_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS) test_map_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
test_map_LDADD = $(COMPILED_TESTS_LDADD) test_map_LDADD = $(COMPILED_TESTS_LDADD)
test_multimap_SOURCES = test-multimap.cc hb-static.cc
test_multimap_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
test_multimap_LDADD = $(COMPILED_TESTS_LDADD)
test_number_SOURCES = test-number.cc hb-number.cc test_number_SOURCES = test-number.cc hb-number.cc
test_number_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS) test_number_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
test_number_LDADD = $(COMPILED_TESTS_LDADD) test_number_LDADD = $(COMPILED_TESTS_LDADD)

View File

@ -52,6 +52,7 @@ HB_BASE_sources = \
hb-map.hh \ hb-map.hh \
hb-meta.hh \ hb-meta.hh \
hb-ms-feature-ranges.hh \ hb-ms-feature-ranges.hh \
hb-multimap.hh \
hb-mutex.hh \ hb-mutex.hh \
hb-null.hh \ hb-null.hh \
hb-number.cc \ hb-number.cc \

View File

@ -524,6 +524,18 @@ struct hb_pair_t
bool operator > (const pair_t& o) const { return first > o.first || (first == o.first && second > o.second); } bool operator > (const pair_t& o) const { return first > o.first || (first == o.first && second > o.second); }
bool operator <= (const pair_t& o) const { return !(*this > o); } bool operator <= (const pair_t& o) const { return !(*this > o); }
static int cmp (const void *pa, const void *pb)
{
pair_t *a = (pair_t *) pa;
pair_t *b = (pair_t *) pb;
if (a->first < b->first) return -1;
if (a->first > b->first) return +1;
if (a->second < b->second) return -1;
if (a->second > b->second) return +1;
return 0;
}
friend void swap (hb_pair_t& a, hb_pair_t& b) friend void swap (hb_pair_t& a, hb_pair_t& b)
{ {
hb_swap (a.first, b.first); hb_swap (a.first, b.first);

92
src/hb-multimap.hh Normal file
View File

@ -0,0 +1,92 @@
/*
* Copyright © 2022 Behdad Esfahbod
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*/
#ifndef HB_MULTIMAP_HH
#define HB_MULTIMAP_HH
#include "hb.hh"
#include "hb-map.hh"
#include "hb-vector.hh"
/*
* hb_multimap_t
*/
struct hb_multimap_t
{
void add (hb_codepoint_t k, hb_codepoint_t v)
{
hb_codepoint_t *i;
if (multiples_indices.has (k, &i))
{
multiples_values[*i].push (v);
return;
}
hb_codepoint_t *old_v;
if (singulars.has (k, &old_v))
{
hb_codepoint_t old = *old_v;
singulars.del (k);
multiples_indices.set (k, multiples_values.length);
auto *vec = multiples_values.push ();
vec->push (old);
vec->push (v);
return;
}
singulars.set (k, v);
}
hb_array_t<const hb_codepoint_t> get (hb_codepoint_t k) const
{
hb_codepoint_t *v;
if (singulars.has (k, &v))
return hb_array (v, 1);
hb_codepoint_t *i;
if (multiples_indices.has (k, &i))
return multiples_values[*i].as_array ();
return hb_array_t<hb_codepoint_t> ();
}
bool in_error () const
{
return singulars.in_error () || multiples_indices.in_error () || multiples_values.in_error ();
}
protected:
hb_map_t singulars;
hb_map_t multiples_indices;
hb_vector_t<hb_vector_t<hb_codepoint_t>> multiples_values;
};
#endif /* HB_MULTIMAP_HH */

View File

@ -31,6 +31,7 @@
#include "hb.hh" #include "hb.hh"
#include "hb-map.hh" #include "hb-map.hh"
#include "hb-multimap.hh"
#include "hb-set.hh" #include "hb-set.hh"
extern HB_INTERNAL hb_user_data_key_t _hb_subset_accelerator_user_data_key; extern HB_INTERNAL hb_user_data_key_t _hb_subset_accelerator_user_data_key;
@ -51,11 +52,12 @@ struct hb_subset_accelerator_t
} }
static hb_subset_accelerator_t* create(const hb_map_t& unicode_to_gid_, static hb_subset_accelerator_t* create(const hb_map_t& unicode_to_gid_,
const hb_set_t& unicodes_, const hb_multimap_t gid_to_unicodes_,
const hb_set_t& unicodes_,
bool has_seac_) { bool has_seac_) {
hb_subset_accelerator_t* accel = hb_subset_accelerator_t* accel =
(hb_subset_accelerator_t*) hb_malloc (sizeof(hb_subset_accelerator_t)); (hb_subset_accelerator_t*) hb_malloc (sizeof(hb_subset_accelerator_t));
new (accel) hb_subset_accelerator_t (unicode_to_gid_, unicodes_); new (accel) hb_subset_accelerator_t (unicode_to_gid_, gid_to_unicodes_, unicodes_);
accel->has_seac = has_seac_; accel->has_seac = has_seac_;
return accel; return accel;
} }
@ -76,8 +78,9 @@ struct hb_subset_accelerator_t
} }
hb_subset_accelerator_t (const hb_map_t& unicode_to_gid_, hb_subset_accelerator_t (const hb_map_t& unicode_to_gid_,
const hb_multimap_t& gid_to_unicodes_,
const hb_set_t& unicodes_) const hb_set_t& unicodes_)
: unicode_to_gid(unicode_to_gid_), unicodes(unicodes_), : unicode_to_gid(unicode_to_gid_), gid_to_unicodes (gid_to_unicodes_), unicodes(unicodes_),
cmap_cache(nullptr), destroy_cmap_cache(nullptr), cmap_cache(nullptr), destroy_cmap_cache(nullptr),
has_seac(false), cff_accelerator(nullptr), destroy_cff_accelerator(nullptr) has_seac(false), cff_accelerator(nullptr), destroy_cff_accelerator(nullptr)
{ sanitized_table_cache_lock.init (); } { sanitized_table_cache_lock.init (); }
@ -91,6 +94,7 @@ struct hb_subset_accelerator_t
mutable hb_hashmap_t<hb_tag_t, hb::unique_ptr<hb_blob_t>> sanitized_table_cache; mutable hb_hashmap_t<hb_tag_t, hb::unique_ptr<hb_blob_t>> sanitized_table_cache;
const hb_map_t unicode_to_gid; const hb_map_t unicode_to_gid;
const hb_multimap_t gid_to_unicodes;
const hb_set_t unicodes; const hb_set_t unicodes;
// cmap // cmap
@ -106,7 +110,10 @@ struct hb_subset_accelerator_t
bool in_error () const bool in_error () const
{ {
return unicode_to_gid.in_error() || unicodes.in_error () || sanitized_table_cache.in_error (); return unicode_to_gid.in_error () ||
gid_to_unicodes.in_error () ||
unicodes.in_error () ||
sanitized_table_cache.in_error ();
} }
}; };

View File

@ -27,6 +27,7 @@
#include "hb-subset-plan.hh" #include "hb-subset-plan.hh"
#include "hb-subset-accelerator.hh" #include "hb-subset-accelerator.hh"
#include "hb-map.hh" #include "hb-map.hh"
#include "hb-multimap.hh"
#include "hb-set.hh" #include "hb-set.hh"
#include "hb-ot-cmap-table.hh" #include "hb-ot-cmap-table.hh"
@ -523,14 +524,44 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes,
cmap_unicodes = &plan->accelerator->unicodes; cmap_unicodes = &plan->accelerator->unicodes;
} }
for (hb_codepoint_t cp : *cmap_unicodes) if (plan->accelerator &&
unicodes->get_population () < cmap_unicodes->get_population () &&
glyphs->get_population () < cmap_unicodes->get_population ())
{ {
hb_codepoint_t gid = (*unicode_glyphid_map)[cp]; auto &gid_to_unicodes = plan->accelerator->gid_to_unicodes;
if (!unicodes->has (cp) && !glyphs->has (gid)) for (hb_codepoint_t gid : *glyphs)
continue; {
auto unicodes = gid_to_unicodes.get (gid);
plan->codepoint_to_glyph->set (cp, gid); for (hb_codepoint_t cp : unicodes)
plan->unicode_to_new_gid_list.push (hb_pair (cp, gid)); {
plan->codepoint_to_glyph->set (cp, gid);
plan->unicode_to_new_gid_list.push (hb_pair (cp, gid));
}
}
for (hb_codepoint_t cp : *unicodes)
{
/* Don't double-add entry. */
if (plan->codepoint_to_glyph->has (cp))
continue;
hb_codepoint_t gid = (*unicode_glyphid_map)[cp];
plan->codepoint_to_glyph->set (cp, gid);
plan->unicode_to_new_gid_list.push (hb_pair (cp, gid));
}
plan->unicode_to_new_gid_list.qsort ();
}
else
{
for (hb_codepoint_t cp : *cmap_unicodes)
{
hb_codepoint_t gid = (*unicode_glyphid_map)[cp];
if (!unicodes->has (cp) && !glyphs->has (gid))
continue;
plan->codepoint_to_glyph->set (cp, gid);
plan->unicode_to_new_gid_list.push (hb_pair (cp, gid));
}
} }
/* Add gids which where requested, but not mapped in cmap */ /* Add gids which where requested, but not mapped in cmap */
@ -930,8 +961,19 @@ hb_subset_plan_create_or_fail (hb_face_t *face,
if (plan->attach_accelerator_data) if (plan->attach_accelerator_data)
{ {
hb_multimap_t gid_to_unicodes;
hb_map_t &unicode_to_gid = *plan->codepoint_to_glyph;
for (auto unicode : *plan->unicodes)
{
auto gid = unicode_to_gid[unicode];
gid_to_unicodes.add (gid, unicode);
}
plan->inprogress_accelerator = plan->inprogress_accelerator =
hb_subset_accelerator_t::create (*plan->codepoint_to_glyph, hb_subset_accelerator_t::create (*plan->codepoint_to_glyph,
gid_to_unicodes,
*plan->unicodes, *plan->unicodes,
plan->has_seac); plan->has_seac);
} }

View File

@ -115,7 +115,7 @@ struct hb_subset_plan_t
// For each cp that we'd like to retain maps to the corresponding gid. // For each cp that we'd like to retain maps to the corresponding gid.
hb_set_t *unicodes; hb_set_t *unicodes;
hb_vector_t<hb_pair_t<hb_codepoint_t, hb_codepoint_t>> unicode_to_new_gid_list; hb_sorted_vector_t<hb_pair_t<hb_codepoint_t, hb_codepoint_t>> unicode_to_new_gid_list;
// name_ids we would like to retain // name_ids we would like to retain
hb_set_t *name_ids; hb_set_t *name_ids;

View File

@ -56,6 +56,7 @@ hb_base_sources = files(
'hb-map.hh', 'hb-map.hh',
'hb-meta.hh', 'hb-meta.hh',
'hb-ms-feature-ranges.hh', 'hb-ms-feature-ranges.hh',
'hb-multimap.hh',
'hb-mutex.hh', 'hb-mutex.hh',
'hb-null.hh', 'hb-null.hh',
'hb-number.cc', 'hb-number.cc',
@ -580,6 +581,7 @@ if get_option('tests').enabled()
'test-iter': ['test-iter.cc', 'hb-static.cc'], 'test-iter': ['test-iter.cc', 'hb-static.cc'],
'test-machinery': ['test-machinery.cc', 'hb-static.cc'], 'test-machinery': ['test-machinery.cc', 'hb-static.cc'],
'test-map': ['test-map.cc', 'hb-static.cc'], 'test-map': ['test-map.cc', 'hb-static.cc'],
'test-multimap': ['test-multimap.cc', 'hb-static.cc'],
'test-number': ['test-number.cc', 'hb-number.cc'], 'test-number': ['test-number.cc', 'hb-number.cc'],
'test-ot-tag': ['hb-ot-tag.cc'], 'test-ot-tag': ['hb-ot-tag.cc'],
'test-priority-queue': ['test-priority-queue.cc', 'hb-static.cc'], 'test-priority-queue': ['test-priority-queue.cc', 'hb-static.cc'],

59
src/test-multimap.cc Normal file
View File

@ -0,0 +1,59 @@
/*
* Copyright © 2022 Behdad Esfahbod
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*/
#include "hb.hh"
#include "hb-multimap.hh"
int
main (int argc, char **argv)
{
hb_multimap_t m;
assert (m.get (10).length == 0);
m.add (10, 11);
assert (m.get (10).length == 1);
m.add (10, 12);
assert (m.get (10).length == 2);
m.add (10, 13);
assert (m.get (10).length == 3);
assert (m.get (10)[0] == 11);
assert (m.get (10)[1] == 12);
assert (m.get (10)[2] == 13);
assert (m.get (11).length == 0);
m.add (11, 14);
assert (m.get (10).length == 3);
assert (m.get (11).length == 1);
assert (m.get (12).length == 0);
assert (m.get (10)[0] == 11);
assert (m.get (10)[1] == 12);
assert (m.get (10)[2] == 13);
assert (m.get (11)[0] == 14);
assert (m.get (12)[0] == 0); // Array fallback value
return 0;
}