Merge pull request #3558 from harfbuzz/set-optimize

[perf] hb_set_t optimizations and perf suite improvements
This commit is contained in:
Behdad Esfahbod 2022-04-29 18:34:00 -06:00 committed by GitHub
commit a4522df378
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 250 additions and 121 deletions

View File

@ -4,7 +4,7 @@ NULL =
ACLOCAL_AMFLAGS = -I m4
SUBDIRS = src util test docs
SUBDIRS = src util test perf docs
EXTRA_DIST = \
autogen.sh \
@ -26,19 +26,6 @@ EXTRA_DIST = \
subprojects/ragel.wrap \
subprojects/packagefiles/ragel/meson.build \
subprojects/ttf-parser.wrap \
perf/meson.build \
perf/perf-draw.hh \
perf/perf-extents.hh \
perf/perf-shaping.hh \
perf/perf.cc \
perf/fonts/Amiri-Regular.ttf \
perf/fonts/NotoNastaliqUrdu-Regular.ttf \
perf/fonts/NotoSansDevanagari-Regular.ttf \
perf/fonts/Roboto-Regular.ttf \
perf/texts/en-thelittleprince.txt \
perf/texts/en-words.txt \
perf/texts/fa-monologue.txt \
perf/texts/fa-thelittleprince.txt \
mingw-configure.sh \
$(NULL)

View File

@ -437,6 +437,7 @@ test/shape/data/text-rendering-tests/Makefile
test/subset/Makefile
test/subset/data/Makefile
test/subset/data/repack_tests/Makefile
perf/Makefile
docs/Makefile
docs/version.xml
])

24
perf/Makefile.am Normal file
View File

@ -0,0 +1,24 @@
# Process this file with automake to produce Makefile.in
NULL =
EXTRA_DIST =
SUBDIRS =
EXTRA_DIST +=
meson.build \
perf-draw.hh \
perf-extents.hh \
perf.cc \
benchmark-map.cc \
benchmark-set.cc \
benchmark-shape.cc \
benchmark-subset.cc \
fonts \
texts \
$(NULL)
# Convenience targets:
lib:
@$(MAKE) $(AM_MAKEFLAGS) -C $(top_builddir)/src lib
-include $(top_srcdir)/git.mk

View File

@ -29,11 +29,13 @@ static void BM_MapInsert(benchmark::State& state) {
RandomMap(map_size, original);
assert(hb_map_get_population(original) == map_size);
auto needle = map_size / 2;
auto v = 0;
for (auto _ : state) {
// TODO(garretrieger): create a copy of the original map.
// Needs a hb_map_copy(..) in public api.
hb_map_set (original, rand (), rand ());
hb_map_set (original, needle++, v++);
}
hb_map_destroy(original);
@ -49,9 +51,11 @@ static void BM_MapLookup(benchmark::State& state) {
RandomMap(map_size, original);
assert(hb_map_get_population(original) == map_size);
auto needle = map_size / 2;
for (auto _ : state) {
benchmark::DoNotOptimize(
hb_map_get (original, rand()));
hb_map_get (original, needle++));
}
hb_map_destroy(original);

View File

@ -74,7 +74,7 @@ BENCHMARK(BM_SetOrderedInsert_1000)
{2, 512}}); // Density
/* Single value lookup on sets of various sizes. */
static void BM_SetLookup(benchmark::State& state) {
static void BM_SetLookup(benchmark::State& state, unsigned interval) {
unsigned set_size = state.range(0);
unsigned max_value = state.range(0) * state.range(1);
@ -82,14 +82,19 @@ static void BM_SetLookup(benchmark::State& state) {
RandomSet(set_size, max_value, original);
assert(hb_set_get_population(original) == set_size);
auto needle = max_value / 2;
for (auto _ : state) {
benchmark::DoNotOptimize(
hb_set_has (original, rand() % max_value));
hb_set_has (original, (needle += interval) % max_value));
}
hb_set_destroy(original);
}
BENCHMARK(BM_SetLookup)
BENCHMARK_CAPTURE(BM_SetLookup, ordered, 3)
->Ranges(
{{1 << 10, 1 << 16}, // Set Size
{2, 512}}); // Density
BENCHMARK_CAPTURE(BM_SetLookup, random, 12345)
->Ranges(
{{1 << 10, 1 << 16}, // Set Size
{2, 512}}); // Density

88
perf/benchmark-shape.cc Normal file
View File

@ -0,0 +1,88 @@
#include "benchmark/benchmark.h"
#include <cstring>
#include "hb.h"
struct test_input_t
{
const char *text_path;
const char *font_path;
} tests[] =
{
{"perf/texts/fa-thelittleprince.txt",
"perf/fonts/Amiri-Regular.ttf"},
{"perf/texts/fa-thelittleprince.txt",
"perf/fonts/NotoNastaliqUrdu-Regular.ttf"},
{"perf/texts/fa-monologue.txt",
"perf/fonts/Amiri-Regular.ttf"},
{"perf/texts/fa-monologue.txt",
"perf/fonts/NotoNastaliqUrdu-Regular.ttf"},
{"perf/texts/en-thelittleprince.txt",
"perf/fonts/Roboto-Regular.ttf"},
{"perf/texts/en-words.txt",
"perf/fonts/Roboto-Regular.ttf"},
};
static void BM_Shape (benchmark::State &state, const test_input_t &input)
{
hb_font_t *font;
{
hb_blob_t *blob = hb_blob_create_from_file_or_fail (input.font_path);
assert (blob);
hb_face_t *face = hb_face_create (blob, 0);
hb_blob_destroy (blob);
font = hb_font_create (face);
hb_face_destroy (face);
}
hb_blob_t *text_blob = hb_blob_create_from_file_or_fail (input.text_path);
assert (text_blob);
unsigned orig_text_length;
const char *orig_text = hb_blob_get_data (text_blob, &orig_text_length);
hb_buffer_t *buf = hb_buffer_create ();
for (auto _ : state)
{
unsigned text_length = orig_text_length;
const char *text = orig_text;
const char *end;
while ((end = (const char *) memchr (text, '\n', text_length)))
{
hb_buffer_clear_contents (buf);
hb_buffer_add_utf8 (buf, text, text_length, 0, end - text);
hb_buffer_guess_segment_properties (buf);
hb_shape (font, buf, nullptr, 0);
unsigned skip = end - text + 1;
text_length -= skip;
text += skip;
}
}
hb_buffer_destroy (buf);
hb_blob_destroy (text_blob);
hb_font_destroy (font);
}
int main(int argc, char** argv)
{
for (auto& test_input : tests)
{
char name[1024] = "BM_Shape";
strcat (name, strrchr (test_input.text_path, '/'));
strcat (name, strrchr (test_input.font_path, '/'));
benchmark::RegisterBenchmark (name, BM_Shape, test_input)
->Unit(benchmark::kMillisecond);
}
benchmark::Initialize(&argc, argv);
benchmark::RunSpecifiedBenchmarks();
benchmark::Shutdown();
}

View File

@ -21,6 +21,16 @@ benchmark('perf', executable('perf', 'perf.cc',
), workdir: meson.current_source_dir() / '..', timeout: 100)
benchmark('benchmark-shape', executable('benchmark-shape', 'benchmark-shape.cc',
dependencies: [
google_benchmark_dep,
],
cpp_args: [],
include_directories: [incconfig, incsrc],
link_with: [libharfbuzz],
install: false,
), workdir: meson.current_source_dir() / '..', timeout: 100)
benchmark('benchmark-set', executable('benchmark-set', 'benchmark-set.cc',
dependencies: [
google_benchmark_dep,

View File

@ -1,65 +0,0 @@
#include "benchmark/benchmark.h"
#include "hb.h"
static void shape (benchmark::State &state, const char *text_path,
hb_direction_t direction, hb_script_t script,
const char *font_path)
{
hb_font_t *font;
{
hb_blob_t *blob = hb_blob_create_from_file_or_fail (font_path);
assert (blob);
hb_face_t *face = hb_face_create (blob, 0);
hb_blob_destroy (blob);
font = hb_font_create (face);
hb_face_destroy (face);
}
hb_blob_t *text_blob = hb_blob_create_from_file_or_fail (text_path);
assert (text_blob);
unsigned text_length;
const char *text = hb_blob_get_data (text_blob, &text_length);
hb_buffer_t *buf = hb_buffer_create ();
for (auto _ : state)
{
hb_buffer_add_utf8 (buf, text, text_length, 0, -1);
hb_buffer_set_direction (buf, direction);
hb_buffer_set_script (buf, script);
hb_shape (font, buf, nullptr, 0);
hb_buffer_clear_contents (buf);
}
hb_buffer_destroy (buf);
hb_blob_destroy (text_blob);
hb_font_destroy (font);
}
BENCHMARK_CAPTURE (shape, fa-thelittleprince.txt - Amiri,
"perf/texts/fa-thelittleprince.txt",
HB_DIRECTION_RTL, HB_SCRIPT_ARABIC,
"perf/fonts/Amiri-Regular.ttf");
BENCHMARK_CAPTURE (shape, fa-thelittleprince.txt - NotoNastaliqUrdu,
"perf/texts/fa-thelittleprince.txt",
HB_DIRECTION_RTL, HB_SCRIPT_ARABIC,
"perf/fonts/NotoNastaliqUrdu-Regular.ttf");
BENCHMARK_CAPTURE (shape, fa-monologue.txt - Amiri,
"perf/texts/fa-monologue.txt",
HB_DIRECTION_RTL, HB_SCRIPT_ARABIC,
"perf/fonts/Amiri-Regular.ttf");
BENCHMARK_CAPTURE (shape, fa-monologue.txt - NotoNastaliqUrdu,
"perf/texts/fa-monologue.txt",
HB_DIRECTION_RTL, HB_SCRIPT_ARABIC,
"perf/fonts/NotoNastaliqUrdu-Regular.ttf");
BENCHMARK_CAPTURE (shape, en-thelittleprince.txt - Roboto,
"perf/texts/en-thelittleprince.txt",
HB_DIRECTION_LTR, HB_SCRIPT_LATIN,
"perf/fonts/Roboto-Regular.ttf");
BENCHMARK_CAPTURE (shape, en-words.txt - Roboto,
"perf/texts/en-words.txt",
HB_DIRECTION_LTR, HB_SCRIPT_LATIN,
"perf/fonts/Roboto-Regular.ttf");

View File

@ -4,7 +4,6 @@
#include "config.h"
#endif
#include "perf-shaping.hh"
#ifdef HAVE_FREETYPE
enum backend_t { HARFBUZZ, FREETYPE, TTF_PARSER };
#include "perf-extents.hh"

View File

@ -874,7 +874,19 @@ struct hb_bit_set_t
page_t *page_for (hb_codepoint_t g, bool insert = false)
{
page_map_t map = {get_major (g), pages.length};
unsigned major = get_major (g);
/* The extra page_map length is necessary; can't just rely on vector here,
* since the next check would be tricked because a null page also has
* major==0, which we can't distinguish from an actualy major==0 page... */
if (likely (last_page_lookup < page_map.length))
{
auto &cached_page = page_map.arrayZ[last_page_lookup];
if (cached_page.major == major)
return &pages[cached_page.index];
}
page_map_t map = {major, pages.length};
unsigned int i;
if (!page_map.bfind (map, &i, HB_NOT_FOUND_STORE_CLOSEST))
{
@ -890,15 +902,31 @@ struct hb_bit_set_t
(page_map.length - 1 - i) * page_map.item_size);
page_map[i] = map;
}
last_page_lookup = i;
return &pages[page_map[i].index];
}
const page_t *page_for (hb_codepoint_t g) const
{
page_map_t key = {get_major (g)};
const page_map_t *found = page_map.bsearch (key);
if (found)
return &pages[found->index];
return nullptr;
unsigned major = get_major (g);
/* The extra page_map length is necessary; can't just rely on vector here,
* since the next check would be tricked because a null page also has
* major==0, which we can't distinguish from an actualy major==0 page... */
if (likely (last_page_lookup < page_map.length))
{
auto &cached_page = page_map.arrayZ[last_page_lookup];
if (cached_page.major == major)
return &pages[cached_page.index];
}
page_map_t key = {major};
unsigned int i;
if (!page_map.bfind (key, &i))
return nullptr;
last_page_lookup = i;
return &pages[page_map[i].index];
}
page_t &page_at (unsigned int i) { return pages[page_map[i].index]; }
const page_t &page_at (unsigned int i) const { return pages[page_map[i].index]; }

View File

@ -109,22 +109,26 @@ struct CmapSubtableFormat4
while (it) {
// Start a new range
start_cp = (*it).first;
prev_run_start_cp = (*it).first;
run_start_cp = (*it).first;
end_cp = (*it).first;
last_gid = (*it).second;
run_length = 1;
prev_delta = 0;
{
const auto& pair = *it;
start_cp = pair.first;
prev_run_start_cp = start_cp;
run_start_cp = start_cp;
end_cp = start_cp;
last_gid = pair.second;
run_length = 1;
prev_delta = 0;
}
delta = (*it).second - (*it).first;
delta = last_gid - start_cp;
mode = FIRST_SUB_RANGE;
it++;
while (it) {
// Process range
hb_codepoint_t next_cp = (*it).first;
hb_codepoint_t next_gid = (*it).second;
const auto& pair = *it;
hb_codepoint_t next_cp = pair.first;
hb_codepoint_t next_gid = pair.second;
if (next_cp != end_cp + 1) {
// Current range is over, stop processing.
break;
@ -282,16 +286,15 @@ struct CmapSubtableFormat4
}
template<typename Iterator,
hb_requires (hb_is_iterator (Iterator))>
hb_requires (hb_is_iterator (Iterator))>
HBUINT16* serialize_rangeoffset_glyid (hb_serialize_context_t *c,
Iterator it,
Iterator it,
HBUINT16 *endCode,
HBUINT16 *startCode,
HBINT16 *idDelta,
unsigned segcount)
{
hb_hashmap_t<hb_codepoint_t, hb_codepoint_t> cp_to_gid;
+ it | hb_sink (cp_to_gid);
hb_hashmap_t<hb_codepoint_t, hb_codepoint_t> cp_to_gid { it };
HBUINT16 *idRangeOffset = c->allocate_size<HBUINT16> (HBUINT16::static_size * segcount);
if (unlikely (!c->check_success (idRangeOffset))) return nullptr;
@ -323,22 +326,32 @@ struct CmapSubtableFormat4
{ return _.first <= 0xFFFF; })
;
if (format4_iter.len () == 0) return;
if (!format4_iter) return;
unsigned table_initpos = c->length ();
if (unlikely (!c->extend_min (this))) return;
this->format = 4;
// TODO(grieger): does pre-alloc make this faster?
hb_vector_t<hb_pair_t<hb_codepoint_t, hb_codepoint_t>> cp_to_gid {
format4_iter
};
//serialize endCode[], startCode[], idDelta[]
HBUINT16* endCode = c->start_embed<HBUINT16> ();
unsigned segcount = serialize_find_segcount (format4_iter);
if (unlikely (!serialize_start_end_delta_arrays (c, format4_iter, segcount)))
unsigned segcount = serialize_find_segcount (cp_to_gid.iter());
if (unlikely (!serialize_start_end_delta_arrays (c, cp_to_gid.iter(), segcount)))
return;
HBUINT16 *startCode = endCode + segcount + 1;
HBINT16 *idDelta = ((HBINT16*)startCode) + segcount;
HBUINT16 *idRangeOffset = serialize_rangeoffset_glyid (c, format4_iter, endCode, startCode, idDelta, segcount);
HBUINT16 *idRangeOffset = serialize_rangeoffset_glyid (c,
cp_to_gid.iter (),
endCode,
startCode,
idDelta,
segcount);
if (unlikely (!c->check_success (idRangeOffset))) return;
this->length = c->length () - table_initpos;
@ -440,14 +453,14 @@ struct CmapSubtableFormat4
hb_codepoint_t start = this->startCount[i];
hb_codepoint_t end = this->endCount[i];
unsigned int rangeOffset = this->idRangeOffset[i];
out->add_range(start, end);
if (rangeOffset == 0)
{
for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++)
{
hb_codepoint_t gid = (codepoint + this->idDelta[i]) & 0xFFFFu;
if (unlikely (!gid))
continue;
out->add (codepoint);
out->del(codepoint);
}
}
else
@ -456,11 +469,13 @@ struct CmapSubtableFormat4
{
unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
if (unlikely (index >= this->glyphIdArrayLength))
{
out->del_range (codepoint, end);
break;
}
hb_codepoint_t gid = this->glyphIdArray[index];
if (unlikely (!gid))
continue;
out->add (codepoint);
out->del(codepoint);
}
}
}
@ -469,6 +484,8 @@ struct CmapSubtableFormat4
void collect_mapping (hb_set_t *unicodes, /* OUT */
hb_map_t *mapping /* OUT */) const
{
// TODO(grieger): optimize similar to collect_unicodes
// (ie. use add_range())
unsigned count = this->segCount;
if (count && this->startCount[count - 1] == 0xFFFFu)
count--; /* Skip sentinel segment. */
@ -1448,6 +1465,37 @@ struct EncodingRecord
DEFINE_SIZE_STATIC (8);
};
struct SubtableUnicodesCache {
private:
const void* base;
hb_hashmap_t<intptr_t, hb_set_t*> cached_unicodes;
public:
SubtableUnicodesCache(const void* cmap_base)
: base(cmap_base), cached_unicodes() {}
~SubtableUnicodesCache()
{
for (hb_set_t* s : cached_unicodes.values()) {
hb_set_destroy (s);
}
}
hb_set_t* set_for(const EncodingRecord* record)
{
if (!cached_unicodes.has ((intptr_t) record)) {
hb_set_t* new_set = hb_set_create ();
if (!cached_unicodes.set ((intptr_t) record, new_set)) {
hb_set_destroy (new_set);
return hb_set_get_empty ();
}
(base+record->subtable).collect_unicodes (cached_unicodes.get ((intptr_t) record));
}
return cached_unicodes.get ((intptr_t) record);
}
};
struct cmap
{
static constexpr hb_tag_t tableTag = HB_OT_TAG_cmap;
@ -1467,6 +1515,7 @@ struct cmap
unsigned format4objidx = 0, format12objidx = 0, format14objidx = 0;
auto snap = c->snapshot ();
SubtableUnicodesCache unicodes_cache (base);
for (const EncodingRecord& _ : encodingrec_iter)
{
if (c->in_error ())
@ -1475,12 +1524,11 @@ struct cmap
unsigned format = (base+_.subtable).u.format;
if (format != 4 && format != 12 && format != 14) continue;
hb_set_t unicodes_set;
(base+_.subtable).collect_unicodes (&unicodes_set);
hb_set_t* unicodes_set = unicodes_cache.set_for (&_);
if (!drop_format_4 && format == 4)
{
c->copy (_, + it | hb_filter (unicodes_set, hb_first), 4u, base, plan, &format4objidx);
c->copy (_, + it | hb_filter (*unicodes_set, hb_first), 4u, base, plan, &format4objidx);
if (c->in_error () && c->only_overflow ())
{
// cmap4 overflowed, reset and retry serialization without format 4 subtables.
@ -1495,8 +1543,8 @@ struct cmap
else if (format == 12)
{
if (_can_drop (_, unicodes_set, base, + it | hb_map (hb_first), encodingrec_iter)) continue;
c->copy (_, + it | hb_filter (unicodes_set, hb_first), 12u, base, plan, &format12objidx);
if (_can_drop (_, *unicodes_set, base, unicodes_cache, + it | hb_map (hb_first), encodingrec_iter)) continue;
c->copy (_, + it | hb_filter (*unicodes_set, hb_first), 12u, base, plan, &format12objidx);
}
else if (format == 14) c->copy (_, it, 14u, base, plan, &format14objidx);
}
@ -1514,6 +1562,7 @@ struct cmap
bool _can_drop (const EncodingRecord& cmap12,
const hb_set_t& cmap12_unicodes,
const void* base,
SubtableUnicodesCache& unicodes_cache,
Iterator subset_unicodes,
EncodingRecordIterator encoding_records)
{
@ -1544,11 +1593,10 @@ struct cmap
|| (base+_.subtable).get_language() != target_language)
continue;
hb_set_t sibling_unicodes;
(base+_.subtable).collect_unicodes (&sibling_unicodes);
hb_set_t* sibling_unicodes = unicodes_cache.set_for (&_);
auto cmap12 = + subset_unicodes | hb_filter (cmap12_unicodes);
auto sibling = + subset_unicodes | hb_filter (sibling_unicodes);
auto sibling = + subset_unicodes | hb_filter (*sibling_unicodes);
for (; cmap12 && sibling; cmap12++, sibling++)
{
unsigned a = *cmap12;