[OTLayout] Accelerate lookups by batching

If we need to apply many many lookups, we can fasten that up by applying
them in batches.  For each batch we keep the union of the coverage of
the lookups participating.  We can then skip glyph ranges that do NOT
participate in any lookup in the batch.  The batch partition is
determined optimally by a mathematical probability model on the glyphs
and a dynamic-program to optimize the partition.

The net effect is 30% speedup on Amiri.  the downside is more memory
consuption as each batch will keep an hb_set_t of its coverage.

I'm not yet convinced that the tradeoff is worth pursuing.  I'm trying
to find out ways to optimized this more, with less memory overhead.

This work also ignores the number of subtables per lookup.  That may
prove to be very important for the performance numbers from here on.
This commit is contained in:
Behdad Esfahbod 2013-04-29 21:16:15 -04:00
parent 68db8c49d8
commit cdbf24e87f
4 changed files with 294 additions and 28 deletions

View File

@ -1252,6 +1252,7 @@ struct SubstLookup : Lookup
unsigned int end, unsigned int end,
const hb_set_digest_t *digest) const const hb_set_digest_t *digest) const
{ {
bool inplace = end != (unsigned int) -1;
bool ret = false; bool ret = false;
end = MIN (end, c->buffer->len); end = MIN (end, c->buffer->len);
@ -1264,9 +1265,9 @@ struct SubstLookup : Lookup
if (likely (!is_reverse ())) if (likely (!is_reverse ()))
{ {
/* in/out forward substitution */ /* in/out forward substitution */
c->buffer->clear_output ();
c->buffer->idx = start; c->buffer->idx = start;
if (inplace)
c->buffer->out_len = start;
while (c->buffer->idx < end) while (c->buffer->idx < end)
{ {
@ -1277,9 +1278,6 @@ struct SubstLookup : Lookup
else else
c->buffer->next_glyph (); c->buffer->next_glyph ();
} }
if (ret)
c->buffer->swap_buffers ();
} }
else else
{ {

View File

@ -39,6 +39,7 @@ static const hb_tag_t table_tags[2] = {HB_OT_TAG_GSUB, HB_OT_TAG_GPOS};
struct hb_ot_map_t struct hb_ot_map_t
{ {
friend struct hb_ot_map_builder_t; friend struct hb_ot_map_builder_t;
friend struct optimize_lookups_context_t;
public: public:
@ -68,10 +69,15 @@ struct hb_ot_map_t
typedef void (*pause_func_t) (const struct hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer); typedef void (*pause_func_t) (const struct hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer);
struct stage_map_t { struct stage_map_t {
unsigned int last_lookup; /* Cumulative */ unsigned int last_lookup; /* Actually, last_lookup+1 */
pause_func_t pause_func; pause_func_t pause_func;
}; };
struct batch_map_t {
unsigned int last_lookup; /* Actually, last_lookup+1 */
hb_set_t *coverage;
};
hb_ot_map_t (void) { memset (this, 0, sizeof (*this)); } hb_ot_map_t (void) { memset (this, 0, sizeof (*this)); }
@ -117,6 +123,8 @@ struct hb_ot_map_t
*lookup_count = end - start; *lookup_count = end - start;
} }
HB_INTERNAL void optimize (hb_face_t *face);
HB_INTERNAL void collect_lookups (unsigned int table_index, hb_set_t *lookups) const; HB_INTERNAL void collect_lookups (unsigned int table_index, hb_set_t *lookups) const;
HB_INTERNAL inline void apply (unsigned int table_index, HB_INTERNAL inline void apply (unsigned int table_index,
const struct hb_ot_shape_plan_t *plan, const struct hb_ot_shape_plan_t *plan,
@ -131,6 +139,9 @@ struct hb_ot_map_t
{ {
lookups[table_index].finish (); lookups[table_index].finish ();
stages[table_index].finish (); stages[table_index].finish ();
for (unsigned int batch_index = 0; batch_index < batches[table_index].len; batch_index++)
hb_set_destroy (batches[table_index][batch_index].coverage);
batches[table_index].finish ();
} }
} }
@ -151,6 +162,7 @@ struct hb_ot_map_t
hb_prealloced_array_t<feature_map_t, 8> features; hb_prealloced_array_t<feature_map_t, 8> features;
hb_prealloced_array_t<lookup_map_t, 32> lookups[2]; /* GSUB/GPOS */ hb_prealloced_array_t<lookup_map_t, 32> lookups[2]; /* GSUB/GPOS */
hb_prealloced_array_t<stage_map_t, 4> stages[2]; /* GSUB/GPOS */ hb_prealloced_array_t<stage_map_t, 4> stages[2]; /* GSUB/GPOS */
hb_prealloced_array_t<batch_map_t, 4> batches[2]; /* GSUB/GPOS */
}; };
enum hb_ot_map_feature_flags_t { enum hb_ot_map_feature_flags_t {

View File

@ -27,6 +27,14 @@
*/ */
#include "hb-ot-map-private.hh" #include "hb-ot-map-private.hh"
#include "hb-ot-layout-private.hh"
#ifndef HB_DEBUG_MAP
#define HB_DEBUG_MAP (HB_DEBUG+0)
#endif
#include "hb-ot-layout-private.hh" #include "hb-ot-layout-private.hh"
@ -102,38 +110,98 @@ void hb_ot_map_builder_t::add_feature (hb_tag_t tag, unsigned int value,
info->stage[1] = current_stage[1]; info->stage[1] = current_stage[1];
} }
static inline bool
may_skip (const hb_glyph_info_t &info, hb_set_t *coverage)
{
return !(info.glyph_props() & HB_OT_LAYOUT_GLYPH_PROPS_MARK) &&
!coverage->has (info.codepoint);
}
inline void hb_ot_map_t::apply (unsigned int table_index, inline void hb_ot_map_t::apply (unsigned int table_index,
const hb_ot_shape_plan_t *plan, const hb_ot_shape_plan_t *plan,
hb_font_t *font, hb_font_t *font,
hb_buffer_t *buffer) const hb_buffer_t *buffer) const
{ {
unsigned int i = 0; unsigned int i = 0;
unsigned int b = 0;
for (unsigned int stage_index = 0; stage_index < stages[table_index].len; stage_index++) { for (unsigned int stage_index = 0; stage_index < stages[table_index].len; stage_index++)
const stage_map_t *stage = &stages[table_index][stage_index]; {
for (; i < stage->last_lookup; i++) const stage_map_t stage = stages[table_index][stage_index];
for (; b < batches[table_index].len && batches[table_index][b].last_lookup <= stage.last_lookup; b++)
{
const batch_map_t batch = batches[table_index][b];
if (!batch.coverage)
{
switch (table_index) switch (table_index)
{ {
case 0: case 0:
for (; i < batch.last_lookup; i++)
{
buffer->clear_output ();
hb_ot_layout_substitute_lookup (font, buffer, lookups[table_index][i].index, hb_ot_layout_substitute_lookup (font, buffer, lookups[table_index][i].index,
0, (unsigned int) -1, 0, (unsigned int) -1,
lookups[table_index][i].mask, lookups[table_index][i].mask,
lookups[table_index][i].auto_zwj); lookups[table_index][i].auto_zwj);
if (buffer->have_output)
buffer->swap_buffers ();
}
break; break;
case 1: case 1:
for (; i < batch.last_lookup; i++)
hb_ot_layout_position_lookup (font, buffer, lookups[table_index][i].index, hb_ot_layout_position_lookup (font, buffer, lookups[table_index][i].index,
0, (unsigned int) -1, 0, (unsigned int) -1,
lookups[table_index][i].mask, lookups[table_index][i].mask,
lookups[table_index][i].auto_zwj); lookups[table_index][i].auto_zwj);
break; break;
} }
if (stage->pause_func)
{
buffer->clear_output ();
stage->pause_func (plan, font, buffer);
} }
else
{
unsigned int start = 0, end = 0;
if (table_index == 0)
buffer->clear_output ();
for (;;)
{
while (start < buffer->len && may_skip (buffer->info[start], batch.coverage))
start++;
if (start >= buffer->len)
break;
end = start + 1;
while (end < buffer->len && !may_skip (buffer->info[end], batch.coverage))
end++;
switch (table_index)
{
case 0:
for (unsigned int j = i; j < batch.last_lookup; j++)
hb_ot_layout_substitute_lookup (font, buffer, lookups[table_index][j].index,
start, end,
lookups[table_index][j].mask,
lookups[table_index][j].auto_zwj);
break;
case 1:
for (unsigned int j = i; j < batch.last_lookup; j++)
hb_ot_layout_position_lookup (font, buffer, lookups[table_index][j].index,
start, end,
lookups[table_index][j].mask,
lookups[table_index][j].auto_zwj);
break;
}
start = end + 1;
}
assert (!buffer->has_separate_output ());
i = batch.last_lookup;
}
}
if (stage.pause_func)
stage.pause_func (plan, font, buffer);
} }
} }
@ -317,4 +385,191 @@ hb_ot_map_builder_t::compile (hb_ot_map_t &m)
} }
} }
} }
m.optimize (face);
}
struct optimize_lookups_context_t
{
inline optimize_lookups_context_t (hb_ot_map_t *map_,
hb_face_t *face_,
unsigned int table_index_) :
map (map_),
face (face_),
table_index (table_index_),
start_lookup (0),
num_lookups (0),
num_glyphs (hb_face_get_glyph_count (face_)) {}
inline void add_lookups (unsigned int start_lookup_,
unsigned int num_lookups_)
{
set_lookups (start_lookup_, num_lookups_);
solve ();
}
private:
inline void set_lookups (unsigned int start_lookup_,
unsigned int num_lookups_)
{
start_lookup = start_lookup_;
num_lookups = num_lookups_;
}
inline void
solve (void)
{
if (!num_lookups)
return;
DEBUG_MSG_FUNC (MAP, map, "%d lookups", num_lookups);
hb_set_t *cov = hb_set_create ();
int best_move[num_lookups];
float best_cost[num_lookups];
best_move[0] = -1;
best_cost[0] = single_lookup_cost (0);
for (unsigned int i = 1; i < num_lookups; i++)
{
cov->clear ();
add_coverage (i, cov);
float this_cost = single_lookup_cost (i);
best_cost[i] = 1e20;
for (unsigned int j = i - 1; (int) j >= 0; j--)
{
if (best_cost[i] > best_cost[j] + this_cost)
{
best_cost[i] = best_cost[j] + this_cost;
best_move[i] = j;
}
add_coverage (j, cov);
this_cost = lookup_batch_cost (cov, i - j + 1);
if (this_cost > best_cost[i])
break; /* No chance */
}
if (best_cost[i] > this_cost)
{
best_cost[i] = this_cost;
best_move[i] = -1;
}
}
DEBUG_MSG_FUNC (MAP, map, "optimal solution costs %f", best_cost[num_lookups - 1]);
for (int i = num_lookups - 1; i >= 0; i = best_move[i])
{
unsigned int batch_num_lookups = i - best_move[i];
if (DEBUG_LEVEL_ENABLED (MAP, 1))
DEBUG_MSG_FUNC (MAP, map, "batch has %d lookups; costs %f",
batch_num_lookups,
best_cost[i] - (best_move[i] == -1 ? 0 : best_cost[best_move[i]]));
hb_ot_map_t::batch_map_t *batch = map->batches[table_index].push ();
if (batch)
{
batch->last_lookup = MAX (lookup_offset (i), lookup_offset (best_move[i] + 1)) + 1;
batch->coverage = batch_num_lookups == 1 ? NULL : hb_set_create ();
for (int j = i; j > best_move[i]; j--)
{
if (batch->coverage)
add_coverage (j, batch->coverage);
if (DEBUG_LEVEL_ENABLED (MAP, 2))
{
cov->clear ();
add_coverage (j, cov);
DEBUG_MSG_FUNC (MAP, map, "lookup %d (lookup-index %d) popcount %d",
lookup_offset (j),
lookup_index (j),
cov->get_population ());
}
}
}
}
hb_set_destroy (cov);
}
inline unsigned int lookup_offset (unsigned int i)
{
assert (i < num_lookups);
return start_lookup + num_lookups - 1 - i;
}
inline unsigned int lookup_index (unsigned int i)
{
return map->lookups[table_index][lookup_offset (i)].index;
}
inline void add_coverage (unsigned int i, hb_set_t *coverage)
{
hb_ot_layout_lookup_get_coverage (face,
table_tags[table_index],
lookup_index (i),
coverage);
}
/* Parameters */
inline float single_lookup_cost (unsigned int lookup_index HB_UNUSED)
{
return 1.0;
}
inline float
lookup_batch_cost (hb_set_t *cov, unsigned int n_lookups)
{
return .1 + probability (cov) * n_lookups;
}
inline float
probability (hb_set_t *cov)
{
/* Biggest hack: assume uniform glyph probability. */
return cov->get_population () / (float) num_glyphs;
}
private:
hb_ot_map_t *map;
hb_face_t *face;
unsigned int table_index;
unsigned int start_lookup;
unsigned int num_lookups;
unsigned int num_glyphs;
};
void
hb_ot_map_t::optimize (hb_face_t *face)
{
DEBUG_MSG_FUNC (MAP, this, "face %p", face);
for (unsigned int table_index = 0; table_index < 2; table_index++)
{
DEBUG_MSG_FUNC (MAP, this, "table %c%c%c%c", HB_UNTAG (table_tags[table_index]));
unsigned int i = 0;
for (unsigned int stage_index = 0; stage_index < stages[table_index].len; stage_index++)
{
stage_map_t *stage = &stages[table_index][stage_index];
unsigned int num_lookups = stage->last_lookup - i;
DEBUG_MSG_FUNC (MAP, this, "stage %d: %d lookups", stage_index, num_lookups);
optimize_lookups_context_t c (this, face, table_index);
unsigned int start = i;
for (; i < num_lookups; i++)
if (!hb_ot_layout_lookup_is_inplace (face, table_tags[table_index],
lookups[table_index][i].index))
{
DEBUG_MSG_FUNC (MAP, this, "lookup %d (lookup-index %d) NOT inplace",
i, lookups[table_index][i].index);
c.add_lookups (start, i - start);
c.add_lookups (i, 1);
start = i + 1;
}
c.add_lookups (start, i - start);
}
}
} }

View File

@ -248,6 +248,7 @@ arabic_fallback_plan_shape (arabic_fallback_plan_t *fallback_plan,
if (fallback_plan->lookup_array[i]) if (fallback_plan->lookup_array[i])
{ {
OT::hb_apply_context_t c (0, font, buffer, fallback_plan->mask_array[i], true/*auto_zwj*/); OT::hb_apply_context_t c (0, font, buffer, fallback_plan->mask_array[i], true/*auto_zwj*/);
/* XXX Currently broken because of clear_output / swap_buffers issues. */
fallback_plan->lookup_array[i]->apply_string (&c, 0, (unsigned int) -1, &fallback_plan->digest_array[i]); fallback_plan->lookup_array[i]->apply_string (&c, 0, (unsigned int) -1, &fallback_plan->digest_array[i]);
} }
} }