[GPOS] Avoid O(n^2) behavior in mark-attachment

Better implementation; avoids arbitrary limit on look-back.
This commit is contained in:
Behdad Esfahbod 2023-02-06 14:51:25 -07:00
parent 661050b465
commit 8708b9e081
3 changed files with 76 additions and 43 deletions

View File

@ -90,6 +90,25 @@ struct MarkBasePosFormat1_2
const Coverage &get_coverage () const { return this+markCoverage; }
static inline bool accept (hb_buffer_t *buffer, unsigned idx)
{
/* We only want to attach to the first of a MultipleSubst sequence.
* https://github.com/harfbuzz/harfbuzz/issues/740
* Reject others...
* ...but stop if we find a mark in the MultipleSubst sequence:
* https://github.com/harfbuzz/harfbuzz/issues/1020 */
return !_hb_glyph_info_multiplied (&buffer->info[idx]) ||
0 == _hb_glyph_info_get_lig_comp (&buffer->info[idx]) ||
(idx == 0 ||
_hb_glyph_info_is_mark (&buffer->info[idx - 1]) ||
!_hb_glyph_info_multiplied (&buffer->info[idx - 1]) ||
_hb_glyph_info_get_lig_id (&buffer->info[idx]) !=
_hb_glyph_info_get_lig_id (&buffer->info[idx - 1]) ||
_hb_glyph_info_get_lig_comp (&buffer->info[idx]) !=
_hb_glyph_info_get_lig_comp (&buffer->info[idx - 1]) + 1
);
}
bool apply (hb_ot_apply_context_t *c) const
{
TRACE_APPLY (this);
@ -97,48 +116,47 @@ struct MarkBasePosFormat1_2
unsigned int mark_index = (this+markCoverage).get_coverage (buffer->cur().codepoint);
if (likely (mark_index == NOT_COVERED)) return_trace (false);
/* Now we search backwards for a non-mark glyph */
/* Now we search backwards for a non-mark glyph.
* We don't use skippy_iter.prev() to avoid O(n^2) behavior. */
hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
skippy_iter.reset (buffer->idx, 1);
skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks);
do {
unsigned unsafe_from;
if (!skippy_iter.prev (&unsafe_from))
{
buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1);
return_trace (false);
}
/* We only want to attach to the first of a MultipleSubst sequence.
* https://github.com/harfbuzz/harfbuzz/issues/740
* Reject others...
* ...but stop if we find a mark in the MultipleSubst sequence:
* https://github.com/harfbuzz/harfbuzz/issues/1020 */
if (!_hb_glyph_info_multiplied (&buffer->info[skippy_iter.idx]) ||
0 == _hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx]) ||
(skippy_iter.idx == 0 ||
_hb_glyph_info_is_mark (&buffer->info[skippy_iter.idx - 1]) ||
!_hb_glyph_info_multiplied (&buffer->info[skippy_iter.idx - 1]) ||
_hb_glyph_info_get_lig_id (&buffer->info[skippy_iter.idx]) !=
_hb_glyph_info_get_lig_id (&buffer->info[skippy_iter.idx - 1]) ||
_hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx]) !=
_hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx - 1]) + 1
))
break;
skippy_iter.reject ();
} while (true);
/* Checking that matched glyph is actually a base glyph by GDEF is too strong; disabled */
//if (!_hb_glyph_info_is_base_glyph (&buffer->info[skippy_iter.idx])) { return_trace (false); }
unsigned int base_index = (this+baseCoverage).get_coverage (buffer->info[skippy_iter.idx].codepoint);
if (base_index == NOT_COVERED)
unsigned j;
for (j = buffer->idx; j > c->last_base_until; j--)
{
buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1);
auto match = skippy_iter.match (buffer->info[j - 1]);
if (match == skippy_iter.MATCH)
{
if (!accept (buffer, j - 1))
match = skippy_iter.SKIP;
}
if (match == skippy_iter.MATCH)
{
c->last_base = (signed) j - 1;
break;
}
}
c->last_base_until = buffer->idx;
if (c->last_base == -1)
{
buffer->unsafe_to_concat_from_outbuffer (0, buffer->idx + 1);
return_trace (false);
}
return_trace ((this+markArray).apply (c, mark_index, base_index, this+baseArray, classCount, skippy_iter.idx));
unsigned idx = (unsigned) c->last_base;
/* Checking that matched glyph is actually a base glyph by GDEF is too strong; disabled */
//if (!_hb_glyph_info_is_base_glyph (&buffer->info[idx])) { return_trace (false); }
unsigned int base_index = (this+baseCoverage).get_coverage (buffer->info[idx].codepoint);
if (base_index == NOT_COVERED)
{
buffer->unsafe_to_concat_from_outbuffer (idx, buffer->idx + 1);
return_trace (false);
}
return_trace ((this+markArray).apply (c, mark_index, base_index, this+baseArray, classCount, idx));
}
bool subset (hb_subset_context_t *c) const

View File

@ -100,20 +100,32 @@ struct MarkLigPosFormat1_2
if (likely (mark_index == NOT_COVERED)) return_trace (false);
/* Now we search backwards for a non-mark glyph */
hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
skippy_iter.reset (buffer->idx, 1);
skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks);
unsigned unsafe_from;
if (!skippy_iter.prev (&unsafe_from))
unsigned j;
for (j = buffer->idx; j > c->last_base_until; j--)
{
buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1);
auto match = skippy_iter.match (buffer->info[j - 1]);
if (match == skippy_iter.MATCH)
{
c->last_base = (signed) j - 1;
break;
}
}
c->last_base_until = buffer->idx;
if (c->last_base == -1)
{
buffer->unsafe_to_concat_from_outbuffer (0, buffer->idx + 1);
return_trace (false);
}
/* Checking that matched glyph is actually a ligature by GDEF is too strong; disabled */
//if (!_hb_glyph_info_is_ligature (&buffer->info[skippy_iter.idx])) { return_trace (false); }
j = (unsigned) c->last_base;
/* Checking that matched glyph is actually a ligature by GDEF is too strong; disabled */
//if (!_hb_glyph_info_is_ligature (&buffer->info[j])) { return_trace (false); }
unsigned int j = skippy_iter.idx;
unsigned int lig_index = (this+ligatureCoverage).get_coverage (buffer->info[j].codepoint);
if (lig_index == NOT_COVERED)
{

View File

@ -712,6 +712,9 @@ struct hb_ot_apply_context_t :
uint32_t random_state = 1;
unsigned new_syllables = (unsigned) -1;
signed last_base = -1; // GPOS uses
unsigned last_base_until = 0; // GPOS uses
hb_ot_apply_context_t (unsigned int table_index_,
hb_font_t *font_,
hb_buffer_t *buffer_) :
@ -750,7 +753,7 @@ struct hb_ot_apply_context_t :
iter_context.init (this, true);
}
void set_lookup_mask (hb_mask_t mask) { lookup_mask = mask; init_iters (); }
void set_lookup_mask (hb_mask_t mask) { lookup_mask = mask; last_base = -1; last_base_until = 0; init_iters (); }
void set_auto_zwj (bool auto_zwj_) { auto_zwj = auto_zwj_; init_iters (); }
void set_auto_zwnj (bool auto_zwnj_) { auto_zwnj = auto_zwnj_; init_iters (); }
void set_per_syllable (bool per_syllable_) { per_syllable = per_syllable_; init_iters (); }