[OTLayout] Ignore default-ignorables when matching GSUB/GPOS

When matching lookups, be smart about default-ignorable characters.
In particular:

Do nothing specific about ZWNJ, but for the other default-ignorables:

If the lookup in question uses the ignorable character in a sequence,
then match it as we used to do.  However, if the sequence match will
fail because the default-ignorable blocked it, try skipping the
ignorable character and continue.

The most immediate thing it means is that if Lam-Alef forms a ligature,
then Lam-ZWJ-Alef will do to.  Finally!

One exception: when matching for GPOS, or for backtrack/lookahead of
GSUB, we ignore ZWNJ too.  That's the right thing to do.

It certainly is possible to build fonts that this feature will result
in undesirable glyphs, but it's hard to think of a real-world case
that that would happen.

This *does* break Indic shaping right now, since Indic Unicode has
specific rules for what ZWJ/ZWNJ mean, and skipping ZWJ is breaking
those rules.  That will be fixed in upcoming commits.
This commit is contained in:
Behdad Esfahbod 2013-02-14 07:43:13 -05:00
parent ec5448667b
commit 607feb7cff
5 changed files with 237 additions and 134 deletions

View File

@ -596,6 +596,7 @@ struct PairSet
unsigned int count = len;
for (unsigned int i = 0; i < count; i++)
{
/* TODO bsearch */
if (c->buffer->info[pos].codepoint == record->secondGlyph)
{
valueFormats[0].apply_value (c->font, c->direction, this,

View File

@ -638,9 +638,9 @@ struct Ligature
ligate_input (c,
count,
&component[1],
ligGlyph,
match_glyph,
NULL,
ligGlyph,
is_mark_ligature,
total_component_count);

View File

@ -288,53 +288,137 @@ struct hb_apply_context_t
inline void set_lookup_props (unsigned int lookup_props_) { lookup_props = lookup_props_; }
inline void set_lookup (const Lookup &l) { lookup_props = l.get_props (); }
struct matcher_t
{
inline matcher_t (void) :
lookup_props (0),
ignore_zwnj (true),
mask (-1),
#define arg1(arg) (arg) /* Remove the macro to see why it's needed! */
syllable arg1(0),
#undef arg1
match_func (NULL),
match_data (NULL) {};
typedef bool (*match_func_t) (hb_codepoint_t glyph_id, const USHORT &value, const void *data);
inline void set_ignore_zwnj (bool ignore_zwnj_) { ignore_zwnj = ignore_zwnj_; }
inline void set_lookup_props (unsigned int lookup_props_) { lookup_props = lookup_props_; }
inline void set_mask (hb_mask_t mask_) { mask = mask_; }
inline void set_syllable (uint8_t syllable_) { syllable = syllable_; }
inline void set_match_func (match_func_t match_func_,
const void *match_data_)
{ match_func = match_func_; match_data = match_data_; }
inline bool matches (const hb_glyph_info_t &info,
const USHORT *glyph_data) const
{
return (info.mask & mask) &&
(!syllable || syllable == info.syllable ()) &&
(!match_func || match_func (info.codepoint, *glyph_data, match_data));
}
enum may_skip_t {
SKIP_NO,
SKIP_YES,
SKIP_MAYBE
};
inline may_skip_t
may_skip (const hb_apply_context_t *c,
const hb_glyph_info_t &info) const
{
unsigned int property;
property = info.glyph_props();
if (!c->match_properties (info.codepoint, property, lookup_props))
return SKIP_YES;
if (unlikely ((_hb_glyph_info_is_default_ignorable (&info) &&
(ignore_zwnj || !_hb_glyph_info_is_zwnj (&info))) &&
!is_a_ligature (info)))
return SKIP_MAYBE;
return SKIP_NO;
}
protected:
unsigned int lookup_props;
bool ignore_zwnj;
hb_mask_t mask;
uint8_t syllable;
match_func_t match_func;
const void *match_data;
};
struct skipping_forward_iterator_t
{
inline skipping_forward_iterator_t (hb_apply_context_t *c_,
unsigned int start_index_,
unsigned int num_items_,
bool context_match = false)
bool context_match = false) :
idx (start_index_),
c (c_),
match_glyph_data (NULL),
num_items (num_items_),
end (c->buffer->len)
{
c = c_;
lookup_props = c->lookup_props;
idx = start_index_;
num_items = num_items_;
mask = context_match ? -1 : c->lookup_mask;
syllable = context_match ? 0 : c->buffer->cur().syllable ();
end = c->buffer->len;
matcher.set_lookup_props (c->lookup_props);
/* Ignore ZWNJ if we are matching GSUB context, or matching GPOS. */
matcher.set_ignore_zwnj (context_match || c->table_index == 1);
if (!context_match)
{
matcher.set_mask (c->lookup_mask);
matcher.set_syllable (start_index_ == c->buffer->idx ? c->buffer->cur().syllable () : 0);
}
}
inline void set_lookup_props (unsigned int lookup_props_)
inline void set_lookup_props (unsigned int lookup_props) { matcher.set_lookup_props (lookup_props); }
inline void set_syllable (unsigned int syllable) { matcher.set_syllable (syllable); }
inline void set_match_func (matcher_t::match_func_t match_func,
const void *match_data,
const USHORT glyph_data[])
{
lookup_props = lookup_props_;
}
inline bool has_no_chance (void) const
{
return unlikely (num_items && idx + num_items >= end);
}
inline void reject (void)
{
num_items++;
matcher.set_match_func (match_func, match_data);
match_glyph_data = glyph_data;
}
inline bool has_no_chance (void) const { return unlikely (num_items && idx + num_items >= end); }
inline void reject (void) { num_items++; match_glyph_data--; }
inline bool next (void)
{
assert (num_items > 0);
do
matcher_t::may_skip_t skip;
while (!has_no_chance ())
{
if (has_no_chance ())
return false;
idx++;
} while (c->should_skip (&c->buffer->info[idx], lookup_props));
num_items--;
return (c->buffer->info[idx].mask & mask) && (!syllable || syllable == c->buffer->info[idx].syllable ());
const hb_glyph_info_t &info = c->buffer->info[idx];
skip = matcher.may_skip (c, info);
if (unlikely (skip == matcher_t::SKIP_YES))
continue;
if (matcher.matches (info, match_glyph_data))
{
num_items--;
match_glyph_data++;
return true;
}
if (skip == matcher_t::SKIP_NO)
return false;
}
return false;
}
unsigned int idx;
protected:
hb_apply_context_t *c;
unsigned int lookup_props;
matcher_t matcher;
const USHORT *match_glyph_data;
unsigned int num_items;
hb_mask_t mask;
uint8_t syllable;
unsigned int end;
};
@ -343,47 +427,67 @@ struct hb_apply_context_t
inline skipping_backward_iterator_t (hb_apply_context_t *c_,
unsigned int start_index_,
unsigned int num_items_,
bool context_match = false)
bool context_match = false) :
idx (start_index_),
c (c_),
match_glyph_data (NULL),
num_items (num_items_)
{
c = c_;
lookup_props = c->lookup_props;
idx = start_index_;
num_items = num_items_;
mask = context_match ? -1 : c->lookup_mask;
syllable = context_match ? 0 : c->buffer->cur().syllable ();
matcher.set_lookup_props (c->lookup_props);
/* Ignore ZWNJ if we are matching GSUB context, or matching GPOS. */
matcher.set_ignore_zwnj (context_match || c->table_index == 1);
if (!context_match)
{
matcher.set_mask (c->lookup_mask);
matcher.set_syllable (start_index_ == c->buffer->idx ? c->buffer->cur().syllable () : 0);
}
}
inline void set_lookup_props (unsigned int lookup_props_)
inline void set_lookup_props (unsigned int lookup_props) { matcher.set_lookup_props (lookup_props); }
inline void set_syllable (unsigned int syllable) { matcher.set_syllable (syllable); }
inline void set_match_func (matcher_t::match_func_t match_func,
const void *match_data,
const USHORT glyph_data[])
{
lookup_props = lookup_props_;
}
inline bool has_no_chance (void) const
{
return unlikely (idx < num_items);
}
inline void reject (void)
{
num_items++;
matcher.set_match_func (match_func, match_data);
match_glyph_data = glyph_data;
}
inline bool has_no_chance (void) const { return unlikely (idx < num_items); }
inline void reject (void) { num_items++; }
inline bool prev (void)
{
assert (num_items > 0);
do
matcher_t::may_skip_t skip;
while (!has_no_chance ())
{
if (has_no_chance ())
return false;
idx--;
} while (c->should_skip (&c->buffer->out_info[idx], lookup_props));
num_items--;
return (c->buffer->out_info[idx].mask & mask) && (!syllable || syllable == c->buffer->out_info[idx].syllable ());
const hb_glyph_info_t &info = c->buffer->out_info[idx];
skip = matcher.may_skip (c, info);
if (unlikely (skip == matcher_t::SKIP_YES))
continue;
if (matcher.matches (info, match_glyph_data))
{
num_items--;
match_glyph_data++;
return true;
}
if (skip == matcher_t::SKIP_NO)
return false;
}
return false;
}
unsigned int idx;
protected:
hb_apply_context_t *c;
unsigned int lookup_props;
matcher_t matcher;
const USHORT *match_glyph_data;
unsigned int num_items;
hb_mask_t mask;
uint8_t syllable;
};
inline bool
@ -435,18 +539,6 @@ struct hb_apply_context_t
return match_properties (info->codepoint, property, lookup_props);
}
inline bool
should_skip (hb_glyph_info_t *info,
unsigned int lookup_props) const
{
unsigned int property;
property = info->glyph_props();
return !match_properties (info->codepoint, property, lookup_props);
}
inline void set_class (hb_codepoint_t glyph_index, unsigned int class_guess) const
{
if (likely (has_glyph_classes))
@ -591,6 +683,7 @@ static inline bool match_input (hb_apply_context_t *c,
TRACE_APPLY (NULL);
hb_apply_context_t::skipping_forward_iterator_t skippy_iter (c, c->buffer->idx, count - 1);
skippy_iter.set_match_func (match_func, match_data, input);
if (skippy_iter.has_no_chance ()) return TRACE_RETURN (false);
/*
@ -623,8 +716,6 @@ static inline bool match_input (hb_apply_context_t *c,
{
if (!skippy_iter.next ()) return TRACE_RETURN (false);
if (likely (!match_func (c->buffer->info[skippy_iter.idx].codepoint, input[i - 1], match_data))) return TRACE_RETURN (false);
unsigned int this_lig_id = get_lig_id (c->buffer->info[skippy_iter.idx]);
unsigned int this_lig_comp = get_lig_comp (c->buffer->info[skippy_iter.idx]);
@ -659,13 +750,17 @@ static inline bool match_input (hb_apply_context_t *c,
}
static inline void ligate_input (hb_apply_context_t *c,
unsigned int count, /* Including the first glyph (not matched) */
const USHORT input[] HB_UNUSED, /* Array of input values--start with second glyph */
const USHORT input[], /* Array of input values--start with second glyph */
match_func_t match_func,
const void *match_data,
hb_codepoint_t lig_glyph,
match_func_t match_func HB_UNUSED,
const void *match_data HB_UNUSED,
bool is_mark_ligature,
unsigned int total_component_count)
{
hb_apply_context_t::skipping_forward_iterator_t skippy_iter (c, c->buffer->idx, count - 1);
skippy_iter.set_match_func (match_func, match_data, input);
if (skippy_iter.has_no_chance ()) return;
/*
* - If it *is* a mark ligature, we don't allocate a new ligature id, and leave
* the ligature to keep its old ligature id. This will allow it to attach to
@ -700,8 +795,6 @@ static inline void ligate_input (hb_apply_context_t *c,
unsigned int last_num_components = get_lig_num_comps (c->buffer->cur());
unsigned int components_so_far = last_num_components;
hb_apply_context_t::skipping_forward_iterator_t skippy_iter (c, c->buffer->idx, count - 1);
if (!is_mark_ligature)
set_lig_props_for_ligature (c->buffer->cur(), lig_id, total_component_count);
c->replace_glyph (lig_glyph, klass);
@ -709,6 +802,7 @@ static inline void ligate_input (hb_apply_context_t *c,
for (unsigned int i = 1; i < count; i++)
{
if (!skippy_iter.next ()) return;
while (c->buffer->idx < skippy_iter.idx)
{
if (!is_mark_ligature) {
@ -749,18 +843,13 @@ static inline bool match_backtrack (hb_apply_context_t *c,
TRACE_APPLY (NULL);
hb_apply_context_t::skipping_backward_iterator_t skippy_iter (c, c->buffer->backtrack_len (), count, true);
if (skippy_iter.has_no_chance ())
return TRACE_RETURN (false);
skippy_iter.set_match_func (match_func, match_data, backtrack);
if (skippy_iter.has_no_chance ()) return TRACE_RETURN (false);
for (unsigned int i = 0; i < count; i++)
{
if (!skippy_iter.prev ())
return TRACE_RETURN (false);
if (likely (!match_func (c->buffer->out_info[skippy_iter.idx].codepoint, backtrack[i], match_data)))
return TRACE_RETURN (false);
}
return TRACE_RETURN (true);
}
@ -774,18 +863,13 @@ static inline bool match_lookahead (hb_apply_context_t *c,
TRACE_APPLY (NULL);
hb_apply_context_t::skipping_forward_iterator_t skippy_iter (c, c->buffer->idx + offset - 1, count, true);
if (skippy_iter.has_no_chance ())
return TRACE_RETURN (false);
skippy_iter.set_match_func (match_func, match_data, lookahead);
if (skippy_iter.has_no_chance ()) return TRACE_RETURN (false);
for (unsigned int i = 0; i < count; i++)
{
if (!skippy_iter.next ())
return TRACE_RETURN (false);
if (likely (!match_func (c->buffer->info[skippy_iter.idx].codepoint, lookahead[i], match_data)))
return TRACE_RETURN (false);
}
return TRACE_RETURN (true);
}
@ -818,6 +902,9 @@ static inline void recurse_lookups (context_t *c,
static inline bool apply_lookup (hb_apply_context_t *c,
unsigned int count, /* Including the first glyph */
const USHORT input[], /* Array of input values--start with second glyph */
match_func_t match_func,
const void *match_data,
unsigned int lookupCount,
const LookupRecord lookupRecord[] /* Array of LookupRecords--in design order */)
{
@ -834,6 +921,11 @@ static inline bool apply_lookup (hb_apply_context_t *c,
* and we jump out of it. Not entirely disastrous. So we don't check
* for reverse lookup here.
*/
hb_apply_context_t::skipping_forward_iterator_t skippy_iter (c, c->buffer->idx, count - 1);
skippy_iter.set_match_func (match_func, match_data, input);
uint8_t syllable = c->buffer->cur().syllable();
unsigned int i = 0;
if (lookupCount && 0 == lookupRecord->sequenceIndex)
{
@ -849,6 +941,13 @@ static inline bool apply_lookup (hb_apply_context_t *c,
if (!done)
goto not_applied;
else
{
/* Reinitialize iterator. */
hb_apply_context_t::skipping_forward_iterator_t tmp (c, c->buffer->idx - 1, count - i);
tmp.set_syllable (syllable);
skippy_iter = tmp;
}
}
else
{
@ -857,7 +956,6 @@ static inline bool apply_lookup (hb_apply_context_t *c,
c->buffer->next_glyph ();
i++;
}
hb_apply_context_t::skipping_forward_iterator_t skippy_iter (c, c->buffer->idx - 1, count - i);
while (i < count)
{
if (!skippy_iter.next ()) return TRACE_RETURN (true);
@ -882,6 +980,7 @@ static inline bool apply_lookup (hb_apply_context_t *c,
{
/* Reinitialize iterator. */
hb_apply_context_t::skipping_forward_iterator_t tmp (c, c->buffer->idx - 1, count - i);
tmp.set_syllable (syllable);
skippy_iter = tmp;
}
}
@ -969,7 +1068,8 @@ static inline bool context_apply_lookup (hb_apply_context_t *c,
inputCount, input,
lookup_context.funcs.match, lookup_context.match_data)
&& apply_lookup (c,
inputCount,
inputCount, input,
lookup_context.funcs.match, lookup_context.match_data,
lookupCount, lookupRecord);
}
@ -1505,7 +1605,8 @@ static inline bool chain_context_apply_lookup (hb_apply_context_t *c,
lookup_context.funcs.match, lookup_context.match_data[2],
lookahead_offset)
&& apply_lookup (c,
inputCount,
inputCount, input,
lookup_context.funcs.match, lookup_context.match_data[1],
lookupCount, lookupRecord);
}

View File

@ -43,6 +43,51 @@
#define syllable() var1.u8[2] /* GSUB/GPOS shaping boundaries */
#define lig_props() var1.u8[3] /* GSUB/GPOS ligature tracking */
/* buffer var allocations, used during the entire shaping process */
#define unicode_props0() var2.u8[0]
#define unicode_props1() var2.u8[1]
inline void
_hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode)
{
info->unicode_props0() = ((unsigned int) unicode->general_category (info->codepoint)) |
(unicode->is_default_ignorable (info->codepoint) ? 0x80 : 0) |
(info->codepoint == 0x200C ? 0x40 : 0);
info->unicode_props1() = unicode->modified_combining_class (info->codepoint);
}
inline hb_unicode_general_category_t
_hb_glyph_info_get_general_category (const hb_glyph_info_t *info)
{
return (hb_unicode_general_category_t) (info->unicode_props0() & 0x3F);
}
inline void
_hb_glyph_info_set_modified_combining_class (hb_glyph_info_t *info, unsigned int modified_class)
{
info->unicode_props1() = modified_class;
}
inline unsigned int
_hb_glyph_info_get_modified_combining_class (const hb_glyph_info_t *info)
{
return info->unicode_props1();
}
inline hb_bool_t
_hb_glyph_info_is_default_ignorable (const hb_glyph_info_t *info)
{
return !!(info->unicode_props0() & 0x80);
}
inline hb_bool_t
_hb_glyph_info_is_zwnj (const hb_glyph_info_t *info)
{
return !!(info->unicode_props0() & 0x40);
}
#define hb_ot_layout_from_face(face) ((hb_ot_layout_t *) face->shaper_data.ot)
/*

View File

@ -33,10 +33,6 @@
/* buffer var allocations, used during the entire shaping process */
#define unicode_props0() var2.u8[0]
#define unicode_props1() var2.u8[1]
struct hb_ot_shape_plan_t
@ -89,44 +85,4 @@ struct hb_ot_shape_planner_t
};
inline void
_hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode)
{
info->unicode_props0() = ((unsigned int) unicode->general_category (info->codepoint)) |
(unicode->is_default_ignorable (info->codepoint) ? 0x80 : 0) |
(info->codepoint == 0x200C ? 0x40 : 0);
info->unicode_props1() = unicode->modified_combining_class (info->codepoint);
}
inline hb_unicode_general_category_t
_hb_glyph_info_get_general_category (const hb_glyph_info_t *info)
{
return (hb_unicode_general_category_t) (info->unicode_props0() & 0x3F);
}
inline void
_hb_glyph_info_set_modified_combining_class (hb_glyph_info_t *info, unsigned int modified_class)
{
info->unicode_props1() = modified_class;
}
inline unsigned int
_hb_glyph_info_get_modified_combining_class (const hb_glyph_info_t *info)
{
return info->unicode_props1();
}
inline hb_bool_t
_hb_glyph_info_is_default_ignorable (const hb_glyph_info_t *info)
{
return !!(info->unicode_props0() & 0x80);
}
inline hb_bool_t
_hb_glyph_info_is_zwnj (const hb_glyph_info_t *info)
{
return !!(info->unicode_props0() & 0x40);
}
#endif /* HB_OT_SHAPE_PRIVATE_HH */