harfbuzz/src/hb-ot-map.hh

291 lines
9.1 KiB
C++
Raw Normal View History

2010-10-09 01:18:16 +02:00
/*
2011-04-21 23:14:28 +02:00
* Copyright © 2009,2010 Red Hat, Inc.
* Copyright © 2010,2011,2012,2013 Google, Inc.
2010-10-09 01:18:16 +02:00
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Red Hat Author(s): Behdad Esfahbod
* Google Author(s): Behdad Esfahbod
*/
#ifndef HB_OT_MAP_HH
#define HB_OT_MAP_HH
2010-10-09 01:18:16 +02:00
#include "hb-buffer.hh"
2010-10-09 01:18:16 +02:00
#define HB_OT_MAP_MAX_BITS 8u
#define HB_OT_MAP_MAX_VALUE ((1u << HB_OT_MAP_MAX_BITS) - 1u)
2013-05-02 20:25:09 +02:00
struct hb_ot_shape_plan_t;
2010-10-09 01:18:16 +02:00
static const hb_tag_t table_tags[2] = {HB_OT_TAG_GSUB, HB_OT_TAG_GPOS};
struct hb_ot_map_t
{
friend struct hb_ot_map_builder_t;
2010-10-09 01:18:16 +02:00
2010-10-09 01:43:48 +02:00
public:
2010-10-09 01:18:16 +02:00
struct feature_map_t {
hb_tag_t tag; /* should be first for our bsearch to work */
unsigned int index[2]; /* GSUB/GPOS */
unsigned int stage[2]; /* GSUB/GPOS */
unsigned int shift;
hb_mask_t mask;
hb_mask_t _1_mask; /* mask for value=1, for quick access */
[Indic-like] Disable automatic joiner handling for basic shaping features Not for Arabic, but for Indic-like scripts. ZWJ/ZWNJ have special meanings in those scripts, so let font lookups take full control. This undoes the regression caused by automatic-joiners handling introduced two commits ago. We only disable automatic joiner handling for the "basic shaping features" of Indic, Myanmar, and SEAsian shapers. The "presentation forms" and other features are still applied with automatic-joiner handling. This change also changes the test suite failure statistics, such that a few scripts show more "failures". The most affected is Kannada. However, upon inspection, we believe that in most, if not all, of the new failures, we are producing results superior to Uniscribe. Hard to count those! Here's an example of what is fixed by the recent joiner-handling changes: https://bugs.freedesktop.org/show_bug.cgi?id=58714 New numbers, for future reference: BENGALI: 353892 out of 354188 tests passed. 296 failed (0.0835714%) DEVANAGARI: 707336 out of 707394 tests passed. 58 failed (0.00819911%) GUJARATI: 366262 out of 366457 tests passed. 195 failed (0.0532122%) GURMUKHI: 60706 out of 60747 tests passed. 41 failed (0.067493%) KANNADA: 950680 out of 951913 tests passed. 1233 failed (0.129529%) KHMER: 299074 out of 299124 tests passed. 50 failed (0.0167155%) LAO: 53611 out of 53644 tests passed. 33 failed (0.0615167%) MALAYALAM: 1047983 out of 1048334 tests passed. 351 failed (0.0334817%) ORIYA: 42320 out of 42329 tests passed. 9 failed (0.021262%) SINHALA: 271539 out of 271847 tests passed. 308 failed (0.113299%) TAMIL: 1091753 out of 1091754 tests passed. 1 failed (9.15957e-05%) TELUGU: 970555 out of 970573 tests passed. 18 failed (0.00185457%) TIBETAN: 208469 out of 208469 tests passed. 0 failed (0%)
2013-02-14 16:40:12 +01:00
unsigned int needs_fallback : 1;
unsigned int auto_zwnj : 1;
[Indic] Futher adjust ZWJ handling in Indic-like shapers After the Ngapi hackfest work, we were assuming that fonts won't use presentation features to choose specific forms (eg. conjuncts). As such, we were using auto-joiner behavior for such features. It proved to be troublesome as many fonts used presentation forms ('pres') for example to form conjuncts, which need to be disabled when a ZWJ is inserted. Two examples: U+0D2F,U+200D,U+0D4D,U+0D2F with kartika.ttf U+0995,U+09CD,U+200D,U+09B7 with vrinda.ttf What we do now is to never do magic to ZWJ during GSUB's main input match for Indic-style shapers. Note that backtrack/lookahead are still matched liberally, as is GPOS. This seems to be an acceptable compromise. As to the bug that initially started this work, that one needs to be fixed differently: Bug 58714 - Kannada u+0cb0 u+200d u+0ccd u+0c95 u+0cbe does not provide same results as Windows8 https://bugs.freedesktop.org/show_bug.cgi?id=58714 New numbers: BENGALI: 353689 out of 354188 tests passed. 499 failed (0.140886%) DEVANAGARI: 707305 out of 707394 tests passed. 89 failed (0.0125814%) GUJARATI: 366349 out of 366457 tests passed. 108 failed (0.0294714%) GURMUKHI: 60706 out of 60747 tests passed. 41 failed (0.067493%) KANNADA: 951030 out of 951913 tests passed. 883 failed (0.0927606%) KHMER: 299070 out of 299124 tests passed. 54 failed (0.0180527%) LAO: 53611 out of 53644 tests passed. 33 failed (0.0615167%) MALAYALAM: 1048102 out of 1048334 tests passed. 232 failed (0.0221304%) ORIYA: 42320 out of 42329 tests passed. 9 failed (0.021262%) SINHALA: 271666 out of 271847 tests passed. 181 failed (0.0665816%) TAMIL: 1091753 out of 1091754 tests passed. 1 failed (9.15957e-05%) TELUGU: 970555 out of 970573 tests passed. 18 failed (0.00185457%) TIBETAN: 208469 out of 208469 tests passed. 0 failed (0%)
2013-03-19 10:53:26 +01:00
unsigned int auto_zwj : 1;
unsigned int random : 1;
unsigned int per_syllable : 1;
int cmp (const hb_tag_t tag_) const
{ return tag_ < tag ? -1 : tag_ > tag ? 1 : 0; }
};
struct lookup_map_t {
[Indic-like] Disable automatic joiner handling for basic shaping features Not for Arabic, but for Indic-like scripts. ZWJ/ZWNJ have special meanings in those scripts, so let font lookups take full control. This undoes the regression caused by automatic-joiners handling introduced two commits ago. We only disable automatic joiner handling for the "basic shaping features" of Indic, Myanmar, and SEAsian shapers. The "presentation forms" and other features are still applied with automatic-joiner handling. This change also changes the test suite failure statistics, such that a few scripts show more "failures". The most affected is Kannada. However, upon inspection, we believe that in most, if not all, of the new failures, we are producing results superior to Uniscribe. Hard to count those! Here's an example of what is fixed by the recent joiner-handling changes: https://bugs.freedesktop.org/show_bug.cgi?id=58714 New numbers, for future reference: BENGALI: 353892 out of 354188 tests passed. 296 failed (0.0835714%) DEVANAGARI: 707336 out of 707394 tests passed. 58 failed (0.00819911%) GUJARATI: 366262 out of 366457 tests passed. 195 failed (0.0532122%) GURMUKHI: 60706 out of 60747 tests passed. 41 failed (0.067493%) KANNADA: 950680 out of 951913 tests passed. 1233 failed (0.129529%) KHMER: 299074 out of 299124 tests passed. 50 failed (0.0167155%) LAO: 53611 out of 53644 tests passed. 33 failed (0.0615167%) MALAYALAM: 1047983 out of 1048334 tests passed. 351 failed (0.0334817%) ORIYA: 42320 out of 42329 tests passed. 9 failed (0.021262%) SINHALA: 271539 out of 271847 tests passed. 308 failed (0.113299%) TAMIL: 1091753 out of 1091754 tests passed. 1 failed (9.15957e-05%) TELUGU: 970555 out of 970573 tests passed. 18 failed (0.00185457%) TIBETAN: 208469 out of 208469 tests passed. 0 failed (0%)
2013-02-14 16:40:12 +01:00
unsigned short index;
unsigned short auto_zwnj : 1;
[Indic] Futher adjust ZWJ handling in Indic-like shapers After the Ngapi hackfest work, we were assuming that fonts won't use presentation features to choose specific forms (eg. conjuncts). As such, we were using auto-joiner behavior for such features. It proved to be troublesome as many fonts used presentation forms ('pres') for example to form conjuncts, which need to be disabled when a ZWJ is inserted. Two examples: U+0D2F,U+200D,U+0D4D,U+0D2F with kartika.ttf U+0995,U+09CD,U+200D,U+09B7 with vrinda.ttf What we do now is to never do magic to ZWJ during GSUB's main input match for Indic-style shapers. Note that backtrack/lookahead are still matched liberally, as is GPOS. This seems to be an acceptable compromise. As to the bug that initially started this work, that one needs to be fixed differently: Bug 58714 - Kannada u+0cb0 u+200d u+0ccd u+0c95 u+0cbe does not provide same results as Windows8 https://bugs.freedesktop.org/show_bug.cgi?id=58714 New numbers: BENGALI: 353689 out of 354188 tests passed. 499 failed (0.140886%) DEVANAGARI: 707305 out of 707394 tests passed. 89 failed (0.0125814%) GUJARATI: 366349 out of 366457 tests passed. 108 failed (0.0294714%) GURMUKHI: 60706 out of 60747 tests passed. 41 failed (0.067493%) KANNADA: 951030 out of 951913 tests passed. 883 failed (0.0927606%) KHMER: 299070 out of 299124 tests passed. 54 failed (0.0180527%) LAO: 53611 out of 53644 tests passed. 33 failed (0.0615167%) MALAYALAM: 1048102 out of 1048334 tests passed. 232 failed (0.0221304%) ORIYA: 42320 out of 42329 tests passed. 9 failed (0.021262%) SINHALA: 271666 out of 271847 tests passed. 181 failed (0.0665816%) TAMIL: 1091753 out of 1091754 tests passed. 1 failed (9.15957e-05%) TELUGU: 970555 out of 970573 tests passed. 18 failed (0.00185457%) TIBETAN: 208469 out of 208469 tests passed. 0 failed (0%)
2013-03-19 10:53:26 +01:00
unsigned short auto_zwj : 1;
2018-01-25 20:22:03 +01:00
unsigned short random : 1;
unsigned short per_syllable : 1;
hb_mask_t mask;
hb_tag_t feature_tag;
HB_INTERNAL static int cmp (const void *pa, const void *pb)
2017-10-30 00:01:47 +01:00
{
const lookup_map_t *a = (const lookup_map_t *) pa;
const lookup_map_t *b = (const lookup_map_t *) pb;
return a->index < b->index ? -1 : a->index > b->index ? 1 : 0;
}
};
2022-11-17 01:51:22 +01:00
/* Pause functions return true if new glyph indices might have been
* added to the buffer. This is used to update buffer digest. */
typedef bool (*pause_func_t) (const struct hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer);
struct stage_map_t {
2013-04-21 21:21:49 +02:00
unsigned int last_lookup; /* Cumulative */
pause_func_t pause_func;
};
void init ()
{
hb_memset (this, 0, sizeof (*this));
2022-11-26 22:31:15 +01:00
features.init0 ();
for (unsigned int table_index = 0; table_index < 2; table_index++)
{
2022-11-26 22:31:15 +01:00
lookups[table_index].init0 ();
stages[table_index].init0 ();
}
}
void fini ()
{
features.fini ();
for (unsigned int table_index = 0; table_index < 2; table_index++)
{
lookups[table_index].fini ();
stages[table_index].fini ();
}
}
2011-08-05 01:49:05 +02:00
hb_mask_t get_global_mask () const { return global_mask; }
2010-10-09 01:18:16 +02:00
hb_mask_t get_mask (hb_tag_t feature_tag, unsigned int *shift = nullptr) const
{
2018-02-08 04:13:10 +01:00
const feature_map_t *map = features.bsearch (feature_tag);
2010-10-09 01:43:48 +02:00
if (shift) *shift = map ? map->shift : 0;
return map ? map->mask : 0;
2010-10-09 01:18:16 +02:00
}
bool needs_fallback (hb_tag_t feature_tag) const
{
2018-02-08 04:13:10 +01:00
const feature_map_t *map = features.bsearch (feature_tag);
return map ? map->needs_fallback : false;
}
hb_mask_t get_1_mask (hb_tag_t feature_tag) const
{
2018-02-08 04:13:10 +01:00
const feature_map_t *map = features.bsearch (feature_tag);
return map ? map->_1_mask : 0;
}
unsigned int get_feature_index (unsigned int table_index, hb_tag_t feature_tag) const
{
2018-02-08 04:13:10 +01:00
const feature_map_t *map = features.bsearch (feature_tag);
return map ? map->index[table_index] : HB_OT_LAYOUT_NO_FEATURE_INDEX;
}
unsigned int get_feature_stage (unsigned int table_index, hb_tag_t feature_tag) const
{
2018-02-08 04:13:10 +01:00
const feature_map_t *map = features.bsearch (feature_tag);
return map ? map->stage[table_index] : UINT_MAX;
}
hb_array_t<const hb_ot_map_t::lookup_map_t>
get_stage_lookups (unsigned int table_index, unsigned int stage) const
{
if (unlikely (stage > stages[table_index].length))
return hb_array<const hb_ot_map_t::lookup_map_t> (nullptr, 0);
2013-04-21 21:21:49 +02:00
unsigned int start = stage ? stages[table_index][stage - 1].last_lookup : 0;
2018-12-22 00:46:51 +01:00
unsigned int end = stage < stages[table_index].length ? stages[table_index][stage].last_lookup : lookups[table_index].length;
return lookups[table_index].as_array ().sub_array (start, end - start);
}
HB_INTERNAL void collect_lookups (unsigned int table_index, hb_set_t *lookups) const;
2013-05-03 00:52:24 +02:00
template <typename Proxy>
2019-07-03 02:11:09 +02:00
HB_INTERNAL void apply (const Proxy &proxy,
const struct hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) const;
HB_INTERNAL void substitute (const struct hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) const;
HB_INTERNAL void position (const struct hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) const;
2012-11-15 01:24:05 +01:00
public:
hb_tag_t chosen_script[2];
bool found_script[2];
private:
2022-07-21 17:46:25 +02:00
hb_mask_t global_mask = 0;
2019-01-08 00:33:04 +01:00
hb_sorted_vector_t<feature_map_t> features;
hb_vector_t<lookup_map_t> lookups[2]; /* GSUB/GPOS */
hb_vector_t<stage_map_t> stages[2]; /* GSUB/GPOS */
};
2010-10-09 01:18:16 +02:00
2018-09-24 23:55:03 +02:00
enum hb_ot_map_feature_flags_t
{
F_NONE = 0x0000u,
F_GLOBAL = 0x0001u, /* Feature applies to all characters; results in no mask allocated for it. */
F_HAS_FALLBACK = 0x0002u, /* Has fallback implementation, so include mask bit even if feature not found. */
F_MANUAL_ZWNJ = 0x0004u, /* Don't skip over ZWNJ when matching **context**. */
F_MANUAL_ZWJ = 0x0008u, /* Don't skip over ZWJ when matching **input**. */
2018-09-25 01:07:23 +02:00
F_MANUAL_JOINERS = F_MANUAL_ZWNJ | F_MANUAL_ZWJ,
F_GLOBAL_MANUAL_JOINERS= F_GLOBAL | F_MANUAL_JOINERS,
2018-10-02 14:45:09 +02:00
F_GLOBAL_HAS_FALLBACK = F_GLOBAL | F_HAS_FALLBACK,
F_GLOBAL_SEARCH = 0x0010u, /* If feature not found in LangSys, look for it in global feature list and pick one. */
F_RANDOM = 0x0020u, /* Randomly select a glyph from an AlternateSubstFormat1 subtable. */
F_PER_SYLLABLE = 0x0040u /* Contain lookup application to within syllable. */
};
HB_MARK_AS_FLAG_T (hb_ot_map_feature_flags_t);
2018-09-24 23:55:03 +02:00
struct hb_ot_map_feature_t
{
hb_tag_t tag;
hb_ot_map_feature_flags_t flags;
};
struct hb_ot_shape_plan_key_t;
2010-10-09 01:18:16 +02:00
2011-07-07 22:20:35 +02:00
struct hb_ot_map_builder_t
{
public:
HB_INTERNAL hb_ot_map_builder_t (hb_face_t *face_,
const hb_segment_properties_t &props_);
2011-08-05 01:49:05 +02:00
HB_INTERNAL ~hb_ot_map_builder_t ();
2018-09-25 00:01:53 +02:00
HB_INTERNAL void add_feature (hb_tag_t tag,
hb_ot_map_feature_flags_t flags=F_NONE,
unsigned int value=1);
2011-07-07 22:20:35 +02:00
HB_INTERNAL bool has_feature (hb_tag_t tag);
void add_feature (const hb_ot_map_feature_t &feat)
2018-09-25 00:01:53 +02:00
{ add_feature (feat.tag, feat.flags); }
2018-09-24 23:55:03 +02:00
void enable_feature (hb_tag_t tag,
2020-07-22 15:00:48 +02:00
hb_ot_map_feature_flags_t flags=F_NONE,
unsigned int value=1)
2018-10-02 14:48:39 +02:00
{ add_feature (tag, F_GLOBAL | flags, value); }
2011-07-07 22:20:35 +02:00
void disable_feature (hb_tag_t tag)
2018-09-25 00:01:53 +02:00
{ add_feature (tag, F_GLOBAL, 0); }
2018-09-24 23:55:03 +02:00
void add_gsub_pause (hb_ot_map_t::pause_func_t pause_func)
2012-08-02 15:44:18 +02:00
{ add_pause (0, pause_func); }
void add_gpos_pause (hb_ot_map_t::pause_func_t pause_func)
2012-08-02 15:44:18 +02:00
{ add_pause (1, pause_func); }
HB_INTERNAL void compile (hb_ot_map_t &m,
const hb_ot_shape_plan_key_t &key);
2011-07-07 22:20:35 +02:00
private:
HB_INTERNAL void add_lookups (hb_ot_map_t &m,
unsigned int table_index,
unsigned int feature_index,
unsigned int variations_index,
hb_mask_t mask,
bool auto_zwnj = true,
2018-01-25 20:22:03 +01:00
bool auto_zwj = true,
bool random = false,
bool per_syllable = false,
hb_tag_t feature_tag = HB_TAG(' ',' ',' ',' '));
2011-07-07 22:20:35 +02:00
struct feature_info_t {
hb_tag_t tag;
unsigned int seq; /* sequence#, used for stable sorting only */
unsigned int max_value;
hb_ot_map_feature_flags_t flags;
2011-07-07 22:20:35 +02:00
unsigned int default_value; /* for non-global features, what should the unset glyphs take */
unsigned int stage[2]; /* GSUB/GPOS */
2011-07-07 22:20:35 +02:00
HB_INTERNAL static int cmp (const void *pa, const void *pb)
2017-10-30 00:01:47 +01:00
{
const feature_info_t *a = (const feature_info_t *) pa;
const feature_info_t *b = (const feature_info_t *) pb;
return (a->tag != b->tag) ? (a->tag < b->tag ? -1 : 1) :
(a->seq < b->seq ? -1 : a->seq > b->seq ? 1 : 0);
}
2011-07-07 22:20:35 +02:00
};
struct stage_info_t {
unsigned int index;
2013-04-21 21:21:49 +02:00
hb_ot_map_t::pause_func_t pause_func;
};
2012-08-02 15:44:18 +02:00
HB_INTERNAL void add_pause (unsigned int table_index, hb_ot_map_t::pause_func_t pause_func);
public:
hb_face_t *face;
hb_segment_properties_t props;
hb_tag_t chosen_script[2];
2012-11-15 01:24:05 +01:00
bool found_script[2];
unsigned int script_index[2], language_index[2];
private:
unsigned int current_stage[2]; /* GSUB/GPOS */
hb_vector_t<feature_info_t> feature_infos;
hb_vector_t<stage_info_t> stages[2]; /* GSUB/GPOS */
2011-07-07 22:20:35 +02:00
};
2010-10-09 01:18:16 +02:00
#endif /* HB_OT_MAP_HH */