2008-01-24 09:11:09 +01:00
|
|
|
/*
|
2011-04-21 23:14:28 +02:00
|
|
|
* Copyright © 2007,2008,2009 Red Hat, Inc.
|
2012-07-30 08:38:39 +02:00
|
|
|
* Copyright © 2012 Google, Inc.
|
2008-01-24 09:11:09 +01:00
|
|
|
*
|
2010-04-22 06:11:43 +02:00
|
|
|
* This is part of HarfBuzz, a text shaping library.
|
2008-01-24 09:11:09 +01:00
|
|
|
*
|
|
|
|
* Permission is hereby granted, without written agreement and without
|
|
|
|
* license or royalty fees, to use, copy, modify, and distribute this
|
|
|
|
* software and its documentation for any purpose, provided that the
|
|
|
|
* above copyright notice and the following two paragraphs appear in
|
|
|
|
* all copies of this software.
|
|
|
|
*
|
|
|
|
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
|
|
|
|
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
|
|
|
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
|
|
|
|
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
|
|
|
* DAMAGE.
|
|
|
|
*
|
|
|
|
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
|
|
|
|
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
|
|
|
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
|
|
|
|
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
|
|
|
*
|
|
|
|
* Red Hat Author(s): Behdad Esfahbod
|
2012-07-30 08:38:39 +02:00
|
|
|
* Google Author(s): Behdad Esfahbod
|
2008-01-24 09:11:09 +01:00
|
|
|
*/
|
|
|
|
|
2010-06-09 12:32:56 +02:00
|
|
|
#ifndef HB_OT_LAYOUT_PRIVATE_HH
|
|
|
|
#define HB_OT_LAYOUT_PRIVATE_HH
|
2008-01-24 09:11:09 +01:00
|
|
|
|
2011-04-21 00:50:27 +02:00
|
|
|
#include "hb-private.hh"
|
2009-08-02 21:20:22 +02:00
|
|
|
|
2008-01-24 09:11:09 +01:00
|
|
|
#include "hb-ot-layout.h"
|
2009-08-02 21:20:22 +02:00
|
|
|
|
2011-04-21 00:50:27 +02:00
|
|
|
#include "hb-font-private.hh"
|
2010-05-13 00:23:21 +02:00
|
|
|
#include "hb-buffer-private.hh"
|
2012-08-02 03:46:36 +02:00
|
|
|
#include "hb-set-private.hh"
|
2008-01-24 09:11:09 +01:00
|
|
|
|
2009-08-04 03:40:20 +02:00
|
|
|
|
2012-07-30 08:38:39 +02:00
|
|
|
/* buffer var allocations, used during the GSUB/GPOS processing */
|
2012-08-10 03:48:55 +02:00
|
|
|
#define glyph_props() var1.u16[0] /* GDEF glyph properties */
|
|
|
|
#define syllable() var1.u8[2] /* GSUB/GPOS shaping boundaries */
|
|
|
|
#define lig_props() var1.u8[3] /* GSUB/GPOS ligature tracking */
|
2012-07-30 08:38:39 +02:00
|
|
|
|
[OTLayout] Ignore default-ignorables when matching GSUB/GPOS
When matching lookups, be smart about default-ignorable characters.
In particular:
Do nothing specific about ZWNJ, but for the other default-ignorables:
If the lookup in question uses the ignorable character in a sequence,
then match it as we used to do. However, if the sequence match will
fail because the default-ignorable blocked it, try skipping the
ignorable character and continue.
The most immediate thing it means is that if Lam-Alef forms a ligature,
then Lam-ZWJ-Alef will do to. Finally!
One exception: when matching for GPOS, or for backtrack/lookahead of
GSUB, we ignore ZWNJ too. That's the right thing to do.
It certainly is possible to build fonts that this feature will result
in undesirable glyphs, but it's hard to think of a real-world case
that that would happen.
This *does* break Indic shaping right now, since Indic Unicode has
specific rules for what ZWJ/ZWNJ mean, and skipping ZWJ is breaking
those rules. That will be fixed in upcoming commits.
2013-02-14 13:43:13 +01:00
|
|
|
/* buffer var allocations, used during the entire shaping process */
|
|
|
|
#define unicode_props0() var2.u8[0]
|
|
|
|
#define unicode_props1() var2.u8[1]
|
|
|
|
|
|
|
|
|
|
|
|
inline void
|
|
|
|
_hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode)
|
|
|
|
{
|
|
|
|
info->unicode_props0() = ((unsigned int) unicode->general_category (info->codepoint)) |
|
|
|
|
(unicode->is_default_ignorable (info->codepoint) ? 0x80 : 0) |
|
2013-02-14 16:46:52 +01:00
|
|
|
(info->codepoint == 0x200C ? 0x40 : 0) |
|
|
|
|
(info->codepoint == 0x200D ? 0x20 : 0);
|
[OTLayout] Ignore default-ignorables when matching GSUB/GPOS
When matching lookups, be smart about default-ignorable characters.
In particular:
Do nothing specific about ZWNJ, but for the other default-ignorables:
If the lookup in question uses the ignorable character in a sequence,
then match it as we used to do. However, if the sequence match will
fail because the default-ignorable blocked it, try skipping the
ignorable character and continue.
The most immediate thing it means is that if Lam-Alef forms a ligature,
then Lam-ZWJ-Alef will do to. Finally!
One exception: when matching for GPOS, or for backtrack/lookahead of
GSUB, we ignore ZWNJ too. That's the right thing to do.
It certainly is possible to build fonts that this feature will result
in undesirable glyphs, but it's hard to think of a real-world case
that that would happen.
This *does* break Indic shaping right now, since Indic Unicode has
specific rules for what ZWJ/ZWNJ mean, and skipping ZWJ is breaking
those rules. That will be fixed in upcoming commits.
2013-02-14 13:43:13 +01:00
|
|
|
info->unicode_props1() = unicode->modified_combining_class (info->codepoint);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline hb_unicode_general_category_t
|
|
|
|
_hb_glyph_info_get_general_category (const hb_glyph_info_t *info)
|
|
|
|
{
|
2013-02-14 16:46:52 +01:00
|
|
|
return (hb_unicode_general_category_t) (info->unicode_props0() & 0x1F);
|
[OTLayout] Ignore default-ignorables when matching GSUB/GPOS
When matching lookups, be smart about default-ignorable characters.
In particular:
Do nothing specific about ZWNJ, but for the other default-ignorables:
If the lookup in question uses the ignorable character in a sequence,
then match it as we used to do. However, if the sequence match will
fail because the default-ignorable blocked it, try skipping the
ignorable character and continue.
The most immediate thing it means is that if Lam-Alef forms a ligature,
then Lam-ZWJ-Alef will do to. Finally!
One exception: when matching for GPOS, or for backtrack/lookahead of
GSUB, we ignore ZWNJ too. That's the right thing to do.
It certainly is possible to build fonts that this feature will result
in undesirable glyphs, but it's hard to think of a real-world case
that that would happen.
This *does* break Indic shaping right now, since Indic Unicode has
specific rules for what ZWJ/ZWNJ mean, and skipping ZWJ is breaking
those rules. That will be fixed in upcoming commits.
2013-02-14 13:43:13 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
inline void
|
|
|
|
_hb_glyph_info_set_modified_combining_class (hb_glyph_info_t *info, unsigned int modified_class)
|
|
|
|
{
|
|
|
|
info->unicode_props1() = modified_class;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline unsigned int
|
|
|
|
_hb_glyph_info_get_modified_combining_class (const hb_glyph_info_t *info)
|
|
|
|
{
|
|
|
|
return info->unicode_props1();
|
|
|
|
}
|
|
|
|
|
|
|
|
inline hb_bool_t
|
|
|
|
_hb_glyph_info_is_default_ignorable (const hb_glyph_info_t *info)
|
|
|
|
{
|
|
|
|
return !!(info->unicode_props0() & 0x80);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline hb_bool_t
|
|
|
|
_hb_glyph_info_is_zwnj (const hb_glyph_info_t *info)
|
|
|
|
{
|
|
|
|
return !!(info->unicode_props0() & 0x40);
|
|
|
|
}
|
|
|
|
|
2013-02-14 16:46:52 +01:00
|
|
|
inline hb_bool_t
|
|
|
|
_hb_glyph_info_is_zwj (const hb_glyph_info_t *info)
|
|
|
|
{
|
|
|
|
return !!(info->unicode_props0() & 0x20);
|
|
|
|
}
|
|
|
|
|
[OTLayout] Ignore default-ignorables when matching GSUB/GPOS
When matching lookups, be smart about default-ignorable characters.
In particular:
Do nothing specific about ZWNJ, but for the other default-ignorables:
If the lookup in question uses the ignorable character in a sequence,
then match it as we used to do. However, if the sequence match will
fail because the default-ignorable blocked it, try skipping the
ignorable character and continue.
The most immediate thing it means is that if Lam-Alef forms a ligature,
then Lam-ZWJ-Alef will do to. Finally!
One exception: when matching for GPOS, or for backtrack/lookahead of
GSUB, we ignore ZWNJ too. That's the right thing to do.
It certainly is possible to build fonts that this feature will result
in undesirable glyphs, but it's hard to think of a real-world case
that that would happen.
This *does* break Indic shaping right now, since Indic Unicode has
specific rules for what ZWJ/ZWNJ mean, and skipping ZWJ is breaking
those rules. That will be fixed in upcoming commits.
2013-02-14 13:43:13 +01:00
|
|
|
|
2012-07-27 08:12:28 +02:00
|
|
|
#define hb_ot_layout_from_face(face) ((hb_ot_layout_t *) face->shaper_data.ot)
|
2010-07-23 21:11:18 +02:00
|
|
|
|
2011-05-03 06:35:53 +02:00
|
|
|
/*
|
|
|
|
* GDEF
|
|
|
|
*/
|
|
|
|
|
2012-11-16 22:34:29 +01:00
|
|
|
typedef enum {
|
2013-02-21 21:54:05 +01:00
|
|
|
HB_OT_LAYOUT_GLYPH_PROPS_UNCLASSIFIED = 1 << HB_OT_LAYOUT_GLYPH_CLASS_UNCLASSIFIED,
|
|
|
|
HB_OT_LAYOUT_GLYPH_PROPS_BASE_GLYPH = 1 << HB_OT_LAYOUT_GLYPH_CLASS_BASE_GLYPH,
|
|
|
|
HB_OT_LAYOUT_GLYPH_PROPS_LIGATURE = 1 << HB_OT_LAYOUT_GLYPH_CLASS_LIGATURE,
|
|
|
|
HB_OT_LAYOUT_GLYPH_PROPS_MARK = 1 << HB_OT_LAYOUT_GLYPH_CLASS_MARK,
|
|
|
|
HB_OT_LAYOUT_GLYPH_PROPS_COMPONENT = 1 << HB_OT_LAYOUT_GLYPH_CLASS_COMPONENT
|
2012-11-16 22:34:29 +01:00
|
|
|
} hb_ot_layout_glyph_class_mask_t;
|
|
|
|
|
2008-01-24 09:54:09 +01:00
|
|
|
|
2011-05-03 06:35:53 +02:00
|
|
|
|
2012-07-24 02:14:13 +02:00
|
|
|
/*
|
|
|
|
* GSUB/GPOS
|
|
|
|
*/
|
|
|
|
|
2012-07-26 00:37:51 +02:00
|
|
|
/* lig_id / lig_comp
|
|
|
|
*
|
|
|
|
* When a ligature is formed:
|
|
|
|
*
|
2012-07-29 03:05:25 +02:00
|
|
|
* - The ligature glyph and any marks in between all the same newly allocated
|
|
|
|
* lig_id,
|
2012-07-30 06:42:07 +02:00
|
|
|
* - The ligature glyph will get lig_num_comps set to the number of components
|
2012-07-26 00:37:51 +02:00
|
|
|
* - The marks get lig_comp > 0, reflecting which component of the ligature
|
|
|
|
* they were applied to.
|
|
|
|
* - This is used in GPOS to attach marks to the right component of a ligature
|
|
|
|
* in MarkLigPos.
|
|
|
|
*
|
|
|
|
* When a multiple-substitution is done:
|
|
|
|
*
|
|
|
|
* - All resulting glyphs will have lig_id = 0,
|
|
|
|
* - The resulting glyphs will have lig_comp = 0, 1, 2, ... respectively.
|
2012-07-29 03:05:25 +02:00
|
|
|
* - This is used in GPOS to attach marks to the first component of a
|
2012-07-26 00:37:51 +02:00
|
|
|
* multiple substitution in MarkBasePos.
|
|
|
|
*
|
|
|
|
* The numbers are also used in GPOS to do mark-to-mark positioning only
|
|
|
|
* to marks that belong to the same component of a ligature in MarkMarPos.
|
|
|
|
*/
|
2012-07-30 06:42:07 +02:00
|
|
|
#define IS_LIG_BASE 0x10
|
2012-07-24 02:14:13 +02:00
|
|
|
static inline void
|
2012-07-30 04:02:24 +02:00
|
|
|
set_lig_props_for_ligature (hb_glyph_info_t &info, unsigned int lig_id, unsigned int lig_num_comps)
|
|
|
|
{
|
2012-07-30 06:42:07 +02:00
|
|
|
info.lig_props() = (lig_id << 5) | IS_LIG_BASE | (lig_num_comps & 0x0F);
|
2012-07-30 04:02:24 +02:00
|
|
|
}
|
|
|
|
static inline void
|
|
|
|
set_lig_props_for_mark (hb_glyph_info_t &info, unsigned int lig_id, unsigned int lig_comp)
|
|
|
|
{
|
|
|
|
info.lig_props() = (lig_id << 5) | (lig_comp & 0x0F);
|
|
|
|
}
|
|
|
|
static inline void
|
|
|
|
set_lig_props_for_component (hb_glyph_info_t &info, unsigned int comp)
|
|
|
|
{
|
|
|
|
set_lig_props_for_mark (info, 0, comp);
|
|
|
|
}
|
|
|
|
|
2012-07-24 02:14:13 +02:00
|
|
|
static inline unsigned int
|
|
|
|
get_lig_id (const hb_glyph_info_t &info)
|
|
|
|
{
|
2012-07-30 04:02:24 +02:00
|
|
|
return info.lig_props() >> 5;
|
2012-07-24 02:14:13 +02:00
|
|
|
}
|
2012-07-30 06:42:07 +02:00
|
|
|
static inline bool
|
|
|
|
is_a_ligature (const hb_glyph_info_t &info)
|
|
|
|
{
|
|
|
|
return !!(info.lig_props() & IS_LIG_BASE);
|
|
|
|
}
|
2012-07-24 02:14:13 +02:00
|
|
|
static inline unsigned int
|
|
|
|
get_lig_comp (const hb_glyph_info_t &info)
|
|
|
|
{
|
2012-07-30 06:42:07 +02:00
|
|
|
if (is_a_ligature (info))
|
2012-07-30 04:02:24 +02:00
|
|
|
return 0;
|
|
|
|
else
|
|
|
|
return info.lig_props() & 0x0F;
|
|
|
|
}
|
|
|
|
static inline unsigned int
|
|
|
|
get_lig_num_comps (const hb_glyph_info_t &info)
|
|
|
|
{
|
2012-11-16 22:34:29 +01:00
|
|
|
if ((info.glyph_props() & HB_OT_LAYOUT_GLYPH_PROPS_LIGATURE) && is_a_ligature (info))
|
2012-07-30 04:02:24 +02:00
|
|
|
return info.lig_props() & 0x0F;
|
|
|
|
else
|
|
|
|
return 1;
|
2012-07-24 02:14:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline uint8_t allocate_lig_id (hb_buffer_t *buffer) {
|
2012-07-30 04:02:24 +02:00
|
|
|
uint8_t lig_id = buffer->next_serial () & 0x07;
|
2012-07-24 02:14:13 +02:00
|
|
|
if (unlikely (!lig_id))
|
|
|
|
lig_id = allocate_lig_id (buffer); /* in case of overflow */
|
|
|
|
return lig_id;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-07-30 08:38:39 +02:00
|
|
|
HB_INTERNAL hb_bool_t
|
2012-11-15 23:57:31 +01:00
|
|
|
hb_ot_layout_lookup_would_substitute_fast (hb_face_t *face,
|
2012-08-23 22:22:28 +02:00
|
|
|
unsigned int lookup_index,
|
2012-07-30 08:38:39 +02:00
|
|
|
const hb_codepoint_t *glyphs,
|
|
|
|
unsigned int glyphs_length,
|
2012-08-23 22:22:28 +02:00
|
|
|
hb_bool_t zero_context);
|
2012-07-30 08:38:39 +02:00
|
|
|
|
2012-08-02 14:11:14 +02:00
|
|
|
|
|
|
|
/* Should be called before all the substitute_lookup's are done. */
|
|
|
|
HB_INTERNAL void
|
2012-08-02 14:36:40 +02:00
|
|
|
hb_ot_layout_substitute_start (hb_font_t *font,
|
2012-08-02 14:11:14 +02:00
|
|
|
hb_buffer_t *buffer);
|
|
|
|
|
2012-07-30 08:38:39 +02:00
|
|
|
HB_INTERNAL hb_bool_t
|
2012-08-02 14:36:40 +02:00
|
|
|
hb_ot_layout_substitute_lookup (hb_font_t *font,
|
2012-08-02 14:11:14 +02:00
|
|
|
hb_buffer_t *buffer,
|
|
|
|
unsigned int lookup_index,
|
[Indic-like] Disable automatic joiner handling for basic shaping features
Not for Arabic, but for Indic-like scripts. ZWJ/ZWNJ have special
meanings in those scripts, so let font lookups take full control.
This undoes the regression caused by automatic-joiners handling
introduced two commits ago.
We only disable automatic joiner handling for the "basic shaping
features" of Indic, Myanmar, and SEAsian shapers. The "presentation
forms" and other features are still applied with automatic-joiner
handling.
This change also changes the test suite failure statistics, such that
a few scripts show more "failures". The most affected is Kannada.
However, upon inspection, we believe that in most, if not all, of the
new failures, we are producing results superior to Uniscribe. Hard to
count those!
Here's an example of what is fixed by the recent joiner-handling
changes:
https://bugs.freedesktop.org/show_bug.cgi?id=58714
New numbers, for future reference:
BENGALI: 353892 out of 354188 tests passed. 296 failed (0.0835714%)
DEVANAGARI: 707336 out of 707394 tests passed. 58 failed (0.00819911%)
GUJARATI: 366262 out of 366457 tests passed. 195 failed (0.0532122%)
GURMUKHI: 60706 out of 60747 tests passed. 41 failed (0.067493%)
KANNADA: 950680 out of 951913 tests passed. 1233 failed (0.129529%)
KHMER: 299074 out of 299124 tests passed. 50 failed (0.0167155%)
LAO: 53611 out of 53644 tests passed. 33 failed (0.0615167%)
MALAYALAM: 1047983 out of 1048334 tests passed. 351 failed (0.0334817%)
ORIYA: 42320 out of 42329 tests passed. 9 failed (0.021262%)
SINHALA: 271539 out of 271847 tests passed. 308 failed (0.113299%)
TAMIL: 1091753 out of 1091754 tests passed. 1 failed (9.15957e-05%)
TELUGU: 970555 out of 970573 tests passed. 18 failed (0.00185457%)
TIBETAN: 208469 out of 208469 tests passed. 0 failed (0%)
2013-02-14 16:40:12 +01:00
|
|
|
hb_mask_t mask,
|
[Indic] Futher adjust ZWJ handling in Indic-like shapers
After the Ngapi hackfest work, we were assuming that fonts
won't use presentation features to choose specific forms
(eg. conjuncts). As such, we were using auto-joiner behavior
for such features. It proved to be troublesome as many fonts
used presentation forms ('pres') for example to form conjuncts,
which need to be disabled when a ZWJ is inserted.
Two examples:
U+0D2F,U+200D,U+0D4D,U+0D2F with kartika.ttf
U+0995,U+09CD,U+200D,U+09B7 with vrinda.ttf
What we do now is to never do magic to ZWJ during GSUB's main input
match for Indic-style shapers. Note that backtrack/lookahead are still
matched liberally, as is GPOS. This seems to be an acceptable
compromise.
As to the bug that initially started this work, that one needs to
be fixed differently:
Bug 58714 - Kannada u+0cb0 u+200d u+0ccd u+0c95 u+0cbe does not
provide same results as Windows8
https://bugs.freedesktop.org/show_bug.cgi?id=58714
New numbers:
BENGALI: 353689 out of 354188 tests passed. 499 failed (0.140886%)
DEVANAGARI: 707305 out of 707394 tests passed. 89 failed (0.0125814%)
GUJARATI: 366349 out of 366457 tests passed. 108 failed (0.0294714%)
GURMUKHI: 60706 out of 60747 tests passed. 41 failed (0.067493%)
KANNADA: 951030 out of 951913 tests passed. 883 failed (0.0927606%)
KHMER: 299070 out of 299124 tests passed. 54 failed (0.0180527%)
LAO: 53611 out of 53644 tests passed. 33 failed (0.0615167%)
MALAYALAM: 1048102 out of 1048334 tests passed. 232 failed (0.0221304%)
ORIYA: 42320 out of 42329 tests passed. 9 failed (0.021262%)
SINHALA: 271666 out of 271847 tests passed. 181 failed (0.0665816%)
TAMIL: 1091753 out of 1091754 tests passed. 1 failed (9.15957e-05%)
TELUGU: 970555 out of 970573 tests passed. 18 failed (0.00185457%)
TIBETAN: 208469 out of 208469 tests passed. 0 failed (0%)
2013-03-19 10:53:26 +01:00
|
|
|
hb_bool_t auto_zwj);
|
2012-08-02 14:11:14 +02:00
|
|
|
|
|
|
|
/* Should be called after all the substitute_lookup's are done */
|
|
|
|
HB_INTERNAL void
|
2012-08-02 14:36:40 +02:00
|
|
|
hb_ot_layout_substitute_finish (hb_font_t *font,
|
2012-08-02 14:11:14 +02:00
|
|
|
hb_buffer_t *buffer);
|
|
|
|
|
|
|
|
|
|
|
|
/* Should be called before all the position_lookup's are done. Resets positions to zero. */
|
|
|
|
HB_INTERNAL void
|
|
|
|
hb_ot_layout_position_start (hb_font_t *font,
|
|
|
|
hb_buffer_t *buffer);
|
2012-07-30 08:38:39 +02:00
|
|
|
|
|
|
|
HB_INTERNAL hb_bool_t
|
2012-08-02 14:11:14 +02:00
|
|
|
hb_ot_layout_position_lookup (hb_font_t *font,
|
|
|
|
hb_buffer_t *buffer,
|
|
|
|
unsigned int lookup_index,
|
[Indic-like] Disable automatic joiner handling for basic shaping features
Not for Arabic, but for Indic-like scripts. ZWJ/ZWNJ have special
meanings in those scripts, so let font lookups take full control.
This undoes the regression caused by automatic-joiners handling
introduced two commits ago.
We only disable automatic joiner handling for the "basic shaping
features" of Indic, Myanmar, and SEAsian shapers. The "presentation
forms" and other features are still applied with automatic-joiner
handling.
This change also changes the test suite failure statistics, such that
a few scripts show more "failures". The most affected is Kannada.
However, upon inspection, we believe that in most, if not all, of the
new failures, we are producing results superior to Uniscribe. Hard to
count those!
Here's an example of what is fixed by the recent joiner-handling
changes:
https://bugs.freedesktop.org/show_bug.cgi?id=58714
New numbers, for future reference:
BENGALI: 353892 out of 354188 tests passed. 296 failed (0.0835714%)
DEVANAGARI: 707336 out of 707394 tests passed. 58 failed (0.00819911%)
GUJARATI: 366262 out of 366457 tests passed. 195 failed (0.0532122%)
GURMUKHI: 60706 out of 60747 tests passed. 41 failed (0.067493%)
KANNADA: 950680 out of 951913 tests passed. 1233 failed (0.129529%)
KHMER: 299074 out of 299124 tests passed. 50 failed (0.0167155%)
LAO: 53611 out of 53644 tests passed. 33 failed (0.0615167%)
MALAYALAM: 1047983 out of 1048334 tests passed. 351 failed (0.0334817%)
ORIYA: 42320 out of 42329 tests passed. 9 failed (0.021262%)
SINHALA: 271539 out of 271847 tests passed. 308 failed (0.113299%)
TAMIL: 1091753 out of 1091754 tests passed. 1 failed (9.15957e-05%)
TELUGU: 970555 out of 970573 tests passed. 18 failed (0.00185457%)
TIBETAN: 208469 out of 208469 tests passed. 0 failed (0%)
2013-02-14 16:40:12 +01:00
|
|
|
hb_mask_t mask,
|
[Indic] Futher adjust ZWJ handling in Indic-like shapers
After the Ngapi hackfest work, we were assuming that fonts
won't use presentation features to choose specific forms
(eg. conjuncts). As such, we were using auto-joiner behavior
for such features. It proved to be troublesome as many fonts
used presentation forms ('pres') for example to form conjuncts,
which need to be disabled when a ZWJ is inserted.
Two examples:
U+0D2F,U+200D,U+0D4D,U+0D2F with kartika.ttf
U+0995,U+09CD,U+200D,U+09B7 with vrinda.ttf
What we do now is to never do magic to ZWJ during GSUB's main input
match for Indic-style shapers. Note that backtrack/lookahead are still
matched liberally, as is GPOS. This seems to be an acceptable
compromise.
As to the bug that initially started this work, that one needs to
be fixed differently:
Bug 58714 - Kannada u+0cb0 u+200d u+0ccd u+0c95 u+0cbe does not
provide same results as Windows8
https://bugs.freedesktop.org/show_bug.cgi?id=58714
New numbers:
BENGALI: 353689 out of 354188 tests passed. 499 failed (0.140886%)
DEVANAGARI: 707305 out of 707394 tests passed. 89 failed (0.0125814%)
GUJARATI: 366349 out of 366457 tests passed. 108 failed (0.0294714%)
GURMUKHI: 60706 out of 60747 tests passed. 41 failed (0.067493%)
KANNADA: 951030 out of 951913 tests passed. 883 failed (0.0927606%)
KHMER: 299070 out of 299124 tests passed. 54 failed (0.0180527%)
LAO: 53611 out of 53644 tests passed. 33 failed (0.0615167%)
MALAYALAM: 1048102 out of 1048334 tests passed. 232 failed (0.0221304%)
ORIYA: 42320 out of 42329 tests passed. 9 failed (0.021262%)
SINHALA: 271666 out of 271847 tests passed. 181 failed (0.0665816%)
TAMIL: 1091753 out of 1091754 tests passed. 1 failed (9.15957e-05%)
TELUGU: 970555 out of 970573 tests passed. 18 failed (0.00185457%)
TIBETAN: 208469 out of 208469 tests passed. 0 failed (0%)
2013-03-19 10:53:26 +01:00
|
|
|
hb_bool_t auto_zwj);
|
2012-08-02 14:11:14 +02:00
|
|
|
|
|
|
|
/* Should be called after all the position_lookup's are done */
|
|
|
|
HB_INTERNAL void
|
|
|
|
hb_ot_layout_position_finish (hb_font_t *font,
|
Adjust mark advance-width zeroing logic for Myanmar
Before, we were zeroing advance width of attached marks for
non-Indic scripts, and not doing it for Indic.
We have now three different behaviors, which seem to better
reflect what Uniscribe is doing:
- For Indic, no explicit zeroing happens whatsoever, which
is the same as before,
- For Myanmar, zero advance width of glyphs marked as marks
*in GDEF*, and do that *before* applying GPOS. This seems
to be what the new Win8 Myanmar shaper does,
- For everything else, zero advance width of glyphs that are
from General_Category=Mn Unicode characters, and do so
before applying GPOS. This seems to be what Uniscribe does
for Latin at least.
With these changes, positioning of all tests matches for Myanmar,
except for the glitch in Uniscribe not applying 'mark'. See preivous
commit.
2013-02-12 15:44:57 +01:00
|
|
|
hb_buffer_t *buffer);
|
2012-08-02 14:11:14 +02:00
|
|
|
|
2012-07-30 08:38:39 +02:00
|
|
|
|
|
|
|
|
2009-08-02 23:41:36 +02:00
|
|
|
/*
|
|
|
|
* hb_ot_layout_t
|
|
|
|
*/
|
|
|
|
|
2012-08-28 23:57:49 +02:00
|
|
|
namespace OT {
|
|
|
|
struct GDEF;
|
|
|
|
struct GSUB;
|
|
|
|
struct GPOS;
|
|
|
|
}
|
|
|
|
|
2010-05-19 21:45:06 +02:00
|
|
|
struct hb_ot_layout_t
|
2009-05-20 05:58:54 +02:00
|
|
|
{
|
2009-08-04 04:01:47 +02:00
|
|
|
hb_blob_t *gdef_blob;
|
|
|
|
hb_blob_t *gsub_blob;
|
|
|
|
hb_blob_t *gpos_blob;
|
|
|
|
|
2012-08-28 23:57:49 +02:00
|
|
|
const struct OT::GDEF *gdef;
|
|
|
|
const struct OT::GSUB *gsub;
|
|
|
|
const struct OT::GPOS *gpos;
|
2012-08-02 03:46:36 +02:00
|
|
|
|
2012-08-02 10:00:31 +02:00
|
|
|
unsigned int gsub_lookup_count;
|
|
|
|
unsigned int gpos_lookup_count;
|
|
|
|
|
2012-08-02 03:46:36 +02:00
|
|
|
hb_set_digest_t *gsub_digests;
|
|
|
|
hb_set_digest_t *gpos_digests;
|
2009-08-02 23:41:36 +02:00
|
|
|
};
|
2009-05-18 23:09:33 +02:00
|
|
|
|
2009-05-16 00:54:53 +02:00
|
|
|
|
2010-05-19 21:45:06 +02:00
|
|
|
HB_INTERNAL hb_ot_layout_t *
|
2011-05-03 06:35:53 +02:00
|
|
|
_hb_ot_layout_create (hb_face_t *face);
|
2009-08-04 03:40:20 +02:00
|
|
|
|
2010-05-13 05:22:55 +02:00
|
|
|
HB_INTERNAL void
|
2011-05-03 06:35:53 +02:00
|
|
|
_hb_ot_layout_destroy (hb_ot_layout_t *layout);
|
2009-05-26 23:31:56 +02:00
|
|
|
|
2010-07-23 21:11:18 +02:00
|
|
|
|
2008-01-24 09:11:09 +01:00
|
|
|
|
2010-06-09 12:32:56 +02:00
|
|
|
#endif /* HB_OT_LAYOUT_PRIVATE_HH */
|