[arabic] Implement Windows-1256 private shaping

Bug 1045139 - The Arabic text with "MS Sans Serif" font is rendered bad
https://bugzilla.mozilla.org/show_bug.cgi?id=1045139

This is only enabled on Windows platforms, and requires support from
Uniscribe to work.  But for clients that do hook up to Uniscribe, this
fixes shaping of Windows-1256-encoded bitmap fonts like "MS Sans Serif".

The code and table together have just less than a 1kb footprint when
enabled.

UNTESTED.  I might even have broken regular Arabic fallback shaping.
This commit is contained in:
Behdad Esfahbod 2014-07-30 02:15:44 -04:00
parent 88911e8cc7
commit f28b1c823d
4 changed files with 442 additions and 14 deletions

View File

@ -91,6 +91,7 @@ HBSOURCES += \
hb-ot-shape-complex-arabic.cc \
hb-ot-shape-complex-arabic-fallback.hh \
hb-ot-shape-complex-arabic-table.hh \
hb-ot-shape-complex-arabic-win1256.hh \
hb-ot-shape-complex-default.cc \
hb-ot-shape-complex-hangul.cc \
hb-ot-shape-complex-hebrew.cc \

View File

@ -27,7 +27,7 @@ echo 'Checking that source files #include "hb-*private.hh" first (or none)'
for x in $HBSOURCES; do
test -f "$srcdir/$x" && x="$srcdir/$x"
grep '#.*\<include\>' "$x" /dev/null | head -n 1
grep '#.*\<include\>' "$x" /dev/null | grep -v 'include _' | head -n 1
done |
grep -v '"hb-.*private[.]hh"' |
grep -v 'hb-private[.]hh:' |

View File

@ -81,6 +81,9 @@ arabic_fallback_synthesize_lookup_single (const hb_ot_shape_plan_t *plan HB_UNUS
num_glyphs++;
}
if (!num_glyphs)
return NULL;
/* Bubble-sort!
* May not be good-enough for presidential candidate interviews, but good-enough for us... */
hb_bubble_sort (&glyphs[0], num_glyphs, OT::GlyphID::cmp, &substitutes[0]);
@ -158,6 +161,9 @@ arabic_fallback_synthesize_lookup_ligature (const hb_ot_shape_plan_t *plan HB_UN
}
}
if (!num_ligatures)
return NULL;
OT::Supplier<OT::GlyphID> first_glyphs_supplier (first_glyphs, num_first_glyphs);
OT::Supplier<unsigned int > ligature_per_first_glyph_count_supplier (ligature_per_first_glyph_count_list, num_first_glyphs);
OT::Supplier<OT::GlyphID> ligatures_supplier (ligature_list, num_ligatures);
@ -198,6 +204,9 @@ struct arabic_fallback_plan_t
{
ASSERT_POD ();
unsigned int num_lookups;
bool free_lookups;
hb_mask_t mask_array[ARABIC_NUM_FALLBACK_FEATURES];
OT::SubstLookup *lookup_array[ARABIC_NUM_FALLBACK_FEATURES];
hb_ot_layout_lookup_accelerator_t accel_array[ARABIC_NUM_FALLBACK_FEATURES];
@ -205,6 +214,91 @@ struct arabic_fallback_plan_t
static const arabic_fallback_plan_t arabic_fallback_plan_nil = {};
#if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(HB_WITH_WIN1256)
#define HB_WITH_WIN1256
#endif
#ifdef HB_WITH_WIN1256
#include "hb-ot-shape-complex-arabic-win1256.hh"
#endif
struct ManifestLookup {
OT::USHORT fallbackType;
OT::OffsetTo<OT::SubstLookup> lookupOffset;
};
typedef OT::ArrayOf<ManifestLookup> Manifest;
static bool
arabic_fallback_plan_init_win1256 (arabic_fallback_plan_t *fallback_plan,
const hb_ot_shape_plan_t *plan,
hb_font_t *font)
{
#ifdef HB_WITH_WIN1256
/* Does this font look like it's Windows-1256-encoded? */
hb_codepoint_t g;
if (!(hb_font_get_glyph (font, 0x0627u, 0, &g) && g == 199 /* ALEF */ &&
hb_font_get_glyph (font, 0x0644u, 0, &g) && g == 225 /* LAM */ &&
hb_font_get_glyph (font, 0x0649u, 0, &g) && g == 236 /* ALEF MAKSURA */ &&
hb_font_get_glyph (font, 0x064Au, 0, &g) && g == 237 /* YEH */ &&
hb_font_get_glyph (font, 0x0652u, 0, &g) && g == 250 /* SUKUN */))
return false;
const Manifest &manifest = reinterpret_cast<const Manifest&> (arabic_win1256_gsub_lookups.manifest);
ASSERT_STATIC (sizeof (arabic_win1256_gsub_lookups.manifestData) / sizeof (ManifestLookup)
<= ARABIC_NUM_FALLBACK_FEATURES);
/* TODO sanitize the table? */
unsigned j = 0;
unsigned int count = manifest.len;
for (unsigned int i = 0; i < count; i++)
{
fallback_plan->mask_array[j] = plan->map.get_1_mask (manifest[i].fallbackType);
if (fallback_plan->mask_array[j])
{
fallback_plan->lookup_array[j] = const_cast<OT::SubstLookup*> (&(&manifest+manifest[i].lookupOffset));
if (fallback_plan->lookup_array[j])
{
fallback_plan->accel_array[j].init (*fallback_plan->lookup_array[j]);
j++;
}
}
}
fallback_plan->num_lookups = j;
fallback_plan->free_lookups = false;
return j > 0;
#else
return false;
#endif
}
static bool
arabic_fallback_plan_init_unicode (arabic_fallback_plan_t *fallback_plan,
const hb_ot_shape_plan_t *plan,
hb_font_t *font)
{
unsigned int j = 0;
for (unsigned int i = 0; i < ARABIC_NUM_FALLBACK_FEATURES; i++)
{
fallback_plan->mask_array[j] = plan->map.get_1_mask (arabic_fallback_features[i]);
if (fallback_plan->mask_array[j])
{
fallback_plan->lookup_array[j] = arabic_fallback_synthesize_lookup (plan, font, i);
if (fallback_plan->lookup_array[j])
{
fallback_plan->accel_array[j].init (*fallback_plan->lookup_array[j]);
j++;
}
}
}
fallback_plan->num_lookups = j;
fallback_plan->free_lookups = true;
return j > 0;
}
static arabic_fallback_plan_t *
arabic_fallback_plan_create (const hb_ot_shape_plan_t *plan,
hb_font_t *font)
@ -213,17 +307,21 @@ arabic_fallback_plan_create (const hb_ot_shape_plan_t *plan,
if (unlikely (!fallback_plan))
return const_cast<arabic_fallback_plan_t *> (&arabic_fallback_plan_nil);
for (unsigned int i = 0; i < ARABIC_NUM_FALLBACK_FEATURES; i++)
{
fallback_plan->mask_array[i] = plan->map.get_1_mask (arabic_fallback_features[i]);
if (fallback_plan->mask_array[i]) {
fallback_plan->lookup_array[i] = arabic_fallback_synthesize_lookup (plan, font, i);
if (fallback_plan->lookup_array[i])
fallback_plan->accel_array[i].init (*fallback_plan->lookup_array[i]);
}
}
fallback_plan->num_lookups = 0;
fallback_plan->free_lookups = false;
return fallback_plan;
/* Try synthesizing GSUB table using Unicode Arabic Presentation Forms,
* in case the font has cmap entries for the presentation-forms characters. */
if (arabic_fallback_plan_init_unicode (fallback_plan, plan, font))
return fallback_plan;
/* See if this looks like a Windows-1256-encoded font. If it does, use a
* hand-coded GSUB table. */
if (arabic_fallback_plan_init_win1256 (fallback_plan, plan, font))
return fallback_plan;
free (fallback_plan);
return const_cast<arabic_fallback_plan_t *> (&arabic_fallback_plan_nil);
}
static void
@ -232,11 +330,12 @@ arabic_fallback_plan_destroy (arabic_fallback_plan_t *fallback_plan)
if (!fallback_plan || fallback_plan == &arabic_fallback_plan_nil)
return;
for (unsigned int i = 0; i < ARABIC_NUM_FALLBACK_FEATURES; i++)
for (unsigned int i = 0; i < fallback_plan->num_lookups; i++)
if (fallback_plan->lookup_array[i])
{
fallback_plan->accel_array[i].fini (fallback_plan->lookup_array[i]);
free (fallback_plan->lookup_array[i]);
if (fallback_plan->free_lookups)
free (fallback_plan->lookup_array[i]);
}
free (fallback_plan);
@ -248,7 +347,7 @@ arabic_fallback_plan_shape (arabic_fallback_plan_t *fallback_plan,
hb_buffer_t *buffer)
{
OT::hb_apply_context_t c (0, font, buffer);
for (unsigned int i = 0; i < ARABIC_NUM_FALLBACK_FEATURES; i++)
for (unsigned int i = 0; i < fallback_plan->num_lookups; i++)
if (fallback_plan->lookup_array[i]) {
c.set_lookup_mask (fallback_plan->mask_array[i]);
hb_ot_layout_substitute_lookup (&c,

View File

@ -0,0 +1,328 @@
/*
* Copyright © 2014 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Google Author(s): Behdad Esfahbod
*/
#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_WIN1256_HH
/*
* The macros in the first part of this file are generic macros that can
* be used to define the bytes for OpenType table data in code in a
* readable manner. We can move the macros to reside with their respective
* struct types, but since we only use these to define one data table, the
* Windows-1256 Arabic shaping table in this file, we keep them here.
*/
/* First we measure, then we cut. */
#ifndef OT_MEASURE
#define OT_MEASURE
#define OT_TABLE_START static const struct TABLE_NAME { char start[0
#define OT_TABLE_END ]; }
#define OT_LABEL(Name) ]; char Name[0
#define OT_USHORT(u16) +2/*bytes*/
#define OT_DISTANCE(From,To,ItemSize) 0/*don't care */
#else
#undef OT_MEASURE
#define OT_TABLE_START TABLE_NAME = { {
#define OT_TABLE_END } };
#define OT_LABEL(Name) }, {
#define OT_USHORT(u16) ((u16)>>8), ((u16)&0xFF),
#define OT_DISTANCE(From,To,ItemSize) ((offsetof (struct TABLE_NAME, To) - \
offsetof (struct TABLE_NAME, From)) / (ItemSize) \
/* ASSERT_STATIC_EXPR it's divisible. */)
#endif
/* Whenever we receive an argument that is a list, it will expand to
* contain commas. That cannot be passed to another macro because the
* commas will throw off the preprocessor. The solution is to wrap
* the passed-in argument in OT_LIST() before passing to the next macro.
* Unfortunately this trick requires vararg macros. */
#define OT_LIST(Args...) Args
/*
* Basic Types
*/
#define OT_OFFSET(From, To) \
OT_USHORT(OT_DISTANCE(From, To, 1)) // Offset from From to To in bytes
#define OT_COUNT(Start, End, ItemSize) \
OT_USHORT(OT_DISTANCE(Start, End, ItemSize))
#define OT_UCOUNT(Start,End) \
OT_COUNT(Start, End, 2) // USHORT count
#define OT_GLYPHID OT_USHORT // GlyphID
#define OT_UARRAY(Name, Items) \
OT_LABEL(Name) \
OT_UCOUNT(Name##Data, Name##DataEnd) \
OT_LABEL(Name##Data) \
Items \
OT_LABEL(Name##DataEnd)
#define OT_UHEADLESSARRAY(Name, Items) \
OT_LABEL(Name) \
OT_USHORT(OT_DISTANCE(Name##Data, Name##DataEnd, 2) + 1) \
OT_LABEL(Name##Data) \
Items \
OT_LABEL(Name##DataEnd)
/*
* Common Types
*/
#define OT_LOOKUP(Name, LookupType, LookupFlag, SubLookupOffsets) \
OT_LABEL(Name) \
OT_USHORT(LookupType) \
OT_USHORT(LookupFlag) \
OT_UARRAY(Name##SubLookupOffsetsArray, OT_LIST(SubLookupOffsets))
#define OT_SUBLOOKUP(Name, SubFormat, Items) \
OT_LABEL(Name) \
OT_USHORT(SubFormat) \
Items
#define OT_COVERAGE1(Name, Items) \
OT_LABEL(Name) \
OT_USHORT(1) \
OT_UARRAY(Name##Glyphs, OT_LIST(Items))
/*
* GSUB
*/
#define OT_SUBLOOKUP_SINGLE_SUBST_FORMAT2(Name, FromGlyphs, ToGlyphs) \
OT_SUBLOOKUP(Name, 2, \
OT_OFFSET(Name, Name##Coverage) \
OT_UARRAY(Name##Substitute, OT_LIST(ToGlyphs)) \
) \
OT_COVERAGE1(Name##Coverage, OT_LIST(FromGlyphs)) \
/* ASSERT_STATIC_EXPR len(FromGlyphs) == len(ToGlyphs) */
#define OT_SUBLOOKUP_MULTIPLE_SUBST_FORMAT1(Name, FromGlyphs, SequenceOffsets) \
OT_SUBLOOKUP(Name, 1, \
OT_OFFSET(Name, Name##Coverage) \
OT_UARRAY(Name##SequenceOffsetsArray, OT_LIST(SequenceOffsets)) \
) \
OT_COVERAGE1(Name##Coverage, OT_LIST(FromGlyphs)) \
/* ASSERT_STATIC_EXPR len(FromGlyphs) == len(SequenceOffsets) */
#define OT_SEQUENCE(Name, SubstituteGlyphs) \
OT_LABEL(Name) \
OT_UARRAY(Name##SubstituteGlyphsArray, OT_LIST(SubstituteGlyphs))
#define OT_SUBLOOKUP_LIGATURE_SUBST_FORMAT1(Name, FirstGlyphs, LigatureSetOffsets) \
OT_SUBLOOKUP(Name, 1, \
OT_OFFSET(Name, Name##Coverage) \
OT_UARRAY(Name##LigatureSetOffsetsArray, OT_LIST(LigatureSetOffsets)) \
) \
OT_COVERAGE1(Name##Coverage, OT_LIST(FirstGlyphs)) \
/* ASSERT_STATIC_EXPR len(FirstGlyphs) == len(LigatureSetOffsets) */
#define OT_LIGATURE_SET(Name, LigatureSetOffsets) \
OT_LABEL(Name) \
OT_UARRAY(Name##LigatureSetOffsetsArray, OT_LIST(LigatureSetOffsets))
#define OT_LIGATURE(Name, Components, LigGlyph) \
OT_LABEL(Name) \
LigGlyph \
OT_UHEADLESSARRAY(Name##ComponentsArray, OT_LIST(Components))
/*
*
* Start of Windows-1256 shaping table.
*
*/
/* Table name. */
#define TABLE_NAME arabic_win1256_gsub_lookups
/* Table manifest. */
#define MANIFEST(Items) \
OT_LABEL(manifest) \
OT_COUNT(manifestData, manifestDataEnd, 4) \
OT_LABEL(manifestData) \
Items \
OT_LABEL(manifestDataEnd)
#define MANIFEST_LOOKUP(FallbackType, Name) \
OT_USHORT(FallbackType) \
OT_OFFSET(manifest, Name)
/* Shorthand. */
#define G OT_GLYPHID
/* We use this to differentiate a medial-Lam from an initial-Lam.
* In this two-shape encoding, those two have the same glyph. But
* for Lam-Alef ligature formations we need to differentiate. As
* such, we add a MultipleSubst to the end of 'medi' feature to
* insert an extra glyph there, and we use that to replace the
* proper ligature later. As long as this is the code for an
* isolated form, it will work fine, as an isolated form cannot
* happen between a Lam-Alef sequence of the shapes that form a
* ligature. */
#define LAM_MEDI_MARKER 225
/*
* Table Start
*/
OT_TABLE_START
/*
* Manifest
*/
MANIFEST(
MANIFEST_LOOKUP(FALLBACK_INIT, initLookup)
MANIFEST_LOOKUP(FALLBACK_MEDI, mediLookup)
MANIFEST_LOOKUP(FALLBACK_FINA, finaLookup)
MANIFEST_LOOKUP(FALLBACK_RLIG, rligLookup)
MANIFEST_LOOKUP(FALLBACK_RLIG, rligMarksLookup)
)
/*
* Lookups
*/
OT_LOOKUP(initLookup, OT::SubstLookupSubTable::Single, OT::LookupFlag::IgnoreMarks,
OT_OFFSET(initLookup, initmediSubLookup)
OT_OFFSET(initLookup, initSubLookup)
)
OT_LOOKUP(mediLookup, OT::SubstLookupSubTable::Single, OT::LookupFlag::IgnoreMarks,
OT_OFFSET(mediLookup, initmediSubLookup)
OT_OFFSET(mediLookup, mediSubLookup)
)
OT_LOOKUP(finaLookup, OT::SubstLookupSubTable::Single, OT::LookupFlag::IgnoreMarks,
OT_OFFSET(finaLookup, finaSubLookup)
)
OT_LOOKUP(mediLamLookup, OT::SubstLookupSubTable::Multiple, OT::LookupFlag::IgnoreMarks,
OT_OFFSET(mediLamLookup, mediLamSubLookup)
)
OT_LOOKUP(rligLookup, OT::SubstLookupSubTable::Ligature, OT::LookupFlag::IgnoreMarks,
OT_OFFSET(rligLookup, lamAlefLigaturesSubLookup)
)
OT_LOOKUP(rligMarksLookup, OT::SubstLookupSubTable::Ligature, 0,
OT_OFFSET(rligMarksLookup, shaddaLigaturesSubLookup)
)
/*
* init/medi/fina forms
*/
OT_SUBLOOKUP_SINGLE_SUBST_FORMAT2(initmediSubLookup,
G(198) G(200) G(201) G(202) G(203) G(211) G(212) G(213)
G(214) G(223) G(225) G(227) G(228) G(236) G(237),
G(162) G(4) G(5) G(5) G(6) G(13) G(14) G(15)
G(26) G(140) G(141) G(142) G(143) G(154) G(154)
)
OT_SUBLOOKUP_SINGLE_SUBST_FORMAT2(initSubLookup,
G(204) G(205) G(206) G(218) G(219) G(221) G(222) G(229),
G(7) G(9) G(11) G(27) G(30) G(128) G(131) G(144)
)
OT_SUBLOOKUP_SINGLE_SUBST_FORMAT2(mediSubLookup,
G(204) G(205) G(206) G(218) G(219) G(221) G(222) G(229),
G(8) G(10) G(12) G(28) G(31) G(129) G(138) G(149)
)
OT_SUBLOOKUP_SINGLE_SUBST_FORMAT2(finaSubLookup,
G(194) G(195) G(197) G(198) G(199) G(218) G(219) G(229) G(236) G(237),
G(2) G(1) G(3) G(181) G(0) G(29) G(127) G(152) G(160) G(156)
)
OT_SUBLOOKUP_MULTIPLE_SUBST_FORMAT1(mediLamSubLookup,
G(141),
OT_OFFSET(mediLamSubLookup, mediLamSequence)
)
OT_SEQUENCE(mediLamSequence, G(141) G(LAM_MEDI_MARKER))
/*
* Lam+Alef ligatures
*/
OT_SUBLOOKUP_LIGATURE_SUBST_FORMAT1(lamAlefLigaturesSubLookup,
G(141),
OT_OFFSET(lamAlefLigaturesSubLookup, lamLigatureSet)
)
OT_LIGATURE_SET(lamLigatureSet,
OT_OFFSET(lamLigatureSet, lamInitLigature1)
OT_OFFSET(lamLigatureSet, lamInitLigature2)
OT_OFFSET(lamLigatureSet, lamInitLigature3)
OT_OFFSET(lamLigatureSet, lamInitLigature4)
OT_OFFSET(lamLigatureSet, lamMediLigature1)
OT_OFFSET(lamLigatureSet, lamMediLigature2)
OT_OFFSET(lamLigatureSet, lamMediLigature3)
OT_OFFSET(lamLigatureSet, lamMediLigature4)
)
OT_LIGATURE(lamInitLigature1, G(0), G(165))
OT_LIGATURE(lamInitLigature2, G(1), G(178))
OT_LIGATURE(lamInitLigature3, G(2), G(180))
OT_LIGATURE(lamInitLigature4, G(3), G(252))
OT_LIGATURE(lamMediLigature1, G(LAM_MEDI_MARKER) G(0), G(170))
OT_LIGATURE(lamMediLigature2, G(LAM_MEDI_MARKER) G(1), G(179))
OT_LIGATURE(lamMediLigature3, G(LAM_MEDI_MARKER) G(2), G(185))
OT_LIGATURE(lamMediLigature4, G(LAM_MEDI_MARKER) G(3), G(255))
/*
* Shadda ligatures
*/
OT_SUBLOOKUP_LIGATURE_SUBST_FORMAT1(shaddaLigaturesSubLookup,
G(248),
OT_OFFSET(shaddaLigaturesSubLookup, shaddaLigatureSet)
)
OT_LIGATURE_SET(shaddaLigatureSet,
OT_OFFSET(shaddaLigatureSet, shaddaLigature1)
OT_OFFSET(shaddaLigatureSet, shaddaLigature2)
OT_OFFSET(shaddaLigatureSet, shaddaLigature3)
)
OT_LIGATURE(shaddaLigature1, G(243), G(172))
OT_LIGATURE(shaddaLigature2, G(245), G(173))
OT_LIGATURE(shaddaLigature3, G(246), G(175))
/*
* Table end
*/
OT_TABLE_END
/*
* Clean up
*/
#undef OT_TABLE_START
#undef OT_TABLE_END
#undef OT_LABEL
#undef OT_USHORT
#undef OT_DISTANCE
/*
* Include a second time to get the table data...
*/
#ifdef OT_MEASURE
#include __FILE__
#endif
#define HB_OT_SHAPE_COMPLEX_ARABIC_WIN1256_HH
#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_WIN1256_HH */