376 lines
11 KiB
C++
376 lines
11 KiB
C++
/*
|
|
* Copyright © 2009 Red Hat, Inc.
|
|
* Copyright © 2011 Codethink Limited
|
|
* Copyright © 2010,2011 Google, Inc.
|
|
*
|
|
* This is part of HarfBuzz, a text shaping library.
|
|
*
|
|
* Permission is hereby granted, without written agreement and without
|
|
* license or royalty fees, to use, copy, modify, and distribute this
|
|
* software and its documentation for any purpose, provided that the
|
|
* above copyright notice and the following two paragraphs appear in
|
|
* all copies of this software.
|
|
*
|
|
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
|
|
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
|
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
|
|
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
|
* DAMAGE.
|
|
*
|
|
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
|
|
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
|
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
|
|
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
|
*
|
|
* Red Hat Author(s): Behdad Esfahbod
|
|
* Codethink Author(s): Ryan Lortie
|
|
* Google Author(s): Behdad Esfahbod
|
|
*/
|
|
|
|
#include "hb-private.hh"
|
|
|
|
#include "hb-unicode-private.hh"
|
|
|
|
|
|
|
|
/*
|
|
* hb_unicode_funcs_t
|
|
*/
|
|
|
|
static unsigned int
|
|
hb_unicode_combining_class_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
|
hb_codepoint_t unicode HB_UNUSED,
|
|
void *user_data HB_UNUSED)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static unsigned int
|
|
hb_unicode_eastasian_width_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
|
hb_codepoint_t unicode HB_UNUSED,
|
|
void *user_data HB_UNUSED)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
static hb_unicode_general_category_t
|
|
hb_unicode_general_category_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
|
hb_codepoint_t unicode HB_UNUSED,
|
|
void *user_data HB_UNUSED)
|
|
{
|
|
return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
|
|
}
|
|
|
|
static hb_codepoint_t
|
|
hb_unicode_mirroring_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
|
hb_codepoint_t unicode HB_UNUSED,
|
|
void *user_data HB_UNUSED)
|
|
{
|
|
return unicode;
|
|
}
|
|
|
|
static hb_script_t
|
|
hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
|
hb_codepoint_t unicode HB_UNUSED,
|
|
void *user_data HB_UNUSED)
|
|
{
|
|
return HB_SCRIPT_UNKNOWN;
|
|
}
|
|
|
|
static hb_bool_t
|
|
hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
|
hb_codepoint_t a HB_UNUSED,
|
|
hb_codepoint_t b HB_UNUSED,
|
|
hb_codepoint_t *ab HB_UNUSED,
|
|
void *user_data HB_UNUSED)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static hb_bool_t
|
|
hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
|
|
hb_codepoint_t ab HB_UNUSED,
|
|
hb_codepoint_t *a HB_UNUSED,
|
|
hb_codepoint_t *b HB_UNUSED,
|
|
void *user_data HB_UNUSED)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
|
|
|
|
hb_unicode_funcs_t *
|
|
hb_unicode_funcs_get_default (void)
|
|
{
|
|
return const_cast<hb_unicode_funcs_t *> (&_hb_unicode_funcs_default);
|
|
}
|
|
|
|
hb_unicode_funcs_t *
|
|
hb_unicode_funcs_create (hb_unicode_funcs_t *parent)
|
|
{
|
|
hb_unicode_funcs_t *ufuncs;
|
|
|
|
if (!(ufuncs = hb_object_create<hb_unicode_funcs_t> ()))
|
|
return hb_unicode_funcs_get_empty ();
|
|
|
|
if (!parent)
|
|
parent = hb_unicode_funcs_get_empty ();
|
|
|
|
hb_unicode_funcs_make_immutable (parent);
|
|
ufuncs->parent = hb_unicode_funcs_reference (parent);
|
|
|
|
ufuncs->func = parent->func;
|
|
|
|
/* We can safely copy user_data from parent since we hold a reference
|
|
* onto it and it's immutable. We should not copy the destroy notifiers
|
|
* though. */
|
|
ufuncs->user_data = parent->user_data;
|
|
|
|
return ufuncs;
|
|
}
|
|
|
|
|
|
extern HB_INTERNAL const hb_unicode_funcs_t _hb_unicode_funcs_nil;
|
|
const hb_unicode_funcs_t _hb_unicode_funcs_nil = {
|
|
HB_OBJECT_HEADER_STATIC,
|
|
|
|
NULL, /* parent */
|
|
true, /* immutable */
|
|
{
|
|
#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil,
|
|
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
|
|
#undef HB_UNICODE_FUNC_IMPLEMENT
|
|
}
|
|
};
|
|
|
|
hb_unicode_funcs_t *
|
|
hb_unicode_funcs_get_empty (void)
|
|
{
|
|
return const_cast<hb_unicode_funcs_t *> (&_hb_unicode_funcs_nil);
|
|
}
|
|
|
|
hb_unicode_funcs_t *
|
|
hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs)
|
|
{
|
|
return hb_object_reference (ufuncs);
|
|
}
|
|
|
|
void
|
|
hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs)
|
|
{
|
|
if (!hb_object_destroy (ufuncs)) return;
|
|
|
|
#define HB_UNICODE_FUNC_IMPLEMENT(name) \
|
|
if (ufuncs->destroy.name) ufuncs->destroy.name (ufuncs->user_data.name);
|
|
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
|
|
#undef HB_UNICODE_FUNC_IMPLEMENT
|
|
|
|
hb_unicode_funcs_destroy (ufuncs->parent);
|
|
|
|
free (ufuncs);
|
|
}
|
|
|
|
hb_bool_t
|
|
hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
|
|
hb_user_data_key_t *key,
|
|
void * data,
|
|
hb_destroy_func_t destroy,
|
|
hb_bool_t replace)
|
|
{
|
|
return hb_object_set_user_data (ufuncs, key, data, destroy, replace);
|
|
}
|
|
|
|
void *
|
|
hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
|
|
hb_user_data_key_t *key)
|
|
{
|
|
return hb_object_get_user_data (ufuncs, key);
|
|
}
|
|
|
|
|
|
void
|
|
hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs)
|
|
{
|
|
if (hb_object_is_inert (ufuncs))
|
|
return;
|
|
|
|
ufuncs->immutable = true;
|
|
}
|
|
|
|
hb_bool_t
|
|
hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs)
|
|
{
|
|
return ufuncs->immutable;
|
|
}
|
|
|
|
hb_unicode_funcs_t *
|
|
hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs)
|
|
{
|
|
return ufuncs->parent ? ufuncs->parent : hb_unicode_funcs_get_empty ();
|
|
}
|
|
|
|
|
|
#define HB_UNICODE_FUNC_IMPLEMENT(name) \
|
|
\
|
|
void \
|
|
hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t *ufuncs, \
|
|
hb_unicode_##name##_func_t func, \
|
|
void *user_data, \
|
|
hb_destroy_func_t destroy) \
|
|
{ \
|
|
if (ufuncs->immutable) \
|
|
return; \
|
|
\
|
|
if (ufuncs->destroy.name) \
|
|
ufuncs->destroy.name (ufuncs->user_data.name); \
|
|
\
|
|
if (func) { \
|
|
ufuncs->func.name = func; \
|
|
ufuncs->user_data.name = user_data; \
|
|
ufuncs->destroy.name = destroy; \
|
|
} else { \
|
|
ufuncs->func.name = ufuncs->parent->func.name; \
|
|
ufuncs->user_data.name = ufuncs->parent->user_data.name; \
|
|
ufuncs->destroy.name = NULL; \
|
|
} \
|
|
}
|
|
|
|
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
|
|
#undef HB_UNICODE_FUNC_IMPLEMENT
|
|
|
|
|
|
#define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \
|
|
\
|
|
return_type \
|
|
hb_unicode_##name (hb_unicode_funcs_t *ufuncs, \
|
|
hb_codepoint_t unicode) \
|
|
{ \
|
|
return ufuncs->func.name (ufuncs, unicode, ufuncs->user_data.name); \
|
|
}
|
|
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
|
|
#undef HB_UNICODE_FUNC_IMPLEMENT
|
|
|
|
hb_bool_t
|
|
hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
|
|
hb_codepoint_t a,
|
|
hb_codepoint_t b,
|
|
hb_codepoint_t *ab)
|
|
{
|
|
*ab = 0;
|
|
/* XXX, this belongs to indic normalizer. */
|
|
if ((FLAG (hb_unicode_general_category (ufuncs, a)) &
|
|
(FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
|
|
FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
|
|
FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))))
|
|
return false;
|
|
/* XXX, add composition-exclusion exceptions to Indic shaper. */
|
|
if (a == 0x09AF && b == 0x09BC) { *ab = 0x09DF; return true; }
|
|
return ufuncs->func.compose (ufuncs, a, b, ab, ufuncs->user_data.compose);
|
|
}
|
|
|
|
hb_bool_t
|
|
hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
|
|
hb_codepoint_t ab,
|
|
hb_codepoint_t *a,
|
|
hb_codepoint_t *b)
|
|
{
|
|
/* XXX FIXME, move these to complex shapers and propagage to normalizer.*/
|
|
switch (ab) {
|
|
case 0x0AC9 : return false;
|
|
|
|
case 0x0931 : return false;
|
|
case 0x0B94 : return false;
|
|
|
|
/* These ones have Unicode decompositions, but we do it
|
|
* this way to be close to what Uniscribe does. */
|
|
case 0x0DDA : *a = 0x0DD9; *b= 0x0DDA; return true;
|
|
case 0x0DDC : *a = 0x0DD9; *b= 0x0DDC; return true;
|
|
case 0x0DDD : *a = 0x0DD9; *b= 0x0DDD; return true;
|
|
case 0x0DDE : *a = 0x0DD9; *b= 0x0DDE; return true;
|
|
|
|
case 0x0F77 : *a = 0x0FB2; *b= 0x0F81; return true;
|
|
case 0x0F79 : *a = 0x0FB3; *b= 0x0F81; return true;
|
|
case 0x17BE : *a = 0x17C1; *b= 0x17BE; return true;
|
|
case 0x17BF : *a = 0x17C1; *b= 0x17BF; return true;
|
|
case 0x17C0 : *a = 0x17C1; *b= 0x17C0; return true;
|
|
case 0x17C4 : *a = 0x17C1; *b= 0x17C4; return true;
|
|
case 0x17C5 : *a = 0x17C1; *b= 0x17C5; return true;
|
|
case 0x1925 : *a = 0x1920; *b= 0x1923; return true;
|
|
case 0x1926 : *a = 0x1920; *b= 0x1924; return true;
|
|
case 0x1B3C : *a = 0x1B42; *b= 0x1B3C; return true;
|
|
case 0x1112E : *a = 0x11127; *b= 0x11131; return true;
|
|
case 0x1112F : *a = 0x11127; *b= 0x11132; return true;
|
|
#if 0
|
|
case 0x0B57 : *a = 0xno decomp, -> RIGHT; return true;
|
|
case 0x1C29 : *a = 0xno decomp, -> LEFT; return true;
|
|
case 0xA9C0 : *a = 0xno decomp, -> RIGHT; return true;
|
|
case 0x111BF : *a = 0xno decomp, -> ABOVE; return true;
|
|
#endif
|
|
}
|
|
*a = ab; *b = 0;
|
|
return ufuncs->func.decompose (ufuncs, ab, a, b, ufuncs->user_data.decompose);
|
|
}
|
|
|
|
|
|
|
|
unsigned int
|
|
_hb_unicode_modified_combining_class (hb_unicode_funcs_t *ufuncs,
|
|
hb_codepoint_t unicode)
|
|
{
|
|
int c = hb_unicode_combining_class (ufuncs, unicode);
|
|
|
|
if (unlikely (hb_in_range<int> (c, 27, 33)))
|
|
{
|
|
/* Modify the combining-class to suit Arabic better. See:
|
|
* http://unicode.org/faq/normalization.html#8
|
|
* http://unicode.org/faq/normalization.html#9
|
|
*/
|
|
c = c == 33 ? 27 : c + 1;
|
|
}
|
|
else if (unlikely (hb_in_range<int> (c, 10, 25)))
|
|
{
|
|
/* The equivalent fix for Hebrew is more complex.
|
|
*
|
|
* We permute the "fixed-position" classes 10-25 into the order
|
|
* described in the SBL Hebrew manual:
|
|
*
|
|
* http://www.sbl-site.org/Fonts/SBLHebrewUserManual1.5x.pdf
|
|
*
|
|
* (as recommended by:
|
|
* http://forum.fontlab.com/archive-old-microsoft-volt-group/vista-and-diacritic-ordering-t6751.0.html)
|
|
*
|
|
* More details here:
|
|
* https://bugzilla.mozilla.org/show_bug.cgi?id=662055
|
|
*/
|
|
static const int permuted_hebrew_classes[25 - 10 + 1] = {
|
|
/* 10 sheva */ 22,
|
|
/* 11 hataf segol */ 15,
|
|
/* 12 hataf patah */ 16,
|
|
/* 13 hataf qamats */ 17,
|
|
/* 14 hiriq */ 23,
|
|
/* 15 tsere */ 18,
|
|
/* 16 segol */ 19,
|
|
/* 17 patah */ 20,
|
|
/* 18 qamats */ 21,
|
|
/* 19 holam */ 14,
|
|
/* 20 qubuts */ 24,
|
|
/* 21 dagesh */ 12,
|
|
/* 22 meteg */ 25,
|
|
/* 23 rafe */ 13,
|
|
/* 24 shin dot */ 10,
|
|
/* 25 sin dot */ 11,
|
|
};
|
|
c = permuted_hebrew_classes[c - 10];
|
|
}
|
|
else if (unlikely (unicode == 0x0E3A)) /* THAI VOWEL SIGN PHINTHU */
|
|
{
|
|
/* Assign 104, so it reorders after the THAI ccc=103 marks.
|
|
* Uniscribe does this. */
|
|
c = 104;
|
|
}
|
|
|
|
return c;
|
|
}
|
|
|