harfbuzz/src/hb-unicode.cc

384 lines
11 KiB
C++
Raw Normal View History

2009-08-11 02:05:16 +02:00
/*
2011-04-21 23:14:28 +02:00
* Copyright © 2009 Red Hat, Inc.
* Copyright © 2011 Codethink Limited
2011-04-21 23:14:28 +02:00
* Copyright © 2010,2011 Google, Inc.
2009-08-11 02:05:16 +02:00
*
2010-04-22 06:11:43 +02:00
* This is part of HarfBuzz, a text shaping library.
2009-08-11 02:05:16 +02:00
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Red Hat Author(s): Behdad Esfahbod
* Codethink Author(s): Ryan Lortie
2011-04-20 08:00:47 +02:00
* Google Author(s): Behdad Esfahbod
2009-08-11 02:05:16 +02:00
*/
#include "hb-private.hh"
2009-08-11 02:05:16 +02:00
2011-04-20 08:00:47 +02:00
#include "hb-unicode-private.hh"
2009-08-11 02:05:16 +02:00
2010-07-23 21:11:18 +02:00
2009-08-11 02:05:16 +02:00
/*
* hb_unicode_funcs_t
*/
static unsigned int
hb_unicode_combining_class_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode HB_UNUSED,
void *user_data HB_UNUSED)
{
return 0;
}
static unsigned int
hb_unicode_eastasian_width_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode HB_UNUSED,
void *user_data HB_UNUSED)
{
return 1;
}
static hb_unicode_general_category_t
hb_unicode_general_category_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode HB_UNUSED,
void *user_data HB_UNUSED)
{
return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
}
static hb_codepoint_t
hb_unicode_mirroring_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode HB_UNUSED,
void *user_data HB_UNUSED)
{
return unicode;
}
static hb_script_t
hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t unicode HB_UNUSED,
void *user_data HB_UNUSED)
{
return HB_SCRIPT_UNKNOWN;
}
static hb_bool_t
hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t a HB_UNUSED,
hb_codepoint_t b HB_UNUSED,
hb_codepoint_t *ab HB_UNUSED,
void *user_data HB_UNUSED)
{
2012-06-06 02:35:40 +02:00
return false;
}
static hb_bool_t
hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
hb_codepoint_t ab HB_UNUSED,
hb_codepoint_t *a HB_UNUSED,
hb_codepoint_t *b HB_UNUSED,
void *user_data HB_UNUSED)
{
2012-06-06 02:35:40 +02:00
return false;
}
2011-04-20 08:00:47 +02:00
hb_unicode_funcs_t *
hb_unicode_funcs_get_default (void)
{
return const_cast<hb_unicode_funcs_t *> (&_hb_unicode_funcs_default);
}
2009-08-11 02:05:16 +02:00
hb_unicode_funcs_t *
hb_unicode_funcs_create (hb_unicode_funcs_t *parent)
2009-08-11 02:05:16 +02:00
{
hb_unicode_funcs_t *ufuncs;
if (!(ufuncs = hb_object_create<hb_unicode_funcs_t> ()))
return hb_unicode_funcs_get_empty ();
2009-08-11 02:05:16 +02:00
if (!parent)
parent = hb_unicode_funcs_get_empty ();
2011-04-20 08:00:47 +02:00
hb_unicode_funcs_make_immutable (parent);
ufuncs->parent = hb_unicode_funcs_reference (parent);
2011-04-20 08:00:47 +02:00
ufuncs->func = parent->func;
/* We can safely copy user_data from parent since we hold a reference
* onto it and it's immutable. We should not copy the destroy notifiers
* though. */
ufuncs->user_data = parent->user_data;
2009-08-11 02:05:16 +02:00
return ufuncs;
}
extern HB_INTERNAL const hb_unicode_funcs_t _hb_unicode_funcs_nil;
const hb_unicode_funcs_t _hb_unicode_funcs_nil = {
HB_OBJECT_HEADER_STATIC,
NULL, /* parent */
2012-06-06 02:35:40 +02:00
true, /* immutable */
{
#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil,
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
#undef HB_UNICODE_FUNC_IMPLEMENT
}
};
hb_unicode_funcs_t *
hb_unicode_funcs_get_empty (void)
{
return const_cast<hb_unicode_funcs_t *> (&_hb_unicode_funcs_nil);
}
2009-08-11 02:05:16 +02:00
hb_unicode_funcs_t *
hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs)
{
return hb_object_reference (ufuncs);
2009-08-11 02:05:16 +02:00
}
void
hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs)
{
if (!hb_object_destroy (ufuncs)) return;
2009-08-11 02:05:16 +02:00
#define HB_UNICODE_FUNC_IMPLEMENT(name) \
2011-07-08 05:14:42 +02:00
if (ufuncs->destroy.name) ufuncs->destroy.name (ufuncs->user_data.name);
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
#undef HB_UNICODE_FUNC_IMPLEMENT
2011-04-20 08:00:47 +02:00
hb_unicode_funcs_destroy (ufuncs->parent);
2009-08-11 02:05:16 +02:00
free (ufuncs);
}
hb_bool_t
hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
hb_user_data_key_t *key,
void * data,
hb_destroy_func_t destroy,
hb_bool_t replace)
{
return hb_object_set_user_data (ufuncs, key, data, destroy, replace);
}
void *
hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
hb_user_data_key_t *key)
{
return hb_object_get_user_data (ufuncs, key);
}
void
hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs)
{
if (hb_object_is_inert (ufuncs))
return;
2012-06-06 02:35:40 +02:00
ufuncs->immutable = true;
}
hb_bool_t
hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs)
{
return ufuncs->immutable;
}
2011-04-20 08:00:47 +02:00
hb_unicode_funcs_t *
hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs)
{
return ufuncs->parent ? ufuncs->parent : hb_unicode_funcs_get_empty ();
2011-04-20 08:00:47 +02:00
}
2009-08-11 04:58:56 +02:00
#define HB_UNICODE_FUNC_IMPLEMENT(name) \
2011-07-08 05:14:42 +02:00
\
void \
hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t *ufuncs, \
hb_unicode_##name##_func_t func, \
2011-07-08 05:14:42 +02:00
void *user_data, \
hb_destroy_func_t destroy) \
{ \
if (ufuncs->immutable) \
return; \
\
if (ufuncs->destroy.name) \
ufuncs->destroy.name (ufuncs->user_data.name); \
\
if (func) { \
ufuncs->func.name = func; \
2011-07-08 05:14:42 +02:00
ufuncs->user_data.name = user_data; \
ufuncs->destroy.name = destroy; \
} else { \
ufuncs->func.name = ufuncs->parent->func.name; \
2011-07-08 05:14:42 +02:00
ufuncs->user_data.name = ufuncs->parent->user_data.name; \
ufuncs->destroy.name = NULL; \
} \
}
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
#undef HB_UNICODE_FUNC_IMPLEMENT
#define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \
2011-07-08 05:14:42 +02:00
\
return_type \
hb_unicode_##name (hb_unicode_funcs_t *ufuncs, \
hb_codepoint_t unicode) \
2011-07-08 05:14:42 +02:00
{ \
return ufuncs->func.name (ufuncs, unicode, ufuncs->user_data.name); \
2011-07-08 05:14:42 +02:00
}
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
2011-07-08 05:14:42 +02:00
#undef HB_UNICODE_FUNC_IMPLEMENT
2009-12-20 15:29:16 +01:00
hb_bool_t
hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t a,
hb_codepoint_t b,
hb_codepoint_t *ab)
{
*ab = 0;
/* XXX, this belongs to indic normalizer. */
if ((FLAG (hb_unicode_general_category (ufuncs, a)) &
(FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))))
return false;
/* XXX, add composition-exclusion exceptions to Indic shaper. */
if (a == 0x09AF && b == 0x09BC) { *ab = 0x09DF; return true; }
return ufuncs->func.compose (ufuncs, a, b, ab, ufuncs->user_data.compose);
}
hb_bool_t
hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t ab,
hb_codepoint_t *a,
hb_codepoint_t *b)
{
/* XXX FIXME, move these to complex shapers and propagage to normalizer.*/
switch (ab) {
case 0x0AC9 : return false;
case 0x0931 : return false;
case 0x0B94 : return false;
/* These ones have Unicode decompositions, but we do it
* this way to be close to what Uniscribe does. */
case 0x0DDA : *a = 0x0DD9; *b= 0x0DDA; return true;
case 0x0DDC : *a = 0x0DD9; *b= 0x0DDC; return true;
case 0x0DDD : *a = 0x0DD9; *b= 0x0DDD; return true;
case 0x0DDE : *a = 0x0DD9; *b= 0x0DDE; return true;
case 0x0F77 : *a = 0x0FB2; *b= 0x0F81; return true;
case 0x0F79 : *a = 0x0FB3; *b= 0x0F81; return true;
case 0x17BE : *a = 0x17C1; *b= 0x17BE; return true;
case 0x17BF : *a = 0x17C1; *b= 0x17BF; return true;
case 0x17C0 : *a = 0x17C1; *b= 0x17C0; return true;
case 0x17C4 : *a = 0x17C1; *b= 0x17C4; return true;
case 0x17C5 : *a = 0x17C1; *b= 0x17C5; return true;
case 0x1925 : *a = 0x1920; *b= 0x1923; return true;
case 0x1926 : *a = 0x1920; *b= 0x1924; return true;
case 0x1B3C : *a = 0x1B42; *b= 0x1B3C; return true;
case 0x1112E : *a = 0x11127; *b= 0x11131; return true;
case 0x1112F : *a = 0x11127; *b= 0x11132; return true;
#if 0
case 0x0B57 : *a = 0xno decomp, -> RIGHT; return true;
case 0x1C29 : *a = 0xno decomp, -> LEFT; return true;
case 0xA9C0 : *a = 0xno decomp, -> RIGHT; return true;
case 0x111BF : *a = 0xno decomp, -> ABOVE; return true;
#endif
}
*a = ab; *b = 0;
return ufuncs->func.decompose (ufuncs, ab, a, b, ufuncs->user_data.decompose);
}
2009-12-20 15:29:16 +01:00
2012-04-05 22:40:37 +02:00
unsigned int
_hb_unicode_modified_combining_class (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode)
{
int c = hb_unicode_combining_class (ufuncs, unicode);
if (unlikely (hb_in_range<int> (c, 27, 33)))
{
/* Modify the combining-class to suit Arabic better. See:
* http://unicode.org/faq/normalization.html#8
* http://unicode.org/faq/normalization.html#9
*/
c = c == 33 ? 27 : c + 1;
}
else if (unlikely (hb_in_range<int> (c, 10, 25)))
{
/* The equivalent fix for Hebrew is more complex.
*
* We permute the "fixed-position" classes 10-25 into the order
* described in the SBL Hebrew manual:
*
* http://www.sbl-site.org/Fonts/SBLHebrewUserManual1.5x.pdf
*
* (as recommended by:
* http://forum.fontlab.com/archive-old-microsoft-volt-group/vista-and-diacritic-ordering-t6751.0.html)
*
* More details here:
* https://bugzilla.mozilla.org/show_bug.cgi?id=662055
*/
static const int permuted_hebrew_classes[25 - 10 + 1] = {
/* 10 sheva */ 22,
/* 11 hataf segol */ 15,
/* 12 hataf patah */ 16,
/* 13 hataf qamats */ 17,
/* 14 hiriq */ 23,
/* 15 tsere */ 18,
/* 16 segol */ 19,
/* 17 patah */ 20,
/* 18 qamats */ 21,
/* 19 holam */ 14,
/* 20 qubuts */ 24,
/* 21 dagesh */ 12,
/* 22 meteg */ 25,
/* 23 rafe */ 13,
/* 24 shin dot */ 10,
/* 25 sin dot */ 11,
};
c = permuted_hebrew_classes[c - 10];
}
else if (unlikely (unicode == 0x0E3A)) /* THAI VOWEL SIGN PHINTHU */
{
/* Assign 104, so it reorders after the THAI ccc=103 marks.
* Uniscribe does this. */
c = 104;
}
else if (unlikely (hb_in_range<hb_codepoint_t> (unicode, 0x0C55, 0x0C56)))
{
/* Telugu length marks.
* These are the only matras in the main Indic script range that have
* a non-zero ccc. That makes them reorder with the Halant that is
* ccc=9. Just zero them, we don't need them in our Indic shaper. */
c = 0;
}
2012-04-05 22:40:37 +02:00
return c;
}