2009-08-11 02:05:16 +02:00
|
|
|
/*
|
2011-04-21 23:14:28 +02:00
|
|
|
* Copyright © 2009 Red Hat, Inc.
|
2011-04-29 18:00:38 +02:00
|
|
|
* Copyright © 2011 Codethink Limited
|
2012-08-02 00:07:42 +02:00
|
|
|
* Copyright © 2010,2011,2012 Google, Inc.
|
2009-08-11 02:05:16 +02:00
|
|
|
*
|
2010-04-22 06:11:43 +02:00
|
|
|
* This is part of HarfBuzz, a text shaping library.
|
2009-08-11 02:05:16 +02:00
|
|
|
*
|
|
|
|
* Permission is hereby granted, without written agreement and without
|
|
|
|
* license or royalty fees, to use, copy, modify, and distribute this
|
|
|
|
* software and its documentation for any purpose, provided that the
|
|
|
|
* above copyright notice and the following two paragraphs appear in
|
|
|
|
* all copies of this software.
|
|
|
|
*
|
|
|
|
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
|
|
|
|
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
|
|
|
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
|
|
|
|
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
|
|
|
* DAMAGE.
|
|
|
|
*
|
|
|
|
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
|
|
|
|
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
|
|
|
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
|
|
|
|
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
|
|
|
*
|
|
|
|
* Red Hat Author(s): Behdad Esfahbod
|
2011-04-20 06:19:20 +02:00
|
|
|
* Codethink Author(s): Ryan Lortie
|
2011-04-20 08:00:47 +02:00
|
|
|
* Google Author(s): Behdad Esfahbod
|
2009-08-11 02:05:16 +02:00
|
|
|
*/
|
|
|
|
|
2011-04-20 08:00:47 +02:00
|
|
|
#ifndef HB_UNICODE_PRIVATE_HH
|
|
|
|
#define HB_UNICODE_PRIVATE_HH
|
2009-08-11 02:05:16 +02:00
|
|
|
|
2011-04-21 00:50:27 +02:00
|
|
|
#include "hb-private.hh"
|
2009-08-11 02:05:16 +02:00
|
|
|
|
|
|
|
#include "hb-unicode.h"
|
2011-04-22 00:24:02 +02:00
|
|
|
#include "hb-object-private.hh"
|
2009-08-11 02:05:16 +02:00
|
|
|
|
|
|
|
|
2012-08-02 00:07:42 +02:00
|
|
|
extern HB_INTERNAL const uint8_t _hb_modified_combining_class[256];
|
2010-07-23 21:11:18 +02:00
|
|
|
|
2009-08-11 02:05:16 +02:00
|
|
|
/*
|
|
|
|
* hb_unicode_funcs_t
|
|
|
|
*/
|
|
|
|
|
2011-07-08 05:14:42 +02:00
|
|
|
#define HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS \
|
2011-07-08 05:19:27 +02:00
|
|
|
HB_UNICODE_FUNC_IMPLEMENT (combining_class) \
|
|
|
|
HB_UNICODE_FUNC_IMPLEMENT (eastasian_width) \
|
|
|
|
HB_UNICODE_FUNC_IMPLEMENT (general_category) \
|
|
|
|
HB_UNICODE_FUNC_IMPLEMENT (mirroring) \
|
|
|
|
HB_UNICODE_FUNC_IMPLEMENT (script) \
|
2011-07-08 05:47:19 +02:00
|
|
|
HB_UNICODE_FUNC_IMPLEMENT (compose) \
|
|
|
|
HB_UNICODE_FUNC_IMPLEMENT (decompose) \
|
2012-08-01 03:36:16 +02:00
|
|
|
HB_UNICODE_FUNC_IMPLEMENT (decompose_compatibility) \
|
2011-07-08 05:14:42 +02:00
|
|
|
/* ^--- Add new callbacks here */
|
|
|
|
|
2011-07-08 05:19:27 +02:00
|
|
|
/* Simple callbacks are those taking a hb_codepoint_t and returning a hb_codepoint_t */
|
|
|
|
#define HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE \
|
2012-08-01 22:23:44 +02:00
|
|
|
HB_UNICODE_FUNC_IMPLEMENT (hb_unicode_combining_class_t, combining_class) \
|
2011-07-08 05:19:27 +02:00
|
|
|
HB_UNICODE_FUNC_IMPLEMENT (unsigned int, eastasian_width) \
|
|
|
|
HB_UNICODE_FUNC_IMPLEMENT (hb_unicode_general_category_t, general_category) \
|
|
|
|
HB_UNICODE_FUNC_IMPLEMENT (hb_codepoint_t, mirroring) \
|
|
|
|
HB_UNICODE_FUNC_IMPLEMENT (hb_script_t, script) \
|
|
|
|
/* ^--- Add new simple callbacks here */
|
|
|
|
|
2012-06-16 21:21:55 +02:00
|
|
|
struct hb_unicode_funcs_t {
|
2011-04-22 00:24:02 +02:00
|
|
|
hb_object_header_t header;
|
2012-06-06 09:30:09 +02:00
|
|
|
ASSERT_POD ();
|
2011-04-22 00:24:02 +02:00
|
|
|
|
2011-04-20 06:19:20 +02:00
|
|
|
hb_unicode_funcs_t *parent;
|
2009-08-11 02:05:16 +02:00
|
|
|
|
2011-04-22 00:24:02 +02:00
|
|
|
bool immutable;
|
2009-08-11 05:25:28 +02:00
|
|
|
|
2012-08-01 23:01:59 +02:00
|
|
|
#define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \
|
|
|
|
inline return_type name (hb_codepoint_t unicode) { return func.name (this, unicode, user_data.name); }
|
|
|
|
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
|
|
|
|
#undef HB_UNICODE_FUNC_IMPLEMENT
|
|
|
|
|
|
|
|
inline hb_bool_t compose (hb_codepoint_t a, hb_codepoint_t b,
|
|
|
|
hb_codepoint_t *ab)
|
|
|
|
{
|
|
|
|
*ab = 0;
|
2012-08-07 22:45:27 +02:00
|
|
|
|
2012-08-01 23:01:59 +02:00
|
|
|
/* XXX, this belongs to indic normalizer. */
|
|
|
|
if ((FLAG (general_category (a)) &
|
|
|
|
(FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
|
|
|
|
FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
|
|
|
|
FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))))
|
|
|
|
return false;
|
|
|
|
/* XXX, add composition-exclusion exceptions to Indic shaper. */
|
|
|
|
if (a == 0x09AF && b == 0x09BC) { *ab = 0x09DF; return true; }
|
2012-08-07 22:45:27 +02:00
|
|
|
|
|
|
|
/* XXX, these belong to the hebew / default shaper. */
|
|
|
|
/* Hebrew presentation-form shaping.
|
|
|
|
* https://bugzilla.mozilla.org/show_bug.cgi?id=728866 */
|
|
|
|
// Hebrew presentation forms with dagesh, for characters 0x05D0..0x05EA;
|
|
|
|
// note that some letters do not have a dagesh presForm encoded
|
|
|
|
static const hb_codepoint_t sDageshForms[0x05EA - 0x05D0 + 1] = {
|
|
|
|
0xFB30, // ALEF
|
|
|
|
0xFB31, // BET
|
|
|
|
0xFB32, // GIMEL
|
|
|
|
0xFB33, // DALET
|
|
|
|
0xFB34, // HE
|
|
|
|
0xFB35, // VAV
|
|
|
|
0xFB36, // ZAYIN
|
|
|
|
0, // HET
|
|
|
|
0xFB38, // TET
|
|
|
|
0xFB39, // YOD
|
|
|
|
0xFB3A, // FINAL KAF
|
|
|
|
0xFB3B, // KAF
|
|
|
|
0xFB3C, // LAMED
|
|
|
|
0, // FINAL MEM
|
|
|
|
0xFB3E, // MEM
|
|
|
|
0, // FINAL NUN
|
|
|
|
0xFB40, // NUN
|
|
|
|
0xFB41, // SAMEKH
|
|
|
|
0, // AYIN
|
|
|
|
0xFB43, // FINAL PE
|
|
|
|
0xFB44, // PE
|
|
|
|
0, // FINAL TSADI
|
|
|
|
0xFB46, // TSADI
|
|
|
|
0xFB47, // QOF
|
|
|
|
0xFB48, // RESH
|
|
|
|
0xFB49, // SHIN
|
|
|
|
0xFB4A // TAV
|
|
|
|
};
|
|
|
|
|
|
|
|
hb_bool_t found = func.compose (this, a, b, ab, user_data.compose);
|
|
|
|
|
|
|
|
if (!found && (b & ~0x7F) == 0x0580) {
|
|
|
|
// special-case Hebrew presentation forms that are excluded from
|
|
|
|
// standard normalization, but wanted for old fonts
|
|
|
|
switch (b) {
|
|
|
|
case 0x05B4: // HIRIQ
|
|
|
|
if (a == 0x05D9) { // YOD
|
|
|
|
*ab = 0xFB1D;
|
|
|
|
found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 0x05B7: // patah
|
|
|
|
if (a == 0x05F2) { // YIDDISH YOD YOD
|
|
|
|
*ab = 0xFB1F;
|
|
|
|
found = true;
|
|
|
|
} else if (a == 0x05D0) { // ALEF
|
|
|
|
*ab = 0xFB2E;
|
|
|
|
found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 0x05B8: // QAMATS
|
|
|
|
if (a == 0x05D0) { // ALEF
|
|
|
|
*ab = 0xFB2F;
|
|
|
|
found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 0x05B9: // HOLAM
|
|
|
|
if (a == 0x05D5) { // VAV
|
|
|
|
*ab = 0xFB4B;
|
|
|
|
found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 0x05BC: // DAGESH
|
|
|
|
if (a >= 0x05D0 && a <= 0x05EA) {
|
|
|
|
*ab = sDageshForms[a - 0x05D0];
|
|
|
|
found = (*ab != 0);
|
|
|
|
} else if (a == 0xFB2A) { // SHIN WITH SHIN DOT
|
|
|
|
*ab = 0xFB2C;
|
|
|
|
found = true;
|
|
|
|
} else if (a == 0xFB2B) { // SHIN WITH SIN DOT
|
|
|
|
*ab = 0xFB2D;
|
|
|
|
found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 0x05BF: // RAFE
|
|
|
|
switch (a) {
|
|
|
|
case 0x05D1: // BET
|
|
|
|
*ab = 0xFB4C;
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
case 0x05DB: // KAF
|
|
|
|
*ab = 0xFB4D;
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
case 0x05E4: // PE
|
|
|
|
*ab = 0xFB4E;
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 0x05C1: // SHIN DOT
|
|
|
|
if (a == 0x05E9) { // SHIN
|
|
|
|
*ab = 0xFB2A;
|
|
|
|
found = true;
|
|
|
|
} else if (a == 0xFB49) { // SHIN WITH DAGESH
|
|
|
|
*ab = 0xFB2C;
|
|
|
|
found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 0x05C2: // SIN DOT
|
|
|
|
if (a == 0x05E9) { // SHIN
|
|
|
|
*ab = 0xFB2B;
|
|
|
|
found = true;
|
|
|
|
} else if (a == 0xFB49) { // SHIN WITH DAGESH
|
|
|
|
*ab = 0xFB2D;
|
|
|
|
found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return found;
|
2012-08-01 23:01:59 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
inline hb_bool_t decompose (hb_codepoint_t ab,
|
|
|
|
hb_codepoint_t *a, hb_codepoint_t *b)
|
|
|
|
{
|
|
|
|
/* XXX FIXME, move these to complex shapers and propagage to normalizer.*/
|
|
|
|
switch (ab) {
|
|
|
|
case 0x0AC9 : return false;
|
|
|
|
|
|
|
|
case 0x0931 : return false;
|
|
|
|
case 0x0B94 : return false;
|
|
|
|
|
|
|
|
/* These ones have Unicode decompositions, but we do it
|
|
|
|
* this way to be close to what Uniscribe does. */
|
|
|
|
case 0x0DDA : *a = 0x0DD9; *b= 0x0DDA; return true;
|
|
|
|
case 0x0DDC : *a = 0x0DD9; *b= 0x0DDC; return true;
|
|
|
|
case 0x0DDD : *a = 0x0DD9; *b= 0x0DDD; return true;
|
|
|
|
case 0x0DDE : *a = 0x0DD9; *b= 0x0DDE; return true;
|
|
|
|
|
|
|
|
case 0x0F77 : *a = 0x0FB2; *b= 0x0F81; return true;
|
|
|
|
case 0x0F79 : *a = 0x0FB3; *b= 0x0F81; return true;
|
|
|
|
case 0x17BE : *a = 0x17C1; *b= 0x17BE; return true;
|
|
|
|
case 0x17BF : *a = 0x17C1; *b= 0x17BF; return true;
|
|
|
|
case 0x17C0 : *a = 0x17C1; *b= 0x17C0; return true;
|
|
|
|
case 0x17C4 : *a = 0x17C1; *b= 0x17C4; return true;
|
|
|
|
case 0x17C5 : *a = 0x17C1; *b= 0x17C5; return true;
|
|
|
|
case 0x1925 : *a = 0x1920; *b= 0x1923; return true;
|
|
|
|
case 0x1926 : *a = 0x1920; *b= 0x1924; return true;
|
|
|
|
case 0x1B3C : *a = 0x1B42; *b= 0x1B3C; return true;
|
|
|
|
case 0x1112E : *a = 0x11127; *b= 0x11131; return true;
|
|
|
|
case 0x1112F : *a = 0x11127; *b= 0x11132; return true;
|
|
|
|
#if 0
|
|
|
|
case 0x0B57 : *a = 0xno decomp, -> RIGHT; return true;
|
|
|
|
case 0x1C29 : *a = 0xno decomp, -> LEFT; return true;
|
|
|
|
case 0xA9C0 : *a = 0xno decomp, -> RIGHT; return true;
|
|
|
|
case 0x111BF : *a = 0xno decomp, -> ABOVE; return true;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
*a = ab; *b = 0;
|
|
|
|
return func.decompose (this, ab, a, b, user_data.decompose);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline unsigned int decompose_compatibility (hb_codepoint_t u,
|
|
|
|
hb_codepoint_t *decomposed)
|
|
|
|
{
|
|
|
|
unsigned int ret = func.decompose_compatibility (this, u, decomposed, user_data.decompose_compatibility);
|
|
|
|
if (ret == 1 && u == decomposed[0]) {
|
|
|
|
decomposed[0] = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
decomposed[ret] = 0;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2011-04-20 08:00:47 +02:00
|
|
|
|
2012-08-02 00:07:42 +02:00
|
|
|
unsigned int
|
|
|
|
modified_combining_class (hb_codepoint_t unicode)
|
|
|
|
{
|
|
|
|
return _hb_modified_combining_class[combining_class (unicode)];
|
|
|
|
}
|
2012-08-01 23:13:10 +02:00
|
|
|
|
|
|
|
inline hb_bool_t
|
|
|
|
is_variation_selector (hb_codepoint_t unicode)
|
|
|
|
{
|
|
|
|
return unlikely (hb_in_ranges<hb_codepoint_t> (unicode,
|
|
|
|
0x180B, 0x180D, /* MONGOLIAN FREE VARIATION SELECTOR ONE..THREE */
|
|
|
|
0xFE00, 0xFE0F, /* VARIATION SELECTOR-1..16 */
|
|
|
|
0xE0100, 0xE01EF)); /* VARIATION SELECTOR-17..256 */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Zero-Width invisible characters:
|
|
|
|
*
|
|
|
|
* 00AD SOFT HYPHEN
|
|
|
|
* 034F COMBINING GRAPHEME JOINER
|
|
|
|
*
|
|
|
|
* 180E MONGOLIAN VOWEL SEPARATOR
|
|
|
|
*
|
|
|
|
* 200B ZERO WIDTH SPACE
|
|
|
|
* 200C ZERO WIDTH NON-JOINER
|
|
|
|
* 200D ZERO WIDTH JOINER
|
|
|
|
* 200E LEFT-TO-RIGHT MARK
|
|
|
|
* 200F RIGHT-TO-LEFT MARK
|
|
|
|
*
|
|
|
|
* 2028 LINE SEPARATOR
|
|
|
|
*
|
|
|
|
* 202A LEFT-TO-RIGHT EMBEDDING
|
|
|
|
* 202B RIGHT-TO-LEFT EMBEDDING
|
|
|
|
* 202C POP DIRECTIONAL FORMATTING
|
|
|
|
* 202D LEFT-TO-RIGHT OVERRIDE
|
|
|
|
* 202E RIGHT-TO-LEFT OVERRIDE
|
|
|
|
*
|
|
|
|
* 2060 WORD JOINER
|
|
|
|
* 2061 FUNCTION APPLICATION
|
|
|
|
* 2062 INVISIBLE TIMES
|
|
|
|
* 2063 INVISIBLE SEPARATOR
|
|
|
|
*
|
|
|
|
* FEFF ZERO WIDTH NO-BREAK SPACE
|
|
|
|
*/
|
|
|
|
inline hb_bool_t
|
|
|
|
is_zero_width (hb_codepoint_t ch)
|
|
|
|
{
|
|
|
|
return ((ch & ~0x007F) == 0x2000 && (hb_in_ranges<hb_codepoint_t> (ch,
|
|
|
|
0x200B, 0x200F,
|
|
|
|
0x202A, 0x202E,
|
|
|
|
0x2060, 0x2064) ||
|
|
|
|
(ch == 0x2028))) ||
|
|
|
|
unlikely (ch == 0x0009 ||
|
|
|
|
ch == 0x00AD ||
|
|
|
|
ch == 0x034F ||
|
|
|
|
ch == 0x180E ||
|
|
|
|
ch == 0xFEFF);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-04-20 08:00:47 +02:00
|
|
|
struct {
|
2011-07-08 05:47:19 +02:00
|
|
|
#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_func_t name;
|
2011-07-08 05:14:42 +02:00
|
|
|
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
|
|
|
|
#undef HB_UNICODE_FUNC_IMPLEMENT
|
2011-07-08 05:47:19 +02:00
|
|
|
} func;
|
2011-04-20 08:00:47 +02:00
|
|
|
|
|
|
|
struct {
|
2011-07-08 05:19:27 +02:00
|
|
|
#define HB_UNICODE_FUNC_IMPLEMENT(name) void *name;
|
2011-07-08 05:14:42 +02:00
|
|
|
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
|
|
|
|
#undef HB_UNICODE_FUNC_IMPLEMENT
|
2011-04-20 08:00:47 +02:00
|
|
|
} user_data;
|
|
|
|
|
2010-05-24 18:46:21 +02:00
|
|
|
struct {
|
2011-07-08 05:19:27 +02:00
|
|
|
#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_destroy_func_t name;
|
2011-07-08 05:14:42 +02:00
|
|
|
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
|
|
|
|
#undef HB_UNICODE_FUNC_IMPLEMENT
|
2011-04-20 08:00:47 +02:00
|
|
|
} destroy;
|
2009-08-11 02:05:16 +02:00
|
|
|
};
|
|
|
|
|
2011-04-27 15:24:37 +02:00
|
|
|
|
2011-09-16 22:40:44 +02:00
|
|
|
#ifdef HAVE_GLIB
|
2012-06-06 00:14:03 +02:00
|
|
|
extern HB_INTERNAL const hb_unicode_funcs_t _hb_glib_unicode_funcs;
|
|
|
|
#define _hb_unicode_funcs_default _hb_glib_unicode_funcs
|
2011-09-16 22:40:44 +02:00
|
|
|
#elif defined(HAVE_ICU)
|
2012-06-06 00:14:03 +02:00
|
|
|
extern HB_INTERNAL const hb_unicode_funcs_t _hb_icu_unicode_funcs;
|
|
|
|
#define _hb_unicode_funcs_default _hb_icu_unicode_funcs
|
2011-04-27 15:24:37 +02:00
|
|
|
#else
|
2012-05-27 16:45:57 +02:00
|
|
|
#define HB_UNICODE_FUNCS_NIL 1
|
2012-07-11 22:35:04 +02:00
|
|
|
extern HB_INTERNAL const hb_unicode_funcs_t _hb_unicode_funcs_nil;
|
2012-06-06 00:14:03 +02:00
|
|
|
#define _hb_unicode_funcs_default _hb_unicode_funcs_nil
|
2011-04-27 15:24:37 +02:00
|
|
|
#endif
|
|
|
|
|
2009-08-11 05:21:33 +02:00
|
|
|
|
2011-04-20 08:00:47 +02:00
|
|
|
#endif /* HB_UNICODE_PRIVATE_HH */
|