2009-11-03 22:35:10 +01:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2009 Red Hat, Inc.
|
2010-11-22 17:03:18 +01:00
|
|
|
* Copyright (C) 2009 Keith Stribley
|
2009-11-03 22:35:10 +01:00
|
|
|
*
|
2010-04-22 06:11:43 +02:00
|
|
|
* This is part of HarfBuzz, a text shaping library.
|
2009-11-03 22:35:10 +01:00
|
|
|
*
|
|
|
|
* Permission is hereby granted, without written agreement and without
|
|
|
|
* license or royalty fees, to use, copy, modify, and distribute this
|
|
|
|
* software and its documentation for any purpose, provided that the
|
|
|
|
* above copyright notice and the following two paragraphs appear in
|
|
|
|
* all copies of this software.
|
|
|
|
*
|
|
|
|
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
|
|
|
|
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
|
|
|
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
|
|
|
|
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
|
|
|
* DAMAGE.
|
|
|
|
*
|
|
|
|
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
|
|
|
|
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
|
|
|
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
|
|
|
|
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
|
|
|
*
|
|
|
|
* Red Hat Author(s): Behdad Esfahbod
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "hb-private.h"
|
|
|
|
|
|
|
|
#include "hb-icu.h"
|
|
|
|
|
2011-04-20 08:00:47 +02:00
|
|
|
#include "hb-unicode-private.hh"
|
2009-11-03 22:35:10 +01:00
|
|
|
|
2010-05-21 14:29:12 +02:00
|
|
|
#include <unicode/uversion.h>
|
2009-11-03 22:35:10 +01:00
|
|
|
#include <unicode/uchar.h>
|
|
|
|
#include <unicode/uscript.h>
|
|
|
|
|
2010-07-23 21:11:18 +02:00
|
|
|
HB_BEGIN_DECLS
|
|
|
|
|
|
|
|
|
2011-04-20 06:19:20 +02:00
|
|
|
static unsigned int
|
|
|
|
hb_icu_get_combining_class (hb_unicode_funcs_t *ufuncs,
|
|
|
|
hb_codepoint_t unicode,
|
|
|
|
void *user_data)
|
|
|
|
|
|
|
|
{
|
|
|
|
return u_getCombiningClass (unicode);
|
|
|
|
}
|
2009-11-03 22:35:10 +01:00
|
|
|
|
|
|
|
static unsigned int
|
2011-04-20 06:19:20 +02:00
|
|
|
hb_icu_get_eastasian_width (hb_unicode_funcs_t *ufuncs,
|
|
|
|
hb_codepoint_t unicode,
|
|
|
|
void *user_data)
|
2009-11-03 22:35:10 +01:00
|
|
|
{
|
|
|
|
switch (u_getIntPropertyValue(unicode, UCHAR_EAST_ASIAN_WIDTH))
|
|
|
|
{
|
|
|
|
case U_EA_WIDE:
|
|
|
|
case U_EA_FULLWIDTH:
|
|
|
|
return 2;
|
|
|
|
case U_EA_NEUTRAL:
|
|
|
|
case U_EA_AMBIGUOUS:
|
|
|
|
case U_EA_HALFWIDTH:
|
|
|
|
case U_EA_NARROW:
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2011-04-11 21:47:40 +02:00
|
|
|
static hb_unicode_general_category_t
|
2011-04-20 06:19:20 +02:00
|
|
|
hb_icu_get_general_category (hb_unicode_funcs_t *ufuncs,
|
|
|
|
hb_codepoint_t unicode,
|
|
|
|
void *user_data)
|
2009-11-03 22:35:10 +01:00
|
|
|
{
|
|
|
|
switch (u_getIntPropertyValue(unicode, UCHAR_GENERAL_CATEGORY))
|
|
|
|
{
|
2011-04-11 21:47:40 +02:00
|
|
|
case U_UNASSIGNED: return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
|
2009-11-03 22:35:10 +01:00
|
|
|
|
2011-04-20 08:29:22 +02:00
|
|
|
case U_UPPERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER;
|
|
|
|
case U_LOWERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER;
|
|
|
|
case U_TITLECASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER;
|
|
|
|
case U_MODIFIER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER;
|
|
|
|
case U_OTHER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
|
|
|
|
|
|
|
|
case U_NON_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK;
|
|
|
|
case U_ENCLOSING_MARK: return HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK;
|
|
|
|
case U_COMBINING_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_COMBINING_MARK;
|
|
|
|
|
|
|
|
case U_DECIMAL_DIGIT_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER;
|
|
|
|
case U_LETTER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER;
|
|
|
|
case U_OTHER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER;
|
|
|
|
|
|
|
|
case U_SPACE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR;
|
|
|
|
case U_LINE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR;
|
|
|
|
case U_PARAGRAPH_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR;
|
|
|
|
|
|
|
|
case U_CONTROL_CHAR: return HB_UNICODE_GENERAL_CATEGORY_CONTROL;
|
|
|
|
case U_FORMAT_CHAR: return HB_UNICODE_GENERAL_CATEGORY_FORMAT;
|
|
|
|
case U_PRIVATE_USE_CHAR: return HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE;
|
|
|
|
case U_SURROGATE: return HB_UNICODE_GENERAL_CATEGORY_SURROGATE;
|
|
|
|
|
|
|
|
|
|
|
|
case U_DASH_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION;
|
|
|
|
case U_START_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION;
|
|
|
|
case U_END_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION;
|
|
|
|
case U_CONNECTOR_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION;
|
|
|
|
case U_OTHER_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION;
|
|
|
|
|
|
|
|
case U_MATH_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL;
|
|
|
|
case U_CURRENCY_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL;
|
|
|
|
case U_MODIFIER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL;
|
|
|
|
case U_OTHER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL;
|
|
|
|
|
|
|
|
case U_INITIAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION;
|
|
|
|
case U_FINAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION;
|
2009-11-03 22:35:10 +01:00
|
|
|
}
|
|
|
|
|
2011-04-11 21:47:40 +02:00
|
|
|
return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
|
2009-11-03 22:35:10 +01:00
|
|
|
}
|
|
|
|
|
2011-04-20 08:00:47 +02:00
|
|
|
static hb_codepoint_t
|
|
|
|
hb_icu_get_mirroring (hb_unicode_funcs_t *ufuncs,
|
|
|
|
hb_codepoint_t unicode,
|
|
|
|
void *user_data)
|
|
|
|
{
|
|
|
|
return u_charMirror(unicode);
|
|
|
|
}
|
|
|
|
|
2009-11-03 22:35:10 +01:00
|
|
|
static hb_script_t
|
2011-04-20 06:19:20 +02:00
|
|
|
hb_icu_get_script (hb_unicode_funcs_t *ufuncs,
|
|
|
|
hb_codepoint_t unicode,
|
|
|
|
void *user_data)
|
2009-11-03 22:35:10 +01:00
|
|
|
{
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
UScriptCode scriptCode = uscript_getScript(unicode, &status);
|
|
|
|
switch ((int) scriptCode)
|
|
|
|
{
|
2010-05-21 14:29:12 +02:00
|
|
|
#define CHECK_ICU_VERSION(major, minor) \
|
|
|
|
U_ICU_VERSION_MAJOR_NUM > (major) || (U_ICU_VERSION_MAJOR_NUM == (major) && U_ICU_VERSION_MINOR_NUM >= (minor))
|
2009-11-03 22:35:10 +01:00
|
|
|
#define MATCH_SCRIPT(C) case USCRIPT_##C: return HB_SCRIPT_##C
|
2010-05-21 14:29:12 +02:00
|
|
|
#define MATCH_SCRIPT2(C1, C2) case USCRIPT_##C1: return HB_SCRIPT_##C2
|
2011-03-16 18:53:32 +01:00
|
|
|
|
2011-04-20 08:29:22 +02:00
|
|
|
MATCH_SCRIPT (COMMON);
|
|
|
|
MATCH_SCRIPT (INHERITED);
|
|
|
|
MATCH_SCRIPT (ARABIC);
|
|
|
|
MATCH_SCRIPT (ARMENIAN);
|
|
|
|
MATCH_SCRIPT (BENGALI);
|
|
|
|
MATCH_SCRIPT (BOPOMOFO);
|
|
|
|
MATCH_SCRIPT (CHEROKEE);
|
|
|
|
MATCH_SCRIPT (COPTIC);
|
|
|
|
MATCH_SCRIPT (CYRILLIC);
|
|
|
|
MATCH_SCRIPT (DESERET);
|
|
|
|
MATCH_SCRIPT (DEVANAGARI);
|
|
|
|
MATCH_SCRIPT (ETHIOPIC);
|
|
|
|
MATCH_SCRIPT (GEORGIAN);
|
|
|
|
MATCH_SCRIPT (GOTHIC);
|
|
|
|
MATCH_SCRIPT (GREEK);
|
|
|
|
MATCH_SCRIPT (GUJARATI);
|
|
|
|
MATCH_SCRIPT (GURMUKHI);
|
|
|
|
MATCH_SCRIPT (HAN);
|
|
|
|
MATCH_SCRIPT (HANGUL);
|
|
|
|
MATCH_SCRIPT (HEBREW);
|
|
|
|
MATCH_SCRIPT (HIRAGANA);
|
|
|
|
MATCH_SCRIPT (KANNADA);
|
|
|
|
MATCH_SCRIPT (KATAKANA);
|
|
|
|
MATCH_SCRIPT (KHMER);
|
|
|
|
MATCH_SCRIPT (LAO);
|
|
|
|
MATCH_SCRIPT (LATIN);
|
|
|
|
MATCH_SCRIPT (MALAYALAM);
|
|
|
|
MATCH_SCRIPT (MONGOLIAN);
|
|
|
|
MATCH_SCRIPT (MYANMAR);
|
|
|
|
MATCH_SCRIPT (OGHAM);
|
|
|
|
MATCH_SCRIPT (OLD_ITALIC);
|
|
|
|
MATCH_SCRIPT (ORIYA);
|
|
|
|
MATCH_SCRIPT (RUNIC);
|
|
|
|
MATCH_SCRIPT (SINHALA);
|
|
|
|
MATCH_SCRIPT (SYRIAC);
|
|
|
|
MATCH_SCRIPT (TAMIL);
|
|
|
|
MATCH_SCRIPT (TELUGU);
|
|
|
|
MATCH_SCRIPT (THAANA);
|
|
|
|
MATCH_SCRIPT (THAI);
|
|
|
|
MATCH_SCRIPT (TIBETAN);
|
|
|
|
MATCH_SCRIPT (CANADIAN_ABORIGINAL);
|
|
|
|
MATCH_SCRIPT (YI);
|
|
|
|
MATCH_SCRIPT (TAGALOG);
|
|
|
|
MATCH_SCRIPT (HANUNOO);
|
|
|
|
MATCH_SCRIPT (BUHID);
|
|
|
|
MATCH_SCRIPT (TAGBANWA);
|
2009-11-03 22:35:10 +01:00
|
|
|
|
|
|
|
/* Unicode-4.0 additions */
|
2011-04-20 08:29:22 +02:00
|
|
|
MATCH_SCRIPT (BRAILLE);
|
|
|
|
MATCH_SCRIPT (CYPRIOT);
|
|
|
|
MATCH_SCRIPT (LIMBU);
|
|
|
|
MATCH_SCRIPT (OSMANYA);
|
|
|
|
MATCH_SCRIPT (SHAVIAN);
|
|
|
|
MATCH_SCRIPT (LINEAR_B);
|
|
|
|
MATCH_SCRIPT (TAI_LE);
|
|
|
|
MATCH_SCRIPT (UGARITIC);
|
2009-11-03 22:35:10 +01:00
|
|
|
|
|
|
|
/* Unicode-4.1 additions */
|
2011-04-20 08:29:22 +02:00
|
|
|
MATCH_SCRIPT (NEW_TAI_LUE);
|
|
|
|
MATCH_SCRIPT (BUGINESE);
|
|
|
|
MATCH_SCRIPT (GLAGOLITIC);
|
|
|
|
MATCH_SCRIPT (TIFINAGH);
|
|
|
|
MATCH_SCRIPT (SYLOTI_NAGRI);
|
|
|
|
MATCH_SCRIPT (OLD_PERSIAN);
|
|
|
|
MATCH_SCRIPT (KHAROSHTHI);
|
2009-11-03 22:35:10 +01:00
|
|
|
|
|
|
|
/* Unicode-5.0 additions */
|
2011-04-20 08:29:22 +02:00
|
|
|
MATCH_SCRIPT (UNKNOWN);
|
|
|
|
MATCH_SCRIPT (BALINESE);
|
|
|
|
MATCH_SCRIPT (CUNEIFORM);
|
|
|
|
MATCH_SCRIPT (PHOENICIAN);
|
|
|
|
MATCH_SCRIPT (PHAGS_PA);
|
|
|
|
MATCH_SCRIPT (NKO);
|
2009-11-03 22:35:10 +01:00
|
|
|
|
|
|
|
/* Unicode-5.1 additions */
|
2011-04-20 08:29:22 +02:00
|
|
|
MATCH_SCRIPT (KAYAH_LI);
|
|
|
|
MATCH_SCRIPT (LEPCHA);
|
|
|
|
MATCH_SCRIPT (REJANG);
|
|
|
|
MATCH_SCRIPT (SUNDANESE);
|
|
|
|
MATCH_SCRIPT (SAURASHTRA);
|
|
|
|
MATCH_SCRIPT (CHAM);
|
|
|
|
MATCH_SCRIPT (OL_CHIKI);
|
|
|
|
MATCH_SCRIPT (VAI);
|
|
|
|
MATCH_SCRIPT (CARIAN);
|
|
|
|
MATCH_SCRIPT (LYCIAN);
|
|
|
|
MATCH_SCRIPT (LYDIAN);
|
2010-05-21 14:29:12 +02:00
|
|
|
|
|
|
|
/* Unicode-5.2 additions */
|
2011-04-20 08:29:22 +02:00
|
|
|
MATCH_SCRIPT (AVESTAN);
|
2010-05-21 14:29:12 +02:00
|
|
|
#if CHECK_ICU_VERSION (4, 4)
|
2011-04-20 08:29:22 +02:00
|
|
|
MATCH_SCRIPT (BAMUM);
|
2010-05-21 14:29:12 +02:00
|
|
|
#endif
|
2011-04-20 08:29:22 +02:00
|
|
|
MATCH_SCRIPT (EGYPTIAN_HIEROGLYPHS);
|
|
|
|
MATCH_SCRIPT (IMPERIAL_ARAMAIC);
|
|
|
|
MATCH_SCRIPT (INSCRIPTIONAL_PAHLAVI);
|
|
|
|
MATCH_SCRIPT (INSCRIPTIONAL_PARTHIAN);
|
|
|
|
MATCH_SCRIPT (JAVANESE);
|
|
|
|
MATCH_SCRIPT (KAITHI);
|
|
|
|
MATCH_SCRIPT2(LANNA, TAI_THAM);
|
2010-05-21 14:29:12 +02:00
|
|
|
#if CHECK_ICU_VERSION (4, 4)
|
2011-04-20 08:29:22 +02:00
|
|
|
MATCH_SCRIPT (LISU);
|
2010-05-21 14:29:12 +02:00
|
|
|
#endif
|
2011-04-20 08:29:22 +02:00
|
|
|
MATCH_SCRIPT2(MEITEI_MAYEK, MEETEI_MAYEK);
|
2010-05-21 14:29:12 +02:00
|
|
|
#if CHECK_ICU_VERSION (4, 4)
|
2011-04-20 08:29:22 +02:00
|
|
|
MATCH_SCRIPT (OLD_SOUTH_ARABIAN);
|
2010-05-21 14:29:12 +02:00
|
|
|
#endif
|
2011-04-20 08:29:22 +02:00
|
|
|
MATCH_SCRIPT2(ORKHON, OLD_TURKIC);
|
|
|
|
MATCH_SCRIPT (SAMARITAN);
|
|
|
|
MATCH_SCRIPT (TAI_VIET);
|
2010-11-17 20:35:34 +01:00
|
|
|
|
|
|
|
/* Unicode-6.0 additions */
|
2011-04-20 08:29:22 +02:00
|
|
|
MATCH_SCRIPT (BATAK);
|
|
|
|
MATCH_SCRIPT (BRAHMI);
|
|
|
|
MATCH_SCRIPT2(MANDAEAN, MANDAIC);
|
2010-11-17 20:35:34 +01:00
|
|
|
|
2011-04-20 06:19:20 +02:00
|
|
|
#undef CHECK_ICU_VERSION
|
|
|
|
#undef MATCH_SCRIPT
|
|
|
|
#undef MATCH_SCRIPT2
|
2009-11-03 22:35:10 +01:00
|
|
|
}
|
2011-04-20 06:19:20 +02:00
|
|
|
|
2009-11-03 22:35:10 +01:00
|
|
|
return HB_SCRIPT_UNKNOWN;
|
|
|
|
}
|
|
|
|
|
|
|
|
static hb_unicode_funcs_t icu_ufuncs = {
|
|
|
|
HB_REFERENCE_COUNT_INVALID, /* ref_count */
|
2011-04-20 08:00:47 +02:00
|
|
|
NULL, /* parent */
|
2009-11-03 22:35:10 +01:00
|
|
|
TRUE, /* immutable */
|
2010-05-24 19:02:32 +02:00
|
|
|
{
|
2011-04-20 08:00:47 +02:00
|
|
|
hb_icu_get_combining_class,
|
|
|
|
hb_icu_get_eastasian_width,
|
|
|
|
hb_icu_get_general_category,
|
|
|
|
hb_icu_get_mirroring,
|
|
|
|
hb_icu_get_script
|
2010-05-24 19:02:32 +02:00
|
|
|
}
|
2009-11-03 22:35:10 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
hb_unicode_funcs_t *
|
|
|
|
hb_icu_get_unicode_funcs (void)
|
|
|
|
{
|
|
|
|
return &icu_ufuncs;
|
|
|
|
}
|
2010-07-23 21:11:18 +02:00
|
|
|
|
|
|
|
|
|
|
|
HB_END_DECLS
|