[icu] Add two-way script conversion functions
Also optimizes the common-direction script lookup.
This commit is contained in:
parent
0809aadd4b
commit
f144a8ea84
|
@ -37,7 +37,7 @@ hb_tag_from_string (const char *s)
|
|||
char tag[4];
|
||||
unsigned int i;
|
||||
|
||||
if (!s)
|
||||
if (!s || !*s)
|
||||
return HB_TAG_NONE;
|
||||
|
||||
for (i = 0; i < 4 && s[i]; i++)
|
||||
|
@ -146,6 +146,9 @@ hb_language_to_string (hb_language_t language)
|
|||
hb_script_t
|
||||
hb_script_from_iso15924_tag (hb_tag_t tag)
|
||||
{
|
||||
if (unlikely (tag == HB_TAG_NONE))
|
||||
return HB_SCRIPT_INVALID;
|
||||
|
||||
/* Be lenient, adjust case (one capital letter followed by three small letters) */
|
||||
tag = (tag & 0xDFDFDFDF) | 0x00202020;
|
||||
|
||||
|
|
205
src/hb-icu.cc
205
src/hb-icu.cc
|
@ -33,111 +33,27 @@
|
|||
|
||||
#include <unicode/uversion.h>
|
||||
#include <unicode/uchar.h>
|
||||
#include <unicode/uscript.h>
|
||||
|
||||
HB_BEGIN_DECLS
|
||||
|
||||
|
||||
static unsigned int
|
||||
hb_icu_get_combining_class (hb_unicode_funcs_t *ufuncs,
|
||||
hb_codepoint_t unicode,
|
||||
void *user_data)
|
||||
|
||||
hb_script_t
|
||||
hb_icu_script_to_script (UScriptCode script)
|
||||
{
|
||||
return u_getCombiningClass (unicode);
|
||||
return hb_script_from_string (uscript_getShortName (script));
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
hb_icu_get_eastasian_width (hb_unicode_funcs_t *ufuncs,
|
||||
hb_codepoint_t unicode,
|
||||
void *user_data)
|
||||
UScriptCode
|
||||
hb_icu_script_from_script (hb_script_t script)
|
||||
{
|
||||
switch (u_getIntPropertyValue(unicode, UCHAR_EAST_ASIAN_WIDTH))
|
||||
{
|
||||
case U_EA_WIDE:
|
||||
case U_EA_FULLWIDTH:
|
||||
return 2;
|
||||
case U_EA_NEUTRAL:
|
||||
case U_EA_AMBIGUOUS:
|
||||
case U_EA_HALFWIDTH:
|
||||
case U_EA_NARROW:
|
||||
return 1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static hb_unicode_general_category_t
|
||||
hb_icu_get_general_category (hb_unicode_funcs_t *ufuncs,
|
||||
hb_codepoint_t unicode,
|
||||
void *user_data)
|
||||
{
|
||||
switch (u_getIntPropertyValue(unicode, UCHAR_GENERAL_CATEGORY))
|
||||
{
|
||||
case U_UNASSIGNED: return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
|
||||
|
||||
case U_UPPERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER;
|
||||
case U_LOWERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER;
|
||||
case U_TITLECASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER;
|
||||
case U_MODIFIER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER;
|
||||
case U_OTHER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
|
||||
|
||||
case U_NON_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK;
|
||||
case U_ENCLOSING_MARK: return HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK;
|
||||
case U_COMBINING_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_COMBINING_MARK;
|
||||
|
||||
case U_DECIMAL_DIGIT_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER;
|
||||
case U_LETTER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER;
|
||||
case U_OTHER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER;
|
||||
|
||||
case U_SPACE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR;
|
||||
case U_LINE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR;
|
||||
case U_PARAGRAPH_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR;
|
||||
|
||||
case U_CONTROL_CHAR: return HB_UNICODE_GENERAL_CATEGORY_CONTROL;
|
||||
case U_FORMAT_CHAR: return HB_UNICODE_GENERAL_CATEGORY_FORMAT;
|
||||
case U_PRIVATE_USE_CHAR: return HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE;
|
||||
case U_SURROGATE: return HB_UNICODE_GENERAL_CATEGORY_SURROGATE;
|
||||
|
||||
|
||||
case U_DASH_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION;
|
||||
case U_START_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION;
|
||||
case U_END_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION;
|
||||
case U_CONNECTOR_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION;
|
||||
case U_OTHER_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION;
|
||||
|
||||
case U_MATH_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL;
|
||||
case U_CURRENCY_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL;
|
||||
case U_MODIFIER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL;
|
||||
case U_OTHER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL;
|
||||
|
||||
case U_INITIAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION;
|
||||
case U_FINAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION;
|
||||
}
|
||||
|
||||
return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
|
||||
}
|
||||
|
||||
static hb_codepoint_t
|
||||
hb_icu_get_mirroring (hb_unicode_funcs_t *ufuncs,
|
||||
hb_codepoint_t unicode,
|
||||
void *user_data)
|
||||
{
|
||||
return u_charMirror(unicode);
|
||||
}
|
||||
|
||||
static hb_script_t
|
||||
hb_icu_get_script (hb_unicode_funcs_t *ufuncs,
|
||||
hb_codepoint_t unicode,
|
||||
void *user_data)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UScriptCode scriptCode = uscript_getScript(unicode, &status);
|
||||
switch ((int) scriptCode)
|
||||
switch ((int) script)
|
||||
{
|
||||
#define CHECK_ICU_VERSION(major, minor) \
|
||||
U_ICU_VERSION_MAJOR_NUM > (major) || (U_ICU_VERSION_MAJOR_NUM == (major) && U_ICU_VERSION_MINOR_NUM >= (minor))
|
||||
#define MATCH_SCRIPT(C) case USCRIPT_##C: return HB_SCRIPT_##C
|
||||
#define MATCH_SCRIPT2(C1, C2) case USCRIPT_##C1: return HB_SCRIPT_##C2
|
||||
#define MATCH_SCRIPT(C) case HB_SCRIPT_##C: return USCRIPT_##C
|
||||
#define MATCH_SCRIPT2(C1, C2) case HB_SCRIPT_##C2: return USCRIPT_##C1
|
||||
|
||||
MATCH_SCRIPT2(INVALID_CODE, INVALID);
|
||||
|
||||
MATCH_SCRIPT (COMMON);
|
||||
MATCH_SCRIPT (INHERITED);
|
||||
|
@ -259,7 +175,106 @@ hb_icu_get_script (hb_unicode_funcs_t *ufuncs,
|
|||
#undef MATCH_SCRIPT2
|
||||
}
|
||||
|
||||
return HB_SCRIPT_UNKNOWN;
|
||||
return USCRIPT_UNKNOWN;
|
||||
}
|
||||
|
||||
|
||||
static unsigned int
|
||||
hb_icu_get_combining_class (hb_unicode_funcs_t *ufuncs,
|
||||
hb_codepoint_t unicode,
|
||||
void *user_data)
|
||||
|
||||
{
|
||||
return u_getCombiningClass (unicode);
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
hb_icu_get_eastasian_width (hb_unicode_funcs_t *ufuncs,
|
||||
hb_codepoint_t unicode,
|
||||
void *user_data)
|
||||
{
|
||||
switch (u_getIntPropertyValue(unicode, UCHAR_EAST_ASIAN_WIDTH))
|
||||
{
|
||||
case U_EA_WIDE:
|
||||
case U_EA_FULLWIDTH:
|
||||
return 2;
|
||||
case U_EA_NEUTRAL:
|
||||
case U_EA_AMBIGUOUS:
|
||||
case U_EA_HALFWIDTH:
|
||||
case U_EA_NARROW:
|
||||
return 1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static hb_unicode_general_category_t
|
||||
hb_icu_get_general_category (hb_unicode_funcs_t *ufuncs,
|
||||
hb_codepoint_t unicode,
|
||||
void *user_data)
|
||||
{
|
||||
switch (u_getIntPropertyValue(unicode, UCHAR_GENERAL_CATEGORY))
|
||||
{
|
||||
case U_UNASSIGNED: return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
|
||||
|
||||
case U_UPPERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER;
|
||||
case U_LOWERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER;
|
||||
case U_TITLECASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER;
|
||||
case U_MODIFIER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER;
|
||||
case U_OTHER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
|
||||
|
||||
case U_NON_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK;
|
||||
case U_ENCLOSING_MARK: return HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK;
|
||||
case U_COMBINING_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_COMBINING_MARK;
|
||||
|
||||
case U_DECIMAL_DIGIT_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER;
|
||||
case U_LETTER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER;
|
||||
case U_OTHER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER;
|
||||
|
||||
case U_SPACE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR;
|
||||
case U_LINE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR;
|
||||
case U_PARAGRAPH_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR;
|
||||
|
||||
case U_CONTROL_CHAR: return HB_UNICODE_GENERAL_CATEGORY_CONTROL;
|
||||
case U_FORMAT_CHAR: return HB_UNICODE_GENERAL_CATEGORY_FORMAT;
|
||||
case U_PRIVATE_USE_CHAR: return HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE;
|
||||
case U_SURROGATE: return HB_UNICODE_GENERAL_CATEGORY_SURROGATE;
|
||||
|
||||
|
||||
case U_DASH_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION;
|
||||
case U_START_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION;
|
||||
case U_END_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION;
|
||||
case U_CONNECTOR_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION;
|
||||
case U_OTHER_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION;
|
||||
|
||||
case U_MATH_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL;
|
||||
case U_CURRENCY_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL;
|
||||
case U_MODIFIER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL;
|
||||
case U_OTHER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL;
|
||||
|
||||
case U_INITIAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION;
|
||||
case U_FINAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION;
|
||||
}
|
||||
|
||||
return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
|
||||
}
|
||||
|
||||
static hb_codepoint_t
|
||||
hb_icu_get_mirroring (hb_unicode_funcs_t *ufuncs,
|
||||
hb_codepoint_t unicode,
|
||||
void *user_data)
|
||||
{
|
||||
return u_charMirror(unicode);
|
||||
}
|
||||
|
||||
static hb_script_t
|
||||
hb_icu_get_script (hb_unicode_funcs_t *ufuncs,
|
||||
hb_codepoint_t unicode,
|
||||
void *user_data)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UScriptCode scriptCode = uscript_getScript(unicode, &status);
|
||||
|
||||
return hb_icu_script_to_script (scriptCode);
|
||||
}
|
||||
|
||||
static hb_unicode_funcs_t icu_ufuncs = {
|
||||
|
|
|
@ -28,10 +28,19 @@
|
|||
#define HB_ICU_H
|
||||
|
||||
#include "hb.h"
|
||||
#include <unicode/uscript.h>
|
||||
|
||||
|
||||
HB_BEGIN_DECLS
|
||||
|
||||
|
||||
hb_script_t
|
||||
hb_icu_script_to_script (UScriptCode script);
|
||||
|
||||
UScriptCode
|
||||
hb_icu_script_from_script (hb_script_t script);
|
||||
|
||||
|
||||
hb_unicode_funcs_t *
|
||||
hb_icu_get_unicode_funcs (void);
|
||||
|
||||
|
|
|
@ -91,8 +91,8 @@ test_types_tag (void)
|
|||
g_assert_cmphex (hb_tag_from_string ("aBc"), ==, 0x61426320);
|
||||
g_assert_cmphex (hb_tag_from_string ("aB"), ==, 0x61422020);
|
||||
g_assert_cmphex (hb_tag_from_string ("a"), ==, 0x61202020);
|
||||
g_assert_cmphex (hb_tag_from_string (""), ==, 0x20202020);
|
||||
|
||||
g_assert_cmphex (hb_tag_from_string (""), ==, HB_TAG_NONE);
|
||||
g_assert_cmphex (hb_tag_from_string (NULL), ==, HB_TAG_NONE);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue