[icu] Add two-way script conversion functions
Also optimizes the common-direction script lookup.
This commit is contained in:
parent
0809aadd4b
commit
f144a8ea84
|
@ -37,7 +37,7 @@ hb_tag_from_string (const char *s)
|
||||||
char tag[4];
|
char tag[4];
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
|
||||||
if (!s)
|
if (!s || !*s)
|
||||||
return HB_TAG_NONE;
|
return HB_TAG_NONE;
|
||||||
|
|
||||||
for (i = 0; i < 4 && s[i]; i++)
|
for (i = 0; i < 4 && s[i]; i++)
|
||||||
|
@ -146,6 +146,9 @@ hb_language_to_string (hb_language_t language)
|
||||||
hb_script_t
|
hb_script_t
|
||||||
hb_script_from_iso15924_tag (hb_tag_t tag)
|
hb_script_from_iso15924_tag (hb_tag_t tag)
|
||||||
{
|
{
|
||||||
|
if (unlikely (tag == HB_TAG_NONE))
|
||||||
|
return HB_SCRIPT_INVALID;
|
||||||
|
|
||||||
/* Be lenient, adjust case (one capital letter followed by three small letters) */
|
/* Be lenient, adjust case (one capital letter followed by three small letters) */
|
||||||
tag = (tag & 0xDFDFDFDF) | 0x00202020;
|
tag = (tag & 0xDFDFDFDF) | 0x00202020;
|
||||||
|
|
||||||
|
|
205
src/hb-icu.cc
205
src/hb-icu.cc
|
@ -33,111 +33,27 @@
|
||||||
|
|
||||||
#include <unicode/uversion.h>
|
#include <unicode/uversion.h>
|
||||||
#include <unicode/uchar.h>
|
#include <unicode/uchar.h>
|
||||||
#include <unicode/uscript.h>
|
|
||||||
|
|
||||||
HB_BEGIN_DECLS
|
HB_BEGIN_DECLS
|
||||||
|
|
||||||
|
|
||||||
static unsigned int
|
hb_script_t
|
||||||
hb_icu_get_combining_class (hb_unicode_funcs_t *ufuncs,
|
hb_icu_script_to_script (UScriptCode script)
|
||||||
hb_codepoint_t unicode,
|
|
||||||
void *user_data)
|
|
||||||
|
|
||||||
{
|
{
|
||||||
return u_getCombiningClass (unicode);
|
return hb_script_from_string (uscript_getShortName (script));
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned int
|
UScriptCode
|
||||||
hb_icu_get_eastasian_width (hb_unicode_funcs_t *ufuncs,
|
hb_icu_script_from_script (hb_script_t script)
|
||||||
hb_codepoint_t unicode,
|
|
||||||
void *user_data)
|
|
||||||
{
|
{
|
||||||
switch (u_getIntPropertyValue(unicode, UCHAR_EAST_ASIAN_WIDTH))
|
switch ((int) script)
|
||||||
{
|
|
||||||
case U_EA_WIDE:
|
|
||||||
case U_EA_FULLWIDTH:
|
|
||||||
return 2;
|
|
||||||
case U_EA_NEUTRAL:
|
|
||||||
case U_EA_AMBIGUOUS:
|
|
||||||
case U_EA_HALFWIDTH:
|
|
||||||
case U_EA_NARROW:
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static hb_unicode_general_category_t
|
|
||||||
hb_icu_get_general_category (hb_unicode_funcs_t *ufuncs,
|
|
||||||
hb_codepoint_t unicode,
|
|
||||||
void *user_data)
|
|
||||||
{
|
|
||||||
switch (u_getIntPropertyValue(unicode, UCHAR_GENERAL_CATEGORY))
|
|
||||||
{
|
|
||||||
case U_UNASSIGNED: return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
|
|
||||||
|
|
||||||
case U_UPPERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER;
|
|
||||||
case U_LOWERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER;
|
|
||||||
case U_TITLECASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER;
|
|
||||||
case U_MODIFIER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER;
|
|
||||||
case U_OTHER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
|
|
||||||
|
|
||||||
case U_NON_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK;
|
|
||||||
case U_ENCLOSING_MARK: return HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK;
|
|
||||||
case U_COMBINING_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_COMBINING_MARK;
|
|
||||||
|
|
||||||
case U_DECIMAL_DIGIT_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER;
|
|
||||||
case U_LETTER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER;
|
|
||||||
case U_OTHER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER;
|
|
||||||
|
|
||||||
case U_SPACE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR;
|
|
||||||
case U_LINE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR;
|
|
||||||
case U_PARAGRAPH_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR;
|
|
||||||
|
|
||||||
case U_CONTROL_CHAR: return HB_UNICODE_GENERAL_CATEGORY_CONTROL;
|
|
||||||
case U_FORMAT_CHAR: return HB_UNICODE_GENERAL_CATEGORY_FORMAT;
|
|
||||||
case U_PRIVATE_USE_CHAR: return HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE;
|
|
||||||
case U_SURROGATE: return HB_UNICODE_GENERAL_CATEGORY_SURROGATE;
|
|
||||||
|
|
||||||
|
|
||||||
case U_DASH_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION;
|
|
||||||
case U_START_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION;
|
|
||||||
case U_END_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION;
|
|
||||||
case U_CONNECTOR_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION;
|
|
||||||
case U_OTHER_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION;
|
|
||||||
|
|
||||||
case U_MATH_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL;
|
|
||||||
case U_CURRENCY_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL;
|
|
||||||
case U_MODIFIER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL;
|
|
||||||
case U_OTHER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL;
|
|
||||||
|
|
||||||
case U_INITIAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION;
|
|
||||||
case U_FINAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION;
|
|
||||||
}
|
|
||||||
|
|
||||||
return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
|
|
||||||
}
|
|
||||||
|
|
||||||
static hb_codepoint_t
|
|
||||||
hb_icu_get_mirroring (hb_unicode_funcs_t *ufuncs,
|
|
||||||
hb_codepoint_t unicode,
|
|
||||||
void *user_data)
|
|
||||||
{
|
|
||||||
return u_charMirror(unicode);
|
|
||||||
}
|
|
||||||
|
|
||||||
static hb_script_t
|
|
||||||
hb_icu_get_script (hb_unicode_funcs_t *ufuncs,
|
|
||||||
hb_codepoint_t unicode,
|
|
||||||
void *user_data)
|
|
||||||
{
|
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
|
||||||
UScriptCode scriptCode = uscript_getScript(unicode, &status);
|
|
||||||
switch ((int) scriptCode)
|
|
||||||
{
|
{
|
||||||
#define CHECK_ICU_VERSION(major, minor) \
|
#define CHECK_ICU_VERSION(major, minor) \
|
||||||
U_ICU_VERSION_MAJOR_NUM > (major) || (U_ICU_VERSION_MAJOR_NUM == (major) && U_ICU_VERSION_MINOR_NUM >= (minor))
|
U_ICU_VERSION_MAJOR_NUM > (major) || (U_ICU_VERSION_MAJOR_NUM == (major) && U_ICU_VERSION_MINOR_NUM >= (minor))
|
||||||
#define MATCH_SCRIPT(C) case USCRIPT_##C: return HB_SCRIPT_##C
|
#define MATCH_SCRIPT(C) case HB_SCRIPT_##C: return USCRIPT_##C
|
||||||
#define MATCH_SCRIPT2(C1, C2) case USCRIPT_##C1: return HB_SCRIPT_##C2
|
#define MATCH_SCRIPT2(C1, C2) case HB_SCRIPT_##C2: return USCRIPT_##C1
|
||||||
|
|
||||||
|
MATCH_SCRIPT2(INVALID_CODE, INVALID);
|
||||||
|
|
||||||
MATCH_SCRIPT (COMMON);
|
MATCH_SCRIPT (COMMON);
|
||||||
MATCH_SCRIPT (INHERITED);
|
MATCH_SCRIPT (INHERITED);
|
||||||
|
@ -259,7 +175,106 @@ hb_icu_get_script (hb_unicode_funcs_t *ufuncs,
|
||||||
#undef MATCH_SCRIPT2
|
#undef MATCH_SCRIPT2
|
||||||
}
|
}
|
||||||
|
|
||||||
return HB_SCRIPT_UNKNOWN;
|
return USCRIPT_UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static unsigned int
|
||||||
|
hb_icu_get_combining_class (hb_unicode_funcs_t *ufuncs,
|
||||||
|
hb_codepoint_t unicode,
|
||||||
|
void *user_data)
|
||||||
|
|
||||||
|
{
|
||||||
|
return u_getCombiningClass (unicode);
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned int
|
||||||
|
hb_icu_get_eastasian_width (hb_unicode_funcs_t *ufuncs,
|
||||||
|
hb_codepoint_t unicode,
|
||||||
|
void *user_data)
|
||||||
|
{
|
||||||
|
switch (u_getIntPropertyValue(unicode, UCHAR_EAST_ASIAN_WIDTH))
|
||||||
|
{
|
||||||
|
case U_EA_WIDE:
|
||||||
|
case U_EA_FULLWIDTH:
|
||||||
|
return 2;
|
||||||
|
case U_EA_NEUTRAL:
|
||||||
|
case U_EA_AMBIGUOUS:
|
||||||
|
case U_EA_HALFWIDTH:
|
||||||
|
case U_EA_NARROW:
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static hb_unicode_general_category_t
|
||||||
|
hb_icu_get_general_category (hb_unicode_funcs_t *ufuncs,
|
||||||
|
hb_codepoint_t unicode,
|
||||||
|
void *user_data)
|
||||||
|
{
|
||||||
|
switch (u_getIntPropertyValue(unicode, UCHAR_GENERAL_CATEGORY))
|
||||||
|
{
|
||||||
|
case U_UNASSIGNED: return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
|
||||||
|
|
||||||
|
case U_UPPERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER;
|
||||||
|
case U_LOWERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER;
|
||||||
|
case U_TITLECASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER;
|
||||||
|
case U_MODIFIER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER;
|
||||||
|
case U_OTHER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
|
||||||
|
|
||||||
|
case U_NON_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK;
|
||||||
|
case U_ENCLOSING_MARK: return HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK;
|
||||||
|
case U_COMBINING_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_COMBINING_MARK;
|
||||||
|
|
||||||
|
case U_DECIMAL_DIGIT_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER;
|
||||||
|
case U_LETTER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER;
|
||||||
|
case U_OTHER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER;
|
||||||
|
|
||||||
|
case U_SPACE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR;
|
||||||
|
case U_LINE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR;
|
||||||
|
case U_PARAGRAPH_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR;
|
||||||
|
|
||||||
|
case U_CONTROL_CHAR: return HB_UNICODE_GENERAL_CATEGORY_CONTROL;
|
||||||
|
case U_FORMAT_CHAR: return HB_UNICODE_GENERAL_CATEGORY_FORMAT;
|
||||||
|
case U_PRIVATE_USE_CHAR: return HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE;
|
||||||
|
case U_SURROGATE: return HB_UNICODE_GENERAL_CATEGORY_SURROGATE;
|
||||||
|
|
||||||
|
|
||||||
|
case U_DASH_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION;
|
||||||
|
case U_START_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION;
|
||||||
|
case U_END_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION;
|
||||||
|
case U_CONNECTOR_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION;
|
||||||
|
case U_OTHER_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION;
|
||||||
|
|
||||||
|
case U_MATH_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL;
|
||||||
|
case U_CURRENCY_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL;
|
||||||
|
case U_MODIFIER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL;
|
||||||
|
case U_OTHER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL;
|
||||||
|
|
||||||
|
case U_INITIAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION;
|
||||||
|
case U_FINAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION;
|
||||||
|
}
|
||||||
|
|
||||||
|
return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
|
||||||
|
}
|
||||||
|
|
||||||
|
static hb_codepoint_t
|
||||||
|
hb_icu_get_mirroring (hb_unicode_funcs_t *ufuncs,
|
||||||
|
hb_codepoint_t unicode,
|
||||||
|
void *user_data)
|
||||||
|
{
|
||||||
|
return u_charMirror(unicode);
|
||||||
|
}
|
||||||
|
|
||||||
|
static hb_script_t
|
||||||
|
hb_icu_get_script (hb_unicode_funcs_t *ufuncs,
|
||||||
|
hb_codepoint_t unicode,
|
||||||
|
void *user_data)
|
||||||
|
{
|
||||||
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
UScriptCode scriptCode = uscript_getScript(unicode, &status);
|
||||||
|
|
||||||
|
return hb_icu_script_to_script (scriptCode);
|
||||||
}
|
}
|
||||||
|
|
||||||
static hb_unicode_funcs_t icu_ufuncs = {
|
static hb_unicode_funcs_t icu_ufuncs = {
|
||||||
|
|
|
@ -28,10 +28,19 @@
|
||||||
#define HB_ICU_H
|
#define HB_ICU_H
|
||||||
|
|
||||||
#include "hb.h"
|
#include "hb.h"
|
||||||
|
#include <unicode/uscript.h>
|
||||||
|
|
||||||
|
|
||||||
HB_BEGIN_DECLS
|
HB_BEGIN_DECLS
|
||||||
|
|
||||||
|
|
||||||
|
hb_script_t
|
||||||
|
hb_icu_script_to_script (UScriptCode script);
|
||||||
|
|
||||||
|
UScriptCode
|
||||||
|
hb_icu_script_from_script (hb_script_t script);
|
||||||
|
|
||||||
|
|
||||||
hb_unicode_funcs_t *
|
hb_unicode_funcs_t *
|
||||||
hb_icu_get_unicode_funcs (void);
|
hb_icu_get_unicode_funcs (void);
|
||||||
|
|
||||||
|
|
|
@ -91,8 +91,8 @@ test_types_tag (void)
|
||||||
g_assert_cmphex (hb_tag_from_string ("aBc"), ==, 0x61426320);
|
g_assert_cmphex (hb_tag_from_string ("aBc"), ==, 0x61426320);
|
||||||
g_assert_cmphex (hb_tag_from_string ("aB"), ==, 0x61422020);
|
g_assert_cmphex (hb_tag_from_string ("aB"), ==, 0x61422020);
|
||||||
g_assert_cmphex (hb_tag_from_string ("a"), ==, 0x61202020);
|
g_assert_cmphex (hb_tag_from_string ("a"), ==, 0x61202020);
|
||||||
g_assert_cmphex (hb_tag_from_string (""), ==, 0x20202020);
|
|
||||||
|
|
||||||
|
g_assert_cmphex (hb_tag_from_string (""), ==, HB_TAG_NONE);
|
||||||
g_assert_cmphex (hb_tag_from_string (NULL), ==, HB_TAG_NONE);
|
g_assert_cmphex (hb_tag_from_string (NULL), ==, HB_TAG_NONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue