diff --git a/src/hb-common.cc b/src/hb-common.cc index e9f9cfeb5..0e47592b9 100644 --- a/src/hb-common.cc +++ b/src/hb-common.cc @@ -28,6 +28,7 @@ #include "hb.hh" #include "hb-machinery.hh" +#include "hb-script-lang-table.h" #if !defined(HB_NO_SETLOCALE) && (!defined(HAVE_NEWLOCALE) || !defined(HAVE_USELOCALE)) #define HB_NO_SETLOCALE 1 @@ -665,6 +666,125 @@ hb_script_get_horizontal_direction (hb_script_t script) return HB_DIRECTION_LTR; } +static int +lang_compare_first_component (const void *pa, + const void *pb) +{ + const char *a = (const char *) pa; + const char *b = (const char *) pb; + unsigned int da, db; + const char *p; + + p = strstr (a, "-"); + da = p ? (unsigned int) (p - a) : strlen (a); + + p = strstr (b, "-"); + db = p ? (unsigned int) (p - b) : strlen (b); + + return strncmp (a, b, hb_max (da, db)); +} + +static const HbScriptForLang * +find_best_lang_match (hb_language_t language) +{ + const char *lang_str; + const char *record, *start, *end; + const char *records; + unsigned int record_size; + unsigned int num_records; + + if (language == nullptr) + return nullptr; + + lang_str = language->s; + + records = (const char *) hb_script_for_lang; + record_size = sizeof (HbScriptForLang); + num_records = sizeof (hb_script_for_lang) / record_size; + record = (const char *) bsearch (lang_str, + records, num_records, + record_size, + lang_compare_first_component); + if (!record) + return nullptr; + + start = records; + end = start + num_records * record_size; + + /* find the best match among all those that have the same first-component */ + + /* go to the final one matching in the first component */ + while (record < end - record_size && + lang_compare_first_component (lang_str, record + record_size) == 0) + + /* go back, find which one matches completely */ + while (start <= record && + lang_compare_first_component (lang_str, record) == 0) + { + hb_language_t l; + + l = hb_language_from_string (record, -1); + if (hb_language_matches (language, l)) + return (const HbScriptForLang *) record; + + record -= record_size; + } + + return nullptr; +} + +/** + * hb_language_get_scripts: + * @language: a #hb_language_t + * @script_count: (inout) (optional): Input = the maximum number of scripts to return; + * Output = the actual number of scripts returned (may be zero) + * @scripts: (out) (array length=script_count): the array of #hb_script_t found + * + * Fetches the scripts that can be used with @language. + * + * Return value: the total number of scripts + * + * Since: REPLACEME + */ +HB_EXTERN unsigned int +hb_language_get_scripts (hb_language_t language, + unsigned int *script_count, + hb_script_t *scripts) +{ + const HbScriptForLang *script_for_lang; + unsigned int count; + unsigned int i; + + script_for_lang = find_best_lang_match (language); + + if (!script_for_lang || script_for_lang->scripts[0] == 0) + { + if (script_count) + *script_count = 0; + + return 0; + } + + for (i = 0; i < sizeof (script_for_lang->scripts) / sizeof (hb_script_t); i++) + if (script_for_lang->scripts[i] == 0) + break; + + count = i; + + if (script_count) + { + for (i = 0; i < count; i++) + { + scripts[i] = script_for_lang->scripts[i]; + if (i == *script_count) + break; + } + *script_count = i; + } + + return count; +} + /* hb_version */ diff --git a/src/hb-common.h b/src/hb-common.h index a5da4e76a..c6c6b98a4 100644 --- a/src/hb-common.h +++ b/src/hb-common.h @@ -758,6 +758,10 @@ hb_script_to_iso15924_tag (hb_script_t script); HB_EXTERN hb_direction_t hb_script_get_horizontal_direction (hb_script_t script); +HB_EXTERN unsigned int +hb_language_get_scripts (hb_language_t language, + unsigned int *script_count, + hb_script_t *scripts); /* User data */ diff --git a/src/hb-script-lang-table.h b/src/hb-script-lang-table.h new file mode 100644 index 000000000..9b7c0aab0 --- /dev/null +++ b/src/hb-script-lang-table.h @@ -0,0 +1,253 @@ +typedef struct { + const char lang[9]; + hb_script_t scripts[3]; +} HbScriptForLang; + +static const HbScriptForLang hb_script_for_lang[] = { + { "aa", { HB_SCRIPT_LATIN } }, + { "ab", { HB_SCRIPT_CYRILLIC } }, + { "af", { HB_SCRIPT_LATIN } }, + { "ak", { HB_SCRIPT_LATIN } }, + { "am", { HB_SCRIPT_ETHIOPIC } }, + { "an", { HB_SCRIPT_LATIN } }, + { "ar", { HB_SCRIPT_ARABIC } }, + { "as", { HB_SCRIPT_BENGALI } }, + { "ast", { HB_SCRIPT_LATIN } }, + { "av", { HB_SCRIPT_CYRILLIC } }, + { "ay", { HB_SCRIPT_LATIN } }, + { "az-az", { HB_SCRIPT_LATIN } }, + { "az-ir", { HB_SCRIPT_ARABIC } }, + { "ba", { HB_SCRIPT_CYRILLIC } }, + { "be", { HB_SCRIPT_CYRILLIC } }, + { "ber-dz", { HB_SCRIPT_LATIN } }, + { "ber-ma", { HB_SCRIPT_TIFINAGH } }, + { "bg", { HB_SCRIPT_CYRILLIC } }, + { "bh", { HB_SCRIPT_DEVANAGARI } }, + { "bho", { HB_SCRIPT_DEVANAGARI } }, + { "bi", { HB_SCRIPT_LATIN } }, + { "bin", { HB_SCRIPT_LATIN } }, + { "bm", { HB_SCRIPT_LATIN } }, + { "bn", { HB_SCRIPT_BENGALI } }, + { "bo", { HB_SCRIPT_TIBETAN } }, + { "br", { HB_SCRIPT_LATIN } }, + { "brx", { HB_SCRIPT_DEVANAGARI } }, + { "bs", { HB_SCRIPT_LATIN } }, + { "bua", { HB_SCRIPT_CYRILLIC } }, + { "byn", { HB_SCRIPT_ETHIOPIC } }, + { "ca", { HB_SCRIPT_LATIN } }, + { "ce", { HB_SCRIPT_CYRILLIC } }, + { "ch", { HB_SCRIPT_LATIN } }, + { "chm", { HB_SCRIPT_CYRILLIC } }, + { "chr", { HB_SCRIPT_CHEROKEE } }, + { "co", { HB_SCRIPT_LATIN } }, + { "crh", { HB_SCRIPT_LATIN } }, + { "cs", { HB_SCRIPT_LATIN } }, + { "csb", { HB_SCRIPT_LATIN } }, + { "cu", { HB_SCRIPT_CYRILLIC } }, + { "cv", { HB_SCRIPT_CYRILLIC, HB_SCRIPT_LATIN } }, + { "cy", { HB_SCRIPT_LATIN } }, + { "da", { HB_SCRIPT_LATIN } }, + { "de", { HB_SCRIPT_LATIN } }, + { "doi", { HB_SCRIPT_DEVANAGARI } }, + { "dv", { HB_SCRIPT_THAANA } }, + { "dz", { HB_SCRIPT_TIBETAN } }, + { "ee", { HB_SCRIPT_LATIN } }, + { "el", { HB_SCRIPT_GREEK } }, + { "en", { HB_SCRIPT_LATIN } }, + { "eo", { HB_SCRIPT_LATIN } }, + { "es", { HB_SCRIPT_LATIN } }, + { "et", { HB_SCRIPT_LATIN } }, + { "eu", { HB_SCRIPT_LATIN } }, + { "fa", { HB_SCRIPT_ARABIC } }, + { "fat", { HB_SCRIPT_LATIN } }, + { "ff", { HB_SCRIPT_LATIN } }, + { "fi", { HB_SCRIPT_LATIN } }, + { "fil", { HB_SCRIPT_LATIN } }, + { "fj", { HB_SCRIPT_LATIN } }, + { "fo", { HB_SCRIPT_LATIN } }, + { "fr", { HB_SCRIPT_LATIN } }, + { "fur", { HB_SCRIPT_LATIN } }, + { "fy", { HB_SCRIPT_LATIN } }, + { "ga", { HB_SCRIPT_LATIN } }, + { "gd", { HB_SCRIPT_LATIN } }, + { "gez", { HB_SCRIPT_ETHIOPIC } }, + { "gl", { HB_SCRIPT_LATIN } }, + { "gn", { HB_SCRIPT_LATIN } }, + { "gu", { HB_SCRIPT_GUJARATI } }, + { "gv", { HB_SCRIPT_LATIN } }, + { "ha", { HB_SCRIPT_LATIN } }, + { "haw", { HB_SCRIPT_LATIN } }, + { "he", { HB_SCRIPT_HEBREW } }, + { "hi", { HB_SCRIPT_DEVANAGARI } }, + { "hne", { HB_SCRIPT_DEVANAGARI } }, + { "ho", { HB_SCRIPT_LATIN } }, + { "hr", { HB_SCRIPT_LATIN } }, + { "hsb", { HB_SCRIPT_LATIN } }, + { "ht", { HB_SCRIPT_LATIN } }, + { "hu", { HB_SCRIPT_LATIN } }, + { "hy", { HB_SCRIPT_ARMENIAN } }, + { "hz", { HB_SCRIPT_LATIN } }, + { "ia", { HB_SCRIPT_LATIN } }, + { "id", { HB_SCRIPT_LATIN } }, + { "ie", { HB_SCRIPT_LATIN } }, + { "ig", { HB_SCRIPT_LATIN } }, + { "ii", { HB_SCRIPT_YI } }, + { "ik", { HB_SCRIPT_CYRILLIC } }, + { "io", { HB_SCRIPT_LATIN } }, + { "is", { HB_SCRIPT_LATIN } }, + { "it", { HB_SCRIPT_LATIN } }, + { "iu", { HB_SCRIPT_CANADIAN_SYLLABICS } }, + { "ja", { HB_SCRIPT_HAN, HB_SCRIPT_KATAKANA, HB_SCRIPT_HIRAGANA } }, + { "jv", { HB_SCRIPT_LATIN } }, + { "ka", { HB_SCRIPT_GEORGIAN } }, + { "kaa", { HB_SCRIPT_CYRILLIC } }, + { "kab", { HB_SCRIPT_LATIN } }, + { "ki", { HB_SCRIPT_LATIN } }, + { "kj", { HB_SCRIPT_LATIN } }, + { "kk", { HB_SCRIPT_CYRILLIC } }, + { "kl", { HB_SCRIPT_LATIN } }, + { "km", { HB_SCRIPT_KHMER } }, + { "kn", { HB_SCRIPT_KANNADA } }, + { "ko", { HB_SCRIPT_HANGUL } }, + { "kok", { HB_SCRIPT_DEVANAGARI } }, + { "kr", { HB_SCRIPT_LATIN } }, + { "ks", { HB_SCRIPT_ARABIC } }, + { "ku-am", { HB_SCRIPT_CYRILLIC } }, + { "ku-iq", { HB_SCRIPT_ARABIC } }, + { "ku-ir", { HB_SCRIPT_ARABIC } }, + { "ku-tr", { HB_SCRIPT_LATIN } }, + { "kum", { HB_SCRIPT_CYRILLIC } }, + { "kv", { HB_SCRIPT_CYRILLIC } }, + { "kw", { HB_SCRIPT_LATIN } }, + { "kwm", { HB_SCRIPT_LATIN } }, + { "ky", { HB_SCRIPT_CYRILLIC } }, + { "la", { HB_SCRIPT_LATIN } }, + { "lah", { HB_SCRIPT_ARABIC } }, + { "lb", { HB_SCRIPT_LATIN } }, + { "lez", { HB_SCRIPT_CYRILLIC } }, + { "lg", { HB_SCRIPT_LATIN } }, + { "li", { HB_SCRIPT_LATIN } }, + { "ln", { HB_SCRIPT_LATIN } }, + { "lo", { HB_SCRIPT_LAO } }, + { "lt", { HB_SCRIPT_LATIN } }, + { "lv", { HB_SCRIPT_LATIN } }, + { "mai", { HB_SCRIPT_DEVANAGARI } }, + { "mg", { HB_SCRIPT_LATIN } }, + { "mh", { HB_SCRIPT_LATIN } }, + { "mi", { HB_SCRIPT_LATIN } }, + { "mk", { HB_SCRIPT_CYRILLIC } }, + { "ml", { HB_SCRIPT_MALAYALAM } }, + { "mn-cn", { HB_SCRIPT_MONGOLIAN } }, + { "mn-mn", { HB_SCRIPT_CYRILLIC } }, + { "mni", { HB_SCRIPT_BENGALI } }, + { "mo", { HB_SCRIPT_CYRILLIC, HB_SCRIPT_LATIN } }, + { "mr", { HB_SCRIPT_DEVANAGARI } }, + { "ms", { HB_SCRIPT_LATIN } }, + { "mt", { HB_SCRIPT_LATIN } }, + { "my", { HB_SCRIPT_MYANMAR } }, + { "na", { HB_SCRIPT_LATIN } }, + { "nb", { HB_SCRIPT_LATIN } }, + { "nds", { HB_SCRIPT_LATIN } }, + { "ne", { HB_SCRIPT_DEVANAGARI } }, + { "ng", { HB_SCRIPT_LATIN } }, + { "nl", { HB_SCRIPT_LATIN } }, + { "nn", { HB_SCRIPT_LATIN } }, + { "no", { HB_SCRIPT_LATIN } }, + { "nqo", { HB_SCRIPT_NKO } }, + { "nr", { HB_SCRIPT_LATIN } }, + { "nso", { HB_SCRIPT_LATIN } }, + { "nv", { HB_SCRIPT_LATIN } }, + { "ny", { HB_SCRIPT_LATIN } }, + { "oc", { HB_SCRIPT_LATIN } }, + { "om", { HB_SCRIPT_LATIN } }, + { "or", { HB_SCRIPT_ORIYA } }, + { "os", { HB_SCRIPT_CYRILLIC } }, + { "ota", { HB_SCRIPT_ARABIC } }, + { "pa", { HB_SCRIPT_GURMUKHI } }, + { "pa-pk", { HB_SCRIPT_ARABIC } }, + { "pap-an", { HB_SCRIPT_LATIN } }, + { "pap-aw", { HB_SCRIPT_LATIN } }, + { "pl", { HB_SCRIPT_LATIN } }, + { "ps-af", { HB_SCRIPT_ARABIC } }, + { "ps-pk", { HB_SCRIPT_ARABIC } }, + { "pt", { HB_SCRIPT_LATIN } }, + { "qu", { HB_SCRIPT_LATIN } }, + { "quz", { HB_SCRIPT_LATIN } }, + { "rm", { HB_SCRIPT_LATIN } }, + { "rn", { HB_SCRIPT_LATIN } }, + { "ro", { HB_SCRIPT_LATIN } }, + { "ru", { HB_SCRIPT_CYRILLIC } }, + { "rw", { HB_SCRIPT_LATIN } }, + { "sa", { HB_SCRIPT_DEVANAGARI } }, + { "sah", { HB_SCRIPT_CYRILLIC } }, + { "sat", { HB_SCRIPT_DEVANAGARI } }, + { "sc", { HB_SCRIPT_LATIN } }, + { "sco", { HB_SCRIPT_LATIN } }, + { "sd", { HB_SCRIPT_ARABIC } }, + { "se", { HB_SCRIPT_LATIN } }, + { "sel", { HB_SCRIPT_CYRILLIC } }, + { "sg", { HB_SCRIPT_LATIN } }, + { "sh", { HB_SCRIPT_CYRILLIC, HB_SCRIPT_LATIN } }, + { "shs", { HB_SCRIPT_LATIN } }, + { "si", { HB_SCRIPT_SINHALA } }, + { "sid", { HB_SCRIPT_ETHIOPIC } }, + { "sk", { HB_SCRIPT_LATIN } }, + { "sl", { HB_SCRIPT_LATIN } }, + { "sm", { HB_SCRIPT_LATIN } }, + { "sma", { HB_SCRIPT_LATIN } }, + { "smj", { HB_SCRIPT_LATIN } }, + { "smn", { HB_SCRIPT_LATIN } }, + { "sms", { HB_SCRIPT_LATIN } }, + { "sn", { HB_SCRIPT_LATIN } }, + { "so", { HB_SCRIPT_LATIN } }, + { "sq", { HB_SCRIPT_LATIN } }, + { "sr", { HB_SCRIPT_CYRILLIC } }, + { "ss", { HB_SCRIPT_LATIN } }, + { "st", { HB_SCRIPT_LATIN } }, + { "su", { HB_SCRIPT_LATIN } }, + { "sv", { HB_SCRIPT_LATIN } }, + { "sw", { HB_SCRIPT_LATIN } }, + { "syr", { HB_SCRIPT_SYRIAC } }, + { "ta", { HB_SCRIPT_TAMIL } }, + { "te", { HB_SCRIPT_TELUGU } }, + { "tg", { HB_SCRIPT_CYRILLIC } }, + { "th", { HB_SCRIPT_THAI } }, + { "ti-er", { HB_SCRIPT_ETHIOPIC } }, + { "ti-et", { HB_SCRIPT_ETHIOPIC } }, + { "tig", { HB_SCRIPT_ETHIOPIC } }, + { "tk", { HB_SCRIPT_LATIN } }, + { "tl", { HB_SCRIPT_LATIN } }, + { "tn", { HB_SCRIPT_LATIN } }, + { "to", { HB_SCRIPT_LATIN } }, + { "tr", { HB_SCRIPT_LATIN } }, + { "ts", { HB_SCRIPT_LATIN } }, + { "tt", { HB_SCRIPT_CYRILLIC } }, + { "tw", { HB_SCRIPT_LATIN } }, + { "ty", { HB_SCRIPT_LATIN } }, + { "tyv", { HB_SCRIPT_CYRILLIC } }, + { "ug", { HB_SCRIPT_ARABIC } }, + { "uk", { HB_SCRIPT_CYRILLIC } }, + { "und-zmth", { HB_SCRIPT_LATIN, HB_SCRIPT_GREEK } }, + { "und-zsye", { (hb_script_t) 0 } }, + { "ur", { HB_SCRIPT_ARABIC } }, + { "uz", { HB_SCRIPT_LATIN } }, + { "ve", { HB_SCRIPT_LATIN } }, + { "vi", { HB_SCRIPT_LATIN } }, + { "vo", { HB_SCRIPT_LATIN } }, + { "vot", { HB_SCRIPT_LATIN } }, + { "wa", { HB_SCRIPT_LATIN } }, + { "wal", { HB_SCRIPT_ETHIOPIC } }, + { "wen", { HB_SCRIPT_LATIN } }, + { "wo", { HB_SCRIPT_LATIN } }, + { "xh", { HB_SCRIPT_LATIN } }, + { "yap", { HB_SCRIPT_LATIN } }, + { "yi", { HB_SCRIPT_HEBREW } }, + { "yo", { HB_SCRIPT_LATIN } }, + { "za", { HB_SCRIPT_LATIN } }, + { "zh-cn", { HB_SCRIPT_HAN } }, + { "zh-hk", { HB_SCRIPT_HAN } }, + { "zh-mo", { HB_SCRIPT_HAN } }, + { "zh-sg", { HB_SCRIPT_HAN } }, + { "zh-tw", { HB_SCRIPT_HAN } }, + { "zu", { HB_SCRIPT_LATIN } } +}; diff --git a/test/api/test-common.c b/test/api/test-common.c index e9fae1351..8aa5eafd9 100644 --- a/test/api/test-common.c +++ b/test/api/test-common.c @@ -210,6 +210,45 @@ test_types_language (void) g_assert (HB_LANGUAGE_INVALID != hb_language_get_default ()); } +static void +test_language_get_scripts (void) +{ + hb_script_t scripts[10]; + unsigned int n_scripts; + unsigned int count; + + n_scripts = 10; + count = hb_language_get_scripts (hb_language_from_string ("en", -1), &n_scripts, scripts); + + g_assert (count == 1); + g_assert (n_scripts == 1); + g_assert (scripts[0] == HB_SCRIPT_LATIN); + + n_scripts = 10; + count = hb_language_get_scripts (hb_language_from_string ("cv", -1), &n_scripts, scripts); + + g_assert (count == 2); + g_assert (n_scripts == 2); + g_assert (scripts[0] == HB_SCRIPT_CYRILLIC); + g_assert (scripts[1] == HB_SCRIPT_LATIN); + + n_scripts = 1; + count = hb_language_get_scripts (hb_language_from_string ("cv", -1), &n_scripts, scripts); + + g_assert (count == 2); + g_assert (n_scripts == 1); + g_assert (scripts[0] == HB_SCRIPT_CYRILLIC); + + n_scripts = 10; + count = hb_language_get_scripts (hb_language_from_string ("ja", -1), &n_scripts, scripts); + + g_assert (count == 3); + g_assert (n_scripts == 3); + g_assert (scripts[0] == HB_SCRIPT_HAN); + g_assert (scripts[1] == HB_SCRIPT_KATAKANA); + g_assert (scripts[2] == HB_SCRIPT_HIRAGANA); +} + int main (int argc, char **argv) { @@ -220,6 +259,7 @@ main (int argc, char **argv) hb_test_add (test_types_tag); hb_test_add (test_types_script); hb_test_add (test_types_language); + hb_test_add (test_language_get_scripts); return hb_test_run(); }