Compare commits

...

1 Commits

Author SHA1 Message Date
Matthias Clasen dd454098c1 Add hb_language_get_scripts
This is a copy of the Pango implementation, minus caching.

Tests included.
2023-01-02 18:11:21 -05:00
4 changed files with 417 additions and 0 deletions

View File

@ -28,6 +28,7 @@
#include "hb.hh"
#include "hb-machinery.hh"
#include "hb-script-lang-table.h"
#if !defined(HB_NO_SETLOCALE) && (!defined(HAVE_NEWLOCALE) || !defined(HAVE_USELOCALE))
#define HB_NO_SETLOCALE 1
@ -665,6 +666,125 @@ hb_script_get_horizontal_direction (hb_script_t script)
return HB_DIRECTION_LTR;
}
static int
lang_compare_first_component (const void *pa,
const void *pb)
{
const char *a = (const char *) pa;
const char *b = (const char *) pb;
unsigned int da, db;
const char *p;
p = strstr (a, "-");
da = p ? (unsigned int) (p - a) : strlen (a);
p = strstr (b, "-");
db = p ? (unsigned int) (p - b) : strlen (b);
return strncmp (a, b, hb_max (da, db));
}
static const HbScriptForLang *
find_best_lang_match (hb_language_t language)
{
const char *lang_str;
const char *record, *start, *end;
const char *records;
unsigned int record_size;
unsigned int num_records;
if (language == nullptr)
return nullptr;
lang_str = language->s;
records = (const char *) hb_script_for_lang;
record_size = sizeof (HbScriptForLang);
num_records = sizeof (hb_script_for_lang) / record_size;
record = (const char *) bsearch (lang_str,
records, num_records,
record_size,
lang_compare_first_component);
if (!record)
return nullptr;
start = records;
end = start + num_records * record_size;
/* find the best match among all those that have the same first-component */
/* go to the final one matching in the first component */
while (record < end - record_size &&
lang_compare_first_component (lang_str, record + record_size) == 0)
/* go back, find which one matches completely */
while (start <= record &&
lang_compare_first_component (lang_str, record) == 0)
{
hb_language_t l;
l = hb_language_from_string (record, -1);
if (hb_language_matches (language, l))
return (const HbScriptForLang *) record;
record -= record_size;
}
return nullptr;
}
/**
* hb_language_get_scripts:
* @language: a #hb_language_t
* @script_count: (inout) (optional): Input = the maximum number of scripts to return;
* Output = the actual number of scripts returned (may be zero)
* @scripts: (out) (array length=script_count): the array of #hb_script_t found
*
* Fetches the scripts that can be used with @language.
*
* Return value: the total number of scripts
*
* Since: REPLACEME
*/
HB_EXTERN unsigned int
hb_language_get_scripts (hb_language_t language,
unsigned int *script_count,
hb_script_t *scripts)
{
const HbScriptForLang *script_for_lang;
unsigned int count;
unsigned int i;
script_for_lang = find_best_lang_match (language);
if (!script_for_lang || script_for_lang->scripts[0] == 0)
{
if (script_count)
*script_count = 0;
return 0;
}
for (i = 0; i < sizeof (script_for_lang->scripts) / sizeof (hb_script_t); i++)
if (script_for_lang->scripts[i] == 0)
break;
count = i;
if (script_count)
{
for (i = 0; i < count; i++)
{
scripts[i] = script_for_lang->scripts[i];
if (i == *script_count)
break;
}
*script_count = i;
}
return count;
}
/* hb_version */

View File

@ -758,6 +758,10 @@ hb_script_to_iso15924_tag (hb_script_t script);
HB_EXTERN hb_direction_t
hb_script_get_horizontal_direction (hb_script_t script);
HB_EXTERN unsigned int
hb_language_get_scripts (hb_language_t language,
unsigned int *script_count,
hb_script_t *scripts);
/* User data */

253
src/hb-script-lang-table.h Normal file
View File

@ -0,0 +1,253 @@
typedef struct {
const char lang[9];
hb_script_t scripts[3];
} HbScriptForLang;
static const HbScriptForLang hb_script_for_lang[] = {
{ "aa", { HB_SCRIPT_LATIN } },
{ "ab", { HB_SCRIPT_CYRILLIC } },
{ "af", { HB_SCRIPT_LATIN } },
{ "ak", { HB_SCRIPT_LATIN } },
{ "am", { HB_SCRIPT_ETHIOPIC } },
{ "an", { HB_SCRIPT_LATIN } },
{ "ar", { HB_SCRIPT_ARABIC } },
{ "as", { HB_SCRIPT_BENGALI } },
{ "ast", { HB_SCRIPT_LATIN } },
{ "av", { HB_SCRIPT_CYRILLIC } },
{ "ay", { HB_SCRIPT_LATIN } },
{ "az-az", { HB_SCRIPT_LATIN } },
{ "az-ir", { HB_SCRIPT_ARABIC } },
{ "ba", { HB_SCRIPT_CYRILLIC } },
{ "be", { HB_SCRIPT_CYRILLIC } },
{ "ber-dz", { HB_SCRIPT_LATIN } },
{ "ber-ma", { HB_SCRIPT_TIFINAGH } },
{ "bg", { HB_SCRIPT_CYRILLIC } },
{ "bh", { HB_SCRIPT_DEVANAGARI } },
{ "bho", { HB_SCRIPT_DEVANAGARI } },
{ "bi", { HB_SCRIPT_LATIN } },
{ "bin", { HB_SCRIPT_LATIN } },
{ "bm", { HB_SCRIPT_LATIN } },
{ "bn", { HB_SCRIPT_BENGALI } },
{ "bo", { HB_SCRIPT_TIBETAN } },
{ "br", { HB_SCRIPT_LATIN } },
{ "brx", { HB_SCRIPT_DEVANAGARI } },
{ "bs", { HB_SCRIPT_LATIN } },
{ "bua", { HB_SCRIPT_CYRILLIC } },
{ "byn", { HB_SCRIPT_ETHIOPIC } },
{ "ca", { HB_SCRIPT_LATIN } },
{ "ce", { HB_SCRIPT_CYRILLIC } },
{ "ch", { HB_SCRIPT_LATIN } },
{ "chm", { HB_SCRIPT_CYRILLIC } },
{ "chr", { HB_SCRIPT_CHEROKEE } },
{ "co", { HB_SCRIPT_LATIN } },
{ "crh", { HB_SCRIPT_LATIN } },
{ "cs", { HB_SCRIPT_LATIN } },
{ "csb", { HB_SCRIPT_LATIN } },
{ "cu", { HB_SCRIPT_CYRILLIC } },
{ "cv", { HB_SCRIPT_CYRILLIC, HB_SCRIPT_LATIN } },
{ "cy", { HB_SCRIPT_LATIN } },
{ "da", { HB_SCRIPT_LATIN } },
{ "de", { HB_SCRIPT_LATIN } },
{ "doi", { HB_SCRIPT_DEVANAGARI } },
{ "dv", { HB_SCRIPT_THAANA } },
{ "dz", { HB_SCRIPT_TIBETAN } },
{ "ee", { HB_SCRIPT_LATIN } },
{ "el", { HB_SCRIPT_GREEK } },
{ "en", { HB_SCRIPT_LATIN } },
{ "eo", { HB_SCRIPT_LATIN } },
{ "es", { HB_SCRIPT_LATIN } },
{ "et", { HB_SCRIPT_LATIN } },
{ "eu", { HB_SCRIPT_LATIN } },
{ "fa", { HB_SCRIPT_ARABIC } },
{ "fat", { HB_SCRIPT_LATIN } },
{ "ff", { HB_SCRIPT_LATIN } },
{ "fi", { HB_SCRIPT_LATIN } },
{ "fil", { HB_SCRIPT_LATIN } },
{ "fj", { HB_SCRIPT_LATIN } },
{ "fo", { HB_SCRIPT_LATIN } },
{ "fr", { HB_SCRIPT_LATIN } },
{ "fur", { HB_SCRIPT_LATIN } },
{ "fy", { HB_SCRIPT_LATIN } },
{ "ga", { HB_SCRIPT_LATIN } },
{ "gd", { HB_SCRIPT_LATIN } },
{ "gez", { HB_SCRIPT_ETHIOPIC } },
{ "gl", { HB_SCRIPT_LATIN } },
{ "gn", { HB_SCRIPT_LATIN } },
{ "gu", { HB_SCRIPT_GUJARATI } },
{ "gv", { HB_SCRIPT_LATIN } },
{ "ha", { HB_SCRIPT_LATIN } },
{ "haw", { HB_SCRIPT_LATIN } },
{ "he", { HB_SCRIPT_HEBREW } },
{ "hi", { HB_SCRIPT_DEVANAGARI } },
{ "hne", { HB_SCRIPT_DEVANAGARI } },
{ "ho", { HB_SCRIPT_LATIN } },
{ "hr", { HB_SCRIPT_LATIN } },
{ "hsb", { HB_SCRIPT_LATIN } },
{ "ht", { HB_SCRIPT_LATIN } },
{ "hu", { HB_SCRIPT_LATIN } },
{ "hy", { HB_SCRIPT_ARMENIAN } },
{ "hz", { HB_SCRIPT_LATIN } },
{ "ia", { HB_SCRIPT_LATIN } },
{ "id", { HB_SCRIPT_LATIN } },
{ "ie", { HB_SCRIPT_LATIN } },
{ "ig", { HB_SCRIPT_LATIN } },
{ "ii", { HB_SCRIPT_YI } },
{ "ik", { HB_SCRIPT_CYRILLIC } },
{ "io", { HB_SCRIPT_LATIN } },
{ "is", { HB_SCRIPT_LATIN } },
{ "it", { HB_SCRIPT_LATIN } },
{ "iu", { HB_SCRIPT_CANADIAN_SYLLABICS } },
{ "ja", { HB_SCRIPT_HAN, HB_SCRIPT_KATAKANA, HB_SCRIPT_HIRAGANA } },
{ "jv", { HB_SCRIPT_LATIN } },
{ "ka", { HB_SCRIPT_GEORGIAN } },
{ "kaa", { HB_SCRIPT_CYRILLIC } },
{ "kab", { HB_SCRIPT_LATIN } },
{ "ki", { HB_SCRIPT_LATIN } },
{ "kj", { HB_SCRIPT_LATIN } },
{ "kk", { HB_SCRIPT_CYRILLIC } },
{ "kl", { HB_SCRIPT_LATIN } },
{ "km", { HB_SCRIPT_KHMER } },
{ "kn", { HB_SCRIPT_KANNADA } },
{ "ko", { HB_SCRIPT_HANGUL } },
{ "kok", { HB_SCRIPT_DEVANAGARI } },
{ "kr", { HB_SCRIPT_LATIN } },
{ "ks", { HB_SCRIPT_ARABIC } },
{ "ku-am", { HB_SCRIPT_CYRILLIC } },
{ "ku-iq", { HB_SCRIPT_ARABIC } },
{ "ku-ir", { HB_SCRIPT_ARABIC } },
{ "ku-tr", { HB_SCRIPT_LATIN } },
{ "kum", { HB_SCRIPT_CYRILLIC } },
{ "kv", { HB_SCRIPT_CYRILLIC } },
{ "kw", { HB_SCRIPT_LATIN } },
{ "kwm", { HB_SCRIPT_LATIN } },
{ "ky", { HB_SCRIPT_CYRILLIC } },
{ "la", { HB_SCRIPT_LATIN } },
{ "lah", { HB_SCRIPT_ARABIC } },
{ "lb", { HB_SCRIPT_LATIN } },
{ "lez", { HB_SCRIPT_CYRILLIC } },
{ "lg", { HB_SCRIPT_LATIN } },
{ "li", { HB_SCRIPT_LATIN } },
{ "ln", { HB_SCRIPT_LATIN } },
{ "lo", { HB_SCRIPT_LAO } },
{ "lt", { HB_SCRIPT_LATIN } },
{ "lv", { HB_SCRIPT_LATIN } },
{ "mai", { HB_SCRIPT_DEVANAGARI } },
{ "mg", { HB_SCRIPT_LATIN } },
{ "mh", { HB_SCRIPT_LATIN } },
{ "mi", { HB_SCRIPT_LATIN } },
{ "mk", { HB_SCRIPT_CYRILLIC } },
{ "ml", { HB_SCRIPT_MALAYALAM } },
{ "mn-cn", { HB_SCRIPT_MONGOLIAN } },
{ "mn-mn", { HB_SCRIPT_CYRILLIC } },
{ "mni", { HB_SCRIPT_BENGALI } },
{ "mo", { HB_SCRIPT_CYRILLIC, HB_SCRIPT_LATIN } },
{ "mr", { HB_SCRIPT_DEVANAGARI } },
{ "ms", { HB_SCRIPT_LATIN } },
{ "mt", { HB_SCRIPT_LATIN } },
{ "my", { HB_SCRIPT_MYANMAR } },
{ "na", { HB_SCRIPT_LATIN } },
{ "nb", { HB_SCRIPT_LATIN } },
{ "nds", { HB_SCRIPT_LATIN } },
{ "ne", { HB_SCRIPT_DEVANAGARI } },
{ "ng", { HB_SCRIPT_LATIN } },
{ "nl", { HB_SCRIPT_LATIN } },
{ "nn", { HB_SCRIPT_LATIN } },
{ "no", { HB_SCRIPT_LATIN } },
{ "nqo", { HB_SCRIPT_NKO } },
{ "nr", { HB_SCRIPT_LATIN } },
{ "nso", { HB_SCRIPT_LATIN } },
{ "nv", { HB_SCRIPT_LATIN } },
{ "ny", { HB_SCRIPT_LATIN } },
{ "oc", { HB_SCRIPT_LATIN } },
{ "om", { HB_SCRIPT_LATIN } },
{ "or", { HB_SCRIPT_ORIYA } },
{ "os", { HB_SCRIPT_CYRILLIC } },
{ "ota", { HB_SCRIPT_ARABIC } },
{ "pa", { HB_SCRIPT_GURMUKHI } },
{ "pa-pk", { HB_SCRIPT_ARABIC } },
{ "pap-an", { HB_SCRIPT_LATIN } },
{ "pap-aw", { HB_SCRIPT_LATIN } },
{ "pl", { HB_SCRIPT_LATIN } },
{ "ps-af", { HB_SCRIPT_ARABIC } },
{ "ps-pk", { HB_SCRIPT_ARABIC } },
{ "pt", { HB_SCRIPT_LATIN } },
{ "qu", { HB_SCRIPT_LATIN } },
{ "quz", { HB_SCRIPT_LATIN } },
{ "rm", { HB_SCRIPT_LATIN } },
{ "rn", { HB_SCRIPT_LATIN } },
{ "ro", { HB_SCRIPT_LATIN } },
{ "ru", { HB_SCRIPT_CYRILLIC } },
{ "rw", { HB_SCRIPT_LATIN } },
{ "sa", { HB_SCRIPT_DEVANAGARI } },
{ "sah", { HB_SCRIPT_CYRILLIC } },
{ "sat", { HB_SCRIPT_DEVANAGARI } },
{ "sc", { HB_SCRIPT_LATIN } },
{ "sco", { HB_SCRIPT_LATIN } },
{ "sd", { HB_SCRIPT_ARABIC } },
{ "se", { HB_SCRIPT_LATIN } },
{ "sel", { HB_SCRIPT_CYRILLIC } },
{ "sg", { HB_SCRIPT_LATIN } },
{ "sh", { HB_SCRIPT_CYRILLIC, HB_SCRIPT_LATIN } },
{ "shs", { HB_SCRIPT_LATIN } },
{ "si", { HB_SCRIPT_SINHALA } },
{ "sid", { HB_SCRIPT_ETHIOPIC } },
{ "sk", { HB_SCRIPT_LATIN } },
{ "sl", { HB_SCRIPT_LATIN } },
{ "sm", { HB_SCRIPT_LATIN } },
{ "sma", { HB_SCRIPT_LATIN } },
{ "smj", { HB_SCRIPT_LATIN } },
{ "smn", { HB_SCRIPT_LATIN } },
{ "sms", { HB_SCRIPT_LATIN } },
{ "sn", { HB_SCRIPT_LATIN } },
{ "so", { HB_SCRIPT_LATIN } },
{ "sq", { HB_SCRIPT_LATIN } },
{ "sr", { HB_SCRIPT_CYRILLIC } },
{ "ss", { HB_SCRIPT_LATIN } },
{ "st", { HB_SCRIPT_LATIN } },
{ "su", { HB_SCRIPT_LATIN } },
{ "sv", { HB_SCRIPT_LATIN } },
{ "sw", { HB_SCRIPT_LATIN } },
{ "syr", { HB_SCRIPT_SYRIAC } },
{ "ta", { HB_SCRIPT_TAMIL } },
{ "te", { HB_SCRIPT_TELUGU } },
{ "tg", { HB_SCRIPT_CYRILLIC } },
{ "th", { HB_SCRIPT_THAI } },
{ "ti-er", { HB_SCRIPT_ETHIOPIC } },
{ "ti-et", { HB_SCRIPT_ETHIOPIC } },
{ "tig", { HB_SCRIPT_ETHIOPIC } },
{ "tk", { HB_SCRIPT_LATIN } },
{ "tl", { HB_SCRIPT_LATIN } },
{ "tn", { HB_SCRIPT_LATIN } },
{ "to", { HB_SCRIPT_LATIN } },
{ "tr", { HB_SCRIPT_LATIN } },
{ "ts", { HB_SCRIPT_LATIN } },
{ "tt", { HB_SCRIPT_CYRILLIC } },
{ "tw", { HB_SCRIPT_LATIN } },
{ "ty", { HB_SCRIPT_LATIN } },
{ "tyv", { HB_SCRIPT_CYRILLIC } },
{ "ug", { HB_SCRIPT_ARABIC } },
{ "uk", { HB_SCRIPT_CYRILLIC } },
{ "und-zmth", { HB_SCRIPT_LATIN, HB_SCRIPT_GREEK } },
{ "und-zsye", { (hb_script_t) 0 } },
{ "ur", { HB_SCRIPT_ARABIC } },
{ "uz", { HB_SCRIPT_LATIN } },
{ "ve", { HB_SCRIPT_LATIN } },
{ "vi", { HB_SCRIPT_LATIN } },
{ "vo", { HB_SCRIPT_LATIN } },
{ "vot", { HB_SCRIPT_LATIN } },
{ "wa", { HB_SCRIPT_LATIN } },
{ "wal", { HB_SCRIPT_ETHIOPIC } },
{ "wen", { HB_SCRIPT_LATIN } },
{ "wo", { HB_SCRIPT_LATIN } },
{ "xh", { HB_SCRIPT_LATIN } },
{ "yap", { HB_SCRIPT_LATIN } },
{ "yi", { HB_SCRIPT_HEBREW } },
{ "yo", { HB_SCRIPT_LATIN } },
{ "za", { HB_SCRIPT_LATIN } },
{ "zh-cn", { HB_SCRIPT_HAN } },
{ "zh-hk", { HB_SCRIPT_HAN } },
{ "zh-mo", { HB_SCRIPT_HAN } },
{ "zh-sg", { HB_SCRIPT_HAN } },
{ "zh-tw", { HB_SCRIPT_HAN } },
{ "zu", { HB_SCRIPT_LATIN } }
};

View File

@ -210,6 +210,45 @@ test_types_language (void)
g_assert (HB_LANGUAGE_INVALID != hb_language_get_default ());
}
static void
test_language_get_scripts (void)
{
hb_script_t scripts[10];
unsigned int n_scripts;
unsigned int count;
n_scripts = 10;
count = hb_language_get_scripts (hb_language_from_string ("en", -1), &n_scripts, scripts);
g_assert (count == 1);
g_assert (n_scripts == 1);
g_assert (scripts[0] == HB_SCRIPT_LATIN);
n_scripts = 10;
count = hb_language_get_scripts (hb_language_from_string ("cv", -1), &n_scripts, scripts);
g_assert (count == 2);
g_assert (n_scripts == 2);
g_assert (scripts[0] == HB_SCRIPT_CYRILLIC);
g_assert (scripts[1] == HB_SCRIPT_LATIN);
n_scripts = 1;
count = hb_language_get_scripts (hb_language_from_string ("cv", -1), &n_scripts, scripts);
g_assert (count == 2);
g_assert (n_scripts == 1);
g_assert (scripts[0] == HB_SCRIPT_CYRILLIC);
n_scripts = 10;
count = hb_language_get_scripts (hb_language_from_string ("ja", -1), &n_scripts, scripts);
g_assert (count == 3);
g_assert (n_scripts == 3);
g_assert (scripts[0] == HB_SCRIPT_HAN);
g_assert (scripts[1] == HB_SCRIPT_KATAKANA);
g_assert (scripts[2] == HB_SCRIPT_HIRAGANA);
}
int
main (int argc, char **argv)
{
@ -220,6 +259,7 @@ main (int argc, char **argv)
hb_test_add (test_types_tag);
hb_test_add (test_types_script);
hb_test_add (test_types_language);
hb_test_add (test_language_get_scripts);
return hb_test_run();
}