Match extlang subtags
If the second subtag of a BCP 47 tag is three letters long, it denotes an extended language. The tag converter ignores the language subtag and uses the extended language instead. There are some grandfathered exceptions, which are handled earlier.
This commit is contained in:
parent
2f1f961cc0
commit
7c7cb2a989
|
@ -884,7 +884,7 @@ def print_subtag_matches (subtag):
|
||||||
|
|
||||||
for language, tags in sorted (ot.from_bcp_47.items (), key=lambda i: (-len (i[0]), i[0])):
|
for language, tags in sorted (ot.from_bcp_47.items (), key=lambda i: (-len (i[0]), i[0])):
|
||||||
lt = LanguageTag (language)
|
lt = LanguageTag (language)
|
||||||
if len (lt.subtags) == 1 or lt.grandfathered and ot.from_bcp_47[lt.subtags[0]] == tags:
|
if len (lt.subtags) == 1 or lt.grandfathered and len (lt.subtags[1]) != 3 and ot.from_bcp_47[lt.subtags[0]] == tags:
|
||||||
continue
|
continue
|
||||||
print (' if (', end='')
|
print (' if (', end='')
|
||||||
if (lt.language == 'und' or
|
if (lt.language == 'und' or
|
||||||
|
|
|
@ -1279,6 +1279,13 @@ hb_ot_tags_from_complex_language (const char *lang_str,
|
||||||
*count = 1;
|
*count = 1;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
if (0 == strcmp (lang_str, "zh-min-nan"))
|
||||||
|
{
|
||||||
|
/* Minnan, Hokkien, Amoy, Taiwanese, Southern Min, Southern Fujian, Hoklo, Southern Fukien, Ho-lo */
|
||||||
|
tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
|
||||||
|
*count = 1;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
if (lang_matches (lang_str, "cdo-hans"))
|
if (lang_matches (lang_str, "cdo-hans"))
|
||||||
{
|
{
|
||||||
/* Min Dong Chinese; Han (Simplified variant) */
|
/* Min Dong Chinese; Han (Simplified variant) */
|
||||||
|
@ -1791,6 +1798,13 @@ hb_ot_tags_from_complex_language (const char *lang_str,
|
||||||
*count = 1;
|
*count = 1;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
if (0 == strcmp (lang_str, "no-bok"))
|
||||||
|
{
|
||||||
|
/* Norwegian Bokmal */
|
||||||
|
tags[0] = HB_TAG('N','O','R',' '); /* Norwegian */
|
||||||
|
*count = 1;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
if (0 == strcmp (lang_str, "no-nyn"))
|
if (0 == strcmp (lang_str, "no-nyn"))
|
||||||
{
|
{
|
||||||
/* Norwegian Nynorsk */
|
/* Norwegian Nynorsk */
|
||||||
|
@ -1822,6 +1836,13 @@ hb_ot_tags_from_complex_language (const char *lang_str,
|
||||||
*count = 1;
|
*count = 1;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
if (0 == strcmp (lang_str, "zh-min"))
|
||||||
|
{
|
||||||
|
/* Min, Fuzhou, Hokkien, Amoy, or Taiwanese */
|
||||||
|
tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */
|
||||||
|
*count = 1;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
if (0 == strcmp (lang_str, "i-hak"))
|
if (0 == strcmp (lang_str, "i-hak"))
|
||||||
{
|
{
|
||||||
/* Hakka */
|
/* Hakka */
|
||||||
|
|
|
@ -249,8 +249,17 @@ hb_ot_tags_from_language (const char *lang_str,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Find a language matching in the first component. */
|
/* Find a language matching in the first component. */
|
||||||
|
s = strchr (lang_str, '-');
|
||||||
{
|
{
|
||||||
const LangTag *lang_tag;
|
const LangTag *lang_tag;
|
||||||
|
if (s && limit - lang_str >= 6)
|
||||||
|
{
|
||||||
|
const char *extlang_end = strchr (s + 1, '-');
|
||||||
|
/* If there is an extended language tag, use it. */
|
||||||
|
if (3 == (extlang_end ? extlang_end - s - 1 : strlen (s + 1)) &&
|
||||||
|
ISALPHA (s[1]))
|
||||||
|
lang_str = s + 1;
|
||||||
|
}
|
||||||
lang_tag = (LangTag *) bsearch (lang_str, ot_languages,
|
lang_tag = (LangTag *) bsearch (lang_str, ot_languages,
|
||||||
ARRAY_LENGTH (ot_languages), sizeof (LangTag),
|
ARRAY_LENGTH (ot_languages), sizeof (LangTag),
|
||||||
lang_compare_first_component);
|
lang_compare_first_component);
|
||||||
|
@ -264,7 +273,6 @@ hb_ot_tags_from_language (const char *lang_str,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s = strchr (lang_str, '-');
|
|
||||||
if (!s)
|
if (!s)
|
||||||
s = lang_str + strlen (lang_str);
|
s = lang_str + strlen (lang_str);
|
||||||
if (s - lang_str == 3) {
|
if (s - lang_str == 3) {
|
||||||
|
|
|
@ -369,9 +369,13 @@ test_ot_tag_language (void)
|
||||||
test_tag_from_language ("ABC", "xyz-xy-x-hbotabc-zxc");
|
test_tag_from_language ("ABC", "xyz-xy-x-hbotabc-zxc");
|
||||||
|
|
||||||
/* Unnormalized BCP 47 tags */
|
/* Unnormalized BCP 47 tags */
|
||||||
|
test_tag_from_language ("ARA", "ar-aao");
|
||||||
test_tag_from_language ("JBO", "art-lojban");
|
test_tag_from_language ("JBO", "art-lojban");
|
||||||
|
test_tag_from_language ("KOK", "kok-gom");
|
||||||
test_tag_from_language ("LTZ", "i-lux");
|
test_tag_from_language ("LTZ", "i-lux");
|
||||||
test_tag_from_language ("MNG", "drh");
|
test_tag_from_language ("MNG", "drh");
|
||||||
|
test_tag_from_language ("MOR", "ar-ary");
|
||||||
|
test_tag_from_language ("MOR", "ar-ary-DZ");
|
||||||
test_tag_from_language ("NOR", "no-bok");
|
test_tag_from_language ("NOR", "no-bok");
|
||||||
test_tag_from_language ("NYN", "no-nyn");
|
test_tag_from_language ("NYN", "no-nyn");
|
||||||
test_tag_from_language ("ZHS", "i-hak");
|
test_tag_from_language ("ZHS", "i-hak");
|
||||||
|
@ -379,6 +383,9 @@ test_ot_tag_language (void)
|
||||||
test_tag_from_language ("ZHS", "zh-min");
|
test_tag_from_language ("ZHS", "zh-min");
|
||||||
test_tag_from_language ("ZHS", "zh-min-nan");
|
test_tag_from_language ("ZHS", "zh-min-nan");
|
||||||
test_tag_from_language ("ZHS", "zh-xiang");
|
test_tag_from_language ("ZHS", "zh-xiang");
|
||||||
|
|
||||||
|
/* A UN M.49 region code, not an extended language subtag */
|
||||||
|
test_tag_from_language ("ARA", "ar-001");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
Loading…
Reference in New Issue