Don’t always inherit from macrolanguages
If an OpenType tag maps to a BCP 47 macrolanguage, that is presumably to support the use of the macrolanguage as a vague stand-in for one of its individual languages. For example, "ar" and "zh" are often used for "arb" and "cmn". When the OpenType tag maps to a macrolanguage and some but not all of its individual languages, that indicates that the OpenType tag only corresponds to the listed individual languages (which may be referred to using the macrolanguage subtag) but not the missing individual languages. In particular, INUK (Nunavik Inuktitut) is mapped to "ike" (Eastern Canadian Inuktitut) and "iu" (Inuktitut) but not to "ikt" (Inuinnaqtun), so "ikt" should not inherit the INUK mapping from its macrolanguage "iu".
This commit is contained in:
parent
0b1bf89cc2
commit
a184c5f851
|
@ -467,6 +467,14 @@ class OpenTypeRegistryParser (HTMLParser):
|
|||
explicit mapping, so it inherits from sq (Albanian) the mapping
|
||||
to SQI.
|
||||
|
||||
However, if an OpenType tag maps to a BCP 47 macrolanguage and
|
||||
some but not all of its individual languages, the mapping is not
|
||||
inherited from the macrolanguage to the missing individual
|
||||
languages. For example, INUK (Nunavik Inuktitut) is mapped to
|
||||
ike (Eastern Canadian Inuktitut) and iu (Inuktitut) but not to
|
||||
ikt (Inuinnaqtun, which is an individual language of iu), so
|
||||
this method does not add a mapping from ikt to INUK.
|
||||
|
||||
If a BCP 47 tag for a macrolanguage has no OpenType mapping but
|
||||
some of its individual languages do, their mappings are copied
|
||||
to the macrolanguage.
|
||||
|
@ -476,12 +484,30 @@ class OpenTypeRegistryParser (HTMLParser):
|
|||
if first_time:
|
||||
self.from_bcp_47_uninherited = dict (self.from_bcp_47)
|
||||
for macrolanguage, languages in dict (bcp_47.macrolanguages).items ():
|
||||
ot_macrolanguages = set (self.from_bcp_47_uninherited.get (macrolanguage, set ()))
|
||||
ot_macrolanguages = {
|
||||
ot_macrolanguage for ot_macrolanguage in self.from_bcp_47_uninherited.get (macrolanguage, set ())
|
||||
}
|
||||
blocked_ot_macrolanguages = set ()
|
||||
if 'retired code' not in bcp_47.scopes.get (macrolanguage, ''):
|
||||
for ot_macrolanguage in ot_macrolanguages:
|
||||
round_trip_macrolanguages = {
|
||||
l for l in self.to_bcp_47[ot_macrolanguage]
|
||||
if 'retired code' not in bcp_47.scopes.get (l, '')
|
||||
}
|
||||
round_trip_languages = {
|
||||
l for l in languages
|
||||
if 'retired code' not in bcp_47.scopes.get (l, '')
|
||||
}
|
||||
intersection = round_trip_macrolanguages & round_trip_languages
|
||||
if intersection and intersection != round_trip_languages:
|
||||
blocked_ot_macrolanguages.add (ot_macrolanguage)
|
||||
if ot_macrolanguages:
|
||||
for ot_macrolanguage in ot_macrolanguages:
|
||||
for language in languages:
|
||||
self.add_language (language, ot_macrolanguage)
|
||||
self.ranks[ot_macrolanguage] += 1
|
||||
if ot_macrolanguage not in blocked_ot_macrolanguages:
|
||||
for language in languages:
|
||||
self.add_language (language, ot_macrolanguage)
|
||||
if not blocked_ot_macrolanguages:
|
||||
self.ranks[ot_macrolanguage] += 1
|
||||
elif first_time:
|
||||
for language in languages:
|
||||
if language in self.from_bcp_47_uninherited:
|
||||
|
@ -715,6 +741,7 @@ ot.add_language ('no', 'NOR')
|
|||
|
||||
ot.add_language ('oc-provenc', 'PRO')
|
||||
|
||||
ot.remove_language_ot ('QUZ')
|
||||
ot.add_language ('qu', 'QUZ')
|
||||
ot.add_language ('qub', 'QWH')
|
||||
ot.add_language ('qud', 'QVI')
|
||||
|
|
|
@ -628,7 +628,6 @@ static const LangTag ot_languages[] = {
|
|||
{"ike", HB_TAG('I','N','U',' ')}, /* Eastern Canadian Inuktitut -> Inuktitut */
|
||||
{"ike", HB_TAG('I','N','U','K')}, /* Eastern Canadian Inuktitut -> Nunavik Inuktitut */
|
||||
{"ikt", HB_TAG('I','N','U',' ')}, /* Inuinnaqtun -> Inuktitut */
|
||||
{"ikt", HB_TAG('I','N','U','K')}, /* Inuinnaqtun -> Nunavik Inuktitut */
|
||||
/*{"ilo", HB_TAG('I','L','O',' ')},*/ /* Iloko -> Ilokano */
|
||||
{"in", HB_TAG('I','N','D',' ')}, /* Indonesian (retired code) */
|
||||
{"in", HB_TAG('M','L','Y',' ')}, /* Indonesian (retired code) -> Malay */
|
||||
|
@ -1044,7 +1043,6 @@ static const LangTag ot_languages[] = {
|
|||
{"nln", HB_TAG('N','A','H',' ')}, /* Durango Nahuatl (retired code) -> Nahuatl */
|
||||
{"nlv", HB_TAG('N','A','H',' ')}, /* Orizaba Nahuatl -> Nahuatl */
|
||||
{"nn", HB_TAG('N','Y','N',' ')}, /* Norwegian Nynorsk (Nynorsk, Norwegian) */
|
||||
{"nn", HB_TAG('N','O','R',' ')}, /* Norwegian Nynorsk -> Norwegian */
|
||||
{"nnh", HB_TAG('B','M','L',' ')}, /* Ngiemboon -> Bamileke */
|
||||
{"nnz", HB_TAG('B','M','L',' ')}, /* Nda'nda' -> Bamileke */
|
||||
{"no", HB_TAG('N','O','R',' ')}, /* Norwegian [macrolanguage] */
|
||||
|
@ -2615,14 +2613,8 @@ hb_ot_tags_from_complex_language (const char *lang_str,
|
|||
if (0 == strcmp (&lang_str[1], "o-nyn"))
|
||||
{
|
||||
/* Norwegian Nynorsk (retired code) */
|
||||
unsigned int i;
|
||||
hb_tag_t possible_tags[] = {
|
||||
HB_TAG('N','Y','N',' '), /* Norwegian Nynorsk (Nynorsk, Norwegian) */
|
||||
HB_TAG('N','O','R',' '), /* Norwegian */
|
||||
};
|
||||
for (i = 0; i < 2 && i < *count; i++)
|
||||
tags[i] = possible_tags[i];
|
||||
*count = i;
|
||||
tags[0] = HB_TAG('N','Y','N',' '); /* Norwegian Nynorsk (Nynorsk, Norwegian) */
|
||||
*count = 1;
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue