Merge pull request #3402 from harfbuzz/language-tags

Make miscellaneous changes to hb-ot-tag-table.hh
This commit is contained in:
Behdad Esfahbod 2022-01-30 17:19:46 -08:00 committed by GitHub
commit 3e8742e376
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 110 additions and 63 deletions

View File

@ -329,6 +329,10 @@ class OpenTypeRegistryParser (HTMLParser):
from_bcp_47 (DefaultDict[str, AbstractSet[str]]): ``to_bcp_47``
inverted. Its values start as unsorted sets;
``sort_languages`` converts them to sorted lists.
from_bcp_47_uninherited (Optional[Dict[str, AbstractSet[str]]]):
A copy of ``from_bcp_47``. It starts as ``None`` and is
populated at the beginning of the first call to
``inherit_from_macrolanguages``.
"""
def __init__ (self):
@ -338,6 +342,7 @@ class OpenTypeRegistryParser (HTMLParser):
self.ranks = collections.defaultdict (int)
self.to_bcp_47 = collections.defaultdict (set)
self.from_bcp_47 = collections.defaultdict (set)
self.from_bcp_47_uninherited = None
# Whether the parser is in a <td> element
self._td = False
# Whether the parser is after a <br> element within the current <tr> element
@ -462,30 +467,51 @@ class OpenTypeRegistryParser (HTMLParser):
explicit mapping, so it inherits from sq (Albanian) the mapping
to SQI.
However, if an OpenType tag maps to a BCP 47 macrolanguage and
some but not all of its individual languages, the mapping is not
inherited from the macrolanguage to the missing individual
languages. For example, INUK (Nunavik Inuktitut) is mapped to
ike (Eastern Canadian Inuktitut) and iu (Inuktitut) but not to
ikt (Inuinnaqtun, which is an individual language of iu), so
this method does not add a mapping from ikt to INUK.
If a BCP 47 tag for a macrolanguage has no OpenType mapping but
all of its individual languages do and they all map to the same
tags, the mapping is copied to the macrolanguage.
some of its individual languages do, their mappings are copied
to the macrolanguage.
"""
global bcp_47
original_ot_from_bcp_47 = dict (self.from_bcp_47)
first_time = self.from_bcp_47_uninherited is None
if first_time:
self.from_bcp_47_uninherited = dict (self.from_bcp_47)
for macrolanguage, languages in dict (bcp_47.macrolanguages).items ():
ot_macrolanguages = set (original_ot_from_bcp_47.get (macrolanguage, set ()))
ot_macrolanguages = {
ot_macrolanguage for ot_macrolanguage in self.from_bcp_47_uninherited.get (macrolanguage, set ())
}
blocked_ot_macrolanguages = set ()
if 'retired code' not in bcp_47.scopes.get (macrolanguage, ''):
for ot_macrolanguage in ot_macrolanguages:
round_trip_macrolanguages = {
l for l in self.to_bcp_47[ot_macrolanguage]
if 'retired code' not in bcp_47.scopes.get (l, '')
}
round_trip_languages = {
l for l in languages
if 'retired code' not in bcp_47.scopes.get (l, '')
}
intersection = round_trip_macrolanguages & round_trip_languages
if intersection and intersection != round_trip_languages:
blocked_ot_macrolanguages.add (ot_macrolanguage)
if ot_macrolanguages:
for ot_macrolanguage in ot_macrolanguages:
if ot_macrolanguage not in blocked_ot_macrolanguages:
for language in languages:
self.add_language (language, ot_macrolanguage)
if not blocked_ot_macrolanguages:
self.ranks[ot_macrolanguage] += 1
else:
elif first_time:
for language in languages:
if language in original_ot_from_bcp_47:
if ot_macrolanguages:
ml = original_ot_from_bcp_47[language]
if ml:
ot_macrolanguages &= ml
else:
pass
else:
ot_macrolanguages |= original_ot_from_bcp_47[language]
if language in self.from_bcp_47_uninherited:
ot_macrolanguages |= self.from_bcp_47_uninherited[language]
else:
ot_macrolanguages.clear ()
if not ot_macrolanguages:
@ -570,7 +596,7 @@ class BCP47Parser (object):
if scope == 'macrolanguage':
scope = ' [macrolanguage]'
elif scope == 'collection':
scope = ' [family]'
scope = ' [collection]'
else:
continue
self.scopes[subtag] = scope
@ -715,6 +741,7 @@ ot.add_language ('no', 'NOR')
ot.add_language ('oc-provenc', 'PRO')
ot.remove_language_ot ('QUZ')
ot.add_language ('qu', 'QUZ')
ot.add_language ('qub', 'QWH')
ot.add_language ('qud', 'QVI')
@ -747,7 +774,6 @@ ot.add_language ('qxr', 'QVI')
ot.add_language ('qxt', 'QWH')
ot.add_language ('qxw', 'QWH')
bcp_47.macrolanguages['ro'].remove ('mo')
bcp_47.macrolanguages['ro-MD'].add ('mo')
ot.remove_language_ot ('SYRE')
@ -987,6 +1013,8 @@ for initial, items in sorted (complex_tags.items ()):
if initial != 'und':
continue
for lt, tags in items:
if not tags:
continue
if lt.variant in bcp_47.prefixes:
expect (next (iter (bcp_47.prefixes[lt.variant])) == lt.language,
'%s is not a valid prefix of %s' % (lt.language, lt.variant))
@ -1021,6 +1049,8 @@ for initial, items in sorted (complex_tags.items ()):
continue
print (" case '%s':" % initial)
for lt, tags in items:
if not tags:
continue
print (' if (', end='')
script = lt.script
region = lt.region
@ -1121,9 +1151,13 @@ def verify_disambiguation_dict ():
elif len (primary_tags) == 0:
expect (ot_tag not in disambiguation, 'There is no possible valid disambiguation for %s' % ot_tag)
else:
macrolanguages = list (t for t in primary_tags if bcp_47.scopes.get (t) == ' [macrolanguage]')
original_languages = [t for t in primary_tags if t in ot.from_bcp_47_uninherited and 'retired code' not in bcp_47.scopes.get (t, '')]
if len (original_languages) == 1:
macrolanguages = original_languages
else:
macrolanguages = [t for t in primary_tags if bcp_47.scopes.get (t) == ' [macrolanguage]']
if len (macrolanguages) != 1:
macrolanguages = list (t for t in primary_tags if bcp_47.scopes.get (t) == ' [family]')
macrolanguages = list (t for t in primary_tags if bcp_47.scopes.get (t) == ' [collection]')
if len (macrolanguages) != 1:
macrolanguages = list (t for t in primary_tags if 'retired code' not in bcp_47.scopes.get (t, ''))
if len (macrolanguages) != 1:

View File

@ -6,8 +6,8 @@
*
* on files with these headers:
*
* <meta name="updated_at" content="2021-12-09 12:01 AM" />
* File-Date: 2021-08-06
* <meta name="updated_at" content="2022-01-28 10:00 PM" />
* File-Date: 2021-12-29
*/
#ifndef HB_OT_TAG_TABLE_HH
@ -66,7 +66,7 @@ static const LangTag ot_languages[] = {
{"an", HB_TAG('A','R','G',' ')}, /* Aragonese */
/*{"ang", HB_TAG('A','N','G',' ')},*/ /* Old English (ca. 450-1100) -> Anglo-Saxon */
{"aoa", HB_TAG('C','P','P',' ')}, /* Angolar -> Creoles */
{"apa", HB_TAG('A','T','H',' ')}, /* Apache [family] -> Athapaskan */
{"apa", HB_TAG('A','T','H',' ')}, /* Apache [collection] -> Athapaskan */
{"apc", HB_TAG('A','R','A',' ')}, /* North Levantine Arabic -> Arabic */
{"apd", HB_TAG('A','R','A',' ')}, /* Sudanese Arabic -> Arabic */
{"apj", HB_TAG('A','T','H',' ')}, /* Jicarilla Apache -> Athapaskan */
@ -86,7 +86,7 @@ static const LangTag ot_languages[] = {
{"arz", HB_TAG('A','R','A',' ')}, /* Egyptian Arabic -> Arabic */
{"as", HB_TAG('A','S','M',' ')}, /* Assamese */
/*{"ast", HB_TAG('A','S','T',' ')},*/ /* Asturian */
/*{"ath", HB_TAG('A','T','H',' ')},*/ /* Athapascan [family] -> Athapaskan */
/*{"ath", HB_TAG('A','T','H',' ')},*/ /* Athapascan [collection] -> Athapaskan */
{"atj", HB_TAG('R','C','R',' ')}, /* Atikamekw -> R-Cree */
{"atv", HB_TAG('A','L','T',' ')}, /* Northern Altai -> Altai */
{"auj", HB_TAG('B','B','R',' ')}, /* Awjilah -> Berber */
@ -110,10 +110,10 @@ static const LangTag ot_languages[] = {
{"azn", HB_TAG('N','A','H',' ')}, /* Western Durango Nahuatl -> Nahuatl */
{"azz", HB_TAG('N','A','H',' ')}, /* Highland Puebla Nahuatl -> Nahuatl */
{"ba", HB_TAG('B','S','H',' ')}, /* Bashkir */
{"bad", HB_TAG('B','A','D','0')}, /* Banda [family] */
{"bad", HB_TAG('B','A','D','0')}, /* Banda [collection] */
{"bag", HB_TAG_NONE }, /* Tuki != Baghelkhandi */
{"bah", HB_TAG('C','P','P',' ')}, /* Bahamas Creole English -> Creoles */
{"bai", HB_TAG('B','M','L',' ')}, /* Bamileke [family] */
{"bai", HB_TAG('B','M','L',' ')}, /* Bamileke [collection] */
{"bal", HB_TAG('B','L','I',' ')}, /* Baluchi [macrolanguage] */
/*{"ban", HB_TAG('B','A','N',' ')},*/ /* Balinese */
/*{"bar", HB_TAG('B','A','R',' ')},*/ /* Bavarian */
@ -135,7 +135,7 @@ static const LangTag ot_languages[] = {
{"bea", HB_TAG('A','T','H',' ')}, /* Beaver -> Athapaskan */
{"beb", HB_TAG('B','T','I',' ')}, /* Bebele -> Beti */
/*{"bem", HB_TAG('B','E','M',' ')},*/ /* Bemba (Zambia) */
{"ber", HB_TAG('B','B','R',' ')}, /* Berber [family] */
{"ber", HB_TAG('B','B','R',' ')}, /* Berber [collection] */
{"bew", HB_TAG('C','P','P',' ')}, /* Betawi -> Creoles */
{"bfl", HB_TAG('B','A','D','0')}, /* Banda-Ndélé -> Banda */
{"bfq", HB_TAG('B','A','D',' ')}, /* Badaga */
@ -203,7 +203,7 @@ static const LangTag ot_languages[] = {
{"btd", HB_TAG('B','T','K',' ')}, /* Batak Dairi -> Batak */
{"bti", HB_TAG_NONE }, /* Burate != Beti */
{"btj", HB_TAG('M','L','Y',' ')}, /* Bacanese Malay -> Malay */
/*{"btk", HB_TAG('B','T','K',' ')},*/ /* Batak [family] */
/*{"btk", HB_TAG('B','T','K',' ')},*/ /* Batak [collection] */
{"btm", HB_TAG('B','T','M',' ')}, /* Batak Mandailing */
{"btm", HB_TAG('B','T','K',' ')}, /* Batak Mandailing -> Batak */
{"bto", HB_TAG('B','I','K',' ')}, /* Rinconada Bikol -> Bikol */
@ -256,6 +256,8 @@ static const LangTag ot_languages[] = {
{"chh", HB_TAG_NONE }, /* Chinook != Chattisgarhi */
{"chj", HB_TAG('C','C','H','N')}, /* Ojitlán Chinantec -> Chinantec */
{"chk", HB_TAG('C','H','K','0')}, /* Chuukese */
{"chm", HB_TAG('H','M','A',' ')}, /* Mari (Russia) [macrolanguage] -> High Mari */
{"chm", HB_TAG('L','M','A',' ')}, /* Mari (Russia) [macrolanguage] -> Low Mari */
{"chn", HB_TAG('C','P','P',' ')}, /* Chinook jargon -> Creoles */
/*{"cho", HB_TAG('C','H','O',' ')},*/ /* Choctaw */
{"chp", HB_TAG('C','H','P',' ')}, /* Chipewyan */
@ -297,10 +299,10 @@ static const LangTag ot_languages[] = {
/*{"cop", HB_TAG('C','O','P',' ')},*/ /* Coptic */
{"coq", HB_TAG('A','T','H',' ')}, /* Coquille -> Athapaskan */
{"cpa", HB_TAG('C','C','H','N')}, /* Palantla Chinantec -> Chinantec */
{"cpe", HB_TAG('C','P','P',' ')}, /* English-based creoles and pidgins [family] -> Creoles */
{"cpf", HB_TAG('C','P','P',' ')}, /* French-based creoles and pidgins [family] -> Creoles */
{"cpe", HB_TAG('C','P','P',' ')}, /* English-based creoles and pidgins [collection] -> Creoles */
{"cpf", HB_TAG('C','P','P',' ')}, /* French-based creoles and pidgins [collection] -> Creoles */
{"cpi", HB_TAG('C','P','P',' ')}, /* Chinese Pidgin English -> Creoles */
/*{"cpp", HB_TAG('C','P','P',' ')},*/ /* Portuguese-based creoles and pidgins [family] -> Creoles */
/*{"cpp", HB_TAG('C','P','P',' ')},*/ /* Portuguese-based creoles and pidgins [collection] -> Creoles */
{"cpx", HB_TAG('Z','H','S',' ')}, /* Pu-Xian Chinese -> Chinese, Simplified */
{"cqd", HB_TAG('H','M','N',' ')}, /* Chuanqiandian Cluster Miao -> Hmong */
{"cqu", HB_TAG('Q','U','H',' ')}, /* Chilean Quechua (retired code) -> Quechua (Bolivia) */
@ -320,7 +322,7 @@ static const LangTag ot_languages[] = {
{"crm", HB_TAG('M','C','R',' ')}, /* Moose Cree */
{"crm", HB_TAG('L','C','R',' ')}, /* Moose Cree -> L-Cree */
{"crm", HB_TAG('C','R','E',' ')}, /* Moose Cree -> Cree */
{"crp", HB_TAG('C','P','P',' ')}, /* Creoles and pidgins [family] -> Creoles */
{"crp", HB_TAG('C','P','P',' ')}, /* Creoles and pidgins [collection] -> Creoles */
{"crr", HB_TAG_NONE }, /* Carolina Algonquian != Carrier */
{"crs", HB_TAG('C','P','P',' ')}, /* Seselwa Creole French -> Creoles */
{"crt", HB_TAG_NONE }, /* Iyojwa'ja Chorote != Crimean Tatar */
@ -431,7 +433,7 @@ static const LangTag ot_languages[] = {
{"et", HB_TAG('E','T','I',' ')}, /* Estonian [macrolanguage] */
{"eto", HB_TAG('B','T','I',' ')}, /* Eton (Cameroon) -> Beti */
{"eu", HB_TAG('E','U','Q',' ')}, /* Basque */
{"euq", HB_TAG_NONE }, /* Basque [family] != Basque */
{"euq", HB_TAG_NONE }, /* Basque [collection] != Basque */
{"eve", HB_TAG('E','V','N',' ')}, /* Even */
{"evn", HB_TAG('E','V','K',' ')}, /* Evenki */
{"ewo", HB_TAG('B','T','I',' ')}, /* Ewondo -> Beti */
@ -620,10 +622,11 @@ static const LangTag ot_languages[] = {
{"ijc", HB_TAG('I','J','O',' ')}, /* Izon -> Ijo */
{"ije", HB_TAG('I','J','O',' ')}, /* Biseni -> Ijo */
{"ijn", HB_TAG('I','J','O',' ')}, /* Kalabari -> Ijo */
/*{"ijo", HB_TAG('I','J','O',' ')},*/ /* Ijo [family] */
/*{"ijo", HB_TAG('I','J','O',' ')},*/ /* Ijo [collection] */
{"ijs", HB_TAG('I','J','O',' ')}, /* Southeast Ijo -> Ijo */
{"ik", HB_TAG('I','P','K',' ')}, /* Inupiaq [macrolanguage] -> Inupiat */
{"ike", HB_TAG('I','N','U',' ')}, /* Eastern Canadian Inuktitut -> Inuktitut */
{"ike", HB_TAG('I','N','U','K')}, /* Eastern Canadian Inuktitut -> Nunavik Inuktitut */
{"ikt", HB_TAG('I','N','U',' ')}, /* Inuinnaqtun -> Inuktitut */
/*{"ilo", HB_TAG('I','L','O',' ')},*/ /* Iloko -> Ilokano */
{"in", HB_TAG('I','N','D',' ')}, /* Indonesian (retired code) */
@ -638,6 +641,7 @@ static const LangTag ot_languages[] = {
{"it", HB_TAG('I','T','A',' ')}, /* Italian */
{"itz", HB_TAG('M','Y','N',' ')}, /* Itzá -> Mayan */
{"iu", HB_TAG('I','N','U',' ')}, /* Inuktitut [macrolanguage] */
{"iu", HB_TAG('I','N','U','K')}, /* Inuktitut [macrolanguage] -> Nunavik Inuktitut */
{"iw", HB_TAG('I','W','R',' ')}, /* Hebrew (retired code) */
{"ixl", HB_TAG('M','Y','N',' ')}, /* Ixil -> Mayan */
{"ja", HB_TAG('J','A','N',' ')}, /* Japanese */
@ -667,7 +671,7 @@ static const LangTag ot_languages[] = {
{"kab", HB_TAG('B','B','R',' ')}, /* Kabyle -> Berber */
{"kac", HB_TAG_NONE }, /* Kachin != Kachchi */
{"kam", HB_TAG('K','M','B',' ')}, /* Kamba (Kenya) */
{"kar", HB_TAG('K','R','N',' ')}, /* Karen [family] */
{"kar", HB_TAG('K','R','N',' ')}, /* Karen [collection] */
/*{"kaw", HB_TAG('K','A','W',' ')},*/ /* Kawi (Old Javanese) */
{"kbd", HB_TAG('K','A','B',' ')}, /* Kabardian */
{"kby", HB_TAG('K','N','R',' ')}, /* Manga Kanuri -> Kanuri */
@ -876,7 +880,7 @@ static const LangTag ot_languages[] = {
{"mam", HB_TAG('M','A','M',' ')}, /* Mam */
{"mam", HB_TAG('M','Y','N',' ')}, /* Mam -> Mayan */
{"man", HB_TAG('M','N','K',' ')}, /* Mandingo [macrolanguage] -> Maninka */
{"map", HB_TAG_NONE }, /* Austronesian [family] != Mapudungun */
{"map", HB_TAG_NONE }, /* Austronesian [collection] != Mapudungun */
{"maw", HB_TAG_NONE }, /* Mampruli != Marwari */
{"max", HB_TAG('M','L','Y',' ')}, /* North Moluccan Malay -> Malay */
{"max", HB_TAG('C','P','P',' ')}, /* North Moluccan Malay -> Creoles */
@ -936,6 +940,7 @@ static const LangTag ot_languages[] = {
{"mnw", HB_TAG('M','O','N','T')}, /* Mon -> Thailand Mon */
{"mnx", HB_TAG_NONE }, /* Manikion != Manx */
{"mo", HB_TAG('M','O','L',' ')}, /* Moldavian (retired code) */
{"mo", HB_TAG('R','O','M',' ')}, /* Moldavian (retired code) -> Romanian */
{"mod", HB_TAG('C','P','P',' ')}, /* Mobilian -> Creoles */
/*{"moh", HB_TAG('M','O','H',' ')},*/ /* Mohawk */
{"mok", HB_TAG_NONE }, /* Morori != Moksha */
@ -958,7 +963,7 @@ static const LangTag ot_languages[] = {
{"mts", HB_TAG_NONE }, /* Yora != Maltese */
{"mud", HB_TAG('C','P','P',' ')}, /* Mednyj Aleut -> Creoles */
{"mui", HB_TAG('M','L','Y',' ')}, /* Musi -> Malay */
{"mun", HB_TAG_NONE }, /* Munda [family] != Mundari */
{"mun", HB_TAG_NONE }, /* Munda [collection] != Mundari */
{"mup", HB_TAG('R','A','J',' ')}, /* Malvi -> Rajasthani */
{"muq", HB_TAG('H','M','N',' ')}, /* Eastern Xiangxi Miao -> Hmong */
/*{"mus", HB_TAG('M','U','S',' ')},*/ /* Creek -> Muscogee */
@ -973,7 +978,7 @@ static const LangTag ot_languages[] = {
{"mww", HB_TAG('H','M','N',' ')}, /* Hmong Daw -> Hmong */
{"my", HB_TAG('B','R','M',' ')}, /* Burmese */
{"mym", HB_TAG('M','E','N',' ')}, /* Meen */
/*{"myn", HB_TAG('M','Y','N',' ')},*/ /* Mayan [family] */
/*{"myn", HB_TAG('M','Y','N',' ')},*/ /* Mayan [collection] */
{"myq", HB_TAG('M','N','K',' ')}, /* Forest Maninka (retired code) -> Maninka */
{"myv", HB_TAG('E','R','Z',' ')}, /* Erzya */
{"mzb", HB_TAG('B','B','R',' ')}, /* Tumzabt -> Berber */
@ -982,7 +987,7 @@ static const LangTag ot_languages[] = {
{"na", HB_TAG('N','A','U',' ')}, /* Nauru -> Nauruan */
{"nag", HB_TAG('N','A','G',' ')}, /* Naga Pidgin -> Naga-Assamese */
{"nag", HB_TAG('C','P','P',' ')}, /* Naga Pidgin -> Creoles */
/*{"nah", HB_TAG('N','A','H',' ')},*/ /* Nahuatl [family] */
/*{"nah", HB_TAG('N','A','H',' ')},*/ /* Nahuatl [collection] */
{"nan", HB_TAG('Z','H','S',' ')}, /* Min Nan Chinese -> Chinese, Simplified */
/*{"nap", HB_TAG('N','A','P',' ')},*/ /* Neapolitan */
{"nas", HB_TAG_NONE }, /* Naasioi != Naskapi */
@ -1039,7 +1044,6 @@ static const LangTag ot_languages[] = {
{"nln", HB_TAG('N','A','H',' ')}, /* Durango Nahuatl (retired code) -> Nahuatl */
{"nlv", HB_TAG('N','A','H',' ')}, /* Orizaba Nahuatl -> Nahuatl */
{"nn", HB_TAG('N','Y','N',' ')}, /* Norwegian Nynorsk (Nynorsk, Norwegian) */
{"nn", HB_TAG('N','O','R',' ')}, /* Norwegian Nynorsk -> Norwegian */
{"nnh", HB_TAG('B','M','L',' ')}, /* Ngiemboon -> Bamileke */
{"nnz", HB_TAG('B','M','L',' ')}, /* Nda'nda' -> Bamileke */
{"no", HB_TAG('N','O','R',' ')}, /* Norwegian [macrolanguage] */
@ -1093,7 +1097,7 @@ static const LangTag ot_languages[] = {
{"otw", HB_TAG('O','J','B',' ')}, /* Ottawa -> Ojibway */
{"oua", HB_TAG('B','B','R',' ')}, /* Tagargrent -> Berber */
{"pa", HB_TAG('P','A','N',' ')}, /* Punjabi */
{"paa", HB_TAG_NONE }, /* Papuan [family] != Palestinian Aramaic */
{"paa", HB_TAG_NONE }, /* Papuan [collection] != Palestinian Aramaic */
/*{"pag", HB_TAG('P','A','G',' ')},*/ /* Pangasinan */
{"pal", HB_TAG_NONE }, /* Pahlavi != Pali */
/*{"pam", HB_TAG('P','A','M',' ')},*/ /* Pampanga -> Pampangan */
@ -1308,6 +1312,9 @@ static const LangTag ot_languages[] = {
{"sgo", HB_TAG_NONE }, /* Songa (retired code) != Sango */
/*{"sgs", HB_TAG('S','G','S',' ')},*/ /* Samogitian */
{"sgw", HB_TAG('C','H','G',' ')}, /* Sebat Bet Gurage -> Chaha Gurage */
{"sh", HB_TAG('B','O','S',' ')}, /* Serbo-Croatian [macrolanguage] -> Bosnian */
{"sh", HB_TAG('H','R','V',' ')}, /* Serbo-Croatian [macrolanguage] -> Croatian */
{"sh", HB_TAG('S','R','B',' ')}, /* Serbo-Croatian [macrolanguage] -> Serbian */
{"shi", HB_TAG('S','H','I',' ')}, /* Tachelhit */
{"shi", HB_TAG('B','B','R',' ')}, /* Tachelhit -> Berber */
{"shl", HB_TAG('Q','I','N',' ')}, /* Shendu -> Chin */
@ -1329,7 +1336,7 @@ static const LangTag ot_languages[] = {
{"skw", HB_TAG('C','P','P',' ')}, /* Skepi Creole Dutch -> Creoles */
{"sky", HB_TAG_NONE }, /* Sikaiana != Slovak */
{"sl", HB_TAG('S','L','V',' ')}, /* Slovenian */
{"sla", HB_TAG_NONE }, /* Slavic [family] != Slavey */
{"sla", HB_TAG_NONE }, /* Slavic [collection] != Slavey */
{"sm", HB_TAG('S','M','O',' ')}, /* Samoan */
{"sma", HB_TAG('S','S','M',' ')}, /* Southern Sami */
{"smj", HB_TAG('L','S','M',' ')}, /* Lule Sami */
@ -1451,7 +1458,7 @@ static const LangTag ot_languages[] = {
{"tpi", HB_TAG('C','P','P',' ')}, /* Tok Pisin -> Creoles */
{"tr", HB_TAG('T','R','K',' ')}, /* Turkish */
{"trf", HB_TAG('C','P','P',' ')}, /* Trinidadian Creole English -> Creoles */
{"trk", HB_TAG_NONE }, /* Turkic [family] != Turkish */
{"trk", HB_TAG_NONE }, /* Turkic [collection] != Turkish */
{"tru", HB_TAG('T','U','A',' ')}, /* Turoyo -> Turoyo Aramaic */
{"tru", HB_TAG('S','Y','R',' ')}, /* Turoyo -> Syriac */
{"ts", HB_TAG('T','S','G',' ')}, /* Tsonga */
@ -1593,7 +1600,7 @@ static const LangTag ot_languages[] = {
{"zlq", HB_TAG('Z','H','A',' ')}, /* Liuqian Zhuang -> Zhuang */
{"zmi", HB_TAG('M','L','Y',' ')}, /* Negeri Sembilan Malay -> Malay */
{"zmz", HB_TAG('B','A','D','0')}, /* Mbandja -> Banda */
{"znd", HB_TAG_NONE }, /* Zande [family] != Zande */
{"znd", HB_TAG_NONE }, /* Zande [collection] != Zande */
{"zne", HB_TAG('Z','N','D',' ')}, /* Zande */
{"zom", HB_TAG('Q','I','N',' ')}, /* Zou -> Chin */
{"zqe", HB_TAG('Z','H','A',' ')}, /* Qiubei Zhuang -> Zhuang */
@ -2607,14 +2614,8 @@ hb_ot_tags_from_complex_language (const char *lang_str,
if (0 == strcmp (&lang_str[1], "o-nyn"))
{
/* Norwegian Nynorsk (retired code) */
unsigned int i;
hb_tag_t possible_tags[] = {
HB_TAG('N','Y','N',' '), /* Norwegian Nynorsk (Nynorsk, Norwegian) */
HB_TAG('N','O','R',' '), /* Norwegian */
};
for (i = 0; i < 2 && i < *count; i++)
tags[i] = possible_tags[i];
*count = i;
tags[0] = HB_TAG('N','Y','N',' '); /* Norwegian Nynorsk (Nynorsk, Norwegian) */
*count = 1;
return true;
}
break;
@ -2623,8 +2624,14 @@ hb_ot_tags_from_complex_language (const char *lang_str,
&& subtag_matches (lang_str, limit, "-md"))
{
/* Romanian; Moldova */
tags[0] = HB_TAG('M','O','L',' '); /* Moldavian */
*count = 1;
unsigned int i;
hb_tag_t possible_tags[] = {
HB_TAG('M','O','L',' '), /* Moldavian */
HB_TAG('R','O','M',' '), /* Romanian */
};
for (i = 0; i < 2 && i < *count; i++)
tags[i] = possible_tags[i];
*count = i;
return true;
}
break;
@ -2813,15 +2820,15 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag)
case HB_TAG('A','R','K',' '): /* Rakhine */
return hb_language_from_string ("rki", -1); /* Rakhine */
case HB_TAG('A','T','H',' '): /* Athapaskan */
return hb_language_from_string ("ath", -1); /* Athapascan [family] */
return hb_language_from_string ("ath", -1); /* Athapascan [collection] */
case HB_TAG('B','B','R',' '): /* Berber */
return hb_language_from_string ("ber", -1); /* Berber [family] */
return hb_language_from_string ("ber", -1); /* Berber [collection] */
case HB_TAG('B','I','K',' '): /* Bikol */
return hb_language_from_string ("bik", -1); /* Bikol [macrolanguage] */
case HB_TAG('B','T','K',' '): /* Batak */
return hb_language_from_string ("btk", -1); /* Batak [family] */
return hb_language_from_string ("btk", -1); /* Batak [collection] */
case HB_TAG('C','P','P',' '): /* Creoles */
return hb_language_from_string ("crp", -1); /* Creoles and pidgins [family] */
return hb_language_from_string ("crp", -1); /* Creoles and pidgins [collection] */
case HB_TAG('C','R','R',' '): /* Carrier */
return hb_language_from_string ("crx", -1); /* Carrier */
case HB_TAG('D','G','R',' '): /* Dogri (macrolanguage) */
@ -2838,6 +2845,8 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag)
return hb_language_from_string ("fa", -1); /* Persian [macrolanguage] */
case HB_TAG('G','O','N',' '): /* Gondi */
return hb_language_from_string ("gon", -1); /* Gondi [macrolanguage] */
case HB_TAG('H','M','A',' '): /* High Mari */
return hb_language_from_string ("mrj", -1); /* Western Mari */
case HB_TAG('H','M','N',' '): /* Hmong */
return hb_language_from_string ("hmn", -1); /* Hmong [macrolanguage] */
case HB_TAG('H','N','D',' '): /* Hindko */
@ -2847,7 +2856,7 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag)
case HB_TAG('I','B','A',' '): /* Iban */
return hb_language_from_string ("iba", -1); /* Iban */
case HB_TAG('I','J','O',' '): /* Ijo */
return hb_language_from_string ("ijo", -1); /* Ijo [family] */
return hb_language_from_string ("ijo", -1); /* Ijo [collection] */
case HB_TAG('I','N','U',' '): /* Inuktitut */
return hb_language_from_string ("iu", -1); /* Inuktitut [macrolanguage] */
case HB_TAG('I','P','K',' '): /* Inupiat */
@ -2873,11 +2882,13 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag)
case HB_TAG('K','P','L',' '): /* Kpelle */
return hb_language_from_string ("kpe", -1); /* Kpelle [macrolanguage] */
case HB_TAG('K','R','N',' '): /* Karen */
return hb_language_from_string ("kar", -1); /* Karen [family] */
return hb_language_from_string ("kar", -1); /* Karen [collection] */
case HB_TAG('K','U','I',' '): /* Kui */
return hb_language_from_string ("uki", -1); /* Kui (India) */
case HB_TAG('K','U','R',' '): /* Kurdish */
return hb_language_from_string ("ku", -1); /* Kurdish [macrolanguage] */
case HB_TAG('L','M','A',' '): /* Low Mari */
return hb_language_from_string ("mhr", -1); /* Eastern Mari */
case HB_TAG('L','U','H',' '): /* Luyia */
return hb_language_from_string ("luy", -1); /* Luyia [macrolanguage] */
case HB_TAG('L','V','I',' '): /* Latvian */
@ -2897,9 +2908,9 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag)
case HB_TAG('M','O','N','T'): /* Thailand Mon */
return hb_language_from_string ("mnw-TH", -1); /* Mon; Thailand */
case HB_TAG('M','Y','N',' '): /* Mayan */
return hb_language_from_string ("myn", -1); /* Mayan [family] */
return hb_language_from_string ("myn", -1); /* Mayan [collection] */
case HB_TAG('N','A','H',' '): /* Nahuatl */
return hb_language_from_string ("nah", -1); /* Nahuatl [family] */
return hb_language_from_string ("nah", -1); /* Nahuatl [collection] */
case HB_TAG('N','E','P',' '): /* Nepali */
return hb_language_from_string ("ne", -1); /* Nepali [macrolanguage] */
case HB_TAG('N','I','S',' '): /* Nisi */
@ -2926,6 +2937,8 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag)
return hb_language_from_string ("qwh", -1); /* Huaylas Ancash Quechua */
case HB_TAG('R','A','J',' '): /* Rajasthani */
return hb_language_from_string ("raj", -1); /* Rajasthani [macrolanguage] */
case HB_TAG('R','O','M',' '): /* Romanian */
return hb_language_from_string ("ro", -1); /* Romanian */
case HB_TAG('R','O','Y',' '): /* Romany */
return hb_language_from_string ("rom", -1); /* Romany [macrolanguage] */
case HB_TAG('S','Q','I',' '): /* Albanian */