Replace “[family]” with “[collection]”

Not all language collections are language families.
This commit is contained in:
David Corbett 2022-01-28 22:27:51 -05:00
parent 0e31595e0d
commit 0b1bf89cc2
2 changed files with 31 additions and 31 deletions

View File

@ -570,7 +570,7 @@ class BCP47Parser (object):
if scope == 'macrolanguage':
scope = ' [macrolanguage]'
elif scope == 'collection':
scope = ' [family]'
scope = ' [collection]'
else:
continue
self.scopes[subtag] = scope
@ -1127,7 +1127,7 @@ def verify_disambiguation_dict ():
else:
macrolanguages = [t for t in primary_tags if bcp_47.scopes.get (t) == ' [macrolanguage]']
if len (macrolanguages) != 1:
macrolanguages = list (t for t in primary_tags if bcp_47.scopes.get (t) == ' [family]')
macrolanguages = list (t for t in primary_tags if bcp_47.scopes.get (t) == ' [collection]')
if len (macrolanguages) != 1:
macrolanguages = list (t for t in primary_tags if 'retired code' not in bcp_47.scopes.get (t, ''))
if len (macrolanguages) != 1:

View File

@ -66,7 +66,7 @@ static const LangTag ot_languages[] = {
{"an", HB_TAG('A','R','G',' ')}, /* Aragonese */
/*{"ang", HB_TAG('A','N','G',' ')},*/ /* Old English (ca. 450-1100) -> Anglo-Saxon */
{"aoa", HB_TAG('C','P','P',' ')}, /* Angolar -> Creoles */
{"apa", HB_TAG('A','T','H',' ')}, /* Apache [family] -> Athapaskan */
{"apa", HB_TAG('A','T','H',' ')}, /* Apache [collection] -> Athapaskan */
{"apc", HB_TAG('A','R','A',' ')}, /* North Levantine Arabic -> Arabic */
{"apd", HB_TAG('A','R','A',' ')}, /* Sudanese Arabic -> Arabic */
{"apj", HB_TAG('A','T','H',' ')}, /* Jicarilla Apache -> Athapaskan */
@ -86,7 +86,7 @@ static const LangTag ot_languages[] = {
{"arz", HB_TAG('A','R','A',' ')}, /* Egyptian Arabic -> Arabic */
{"as", HB_TAG('A','S','M',' ')}, /* Assamese */
/*{"ast", HB_TAG('A','S','T',' ')},*/ /* Asturian */
/*{"ath", HB_TAG('A','T','H',' ')},*/ /* Athapascan [family] -> Athapaskan */
/*{"ath", HB_TAG('A','T','H',' ')},*/ /* Athapascan [collection] -> Athapaskan */
{"atj", HB_TAG('R','C','R',' ')}, /* Atikamekw -> R-Cree */
{"atv", HB_TAG('A','L','T',' ')}, /* Northern Altai -> Altai */
{"auj", HB_TAG('B','B','R',' ')}, /* Awjilah -> Berber */
@ -110,10 +110,10 @@ static const LangTag ot_languages[] = {
{"azn", HB_TAG('N','A','H',' ')}, /* Western Durango Nahuatl -> Nahuatl */
{"azz", HB_TAG('N','A','H',' ')}, /* Highland Puebla Nahuatl -> Nahuatl */
{"ba", HB_TAG('B','S','H',' ')}, /* Bashkir */
{"bad", HB_TAG('B','A','D','0')}, /* Banda [family] */
{"bad", HB_TAG('B','A','D','0')}, /* Banda [collection] */
{"bag", HB_TAG_NONE }, /* Tuki != Baghelkhandi */
{"bah", HB_TAG('C','P','P',' ')}, /* Bahamas Creole English -> Creoles */
{"bai", HB_TAG('B','M','L',' ')}, /* Bamileke [family] */
{"bai", HB_TAG('B','M','L',' ')}, /* Bamileke [collection] */
{"bal", HB_TAG('B','L','I',' ')}, /* Baluchi [macrolanguage] */
/*{"ban", HB_TAG('B','A','N',' ')},*/ /* Balinese */
/*{"bar", HB_TAG('B','A','R',' ')},*/ /* Bavarian */
@ -135,7 +135,7 @@ static const LangTag ot_languages[] = {
{"bea", HB_TAG('A','T','H',' ')}, /* Beaver -> Athapaskan */
{"beb", HB_TAG('B','T','I',' ')}, /* Bebele -> Beti */
/*{"bem", HB_TAG('B','E','M',' ')},*/ /* Bemba (Zambia) */
{"ber", HB_TAG('B','B','R',' ')}, /* Berber [family] */
{"ber", HB_TAG('B','B','R',' ')}, /* Berber [collection] */
{"bew", HB_TAG('C','P','P',' ')}, /* Betawi -> Creoles */
{"bfl", HB_TAG('B','A','D','0')}, /* Banda-Ndélé -> Banda */
{"bfq", HB_TAG('B','A','D',' ')}, /* Badaga */
@ -203,7 +203,7 @@ static const LangTag ot_languages[] = {
{"btd", HB_TAG('B','T','K',' ')}, /* Batak Dairi -> Batak */
{"bti", HB_TAG_NONE }, /* Burate != Beti */
{"btj", HB_TAG('M','L','Y',' ')}, /* Bacanese Malay -> Malay */
/*{"btk", HB_TAG('B','T','K',' ')},*/ /* Batak [family] */
/*{"btk", HB_TAG('B','T','K',' ')},*/ /* Batak [collection] */
{"btm", HB_TAG('B','T','M',' ')}, /* Batak Mandailing */
{"btm", HB_TAG('B','T','K',' ')}, /* Batak Mandailing -> Batak */
{"bto", HB_TAG('B','I','K',' ')}, /* Rinconada Bikol -> Bikol */
@ -299,10 +299,10 @@ static const LangTag ot_languages[] = {
/*{"cop", HB_TAG('C','O','P',' ')},*/ /* Coptic */
{"coq", HB_TAG('A','T','H',' ')}, /* Coquille -> Athapaskan */
{"cpa", HB_TAG('C','C','H','N')}, /* Palantla Chinantec -> Chinantec */
{"cpe", HB_TAG('C','P','P',' ')}, /* English-based creoles and pidgins [family] -> Creoles */
{"cpf", HB_TAG('C','P','P',' ')}, /* French-based creoles and pidgins [family] -> Creoles */
{"cpe", HB_TAG('C','P','P',' ')}, /* English-based creoles and pidgins [collection] -> Creoles */
{"cpf", HB_TAG('C','P','P',' ')}, /* French-based creoles and pidgins [collection] -> Creoles */
{"cpi", HB_TAG('C','P','P',' ')}, /* Chinese Pidgin English -> Creoles */
/*{"cpp", HB_TAG('C','P','P',' ')},*/ /* Portuguese-based creoles and pidgins [family] -> Creoles */
/*{"cpp", HB_TAG('C','P','P',' ')},*/ /* Portuguese-based creoles and pidgins [collection] -> Creoles */
{"cpx", HB_TAG('Z','H','S',' ')}, /* Pu-Xian Chinese -> Chinese, Simplified */
{"cqd", HB_TAG('H','M','N',' ')}, /* Chuanqiandian Cluster Miao -> Hmong */
{"cqu", HB_TAG('Q','U','H',' ')}, /* Chilean Quechua (retired code) -> Quechua (Bolivia) */
@ -322,7 +322,7 @@ static const LangTag ot_languages[] = {
{"crm", HB_TAG('M','C','R',' ')}, /* Moose Cree */
{"crm", HB_TAG('L','C','R',' ')}, /* Moose Cree -> L-Cree */
{"crm", HB_TAG('C','R','E',' ')}, /* Moose Cree -> Cree */
{"crp", HB_TAG('C','P','P',' ')}, /* Creoles and pidgins [family] -> Creoles */
{"crp", HB_TAG('C','P','P',' ')}, /* Creoles and pidgins [collection] -> Creoles */
{"crr", HB_TAG_NONE }, /* Carolina Algonquian != Carrier */
{"crs", HB_TAG('C','P','P',' ')}, /* Seselwa Creole French -> Creoles */
{"crt", HB_TAG_NONE }, /* Iyojwa'ja Chorote != Crimean Tatar */
@ -433,7 +433,7 @@ static const LangTag ot_languages[] = {
{"et", HB_TAG('E','T','I',' ')}, /* Estonian [macrolanguage] */
{"eto", HB_TAG('B','T','I',' ')}, /* Eton (Cameroon) -> Beti */
{"eu", HB_TAG('E','U','Q',' ')}, /* Basque */
{"euq", HB_TAG_NONE }, /* Basque [family] != Basque */
{"euq", HB_TAG_NONE }, /* Basque [collection] != Basque */
{"eve", HB_TAG('E','V','N',' ')}, /* Even */
{"evn", HB_TAG('E','V','K',' ')}, /* Evenki */
{"ewo", HB_TAG('B','T','I',' ')}, /* Ewondo -> Beti */
@ -622,7 +622,7 @@ static const LangTag ot_languages[] = {
{"ijc", HB_TAG('I','J','O',' ')}, /* Izon -> Ijo */
{"ije", HB_TAG('I','J','O',' ')}, /* Biseni -> Ijo */
{"ijn", HB_TAG('I','J','O',' ')}, /* Kalabari -> Ijo */
/*{"ijo", HB_TAG('I','J','O',' ')},*/ /* Ijo [family] */
/*{"ijo", HB_TAG('I','J','O',' ')},*/ /* Ijo [collection] */
{"ijs", HB_TAG('I','J','O',' ')}, /* Southeast Ijo -> Ijo */
{"ik", HB_TAG('I','P','K',' ')}, /* Inupiaq [macrolanguage] -> Inupiat */
{"ike", HB_TAG('I','N','U',' ')}, /* Eastern Canadian Inuktitut -> Inuktitut */
@ -672,7 +672,7 @@ static const LangTag ot_languages[] = {
{"kab", HB_TAG('B','B','R',' ')}, /* Kabyle -> Berber */
{"kac", HB_TAG_NONE }, /* Kachin != Kachchi */
{"kam", HB_TAG('K','M','B',' ')}, /* Kamba (Kenya) */
{"kar", HB_TAG('K','R','N',' ')}, /* Karen [family] */
{"kar", HB_TAG('K','R','N',' ')}, /* Karen [collection] */
/*{"kaw", HB_TAG('K','A','W',' ')},*/ /* Kawi (Old Javanese) */
{"kbd", HB_TAG('K','A','B',' ')}, /* Kabardian */
{"kby", HB_TAG('K','N','R',' ')}, /* Manga Kanuri -> Kanuri */
@ -881,7 +881,7 @@ static const LangTag ot_languages[] = {
{"mam", HB_TAG('M','A','M',' ')}, /* Mam */
{"mam", HB_TAG('M','Y','N',' ')}, /* Mam -> Mayan */
{"man", HB_TAG('M','N','K',' ')}, /* Mandingo [macrolanguage] -> Maninka */
{"map", HB_TAG_NONE }, /* Austronesian [family] != Mapudungun */
{"map", HB_TAG_NONE }, /* Austronesian [collection] != Mapudungun */
{"maw", HB_TAG_NONE }, /* Mampruli != Marwari */
{"max", HB_TAG('M','L','Y',' ')}, /* North Moluccan Malay -> Malay */
{"max", HB_TAG('C','P','P',' ')}, /* North Moluccan Malay -> Creoles */
@ -963,7 +963,7 @@ static const LangTag ot_languages[] = {
{"mts", HB_TAG_NONE }, /* Yora != Maltese */
{"mud", HB_TAG('C','P','P',' ')}, /* Mednyj Aleut -> Creoles */
{"mui", HB_TAG('M','L','Y',' ')}, /* Musi -> Malay */
{"mun", HB_TAG_NONE }, /* Munda [family] != Mundari */
{"mun", HB_TAG_NONE }, /* Munda [collection] != Mundari */
{"mup", HB_TAG('R','A','J',' ')}, /* Malvi -> Rajasthani */
{"muq", HB_TAG('H','M','N',' ')}, /* Eastern Xiangxi Miao -> Hmong */
/*{"mus", HB_TAG('M','U','S',' ')},*/ /* Creek -> Muscogee */
@ -978,7 +978,7 @@ static const LangTag ot_languages[] = {
{"mww", HB_TAG('H','M','N',' ')}, /* Hmong Daw -> Hmong */
{"my", HB_TAG('B','R','M',' ')}, /* Burmese */
{"mym", HB_TAG('M','E','N',' ')}, /* Meen */
/*{"myn", HB_TAG('M','Y','N',' ')},*/ /* Mayan [family] */
/*{"myn", HB_TAG('M','Y','N',' ')},*/ /* Mayan [collection] */
{"myq", HB_TAG('M','N','K',' ')}, /* Forest Maninka (retired code) -> Maninka */
{"myv", HB_TAG('E','R','Z',' ')}, /* Erzya */
{"mzb", HB_TAG('B','B','R',' ')}, /* Tumzabt -> Berber */
@ -987,7 +987,7 @@ static const LangTag ot_languages[] = {
{"na", HB_TAG('N','A','U',' ')}, /* Nauru -> Nauruan */
{"nag", HB_TAG('N','A','G',' ')}, /* Naga Pidgin -> Naga-Assamese */
{"nag", HB_TAG('C','P','P',' ')}, /* Naga Pidgin -> Creoles */
/*{"nah", HB_TAG('N','A','H',' ')},*/ /* Nahuatl [family] */
/*{"nah", HB_TAG('N','A','H',' ')},*/ /* Nahuatl [collection] */
{"nan", HB_TAG('Z','H','S',' ')}, /* Min Nan Chinese -> Chinese, Simplified */
/*{"nap", HB_TAG('N','A','P',' ')},*/ /* Neapolitan */
{"nas", HB_TAG_NONE }, /* Naasioi != Naskapi */
@ -1098,7 +1098,7 @@ static const LangTag ot_languages[] = {
{"otw", HB_TAG('O','J','B',' ')}, /* Ottawa -> Ojibway */
{"oua", HB_TAG('B','B','R',' ')}, /* Tagargrent -> Berber */
{"pa", HB_TAG('P','A','N',' ')}, /* Punjabi */
{"paa", HB_TAG_NONE }, /* Papuan [family] != Palestinian Aramaic */
{"paa", HB_TAG_NONE }, /* Papuan [collection] != Palestinian Aramaic */
/*{"pag", HB_TAG('P','A','G',' ')},*/ /* Pangasinan */
{"pal", HB_TAG_NONE }, /* Pahlavi != Pali */
/*{"pam", HB_TAG('P','A','M',' ')},*/ /* Pampanga -> Pampangan */
@ -1337,7 +1337,7 @@ static const LangTag ot_languages[] = {
{"skw", HB_TAG('C','P','P',' ')}, /* Skepi Creole Dutch -> Creoles */
{"sky", HB_TAG_NONE }, /* Sikaiana != Slovak */
{"sl", HB_TAG('S','L','V',' ')}, /* Slovenian */
{"sla", HB_TAG_NONE }, /* Slavic [family] != Slavey */
{"sla", HB_TAG_NONE }, /* Slavic [collection] != Slavey */
{"sm", HB_TAG('S','M','O',' ')}, /* Samoan */
{"sma", HB_TAG('S','S','M',' ')}, /* Southern Sami */
{"smj", HB_TAG('L','S','M',' ')}, /* Lule Sami */
@ -1459,7 +1459,7 @@ static const LangTag ot_languages[] = {
{"tpi", HB_TAG('C','P','P',' ')}, /* Tok Pisin -> Creoles */
{"tr", HB_TAG('T','R','K',' ')}, /* Turkish */
{"trf", HB_TAG('C','P','P',' ')}, /* Trinidadian Creole English -> Creoles */
{"trk", HB_TAG_NONE }, /* Turkic [family] != Turkish */
{"trk", HB_TAG_NONE }, /* Turkic [collection] != Turkish */
{"tru", HB_TAG('T','U','A',' ')}, /* Turoyo -> Turoyo Aramaic */
{"tru", HB_TAG('S','Y','R',' ')}, /* Turoyo -> Syriac */
{"ts", HB_TAG('T','S','G',' ')}, /* Tsonga */
@ -1601,7 +1601,7 @@ static const LangTag ot_languages[] = {
{"zlq", HB_TAG('Z','H','A',' ')}, /* Liuqian Zhuang -> Zhuang */
{"zmi", HB_TAG('M','L','Y',' ')}, /* Negeri Sembilan Malay -> Malay */
{"zmz", HB_TAG('B','A','D','0')}, /* Mbandja -> Banda */
{"znd", HB_TAG_NONE }, /* Zande [family] != Zande */
{"znd", HB_TAG_NONE }, /* Zande [collection] != Zande */
{"zne", HB_TAG('Z','N','D',' ')}, /* Zande */
{"zom", HB_TAG('Q','I','N',' ')}, /* Zou -> Chin */
{"zqe", HB_TAG('Z','H','A',' ')}, /* Qiubei Zhuang -> Zhuang */
@ -2821,15 +2821,15 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag)
case HB_TAG('A','R','K',' '): /* Rakhine */
return hb_language_from_string ("rki", -1); /* Rakhine */
case HB_TAG('A','T','H',' '): /* Athapaskan */
return hb_language_from_string ("ath", -1); /* Athapascan [family] */
return hb_language_from_string ("ath", -1); /* Athapascan [collection] */
case HB_TAG('B','B','R',' '): /* Berber */
return hb_language_from_string ("ber", -1); /* Berber [family] */
return hb_language_from_string ("ber", -1); /* Berber [collection] */
case HB_TAG('B','I','K',' '): /* Bikol */
return hb_language_from_string ("bik", -1); /* Bikol [macrolanguage] */
case HB_TAG('B','T','K',' '): /* Batak */
return hb_language_from_string ("btk", -1); /* Batak [family] */
return hb_language_from_string ("btk", -1); /* Batak [collection] */
case HB_TAG('C','P','P',' '): /* Creoles */
return hb_language_from_string ("crp", -1); /* Creoles and pidgins [family] */
return hb_language_from_string ("crp", -1); /* Creoles and pidgins [collection] */
case HB_TAG('C','R','R',' '): /* Carrier */
return hb_language_from_string ("crx", -1); /* Carrier */
case HB_TAG('D','G','R',' '): /* Dogri (macrolanguage) */
@ -2857,7 +2857,7 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag)
case HB_TAG('I','B','A',' '): /* Iban */
return hb_language_from_string ("iba", -1); /* Iban */
case HB_TAG('I','J','O',' '): /* Ijo */
return hb_language_from_string ("ijo", -1); /* Ijo [family] */
return hb_language_from_string ("ijo", -1); /* Ijo [collection] */
case HB_TAG('I','N','U',' '): /* Inuktitut */
return hb_language_from_string ("iu", -1); /* Inuktitut [macrolanguage] */
case HB_TAG('I','P','K',' '): /* Inupiat */
@ -2883,7 +2883,7 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag)
case HB_TAG('K','P','L',' '): /* Kpelle */
return hb_language_from_string ("kpe", -1); /* Kpelle [macrolanguage] */
case HB_TAG('K','R','N',' '): /* Karen */
return hb_language_from_string ("kar", -1); /* Karen [family] */
return hb_language_from_string ("kar", -1); /* Karen [collection] */
case HB_TAG('K','U','I',' '): /* Kui */
return hb_language_from_string ("uki", -1); /* Kui (India) */
case HB_TAG('K','U','R',' '): /* Kurdish */
@ -2909,9 +2909,9 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag)
case HB_TAG('M','O','N','T'): /* Thailand Mon */
return hb_language_from_string ("mnw-TH", -1); /* Mon; Thailand */
case HB_TAG('M','Y','N',' '): /* Mayan */
return hb_language_from_string ("myn", -1); /* Mayan [family] */
return hb_language_from_string ("myn", -1); /* Mayan [collection] */
case HB_TAG('N','A','H',' '): /* Nahuatl */
return hb_language_from_string ("nah", -1); /* Nahuatl [family] */
return hb_language_from_string ("nah", -1); /* Nahuatl [collection] */
case HB_TAG('N','E','P',' '): /* Nepali */
return hb_language_from_string ("ne", -1); /* Nepali [macrolanguage] */
case HB_TAG('N','I','S',' '): /* Nisi */