diff --git a/src/Makefile.am b/src/Makefile.am index c4ae2bcb2..c74bab5d9 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -291,11 +291,12 @@ GENERATORS = \ gen-emoji-table.py \ gen-indic-table.py \ gen-os2-unicode-ranges.py \ + gen-tag-table.py \ gen-use-table.py \ $(NULL) EXTRA_DIST += $(GENERATORS) -unicode-tables: arabic-table indic-table use-table emoji-table +unicode-tables: arabic-table indic-table tag-table use-table emoji-table arabic-table: gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt $(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-arabic-table.hh \ @@ -305,6 +306,10 @@ indic-table: gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategor $(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-indic-table.cc \ || ($(RM) $(srcdir)/hb-ot-shape-complex-indic-table.cc; false) +tag-table: gen-tag-table.py languagetags language-subtag-registry + $(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-tag-table.hh \ + || ($(RM) $(srcdir)/hb-ot-tag-table.hh; false) + use-table: gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt $(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-use-table.cc \ || ($(RM) $(srcdir)/hb-ot-shape-complex-use-table.cc; false) @@ -315,7 +320,7 @@ emoji-table: gen-emoji-table.py emoji-data.txt built-sources: $(BUILT_SOURCES) -.PHONY: unicode-tables arabic-table indic-table use-table emoji-table built-sources +.PHONY: unicode-tables arabic-table indic-table tag-table use-table emoji-table built-sources RAGEL_GENERATED = \ $(patsubst %,$(srcdir)/%,$(HB_BASE_RAGEL_GENERATED_sources)) \ diff --git a/src/Makefile.sources b/src/Makefile.sources index 8b70381bd..db0a4138d 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -38,6 +38,7 @@ HB_BASE_sources = \ hb-ot-os2-unicode-ranges.hh \ hb-ot-post-macroman.hh \ hb-ot-post-table.hh \ + hb-ot-tag-table.hh \ hb-ot-tag.cc \ hb.hh \ hb-set-digest.hh \ diff --git a/src/gen-tag-table.py b/src/gen-tag-table.py new file mode 100755 index 000000000..925ffb439 --- /dev/null +++ b/src/gen-tag-table.py @@ -0,0 +1,1013 @@ +#!/usr/bin/python + +"""Generator of the mapping from OpenType tags to BCP 47 tags and vice +versa. + +It creates a ``const LangTag[]``, matching the tags from the OpenType +languages system tag list to the language subtags of the BCP 47 language +subtag registry, with some manual adjustments. The mappings are +supplemented with macrolanguages' sublanguages and retired codes' +replacements, according to BCP 47 and some manual additions where BCP 47 +omits a retired code entirely. + +Also generated is a function, ``hb_ot_ambiguous_tag_to_language``, +intended for use by ``hb_ot_tag_to_language``. It maps OpenType tags +back to BCP 47 tags. Ambiguous OpenType tags (those that correspond to +multiple BCP 47 tags) are listed here, except when the alphabetically +first BCP 47 tag happens to be the chosen disambiguated tag. In that +case, the fallback behavior will choose the right tag anyway. +""" + +from __future__ import absolute_import, division, print_function, unicode_literals + +import collections +try: + from HTMLParser import HTMLParser + def write (s): + print (s.encode ('utf-8'), end='') +except ImportError: + from html.parser import HTMLParser + def write (s): + sys.stdout.flush () + sys.stdout.buffer.write (s.encode ('utf-8')) +import io +import itertools +import re +import sys +import unicodedata + +if len (sys.argv) != 3: + print ('usage: ./gen-tag-table.py languagetags language-subtag-registry', file=sys.stderr) + sys.exit (1) + +try: + from html import unescape + def html_unescape (parser, entity): + return unescape (entity) +except ImportError: + def html_unescape (parser, entity): + return parser.unescape (entity) + +def expect (condition, message=None): + if not condition: + if message is None: + raise AssertionError + raise AssertionError (message) + +# from http://www-01.sil.org/iso639-3/iso-639-3.tab +ISO_639_3_TO_1 = { + 'aar': 'aa', + 'abk': 'ab', + 'afr': 'af', + 'aka': 'ak', + 'amh': 'am', + 'ara': 'ar', + 'arg': 'an', + 'asm': 'as', + 'ava': 'av', + 'ave': 'ae', + 'aym': 'ay', + 'aze': 'az', + 'bak': 'ba', + 'bam': 'bm', + 'bel': 'be', + 'ben': 'bn', + 'bis': 'bi', + 'bod': 'bo', + 'bos': 'bs', + 'bre': 'br', + 'bul': 'bg', + 'cat': 'ca', + 'ces': 'cs', + 'cha': 'ch', + 'che': 'ce', + 'chu': 'cu', + 'chv': 'cv', + 'cor': 'kw', + 'cos': 'co', + 'cre': 'cr', + 'cym': 'cy', + 'dan': 'da', + 'deu': 'de', + 'div': 'dv', + 'dzo': 'dz', + 'ell': 'el', + 'eng': 'en', + 'epo': 'eo', + 'est': 'et', + 'eus': 'eu', + 'ewe': 'ee', + 'fao': 'fo', + 'fas': 'fa', + 'fij': 'fj', + 'fin': 'fi', + 'fra': 'fr', + 'fry': 'fy', + 'ful': 'ff', + 'gla': 'gd', + 'gle': 'ga', + 'glg': 'gl', + 'glv': 'gv', + 'grn': 'gn', + 'guj': 'gu', + 'hat': 'ht', + 'hau': 'ha', + 'hbs': 'sh', + 'heb': 'he', + 'her': 'hz', + 'hin': 'hi', + 'hmo': 'ho', + 'hrv': 'hr', + 'hun': 'hu', + 'hye': 'hy', + 'ibo': 'ig', + 'ido': 'io', + 'iii': 'ii', + 'iku': 'iu', + 'ile': 'ie', + 'ina': 'ia', + 'ind': 'id', + 'ipk': 'ik', + 'isl': 'is', + 'ita': 'it', + 'jav': 'jv', + 'jpn': 'ja', + 'kal': 'kl', + 'kan': 'kn', + 'kas': 'ks', + 'kat': 'ka', + 'kau': 'kr', + 'kaz': 'kk', + 'khm': 'km', + 'kik': 'ki', + 'kin': 'rw', + 'kir': 'ky', + 'kom': 'kv', + 'kon': 'kg', + 'kor': 'ko', + 'kua': 'kj', + 'kur': 'ku', + 'lao': 'lo', + 'lat': 'la', + 'lav': 'lv', + 'lim': 'li', + 'lin': 'ln', + 'lit': 'lt', + 'ltz': 'lb', + 'lub': 'lu', + 'lug': 'lg', + 'mah': 'mh', + 'mal': 'ml', + 'mar': 'mr', + 'mkd': 'mk', + 'mlg': 'mg', + 'mlt': 'mt', + 'mol': 'mo', + 'mon': 'mn', + 'mri': 'mi', + 'msa': 'ms', + 'mya': 'my', + 'nau': 'na', + 'nav': 'nv', + 'nbl': 'nr', + 'nde': 'nd', + 'ndo': 'ng', + 'nep': 'ne', + 'nld': 'nl', + 'nno': 'nn', + 'nob': 'nb', + 'nor': 'no', + 'nya': 'ny', + 'oci': 'oc', + 'oji': 'oj', + 'ori': 'or', + 'orm': 'om', + 'oss': 'os', + 'pan': 'pa', + 'pli': 'pi', + 'pol': 'pl', + 'por': 'pt', + 'pus': 'ps', + 'que': 'qu', + 'roh': 'rm', + 'ron': 'ro', + 'run': 'rn', + 'rus': 'ru', + 'sag': 'sg', + 'san': 'sa', + 'sin': 'si', + 'slk': 'sk', + 'slv': 'sl', + 'sme': 'se', + 'smo': 'sm', + 'sna': 'sn', + 'snd': 'sd', + 'som': 'so', + 'sot': 'st', + 'spa': 'es', + 'sqi': 'sq', + 'srd': 'sc', + 'srp': 'sr', + 'ssw': 'ss', + 'sun': 'su', + 'swa': 'sw', + 'swe': 'sv', + 'tah': 'ty', + 'tam': 'ta', + 'tat': 'tt', + 'tel': 'te', + 'tgk': 'tg', + 'tgl': 'tl', + 'tha': 'th', + 'tir': 'ti', + 'ton': 'to', + 'tsn': 'tn', + 'tso': 'ts', + 'tuk': 'tk', + 'tur': 'tr', + 'twi': 'tw', + 'uig': 'ug', + 'ukr': 'uk', + 'urd': 'ur', + 'uzb': 'uz', + 'ven': 've', + 'vie': 'vi', + 'vol': 'vo', + 'wln': 'wa', + 'wol': 'wo', + 'xho': 'xh', + 'yid': 'yi', + 'yor': 'yo', + 'zha': 'za', + 'zho': 'zh', + 'zul': 'zu', +} + +class LanguageTag (object): + """A BCP 47 language tag. + + Attributes: + subtags (List[str]): The list of subtags in this tag. + grandfathered (bool): Whether this tag is grandfathered. If + ``true``, the entire lowercased tag is the ``language`` + and the other subtag fields are empty. + language (str): The language subtag. + script (str): The script subtag. + region (str): The region subtag. + variant (str): The variant subtag. + + Args: + tag (str): A BCP 47 language tag. + + """ + def __init__ (self, tag): + global bcp_47 + self.subtags = tag.lower ().split ('-') + self.grandfathered = tag.lower () in bcp_47.grandfathered + if self.grandfathered: + self.language = tag.lower () + self.script = '' + self.region = '' + self.variant = '' + else: + self.language = self.subtags[0] + self.script = self._find_first (lambda s: len (s) == 4 and s[0] > '9', self.subtags) + self.region = self._find_first (lambda s: len (s) == 2 and s[0] > '9' or len (s) == 3 and s[0] <= '9', self.subtags[1:]) + self.variant = self._find_first (lambda s: len (s) > 4 or len (s) == 4 and s[0] <= '9', self.subtags) + + def __str__(self): + return '-'.join(self.subtags) + + def __repr__ (self): + return 'LanguageTag(%r)' % str(self) + + @staticmethod + def _find_first (function, sequence): + try: + return next (iter (filter (function, sequence))) + except StopIteration: + return None + +class OpenTypeRegistryParser (HTMLParser): + """A parser for the OpenType language system tag registry. + + Attributes: + header (str): The "last updated" line of the registry. + names (Mapping[str, str]): A map of language system tags to the + names they are given in the registry. + ranks (DefaultDict[str, int]): A map of language system tags to + numbers. If a single BCP 47 tag corresponds to multiple + OpenType tags, the tags are ordered in increasing order by + rank. The rank is based on the number of BCP 47 tags + associated with a tag, though it may be manually modified. + to_bcp_47 (DefaultDict[str, AbstractSet[str]]): A map of + OpenType language system tags to sets of BCP 47 tags. + from_bcp_47 (DefaultDict[str, AbstractSet[str]]): ``to_bcp_47`` + inverted. Its values start as unsorted sets; + ``sort_languages`` converts them to sorted lists. + + """ + def __init__ (self): + HTMLParser.__init__ (self) + self.header = '' + self.names = {} + self.ranks = collections.defaultdict (int) + self.to_bcp_47 = collections.defaultdict (set) + self.from_bcp_47 = collections.defaultdict (set) + # Whether the parser is in a element + self._td = False + # The text of the elements of the current element. + self._current_tr = [] + + def handle_starttag (self, tag, attrs): + if tag == 'meta': + for attr, value in attrs: + if attr == 'name' and value == 'updated_at': + self.header = self.get_starttag_text () + break + elif tag == 'td': + self._td = True + self._current_tr.append ('') + elif tag == 'tr': + self._current_tr = [] + + def handle_endtag (self, tag): + if tag == 'td': + self._td = False + elif tag == 'tr' and self._current_tr: + expect (2 <= len (self._current_tr) <= 3) + name = self._current_tr[0].strip () + tag = self._current_tr[1].strip () + rank = 0 + if len (tag) > 4: + expect (tag.endswith (' (deprecated)'), 'ill-formed OpenType tag: %s' % tag) + name += ' (deprecated)' + tag = tag.split (' ')[0] + rank = 1 + self.names[tag] = re.sub (' languages$', '', name) + if not self._current_tr[2]: + return + iso_codes = self._current_tr[2].strip () + self.to_bcp_47[tag].update (ISO_639_3_TO_1.get (code, code) for code in iso_codes.replace (' ', '').split (',')) + rank += 2 * len (self.to_bcp_47[tag]) + self.ranks[tag] = rank + + def handle_data (self, data): + if self._td: + self._current_tr[-1] += data + + def handle_charref (self, name): + self.handle_data (html_unescape (self, '&#%s;' % name)) + + def handle_entityref (self, name): + self.handle_data (html_unescape (self, '&%s;' % name)) + + def parse (self, filename): + """Parse the OpenType language system tag registry. + + Args: + filename (str): The file name of the registry. + """ + with io.open (filename, encoding='utf-8') as f: + self.feed (f.read ()) + expect (self.header) + for tag, iso_codes in self.to_bcp_47.items (): + for iso_code in iso_codes: + self.from_bcp_47[iso_code].add (tag) + + def add_language (self, bcp_47_tag, ot_tag): + """Add a language as if it were in the registry. + + Args: + bcp_47_tag (str): A BCP 47 tag. If the tag is more than just + a language subtag, and if the language subtag is a + macrolanguage, then new languages are added corresponding + to the macrolanguages' individual languages with the + remainder of the tag appended. + ot_tag (str): An OpenType language system tag. + """ + global bcp_47 + self.to_bcp_47[ot_tag].add (bcp_47_tag) + self.from_bcp_47[bcp_47_tag].add (ot_tag) + if bcp_47_tag.lower () not in bcp_47.grandfathered: + try: + [macrolanguage, suffix] = bcp_47_tag.split ('-', 1) + if macrolanguage in bcp_47.macrolanguages: + s = set () + for language in bcp_47.macrolanguages[macrolanguage]: + if language.lower () not in bcp_47.grandfathered: + s.add ('%s-%s' % (language, suffix)) + bcp_47.macrolanguages['%s-%s' % (macrolanguage, suffix)] = s + except ValueError: + pass + + @staticmethod + def _remove_language (tag_1, dict_1, dict_2): + for tag_2 in dict_1.pop (tag_1): + dict_2[tag_2].remove (tag_1) + if not dict_2[tag_2]: + del dict_2[tag_2] + + def remove_language_ot (self, ot_tag): + """Remove an OpenType tag from the registry. + + Args: + ot_tag (str): An OpenType tag. + """ + self._remove_language (ot_tag, self.to_bcp_47, self.from_bcp_47) + + def remove_language_bcp_47 (self, bcp_47_tag): + """Remove a BCP 47 tag from the registry. + + Args: + bcp_47_tag (str): A BCP 47 tag. + """ + self._remove_language (bcp_47_tag, self.from_bcp_47, self.to_bcp_47) + + def inherit_from_macrolanguages (self): + """Copy mappings from macrolanguages to individual languages. + + If a BCP 47 tag for an individual mapping has no OpenType + mapping but its macrolanguage does, the mapping is copied to + the individual language. For example, als (Tosk Albanian) has no + explicit mapping, so it inherits from sq (Albanian) the mapping + to SQI. + + If a BCP 47 tag for a macrolanguage has no OpenType mapping but + all of its individual languages do and they all map to the same + tags, the mapping is copied to the macrolanguage. + """ + global bcp_47 + original_ot_from_bcp_47 = dict (self.from_bcp_47) + for macrolanguage, languages in dict (bcp_47.macrolanguages).items (): + ot_macrolanguages = set (original_ot_from_bcp_47.get (macrolanguage, set ())) + if ot_macrolanguages: + for ot_macrolanguage in ot_macrolanguages: + for language in languages: + # Remove the following condition if e.g. nn should map to NYN,NOR + # instead of just NYN. + if language not in original_ot_from_bcp_47: + self.add_language (language, ot_macrolanguage) + self.ranks[ot_macrolanguage] += 1 + else: + for language in languages: + if language in original_ot_from_bcp_47: + if ot_macrolanguages: + ml = original_ot_from_bcp_47[language] + if ml: + ot_macrolanguages &= ml + else: + pass + else: + ot_macrolanguages |= original_ot_from_bcp_47[language] + else: + ot_macrolanguages.clear () + if not ot_macrolanguages: + break + for ot_macrolanguage in ot_macrolanguages: + self.add_language (macrolanguage, ot_macrolanguage) + + def sort_languages (self): + """Sort the values of ``from_bcp_47`` in ascending rank order.""" + for language, tags in self.from_bcp_47.items (): + self.from_bcp_47[language] = sorted (tags, + key=lambda t: (self.ranks[t] + rank_delta (language, t), t)) + +ot = OpenTypeRegistryParser () + +class BCP47Parser (object): + """A parser for the BCP 47 subtag registry. + + Attributes: + header (str): The "File-Date" line of the registry. + names (Mapping[str, str]): A map of subtags to the names they + are given in the registry. Each value is a + ``'\\n'``-separated list of names. + scopes (Mapping[str, str]): A map of language subtags to strings + suffixed to language names, including suffixes to explain + language scopes. + macrolanguages (DefaultDict[str, AbstractSet[str]]): A map of + language subtags to the sets of language subtags which + inherit from them. See + ``OpenTypeRegistryParser.inherit_from_macrolanguages``. + prefixes (DefaultDict[str, AbstractSet[str]]): A map of variant + subtags to their prefixes. + grandfathered (AbstractSet[str]): The set of grandfathered tags, + normalized to lowercase. + + """ + def __init__ (self): + self.header = '' + self.names = {} + self.scopes = {} + self.macrolanguages = collections.defaultdict (set) + self.prefixes = collections.defaultdict (set) + self.grandfathered = set () + + def parse (self, filename): + """Parse the BCP 47 subtag registry. + + Args: + filename (str): The file name of the registry. + """ + with io.open (filename, encoding='utf-8') as f: + subtag_type = None + subtag = None + deprecated = False + has_preferred_value = False + line_buffer = '' + for line in itertools.chain (f, ['']): + line = line.rstrip () + if line.startswith (' '): + line_buffer += line[1:] + continue + line, line_buffer = line_buffer, line + if line.startswith ('Type: '): + subtag_type = line.split (' ')[1] + deprecated = False + has_preferred_value = False + elif line.startswith ('Subtag: ') or line.startswith ('Tag: '): + subtag = line.split (' ')[1] + if subtag_type == 'grandfathered': + self.grandfathered.add (subtag.lower ()) + elif line.startswith ('Description: '): + description = line.split (' ', 1)[1].replace (' (individual language)', '') + description = re.sub (' (\((individual |macro)language\)|languages)$', '', + description) + if subtag in self.names: + self.names[subtag] += '\n' + description + else: + self.names[subtag] = description + elif subtag_type == 'language' or subtag_type == 'grandfathered': + if line.startswith ('Scope: '): + scope = line.split (' ')[1] + if scope == 'macrolanguage': + scope = ' [macrolanguage]' + elif scope == 'collection': + scope = ' [family]' + else: + continue + self.scopes[subtag] = scope + elif line.startswith ('Deprecated: '): + self.scopes[subtag] = ' (retired code)' + self.scopes.get (subtag, '') + deprecated = True + elif deprecated and line.startswith ('Comments: see '): + # If a subtag is split into multiple replacement subtags, + # it essentially represents a macrolanguage. + for language in line.replace (',', '').split (' ')[2:]: + self._add_macrolanguage (subtag, language) + elif line.startswith ('Preferred-Value: '): + # If a subtag is deprecated in favor of a single replacement subtag, + # it is either a dialect or synonym of the preferred subtag. Either + # way, it is close enough to the truth to consider the replacement + # the macrolanguage of the deprecated language. + has_preferred_value = True + macrolanguage = line.split (' ')[1] + self._add_macrolanguage (macrolanguage, subtag) + elif not has_preferred_value and line.startswith ('Macrolanguage: '): + self._add_macrolanguage (line.split (' ')[1], subtag) + elif subtag_type == 'variant': + if line.startswith ('Prefix: '): + self.prefixes[subtag].add (line.split (' ')[1]) + elif line.startswith ('File-Date: '): + self.header = line + expect (self.header) + + def _add_macrolanguage (self, macrolanguage, language): + global ot + if language not in ot.from_bcp_47: + for l in self.macrolanguages.get (language, set ()): + self._add_macrolanguage (macrolanguage, l) + if macrolanguage not in ot.from_bcp_47: + for ls in list (self.macrolanguages.values ()): + if macrolanguage in ls: + ls.add (language) + return + self.macrolanguages[macrolanguage].add (language) + + def remove_extra_macrolanguages (self): + """Make every language have at most one macrolanguage.""" + inverted = collections.defaultdict (list) + for macrolanguage, languages in self.macrolanguages.items (): + for language in languages: + inverted[language].append (macrolanguage) + for language, macrolanguages in inverted.items (): + if len (macrolanguages) > 1: + macrolanguages.sort (key=lambda ml: len (self.macrolanguages[ml])) + biggest_macrolanguage = macrolanguages.pop () + for macrolanguage in macrolanguages: + self._add_macrolanguage (biggest_macrolanguage, macrolanguage) + + def get_name (self, tag): + """Return the names of the subtags in a language tag. + + Args: + tag (str): A BCP 47 language tag. + + Returns: + The name form of ``tag``. + """ + lt = LanguageTag (tag) + name = self.names[lt.language].split ('\n')[0] + if lt.script: + name += '; ' + self.names[lt.script.title ()].split ('\n')[0] + if lt.region: + name += '; ' + self.names[lt.region.upper ()].split ('\n')[0] + if lt.variant: + name += '; ' + self.names[lt.variant].split ('\n')[0] + return name + +bcp_47 = BCP47Parser () + +ot.parse (sys.argv[1]) +bcp_47.parse (sys.argv[2]) + +ot.add_language ('ary', 'MOR') + +ot.add_language ('ath', 'ATH') + +ot.add_language ('bai', 'BML') + +ot.ranks['BAL'] = ot.ranks['KAR'] + 1 + +ot.add_language ('ber', 'BBR') + +ot.remove_language_ot ('PGR') +ot.add_language ('el-polyton', 'PGR') + +bcp_47.macrolanguages['et'] = {'ekk'} + +bcp_47.names['flm'] = 'Falam Chin' +bcp_47.scopes['flm'] = ' (retired code)' +bcp_47.macrolanguages['flm'] = {'cfm'} + +ot.add_language ('und-fonipa', 'IPPH') + +ot.add_language ('und-fonnapa', 'APPH') + +ot.remove_language_ot ('IRT') +ot.add_language ('ga-Latg', 'IRT') + +ot.remove_language_ot ('KGE') +ot.add_language ('und-Geok', 'KGE') + +ot.add_language ('guk', 'GUK') +ot.names['GUK'] = 'Gumuz (SIL fonts)' +ot.ranks['GUK'] = ot.ranks['GMZ'] + 1 + +bcp_47.macrolanguages['id'] = {'in'} + +bcp_47.macrolanguages['ijo'] = {'ijc'} + +ot.add_language ('kht', 'KHN') +ot.names['KHN'] = ot.names['KHT'] + ' (Microsoft fonts)' +ot.names['KHT'] = ot.names['KHT'] + ' (OpenType spec and SIL fonts)' +ot.ranks['KHN'] = ot.ranks['KHT'] +ot.ranks['KHT'] += 1 + +ot.ranks['LCR'] = ot.ranks['MCR'] + 1 + +ot.names['MAL'] = 'Malayalam Traditional' +ot.ranks['MLR'] += 1 + +bcp_47.names['mhv'] = 'Arakanese' +bcp_47.scopes['mhv'] = ' (retired code)' + +ot.add_language ('no', 'NOR') + +ot.add_language ('qu', 'QUZ') + +bcp_47.macrolanguages['ro'].remove ('mo') +bcp_47.macrolanguages['ro-MD'].add ('mo') + +ot.add_language ('sgw', 'SGW') +ot.names['SGW'] = ot.names['CHG'] + ' (SIL fonts)' +ot.ranks['SGW'] = ot.ranks['CHG'] + 1 + +ot.remove_language_ot ('SYRE') +ot.remove_language_ot ('SYRJ') +ot.remove_language_ot ('SYRN') +ot.add_language ('und-Syre', 'SYRE') +ot.add_language ('und-Syrj', 'SYRJ') +ot.add_language ('und-Syrn', 'SYRN') + +bcp_47.names['xst'] = u"Silt'e" +bcp_47.scopes['xst'] = ' (retired code)' +bcp_47.macrolanguages['xst'] = {'stv', 'wle'} + +ot.add_language ('xwo', 'TOD') + +ot.remove_language_ot ('ZHH') +ot.remove_language_ot ('ZHP') +ot.remove_language_ot ('ZHT') +bcp_47.macrolanguages['zh'].remove ('lzh') +bcp_47.macrolanguages['zh'].remove ('yue') +ot.add_language ('zh-Hant-MO', 'ZHH') +ot.add_language ('zh-Hant-HK', 'ZHH') +ot.add_language ('zh-Hans', 'ZHS') +ot.add_language ('zh-Hant', 'ZHT') +ot.add_language ('zh-HK', 'ZHH') +ot.add_language ('zh-MO', 'ZHH') +ot.add_language ('zh-TW', 'ZHT') +ot.add_language ('lzh', 'ZHT') +ot.add_language ('lzh-Hans', 'ZHS') +ot.add_language ('yue', 'ZHH') +ot.add_language ('yue-Hans', 'ZHS') + +bcp_47.macrolanguages['zom'] = {'yos'} + +def rank_delta (bcp_47, ot): + """Return a delta to apply to a BCP 47 tag's rank. + + Most OpenType tags have a constant rank, but a few have ranks that + depend on the BCP 47 tag. + + Args: + bcp_47 (str): A BCP 47 tag. + ot (str): An OpenType tag to. + + Returns: + A number to add to ``ot``'s rank when sorting ``bcp_47``'s + OpenType equivalents. + """ + if bcp_47 == 'ak' and ot == 'AKA': + return -1 + if bcp_47 == 'tw' and ot == 'TWI': + return -1 + return 0 + +disambiguation = { + 'ALT': 'alt', + 'ARK': 'rki', + 'BHI': 'bhb', + 'BLN': 'bjt', + 'BTI': 'beb', + 'CCHN': 'cco', + 'CMR': 'swb', + 'CRR': 'crx', + 'DUJ': 'dwu', + 'ECR': 'crj', + 'HND': 'hnd', + 'KIS': 'kqs', + 'LRC': 'bqi', + 'NDB': 'nd', + 'NIS': 'njz', + 'PLG': 'pce', + 'QIN': 'bgr', + 'SIG': 'stv', + 'ZHH': 'zh-HK', + 'ZHS': 'zh-Hans', + 'ZHT': 'zh-Hant', +} + +ot.inherit_from_macrolanguages () +bcp_47.remove_extra_macrolanguages () +ot.inherit_from_macrolanguages () +ot.sort_languages () + +print ('/* == Start of generated table == */') +print ('/*') +print (' * The following table is generated by running:') +print (' *') +print (' * %s languagetags language-subtag-registry' % sys.argv[0]) +print (' *') +print (' * on files with these headers:') +print (' *') +print (' * %s' % ot.header.strip ()) +print (' * %s' % bcp_47.header) +print (' */') +print () +print ('#ifndef HB_OT_TAG_TABLE_HH') +print ('#define HB_OT_TAG_TABLE_HH') +print () +print ('static const LangTag ot_languages[] = {') + +def hb_tag (tag): + """Convert a tag to ``HB_TAG`` form. + + Args: + tag (str): An OpenType tag. + + Returns: + A snippet of C++ representing ``tag``. + """ + return u"HB_TAG('%s','%s','%s','%s')" % tuple (('%-4s' % tag)[:4]) + +def get_variant_set (name): + """Return a set of variant language names from a name. + + Args: + name (str): A list of language names from the BCP 47 registry, + joined on ``'\\n'``. + + Returns: + A set of normalized language names. + """ + return set (unicodedata.normalize ('NFD', n.replace ('\u2019', u"'")) + .encode ('ASCII', 'ignore') + .strip () + for n in re.split ('[\n(),]', name) if n) + +def language_name_intersection (a, b): + """Return the names in common between two language names. + + Args: + a (str): A list of language names from the BCP 47 registry, + joined on ``'\\n'``. + b (str): A list of language names from the BCP 47 registry, + joined on ``'\\n'``. + + Returns: + The normalized language names shared by ``a`` and ``b``. + """ + return get_variant_set (a).intersection (get_variant_set (b)) + +def get_matching_language_name (intersection, candidates): + return next (iter (c for c in candidates if not intersection.isdisjoint (get_variant_set (c)))) + +maximum_tags = 0 +for language, tags in sorted (ot.from_bcp_47.items ()): + if language == '' or '-' in language: + continue + print (' {\"%s\",\t{' % language, end='') + maximum_tags = max (maximum_tags, len (tags)) + tag_count = len (tags) + for i, tag in enumerate (tags, start=1): + if i > 1: + print ('\t\t ', end='') + print (hb_tag (tag), end='') + if i == tag_count: + print ('}}', end='') + print (',\t/* ', end='') + bcp_47_name = bcp_47.names.get (language, '') + bcp_47_name_candidates = bcp_47_name.split ('\n') + intersection = language_name_intersection (bcp_47_name, ot.names[tag]) + scope = bcp_47.scopes.get (language, '') + if not intersection: + write ('%s%s -> %s' % (bcp_47_name_candidates[0], scope, ot.names[tag])) + else: + name = get_matching_language_name (intersection, bcp_47_name_candidates) + bcp_47.names[language] = name + write ('%s%s' % (name if len (name) > len (ot.names[tag]) else ot.names[tag], scope)) + print (' */') + +print ('};') +print () +print ('static_assert (HB_OT_MAX_TAGS_PER_LANGUAGE == %iu, "");' % maximum_tags) +print () + +print ('/**') +print (' * hb_ot_tags_from_complex_language:') +print (' * @lang_str: a BCP 47 language tag to convert.') +print (' * @limit: a pointer to the end of the substring of @lang_str to consider for') +print (' * conversion.') +print (' * @count: maximum number of language tags to retrieve (IN) and actual number of') +print (' * language tags retrieved (OUT). If no tags are retrieved, it is not modified.') +print (' * @tags: array of size at least @language_count to store the language tag') +print (' * results') +print (' *') +print (' * Converts a multi-subtag BCP 47 language tag to language tags.') +print (' *') +print (' * Return value: Whether any language systems were retrieved.') +print (' **/') +print ('static bool') +print ('hb_ot_tags_from_complex_language (const char *lang_str,') +print ('\t\t\t\t const char *limit,') +print ('\t\t\t\t unsigned int *count /* IN/OUT */,') +print ('\t\t\t\t hb_tag_t *tags /* OUT */)') +print ('{') + +def print_subtag_matches (subtag): + if subtag: + print () + print (' && subtag_matches (lang_str, limit, "-%s")' % subtag, end='') + +for language, tags in sorted (ot.from_bcp_47.items (), key=lambda i: (-len (i[0]), i[0])): + lt = LanguageTag (language) + if len (lt.subtags) == 1 or lt.grandfathered and ot.from_bcp_47[lt.subtags[0]] == tags: + continue + print (' if (', end='') + if (lt.language == 'und' or + lt.variant in bcp_47.prefixes and + len (bcp_47.prefixes[lt.variant]) == 1): + if lt.variant in bcp_47.prefixes: + expect (next (iter (bcp_47.prefixes[lt.variant])) == lt.language, + '%s is not a valid prefix of %s' % (lt.language, lt.variant)) + print ('1', end='') + elif lt.grandfathered: + print ('0 == strcmp (lang_str, "%s")' % lt.language, end='') + else: + print ('lang_matches (lang_str, "%s' % lt.language, end='') + if lt.script: + print ('-%s' % lt.script, end='') + lt.script = None + if lt.region: + print ('-%s' % lt.region, end='') + lt.region = None + print ('")', end='') + print_subtag_matches (lt.script) + print_subtag_matches (lt.region) + print_subtag_matches (lt.variant) + print (')') + print (' {') + write (' /* %s */' % bcp_47.get_name (language)) + print () + if len (tags) == 1: + write (' tags[0] = %s; /* %s */' % (hb_tag (tags[0]), ot.names[tags[0]])) + print () + print (' *count = 1;') + else: + print (' unsigned int i;') + print (' hb_tag_t possible_tags[] = {') + for tag in tags: + write (' %s, /* %s */' % (hb_tag (tag), ot.names[tag])) + print () + print (' };') + print (' for (i = 0; i < %s && i < *count; i++)' % len (tags)) + print (' tags[i] = possible_tags[i];') + print (' *count = i;') + print (' return true;') + print (' }') + +print (' return false;') +print ('}') +print () +print ('/**') +print (' * hb_ot_ambiguous_tag_to_language') +print (' * @tag: A language tag.') +print (' *') +print (' * Converts @tag to a BCP 47 language tag if it is ambiguous (it corresponds to') +print (' * many language tags) and the best tag is not the alphabetically first, or if') +print (' * the best tag consists of multiple subtags.') +print (' *') +print (' * Return value: The #hb_language_t corresponding to the BCP 47 language tag,') +print (' * or #HB_LANGUAGE_INVALID if @tag is not ambiguous.') +print (' **/') +print ('static hb_language_t') +print ('hb_ot_ambiguous_tag_to_language (hb_tag_t tag)') +print ('{') +print (' switch (tag)') +print (' {') + +def verify_disambiguation_dict (): + """Verify and normalize ``disambiguation``. + + ``disambiguation`` is a map of ambiguous OpenType language system + tags to the particular BCP 47 tags they correspond to. This function + checks that all its keys really are ambiguous and that each key's + value is valid for that key. It checks that no ambiguous tag is + missing, except when it can figure out which BCP 47 tag is the best + by itself. + + It modifies ``disambiguation`` to remove keys whose values are the + same as those that the fallback would return anyway, and to add + ambiguous keys whose disambiguations it determined automatically. + + Raises: + AssertionError: Verification failed. + """ + global bcp_47 + global disambiguation + global ot + for ot_tag, bcp_47_tags in ot.to_bcp_47.items (): + primary_tags = list (t for t in bcp_47_tags if t not in bcp_47.grandfathered and ot.from_bcp_47.get (t)[0] == ot_tag) + if len (primary_tags) == 1: + expect (ot_tag not in disambiguation, 'unnecessary disambiguation for OT tag: %s' % ot_tag) + if '-' in primary_tags[0]: + disambiguation[ot_tag] = primary_tags[0] + elif len (primary_tags) == 0: + expect (ot_tag not in disambiguation, 'There is no possible valid disambiguation for %s' % ot_tag) + else: + macrolanguages = list (t for t in primary_tags if bcp_47.scopes.get (t) == ' [macrolanguage]') + if len (macrolanguages) != 1: + macrolanguages = list (t for t in primary_tags if bcp_47.scopes.get (t) == ' [family]') + if len (macrolanguages) != 1: + macrolanguages = list (t for t in primary_tags if 'retired code' not in bcp_47.scopes.get (t, '')) + if len (macrolanguages) != 1: + expect (ot_tag in disambiguation, 'ambiguous OT tag: %s %s' % (ot_tag, str (macrolanguages))) + expect (disambiguation[ot_tag] in bcp_47_tags, + '%s is not a valid disambiguation for %s' % (disambiguation[ot_tag], ot_tag)) + elif ot_tag not in disambiguation: + disambiguation[ot_tag] = macrolanguages[0] + if disambiguation[ot_tag] == sorted (primary_tags)[0] and '-' not in disambiguation[ot_tag]: + del disambiguation[ot_tag] + for ot_tag in disambiguation.keys (): + expect (ot_tag in ot.to_bcp_47, 'unknown OT tag: %s' % ot_tag) + +verify_disambiguation_dict () +for ot_tag, bcp_47_tag in sorted (disambiguation.items ()): + write (' case %s: /* %s */' % (hb_tag (ot_tag), ot.names[ot_tag])) + print () + write (' return hb_language_from_string (\"%s\", -1); /* %s */' % (bcp_47_tag, bcp_47.get_name (bcp_47_tag))) + print () + +print (' default:') +print (' return HB_LANGUAGE_INVALID;') +print (' }') +print ('}') + +print () +print ('#endif /* HB_OT_TAG_TABLE_HH */') +print () +print ('/* == End of generated table == */') + diff --git a/src/hb-ot-tag-table.hh b/src/hb-ot-tag-table.hh new file mode 100644 index 000000000..0d06255a3 --- /dev/null +++ b/src/hb-ot-tag-table.hh @@ -0,0 +1,1997 @@ +/* == Start of generated table == */ +/* + * The following table is generated by running: + * + * ./gen-tag-table.py languagetags language-subtag-registry + * + * on files with these headers: + * + * + * File-Date: 2018-04-23 + */ + +#ifndef HB_OT_TAG_TABLE_HH +#define HB_OT_TAG_TABLE_HH + +static const LangTag ot_languages[] = { + {"aa", {HB_TAG('A','F','R',' ')}}, /* Afar */ + {"aae", {HB_TAG('S','Q','I',' ')}}, /* Arbëreshë Albanian -> Albanian */ + {"aao", {HB_TAG('A','R','A',' ')}}, /* Algerian Saharan Arabic -> Arabic */ + {"aat", {HB_TAG('S','Q','I',' ')}}, /* Arvanitika Albanian -> Albanian */ + {"ab", {HB_TAG('A','B','K',' ')}}, /* Abkhazian */ + {"abh", {HB_TAG('A','R','A',' ')}}, /* Tajiki Arabic -> Arabic */ + {"abq", {HB_TAG('A','B','A',' ')}}, /* Abaza */ + {"abv", {HB_TAG('A','R','A',' ')}}, /* Baharna Arabic -> Arabic */ + {"acf", {HB_TAG('F','A','N',' ')}}, /* Saint Lucian Creole French -> French Antillean */ + {"ach", {HB_TAG('A','C','H',' ')}}, /* Acoli -> Acholi */ + {"acm", {HB_TAG('A','R','A',' ')}}, /* Mesopotamian Arabic -> Arabic */ + {"acq", {HB_TAG('A','R','A',' ')}}, /* Ta'izzi-Adeni Arabic -> Arabic */ + {"acr", {HB_TAG('A','C','R',' ')}}, /* Achi */ + {"acw", {HB_TAG('A','R','A',' ')}}, /* Hijazi Arabic -> Arabic */ + {"acx", {HB_TAG('A','R','A',' ')}}, /* Omani Arabic -> Arabic */ + {"acy", {HB_TAG('A','R','A',' ')}}, /* Cypriot Arabic -> Arabic */ + {"ada", {HB_TAG('D','N','G',' ')}}, /* Adangme -> Dangme */ + {"adf", {HB_TAG('A','R','A',' ')}}, /* Dhofari Arabic -> Arabic */ + {"adp", {HB_TAG('D','Z','N',' ')}}, /* Adap (retired code) -> Dzongkha */ + {"ady", {HB_TAG('A','D','Y',' ')}}, /* Adyghe */ + {"aeb", {HB_TAG('A','R','A',' ')}}, /* Tunisian Arabic -> Arabic */ + {"aec", {HB_TAG('A','R','A',' ')}}, /* Saidi Arabic -> Arabic */ + {"af", {HB_TAG('A','F','K',' ')}}, /* Afrikaans */ + {"afb", {HB_TAG('A','R','A',' ')}}, /* Gulf Arabic -> Arabic */ + {"ahg", {HB_TAG('A','G','W',' ')}}, /* Qimant -> Agaw */ + {"aht", {HB_TAG('A','T','H',' ')}}, /* Ahtena -> Athapaskan */ + {"aii", {HB_TAG('S','W','A',' '), /* Assyrian Neo-Aramaic -> Swadaya Aramaic */ + HB_TAG('S','Y','R',' ')}}, /* Assyrian Neo-Aramaic -> Syriac */ + {"aio", {HB_TAG('A','I','O',' ')}}, /* Aiton */ + {"aiw", {HB_TAG('A','R','I',' ')}}, /* Aari */ + {"ajp", {HB_TAG('A','R','A',' ')}}, /* South Levantine Arabic -> Arabic */ + {"ak", {HB_TAG('A','K','A',' '), /* Akan [macrolanguage] */ + HB_TAG('T','W','I',' ')}}, /* Akan [macrolanguage] -> Twi */ + {"aln", {HB_TAG('S','Q','I',' ')}}, /* Gheg Albanian -> Albanian */ + {"als", {HB_TAG('S','Q','I',' ')}}, /* Tosk Albanian -> Albanian */ + {"alt", {HB_TAG('A','L','T',' ')}}, /* Southern Altai -> Altai */ + {"am", {HB_TAG('A','M','H',' ')}}, /* Amharic */ + {"amf", {HB_TAG('H','B','N',' ')}}, /* Hamer-Banna -> Hammer-Banna */ + {"amw", {HB_TAG('S','Y','R',' ')}}, /* Western Neo-Aramaic -> Syriac */ + {"an", {HB_TAG('A','R','G',' ')}}, /* Aragonese */ + {"ang", {HB_TAG('A','N','G',' ')}}, /* Old English (ca. 450-1100) -> Anglo-Saxon */ + {"apc", {HB_TAG('A','R','A',' ')}}, /* North Levantine Arabic -> Arabic */ + {"apd", {HB_TAG('A','R','A',' ')}}, /* Sudanese Arabic -> Arabic */ + {"apj", {HB_TAG('A','T','H',' ')}}, /* Jicarilla Apache -> Athapaskan */ + {"apk", {HB_TAG('A','T','H',' ')}}, /* Kiowa Apache -> Athapaskan */ + {"apl", {HB_TAG('A','T','H',' ')}}, /* Lipan Apache -> Athapaskan */ + {"apm", {HB_TAG('A','T','H',' ')}}, /* Mescalero-Chiricahua Apache -> Athapaskan */ + {"apw", {HB_TAG('A','T','H',' ')}}, /* Western Apache -> Athapaskan */ + {"ar", {HB_TAG('A','R','A',' ')}}, /* Arabic [macrolanguage] */ + {"arb", {HB_TAG('A','R','A',' ')}}, /* Standard Arabic -> Arabic */ + {"arn", {HB_TAG('M','A','P',' ')}}, /* Mapudungun */ + {"arq", {HB_TAG('A','R','A',' ')}}, /* Algerian Arabic -> Arabic */ + {"ars", {HB_TAG('A','R','A',' ')}}, /* Najdi Arabic -> Arabic */ + {"ary", {HB_TAG('M','O','R',' ')}}, /* Moroccan Arabic -> Moroccan */ + {"arz", {HB_TAG('A','R','A',' ')}}, /* Egyptian Arabic -> Arabic */ + {"as", {HB_TAG('A','S','M',' ')}}, /* Assamese */ + {"ast", {HB_TAG('A','S','T',' ')}}, /* Asturian */ + {"ath", {HB_TAG('A','T','H',' ')}}, /* Athapascan [family] -> Athapaskan */ + {"atj", {HB_TAG('R','C','R',' ')}}, /* Atikamekw -> R-Cree */ + {"atv", {HB_TAG('A','L','T',' ')}}, /* Northern Altai -> Altai */ + {"auz", {HB_TAG('A','R','A',' ')}}, /* Uzbeki Arabic -> Arabic */ + {"av", {HB_TAG('A','V','R',' ')}}, /* Avaric -> Avar */ + {"avl", {HB_TAG('A','R','A',' ')}}, /* Eastern Egyptian Bedawi Arabic -> Arabic */ + {"awa", {HB_TAG('A','W','A',' ')}}, /* Awadhi */ + {"ay", {HB_TAG('A','Y','M',' ')}}, /* Aymara [macrolanguage] */ + {"ayc", {HB_TAG('A','Y','M',' ')}}, /* Southern Aymara -> Aymara */ + {"ayh", {HB_TAG('A','R','A',' ')}}, /* Hadrami Arabic -> Arabic */ + {"ayl", {HB_TAG('A','R','A',' ')}}, /* Libyan Arabic -> Arabic */ + {"ayn", {HB_TAG('A','R','A',' ')}}, /* Sanaani Arabic -> Arabic */ + {"ayp", {HB_TAG('A','R','A',' ')}}, /* North Mesopotamian Arabic -> Arabic */ + {"ayr", {HB_TAG('A','Y','M',' ')}}, /* Central Aymara -> Aymara */ + {"az", {HB_TAG('A','Z','E',' ')}}, /* Azerbaijani [macrolanguage] */ + {"azb", {HB_TAG('A','Z','B',' ')}}, /* South Azerbaijani -> Torki */ + {"azj", {HB_TAG('A','Z','E',' ')}}, /* North Azerbaijani -> Azerbaijani */ + {"ba", {HB_TAG('B','S','H',' ')}}, /* Bashkir */ + {"bad", {HB_TAG('B','A','D','0')}}, /* Banda [family] */ + {"bai", {HB_TAG('B','M','L',' ')}}, /* Bamileke [family] */ + {"bal", {HB_TAG('B','L','I',' ')}}, /* Baluchi [macrolanguage] */ + {"ban", {HB_TAG('B','A','N',' ')}}, /* Balinese */ + {"bar", {HB_TAG('B','A','R',' ')}}, /* Bavarian */ + {"bbc", {HB_TAG('B','B','C',' ')}}, /* Batak Toba */ + {"bbz", {HB_TAG('A','R','A',' ')}}, /* Babalia Creole Arabic -> Arabic */ + {"bcc", {HB_TAG('B','L','I',' ')}}, /* Southern Balochi -> Baluchi */ + {"bci", {HB_TAG('B','A','U',' ')}}, /* Baoulé -> Baulé */ + {"bcl", {HB_TAG('B','I','K',' ')}}, /* Central Bikol -> Bikol */ + {"bcq", {HB_TAG('B','C','H',' ')}}, /* Bench */ + {"bcr", {HB_TAG('A','T','H',' ')}}, /* Babine -> Athapaskan */ + {"bdy", {HB_TAG('B','D','Y',' ')}}, /* Bandjalang */ + {"be", {HB_TAG('B','E','L',' ')}}, /* Belarusian -> Belarussian */ + {"bea", {HB_TAG('A','T','H',' ')}}, /* Beaver -> Athapaskan */ + {"beb", {HB_TAG('B','T','I',' ')}}, /* Bebele -> Beti */ + {"bem", {HB_TAG('B','E','M',' ')}}, /* Bemba (Zambia) */ + {"ber", {HB_TAG('B','B','R',' ')}}, /* Berber [family] */ + {"bfq", {HB_TAG('B','A','D',' ')}}, /* Badaga */ + {"bft", {HB_TAG('B','L','T',' ')}}, /* Balti */ + {"bfu", {HB_TAG('L','A','H',' ')}}, /* Gahri -> Lahuli */ + {"bfy", {HB_TAG('B','A','G',' ')}}, /* Bagheli -> Baghelkhandi */ + {"bg", {HB_TAG('B','G','R',' ')}}, /* Bulgarian */ + {"bgc", {HB_TAG('B','G','C',' ')}}, /* Haryanvi */ + {"bgn", {HB_TAG('B','L','I',' ')}}, /* Western Balochi -> Baluchi */ + {"bgp", {HB_TAG('B','L','I',' ')}}, /* Eastern Balochi -> Baluchi */ + {"bgq", {HB_TAG('B','G','Q',' ')}}, /* Bagri */ + {"bgr", {HB_TAG('Q','I','N',' ')}}, /* Bawm Chin -> Chin */ + {"bhb", {HB_TAG('B','H','I',' ')}}, /* Bhili */ + {"bhi", {HB_TAG('B','H','I',' ')}}, /* Bhilali -> Bhili */ + {"bhk", {HB_TAG('B','I','K',' ')}}, /* Albay Bicolano (retired code) -> Bikol */ + {"bho", {HB_TAG('B','H','O',' ')}}, /* Bhojpuri */ + {"bhr", {HB_TAG('M','L','G',' ')}}, /* Bara Malagasy -> Malagasy */ + {"bi", {HB_TAG('B','I','S',' ')}}, /* Bislama */ + {"bik", {HB_TAG('B','I','K',' ')}}, /* Bikol [macrolanguage] */ + {"bin", {HB_TAG('E','D','O',' ')}}, /* Edo */ + {"bjj", {HB_TAG('B','J','J',' ')}}, /* Kanauji */ + {"bjn", {HB_TAG('M','L','Y',' ')}}, /* Banjar -> Malay */ + {"bjq", {HB_TAG('M','L','G',' ')}}, /* Southern Betsimisaraka Malagasy (retired code) -> Malagasy */ + {"bjt", {HB_TAG('B','L','N',' ')}}, /* Balanta-Ganja -> Balante */ + {"bla", {HB_TAG('B','K','F',' ')}}, /* Siksika -> Blackfoot */ + {"ble", {HB_TAG('B','L','N',' ')}}, /* Balanta-Kentohe -> Balante */ + {"blk", {HB_TAG('B','L','K',' ')}}, /* Pa'o Karen */ + {"bln", {HB_TAG('B','I','K',' ')}}, /* Southern Catanduanes Bikol -> Bikol */ + {"bm", {HB_TAG('B','M','B',' ')}}, /* Bambara (Bamanankan) */ + {"bmm", {HB_TAG('M','L','G',' ')}}, /* Northern Betsimisaraka Malagasy -> Malagasy */ + {"bn", {HB_TAG('B','E','N',' ')}}, /* Bengali */ + {"bo", {HB_TAG('T','I','B',' ')}}, /* Tibetan */ + {"bpy", {HB_TAG('B','P','Y',' ')}}, /* Bishnupriya -> Bishnupriya Manipuri */ + {"bqi", {HB_TAG('L','R','C',' ')}}, /* Bakhtiari -> Luri */ + {"br", {HB_TAG('B','R','E',' ')}}, /* Breton */ + {"bra", {HB_TAG('B','R','I',' ')}}, /* Braj -> Braj Bhasha */ + {"brh", {HB_TAG('B','R','H',' ')}}, /* Brahui */ + {"brx", {HB_TAG('B','R','X',' ')}}, /* Bodo (India) */ + {"bs", {HB_TAG('B','O','S',' ')}}, /* Bosnian */ + {"bsk", {HB_TAG('B','S','K',' ')}}, /* Burushaski */ + {"btb", {HB_TAG('B','T','I',' ')}}, /* Beti (Cameroon) (retired code) */ + {"btj", {HB_TAG('M','L','Y',' ')}}, /* Bacanese Malay -> Malay */ + {"bto", {HB_TAG('B','I','K',' ')}}, /* Rinconada Bikol -> Bikol */ + {"bts", {HB_TAG('B','T','S',' ')}}, /* Batak Simalungun */ + {"bug", {HB_TAG('B','U','G',' ')}}, /* Buginese -> Bugis */ + {"bum", {HB_TAG('B','T','I',' ')}}, /* Bulu (Cameroon) -> Beti */ + {"bve", {HB_TAG('M','L','Y',' ')}}, /* Berau Malay -> Malay */ + {"bvu", {HB_TAG('M','L','Y',' ')}}, /* Bukit Malay -> Malay */ + {"bxk", {HB_TAG('L','U','H',' ')}}, /* Bukusu -> Luyia */ + {"bxp", {HB_TAG('B','T','I',' ')}}, /* Bebil -> Beti */ + {"bxr", {HB_TAG('R','B','U',' ')}}, /* Russia Buriat -> Russian Buriat */ + {"byn", {HB_TAG('B','I','L',' ')}}, /* Bilin -> Bilen */ + {"byv", {HB_TAG('B','Y','V',' ')}}, /* Medumba */ + {"bzc", {HB_TAG('M','L','G',' ')}}, /* Southern Betsimisaraka Malagasy -> Malagasy */ + {"ca", {HB_TAG('C','A','T',' ')}}, /* Catalan */ + {"caf", {HB_TAG('C','R','R',' '), /* Southern Carrier -> Carrier */ + HB_TAG('A','T','H',' ')}}, /* Southern Carrier -> Athapaskan */ + {"cak", {HB_TAG('C','A','K',' ')}}, /* Kaqchikel */ + {"cbk", {HB_TAG('C','B','K',' ')}}, /* Chavacano -> Zamboanga Chavacano */ + {"cbl", {HB_TAG('Q','I','N',' ')}}, /* Bualkhaw Chin -> Chin */ + {"cco", {HB_TAG('C','C','H','N')}}, /* Comaltepec Chinantec -> Chinantec */ + {"ccq", {HB_TAG('A','R','K',' ')}}, /* Chaungtha (retired code) -> Rakhine */ + {"cdo", {HB_TAG('Z','H','S',' ')}}, /* Min Dong Chinese -> Chinese Simplified */ + {"ce", {HB_TAG('C','H','E',' ')}}, /* Chechen */ + {"ceb", {HB_TAG('C','E','B',' ')}}, /* Cebuano */ + {"cfm", {HB_TAG('H','A','L',' '), /* Falam Chin -> Halam */ + HB_TAG('Q','I','N',' ')}}, /* Falam Chin -> Chin */ + {"cgg", {HB_TAG('C','G','G',' ')}}, /* Chiga */ + {"ch", {HB_TAG('C','H','A',' ')}}, /* Chamorro */ + {"chj", {HB_TAG('C','C','H','N')}}, /* Ojitlán Chinantec -> Chinantec */ + {"chk", {HB_TAG('C','H','K','0')}}, /* Chuukese */ + {"cho", {HB_TAG('C','H','O',' ')}}, /* Choctaw */ + {"chp", {HB_TAG('C','H','P',' '), /* Chipewyan */ + HB_TAG('S','A','Y',' '), /* Chipewyan -> Sayisi */ + HB_TAG('A','T','H',' ')}}, /* Chipewyan -> Athapaskan */ + {"chq", {HB_TAG('C','C','H','N')}}, /* Quiotepec Chinantec -> Chinantec */ + {"chr", {HB_TAG('C','H','R',' ')}}, /* Cherokee */ + {"chy", {HB_TAG('C','H','Y',' ')}}, /* Cheyenne */ + {"chz", {HB_TAG('C','C','H','N')}}, /* Ozumacín Chinantec -> Chinantec */ + {"ciw", {HB_TAG('O','J','B',' ')}}, /* Chippewa -> Ojibway */ + {"cja", {HB_TAG('C','J','A',' ')}}, /* Western Cham */ + {"cjm", {HB_TAG('C','J','M',' ')}}, /* Eastern Cham */ + {"cjy", {HB_TAG('Z','H','S',' ')}}, /* Jinyu Chinese -> Chinese Simplified */ + {"cka", {HB_TAG('Q','I','N',' ')}}, /* Khumi Awa Chin (retired code) -> Chin */ + {"ckb", {HB_TAG('K','U','R',' ')}}, /* Central Kurdish -> Kurdish */ + {"ckt", {HB_TAG('C','H','K',' ')}}, /* Chukot -> Chukchi */ + {"clc", {HB_TAG('A','T','H',' ')}}, /* Chilcotin -> Athapaskan */ + {"cld", {HB_TAG('S','Y','R',' ')}}, /* Chaldean Neo-Aramaic -> Syriac */ + {"cle", {HB_TAG('C','C','H','N')}}, /* Lealao Chinantec -> Chinantec */ + {"cmn", {HB_TAG('Z','H','S',' ')}}, /* Mandarin Chinese -> Chinese Simplified */ + {"cmr", {HB_TAG('Q','I','N',' ')}}, /* Mro-Khimi Chin -> Chin */ + {"cnb", {HB_TAG('Q','I','N',' ')}}, /* Chinbon Chin -> Chin */ + {"cnh", {HB_TAG('Q','I','N',' ')}}, /* Hakha Chin -> Chin */ + {"cnk", {HB_TAG('Q','I','N',' ')}}, /* Khumi Chin -> Chin */ + {"cnl", {HB_TAG('C','C','H','N')}}, /* Lalana Chinantec -> Chinantec */ + {"cnt", {HB_TAG('C','C','H','N')}}, /* Tepetotutla Chinantec -> Chinantec */ + {"cnw", {HB_TAG('Q','I','N',' ')}}, /* Ngawn Chin -> Chin */ + {"co", {HB_TAG('C','O','S',' ')}}, /* Corsican */ + {"coa", {HB_TAG('M','L','Y',' ')}}, /* Cocos Islands Malay -> Malay */ + {"cop", {HB_TAG('C','O','P',' ')}}, /* Coptic */ + {"coq", {HB_TAG('A','T','H',' ')}}, /* Coquille -> Athapaskan */ + {"cpa", {HB_TAG('C','C','H','N')}}, /* Palantla Chinantec -> Chinantec */ + {"cpp", {HB_TAG('C','P','P',' ')}}, /* Portuguese-based creoles and pidgins [family] -> Creoles */ + {"cpx", {HB_TAG('Z','H','S',' ')}}, /* Pu-Xian Chinese -> Chinese Simplified */ + {"cqd", {HB_TAG('H','M','N',' ')}}, /* Chuanqiandian Cluster Miao -> Hmong */ + {"cqu", {HB_TAG('Q','U','H',' ')}}, /* Chilean Quechua (retired code) -> Quechua (Bolivia) */ + {"cr", {HB_TAG('C','R','E',' '), /* Cree [macrolanguage] */ + HB_TAG('Y','C','R',' ')}}, /* Cree [macrolanguage] -> Y-Cree */ + {"crh", {HB_TAG('C','R','T',' ')}}, /* Crimean Tatar */ + {"crj", {HB_TAG('E','C','R',' ')}}, /* Southern East Cree -> Eastern Cree */ + {"crk", {HB_TAG('W','C','R',' ')}}, /* Plains Cree -> West-Cree */ + {"crl", {HB_TAG('E','C','R',' ')}}, /* Northern East Cree -> Eastern Cree */ + {"crm", {HB_TAG('M','C','R',' '), /* Moose Cree */ + HB_TAG('L','C','R',' ')}}, /* Moose Cree -> L-Cree */ + {"crx", {HB_TAG('C','R','R',' '), /* Carrier */ + HB_TAG('A','T','H',' ')}}, /* Carrier -> Athapaskan */ + {"cs", {HB_TAG('C','S','Y',' ')}}, /* Czech */ + {"csa", {HB_TAG('C','C','H','N')}}, /* Chiltepec Chinantec -> Chinantec */ + {"csb", {HB_TAG('C','S','B',' ')}}, /* Kashubian */ + {"csh", {HB_TAG('Q','I','N',' ')}}, /* Asho Chin -> Chin */ + {"cso", {HB_TAG('C','C','H','N')}}, /* Sochiapam Chinantec -> Chinantec */ + {"csw", {HB_TAG('N','C','R',' '), /* Swampy Cree -> N-Cree */ + HB_TAG('N','H','C',' ')}}, /* Swampy Cree -> Norway House Cree */ + {"csy", {HB_TAG('Q','I','N',' ')}}, /* Siyin Chin -> Chin */ + {"ctc", {HB_TAG('A','T','H',' ')}}, /* Chetco -> Athapaskan */ + {"ctd", {HB_TAG('Q','I','N',' ')}}, /* Tedim Chin -> Chin */ + {"cte", {HB_TAG('C','C','H','N')}}, /* Tepinapa Chinantec -> Chinantec */ + {"ctg", {HB_TAG('C','T','G',' ')}}, /* Chittagonian */ + {"ctl", {HB_TAG('C','C','H','N')}}, /* Tlacoatzintepec Chinantec -> Chinantec */ + {"cts", {HB_TAG('B','I','K',' ')}}, /* Northern Catanduanes Bikol -> Bikol */ + {"cu", {HB_TAG('C','S','L',' ')}}, /* Church Slavonic */ + {"cuc", {HB_TAG('C','C','H','N')}}, /* Usila Chinantec -> Chinantec */ + {"cuk", {HB_TAG('C','U','K',' ')}}, /* San Blas Kuna */ + {"cv", {HB_TAG('C','H','U',' ')}}, /* Chuvash */ + {"cvn", {HB_TAG('C','C','H','N')}}, /* Valle Nacional Chinantec -> Chinantec */ + {"cwd", {HB_TAG('D','C','R',' '), /* Woods Cree */ + HB_TAG('T','C','R',' ')}}, /* Woods Cree -> TH-Cree */ + {"cy", {HB_TAG('W','E','L',' ')}}, /* Welsh */ + {"czh", {HB_TAG('Z','H','S',' ')}}, /* Huizhou Chinese -> Chinese Simplified */ + {"czo", {HB_TAG('Z','H','S',' ')}}, /* Min Zhong Chinese -> Chinese Simplified */ + {"czt", {HB_TAG('Q','I','N',' ')}}, /* Zotung Chin -> Chin */ + {"da", {HB_TAG('D','A','N',' ')}}, /* Danish */ + {"dao", {HB_TAG('Q','I','N',' ')}}, /* Daai Chin -> Chin */ + {"dap", {HB_TAG('N','I','S',' ')}}, /* Nisi (India) (retired code) */ + {"dar", {HB_TAG('D','A','R',' ')}}, /* Dargwa */ + {"dax", {HB_TAG('D','A','X',' ')}}, /* Dayi */ + {"de", {HB_TAG('D','E','U',' ')}}, /* German */ + {"den", {HB_TAG('S','L','A',' '), /* Slave (Athapascan) [macrolanguage] -> Slavey */ + HB_TAG('A','T','H',' ')}}, /* Slave (Athapascan) [macrolanguage] -> Athapaskan */ + {"dgo", {HB_TAG('D','G','O',' ')}}, /* Dogri */ + {"dgr", {HB_TAG('A','T','H',' ')}}, /* Dogrib -> Athapaskan */ + {"dhd", {HB_TAG('M','A','W',' ')}}, /* Dhundari -> Marwari */ + {"dhg", {HB_TAG('D','H','G',' ')}}, /* Dhangu */ + {"dib", {HB_TAG('D','N','K',' ')}}, /* South Central Dinka -> Dinka */ + {"dik", {HB_TAG('D','N','K',' ')}}, /* Southwestern Dinka -> Dinka */ + {"din", {HB_TAG('D','N','K',' ')}}, /* Dinka [macrolanguage] */ + {"dip", {HB_TAG('D','N','K',' ')}}, /* Northeastern Dinka -> Dinka */ + {"diq", {HB_TAG('D','I','Q',' ')}}, /* Dimli */ + {"diw", {HB_TAG('D','N','K',' ')}}, /* Northwestern Dinka -> Dinka */ + {"dje", {HB_TAG('D','J','R',' ')}}, /* Zarma */ + {"djr", {HB_TAG('D','J','R','0')}}, /* Djambarrpuyngu */ + {"dks", {HB_TAG('D','N','K',' ')}}, /* Southeastern Dinka -> Dinka */ + {"dng", {HB_TAG('D','U','N',' ')}}, /* Dungan */ + {"dnj", {HB_TAG('D','N','J',' ')}}, /* Dan */ + {"doi", {HB_TAG('D','G','R',' ')}}, /* Dogri [macrolanguage] */ + {"drh", {HB_TAG('M','N','G',' ')}}, /* Darkhat (retired code) -> Mongolian */ + {"drw", {HB_TAG('D','R','I',' ')}}, /* Darwazi (retired code) -> Dari */ + {"dsb", {HB_TAG('L','S','B',' ')}}, /* Lower Sorbian */ + {"dty", {HB_TAG('N','E','P',' ')}}, /* Dotyali -> Nepali */ + {"duj", {HB_TAG('D','U','J',' ')}}, /* Dhuwal (retired code) */ + {"dup", {HB_TAG('M','L','Y',' ')}}, /* Duano -> Malay */ + {"dv", {HB_TAG('D','I','V',' '), /* Divehi (Dhivehi, Maldivian) */ + HB_TAG('D','H','V',' ')}}, /* Divehi (Dhivehi, Maldivian) (deprecated) */ + {"dwu", {HB_TAG('D','U','J',' ')}}, /* Dhuwal */ + {"dwy", {HB_TAG('D','U','J',' ')}}, /* Dhuwaya -> Dhuwal */ + {"dyu", {HB_TAG('J','U','L',' ')}}, /* Dyula -> Jula */ + {"dz", {HB_TAG('D','Z','N',' ')}}, /* Dzongkha */ + {"ee", {HB_TAG('E','W','E',' ')}}, /* Ewe */ + {"efi", {HB_TAG('E','F','I',' ')}}, /* Efik */ + {"ekk", {HB_TAG('E','T','I',' ')}}, /* Standard Estonian -> Estonian */ + {"el", {HB_TAG('E','L','L',' ')}}, /* Modern Greek (1453-) -> Greek */ + {"emk", {HB_TAG('E','M','K',' '), /* Eastern Maninkakan */ + HB_TAG('M','N','K',' ')}}, /* Eastern Maninkakan -> Maninka */ + {"en", {HB_TAG('E','N','G',' ')}}, /* English */ + {"enb", {HB_TAG('K','A','L',' ')}}, /* Markweeta -> Kalenjin */ + {"enf", {HB_TAG('F','N','E',' ')}}, /* Forest Enets -> Forest Nenets */ + {"enh", {HB_TAG('T','N','E',' ')}}, /* Tundra Enets -> Tundra Nenets */ + {"eo", {HB_TAG('N','T','O',' ')}}, /* Esperanto */ + {"es", {HB_TAG('E','S','P',' ')}}, /* Spanish */ + {"esg", {HB_TAG('G','O','N',' ')}}, /* Aheri Gondi -> Gondi */ + {"esi", {HB_TAG('I','P','K',' ')}}, /* North Alaskan Inupiatun -> Inupiat */ + {"esk", {HB_TAG('I','P','K',' ')}}, /* Northwest Alaska Inupiatun -> Inupiat */ + {"esu", {HB_TAG('E','S','U',' ')}}, /* Central Yupik */ + {"et", {HB_TAG('E','T','I',' ')}}, /* Estonian [macrolanguage] */ + {"eto", {HB_TAG('B','T','I',' ')}}, /* Eton (Cameroon) -> Beti */ + {"eu", {HB_TAG('E','U','Q',' ')}}, /* Basque */ + {"eve", {HB_TAG('E','V','N',' ')}}, /* Even */ + {"evn", {HB_TAG('E','V','K',' ')}}, /* Evenki */ + {"ewo", {HB_TAG('B','T','I',' ')}}, /* Ewondo -> Beti */ + {"eyo", {HB_TAG('K','A','L',' ')}}, /* Keiyo -> Kalenjin */ + {"fa", {HB_TAG('F','A','R',' ')}}, /* Persian [macrolanguage] */ + {"fan", {HB_TAG('F','A','N','0')}}, /* Fang (Equatorial Guinea) */ + {"fat", {HB_TAG('F','A','T',' ')}}, /* Fanti */ + {"fbl", {HB_TAG('B','I','K',' ')}}, /* West Albay Bikol -> Bikol */ + {"ff", {HB_TAG('F','U','L',' ')}}, /* Fulah [macrolanguage] */ + {"ffm", {HB_TAG('F','U','L',' ')}}, /* Maasina Fulfulde -> Fulah */ + {"fi", {HB_TAG('F','I','N',' ')}}, /* Finnish */ + {"fil", {HB_TAG('P','I','L',' ')}}, /* Filipino */ + {"fj", {HB_TAG('F','J','I',' ')}}, /* Fijian */ + {"flm", {HB_TAG('H','A','L',' '), /* Falam Chin (retired code) -> Halam */ + HB_TAG('Q','I','N',' ')}}, /* Falam Chin (retired code) -> Chin */ + {"fmp", {HB_TAG('F','M','P',' ')}}, /* Fe'fe' */ + {"fo", {HB_TAG('F','O','S',' ')}}, /* Faroese */ + {"fon", {HB_TAG('F','O','N',' ')}}, /* Fon */ + {"fr", {HB_TAG('F','R','A',' ')}}, /* French */ + {"frc", {HB_TAG('F','R','C',' ')}}, /* Cajun French */ + {"frp", {HB_TAG('F','R','P',' ')}}, /* Arpitan */ + {"fub", {HB_TAG('F','U','L',' ')}}, /* Adamawa Fulfulde -> Fulah */ + {"fuc", {HB_TAG('F','U','L',' ')}}, /* Pulaar -> Fulah */ + {"fue", {HB_TAG('F','U','L',' ')}}, /* Borgu Fulfulde -> Fulah */ + {"fuf", {HB_TAG('F','T','A',' ')}}, /* Pular -> Futa */ + {"fuh", {HB_TAG('F','U','L',' ')}}, /* Western Niger Fulfulde -> Fulah */ + {"fui", {HB_TAG('F','U','L',' ')}}, /* Bagirmi Fulfulde -> Fulah */ + {"fuq", {HB_TAG('F','U','L',' ')}}, /* Central-Eastern Niger Fulfulde -> Fulah */ + {"fur", {HB_TAG('F','R','L',' ')}}, /* Friulian */ + {"fuv", {HB_TAG('F','U','V',' ')}}, /* Nigerian Fulfulde */ + {"fy", {HB_TAG('F','R','I',' ')}}, /* Western Frisian -> Frisian */ + {"ga", {HB_TAG('I','R','I',' ')}}, /* Irish */ + {"gaa", {HB_TAG('G','A','D',' ')}}, /* Ga */ + {"gag", {HB_TAG('G','A','G',' ')}}, /* Gagauz */ + {"gan", {HB_TAG('Z','H','S',' ')}}, /* Gan Chinese -> Chinese Simplified */ + {"gax", {HB_TAG('O','R','O',' ')}}, /* Borana-Arsi-Guji Oromo -> Oromo */ + {"gaz", {HB_TAG('O','R','O',' ')}}, /* West Central Oromo -> Oromo */ + {"gbm", {HB_TAG('G','A','W',' ')}}, /* Garhwali */ + {"gce", {HB_TAG('A','T','H',' ')}}, /* Galice -> Athapaskan */ + {"gd", {HB_TAG('G','A','E',' ')}}, /* Scottish Gaelic (Gaelic) */ + {"gda", {HB_TAG('R','A','J',' ')}}, /* Gade Lohar -> Rajasthani */ + {"gez", {HB_TAG('G','E','Z',' ')}}, /* Geez -> Ge'ez */ + {"ggo", {HB_TAG('G','O','N',' ')}}, /* Southern Gondi (retired code) -> Gondi */ + {"gih", {HB_TAG('G','I','H',' ')}}, /* Githabul */ + {"gil", {HB_TAG('G','I','L','0')}}, /* Kiribati (Gilbertese) */ + {"gju", {HB_TAG('R','A','J',' ')}}, /* Gujari -> Rajasthani */ + {"gkp", {HB_TAG('G','K','P',' ')}}, /* Guinea Kpelle -> Kpelle (Guinea) */ + {"gl", {HB_TAG('G','A','L',' ')}}, /* Galician */ + {"gld", {HB_TAG('N','A','N',' ')}}, /* Nanai */ + {"glk", {HB_TAG('G','L','K',' ')}}, /* Gilaki */ + {"gn", {HB_TAG('G','U','A',' ')}}, /* Guarani [macrolanguage] */ + {"gnn", {HB_TAG('G','N','N',' ')}}, /* Gumatj */ + {"gno", {HB_TAG('G','O','N',' ')}}, /* Northern Gondi -> Gondi */ + {"gnw", {HB_TAG('G','U','A',' ')}}, /* Western Bolivian Guaraní -> Guarani */ + {"gog", {HB_TAG('G','O','G',' ')}}, /* Gogo */ + {"gom", {HB_TAG('K','O','K',' ')}}, /* Goan Konkani -> Konkani */ + {"gon", {HB_TAG('G','O','N',' ')}}, /* Gondi [macrolanguage] */ + {"grt", {HB_TAG('G','R','O',' ')}}, /* Garo */ + {"gru", {HB_TAG('S','O','G',' ')}}, /* Kistane -> Sodo Gurage */ + {"gsw", {HB_TAG('A','L','S',' ')}}, /* Alsatian */ + {"gu", {HB_TAG('G','U','J',' ')}}, /* Gujarati */ + {"guc", {HB_TAG('G','U','C',' ')}}, /* Wayuu */ + {"guf", {HB_TAG('G','U','F',' ')}}, /* Gupapuyngu */ + {"gug", {HB_TAG('G','U','A',' ')}}, /* Paraguayan Guaraní -> Guarani */ + {"gui", {HB_TAG('G','U','A',' ')}}, /* Eastern Bolivian Guaraní -> Guarani */ + {"guk", {HB_TAG('G','M','Z',' '), /* Gumuz */ + HB_TAG('G','U','K',' ')}}, /* Gumuz (SIL fonts) */ + {"gun", {HB_TAG('G','U','A',' ')}}, /* Mbyá Guaraní -> Guarani */ + {"guz", {HB_TAG('G','U','Z',' ')}}, /* Gusii */ + {"gv", {HB_TAG('M','N','X',' ')}}, /* Manx */ + {"gwi", {HB_TAG('A','T','H',' ')}}, /* Gwichʼin -> Athapaskan */ + {"ha", {HB_TAG('H','A','U',' ')}}, /* Hausa */ + {"haa", {HB_TAG('A','T','H',' ')}}, /* Han -> Athapaskan */ + {"hae", {HB_TAG('O','R','O',' ')}}, /* Eastern Oromo -> Oromo */ + {"hak", {HB_TAG('Z','H','S',' ')}}, /* Hakka Chinese -> Chinese Simplified */ + {"har", {HB_TAG('H','R','I',' ')}}, /* Harari */ + {"haw", {HB_TAG('H','A','W',' ')}}, /* Hawaiian */ + {"hay", {HB_TAG('H','A','Y',' ')}}, /* Haya */ + {"haz", {HB_TAG('H','A','Z',' ')}}, /* Hazaragi */ + {"he", {HB_TAG('I','W','R',' ')}}, /* Hebrew */ + {"hea", {HB_TAG('H','M','N',' ')}}, /* Northern Qiandong Miao -> Hmong */ + {"hi", {HB_TAG('H','I','N',' ')}}, /* Hindi */ + {"hil", {HB_TAG('H','I','L',' ')}}, /* Hiligaynon */ + {"hji", {HB_TAG('M','L','Y',' ')}}, /* Haji -> Malay */ + {"hlt", {HB_TAG('Q','I','N',' ')}}, /* Matu Chin -> Chin */ + {"hma", {HB_TAG('H','M','N',' ')}}, /* Southern Mashan Hmong -> Hmong */ + {"hmc", {HB_TAG('H','M','N',' ')}}, /* Central Huishui Hmong -> Hmong */ + {"hmd", {HB_TAG('H','M','N',' ')}}, /* Large Flowery Miao -> Hmong */ + {"hme", {HB_TAG('H','M','N',' ')}}, /* Eastern Huishui Hmong -> Hmong */ + {"hmg", {HB_TAG('H','M','N',' ')}}, /* Southwestern Guiyang Hmong -> Hmong */ + {"hmh", {HB_TAG('H','M','N',' ')}}, /* Southwestern Huishui Hmong -> Hmong */ + {"hmi", {HB_TAG('H','M','N',' ')}}, /* Northern Huishui Hmong -> Hmong */ + {"hmj", {HB_TAG('H','M','N',' ')}}, /* Ge -> Hmong */ + {"hml", {HB_TAG('H','M','N',' ')}}, /* Luopohe Hmong -> Hmong */ + {"hmm", {HB_TAG('H','M','N',' ')}}, /* Central Mashan Hmong -> Hmong */ + {"hmn", {HB_TAG('H','M','N',' ')}}, /* Hmong [macrolanguage] */ + {"hmp", {HB_TAG('H','M','N',' ')}}, /* Northern Mashan Hmong -> Hmong */ + {"hmq", {HB_TAG('H','M','N',' ')}}, /* Eastern Qiandong Miao -> Hmong */ + {"hms", {HB_TAG('H','M','N',' ')}}, /* Southern Qiandong Miao -> Hmong */ + {"hmw", {HB_TAG('H','M','N',' ')}}, /* Western Mashan Hmong -> Hmong */ + {"hmy", {HB_TAG('H','M','N',' ')}}, /* Southern Guiyang Hmong -> Hmong */ + {"hmz", {HB_TAG('H','M','N',' ')}}, /* Hmong Shua -> Hmong */ + {"hnd", {HB_TAG('H','N','D',' ')}}, /* Southern Hindko -> Hindko */ + {"hne", {HB_TAG('C','H','H',' ')}}, /* Chhattisgarhi -> Chattisgarhi */ + {"hnj", {HB_TAG('H','M','N',' ')}}, /* Hmong Njua -> Hmong */ + {"hno", {HB_TAG('H','N','D',' ')}}, /* Northern Hindko -> Hindko */ + {"ho", {HB_TAG('H','M','O',' ')}}, /* Hiri Motu */ + {"hoc", {HB_TAG('H','O',' ',' ')}}, /* Ho */ + {"hoi", {HB_TAG('A','T','H',' ')}}, /* Holikachuk -> Athapaskan */ + {"hoj", {HB_TAG('H','A','R',' ')}}, /* Hadothi -> Harauti */ + {"hr", {HB_TAG('H','R','V',' ')}}, /* Croatian */ + {"hrm", {HB_TAG('H','M','N',' ')}}, /* Horned Miao -> Hmong */ + {"hsb", {HB_TAG('U','S','B',' ')}}, /* Upper Sorbian */ + {"hsn", {HB_TAG('Z','H','S',' ')}}, /* Xiang Chinese -> Chinese Simplified */ + {"ht", {HB_TAG('H','A','I',' ')}}, /* Haitian (Haitian Creole) */ + {"hu", {HB_TAG('H','U','N',' ')}}, /* Hungarian */ + {"huj", {HB_TAG('H','M','N',' ')}}, /* Northern Guiyang Hmong -> Hmong */ + {"hup", {HB_TAG('A','T','H',' ')}}, /* Hupa -> Athapaskan */ + {"hy", {HB_TAG('H','Y','E',' '), /* Armenian */ + HB_TAG('H','Y','E','0')}}, /* Armenian -> Armenian East */ + {"hz", {HB_TAG('H','E','R',' ')}}, /* Herero */ + {"ia", {HB_TAG('I','N','A',' ')}}, /* Interlingua (International Auxiliary Language Association) */ + {"iba", {HB_TAG('I','B','A',' ')}}, /* Iban */ + {"ibb", {HB_TAG('I','B','B',' ')}}, /* Ibibio */ + {"id", {HB_TAG('I','N','D',' ')}}, /* Indonesian */ + {"ida", {HB_TAG('L','U','H',' ')}}, /* Idakho-Isukha-Tiriki -> Luyia */ + {"ie", {HB_TAG('I','L','E',' ')}}, /* Interlingue */ + {"ig", {HB_TAG('I','B','O',' ')}}, /* Igbo */ + {"igb", {HB_TAG('E','B','I',' ')}}, /* Ebira */ + {"ii", {HB_TAG('Y','I','M',' ')}}, /* Sichuan Yi -> Yi Modern */ + {"ijc", {HB_TAG('I','J','O',' ')}}, /* Izon -> Ijo */ + {"ijo", {HB_TAG('I','J','O',' ')}}, /* Ijo [family] */ + {"ik", {HB_TAG('I','P','K',' ')}}, /* Inupiaq [macrolanguage] -> Inupiat */ + {"ike", {HB_TAG('I','N','U',' ')}}, /* Eastern Canadian Inuktitut -> Inuktitut */ + {"ikt", {HB_TAG('I','N','U',' ')}}, /* Inuinnaqtun -> Inuktitut */ + {"ilo", {HB_TAG('I','L','O',' ')}}, /* Iloko -> Ilokano */ + {"in", {HB_TAG('I','N','D',' ')}}, /* Indonesian (retired code) */ + {"ing", {HB_TAG('A','T','H',' ')}}, /* Degexit'an -> Athapaskan */ + {"inh", {HB_TAG('I','N','G',' ')}}, /* Ingush */ + {"io", {HB_TAG('I','D','O',' ')}}, /* Ido */ + {"is", {HB_TAG('I','S','L',' ')}}, /* Icelandic */ + {"it", {HB_TAG('I','T','A',' ')}}, /* Italian */ + {"iu", {HB_TAG('I','N','U',' ')}}, /* Inuktitut [macrolanguage] */ + {"iw", {HB_TAG('I','W','R',' ')}}, /* Hebrew (retired code) */ + {"ja", {HB_TAG('J','A','N',' ')}}, /* Japanese */ + {"jak", {HB_TAG('M','L','Y',' ')}}, /* Jakun -> Malay */ + {"jam", {HB_TAG('J','A','M',' ')}}, /* Jamaican Creole English -> Jamaican Creole */ + {"jax", {HB_TAG('M','L','Y',' ')}}, /* Jambi Malay -> Malay */ + {"jbo", {HB_TAG('J','B','O',' ')}}, /* Lojban */ + {"jct", {HB_TAG('J','C','T',' ')}}, /* Krymchak */ + {"ji", {HB_TAG('J','I','I',' ')}}, /* Yiddish (retired code) */ + {"jv", {HB_TAG('J','A','V',' ')}}, /* Javanese */ + {"jw", {HB_TAG('J','A','V',' ')}}, /* Javanese (retired code) */ + {"ka", {HB_TAG('K','A','T',' ')}}, /* Georgian */ + {"kaa", {HB_TAG('K','R','K',' ')}}, /* Kara-Kalpak -> Karakalpak */ + {"kab", {HB_TAG('K','A','B','0')}}, /* Kabyle */ + {"kam", {HB_TAG('K','M','B',' ')}}, /* Kamba (Kenya) */ + {"kar", {HB_TAG('K','R','N',' ')}}, /* Karen [family] */ + {"kbd", {HB_TAG('K','A','B',' ')}}, /* Kabardian */ + {"kby", {HB_TAG('K','N','R',' ')}}, /* Manga Kanuri -> Kanuri */ + {"kca", {HB_TAG('K','H','K',' '), /* Khanty -> Khanty-Kazim */ + HB_TAG('K','H','S',' '), /* Khanty -> Khanty-Shurishkar */ + HB_TAG('K','H','V',' ')}}, /* Khanty -> Khanty-Vakhi */ + {"kde", {HB_TAG('K','D','E',' ')}}, /* Makonde */ + {"kdr", {HB_TAG('K','R','M',' ')}}, /* Karaim */ + {"kdt", {HB_TAG('K','U','Y',' ')}}, /* Kuy */ + {"kea", {HB_TAG('K','E','A',' ')}}, /* Kabuverdianu (Crioulo) */ + {"kek", {HB_TAG('K','E','K',' ')}}, /* Kekchi */ + {"kex", {HB_TAG('K','K','N',' ')}}, /* Kukna -> Kokni */ + {"kfa", {HB_TAG('K','O','D',' ')}}, /* Kodava -> Kodagu */ + {"kfr", {HB_TAG('K','A','C',' ')}}, /* Kachhi -> Kachchi */ + {"kfx", {HB_TAG('K','U','L',' ')}}, /* Kullu Pahari -> Kulvi */ + {"kfy", {HB_TAG('K','M','N',' ')}}, /* Kumaoni */ + {"kg", {HB_TAG('K','O','N','0')}}, /* Kongo [macrolanguage] */ + {"kha", {HB_TAG('K','S','I',' ')}}, /* Khasi */ + {"khb", {HB_TAG('X','B','D',' ')}}, /* Lü */ + {"khk", {HB_TAG('M','N','G',' ')}}, /* Halh Mongolian -> Mongolian */ + {"kht", {HB_TAG('K','H','N',' '), /* Khamti -> Khamti Shan (Microsoft fonts) */ + HB_TAG('K','H','T',' ')}}, /* Khamti -> Khamti Shan (OpenType spec and SIL fonts) */ + {"khw", {HB_TAG('K','H','W',' ')}}, /* Khowar */ + {"ki", {HB_TAG('K','I','K',' ')}}, /* Kikuyu (Gikuyu) */ + {"kiu", {HB_TAG('K','I','U',' ')}}, /* Kirmanjki */ + {"kj", {HB_TAG('K','U','A',' ')}}, /* Kuanyama */ + {"kjd", {HB_TAG('K','J','D',' ')}}, /* Southern Kiwai */ + {"kjh", {HB_TAG('K','H','A',' ')}}, /* Khakas -> Khakass */ + {"kjp", {HB_TAG('K','J','P',' ')}}, /* Pwo Eastern Karen -> Eastern Pwo Karen */ + {"kk", {HB_TAG('K','A','Z',' ')}}, /* Kazakh */ + {"kkz", {HB_TAG('A','T','H',' ')}}, /* Kaska -> Athapaskan */ + {"kl", {HB_TAG('G','R','N',' ')}}, /* Greenlandic */ + {"kln", {HB_TAG('K','A','L',' ')}}, /* Kalenjin [macrolanguage] */ + {"km", {HB_TAG('K','H','M',' ')}}, /* Khmer */ + {"kmb", {HB_TAG('M','B','N',' ')}}, /* Kimbundu -> Mbundu */ + {"kmr", {HB_TAG('K','U','R',' ')}}, /* Northern Kurdish -> Kurdish */ + {"kmw", {HB_TAG('K','M','O',' ')}}, /* Komo (Democratic Republic of Congo) */ + {"kmz", {HB_TAG('K','M','Z',' ')}}, /* Khorasani Turkish -> Khorasani Turkic */ + {"kn", {HB_TAG('K','A','N',' ')}}, /* Kannada */ + {"knc", {HB_TAG('K','N','R',' ')}}, /* Central Kanuri -> Kanuri */ + {"kng", {HB_TAG('K','O','N','0')}}, /* Koongo -> Kongo */ + {"knn", {HB_TAG('K','O','K',' ')}}, /* Konkani */ + {"ko", {HB_TAG('K','O','R',' ')}}, /* Korean */ + {"koi", {HB_TAG('K','O','P',' ')}}, /* Komi-Permyak */ + {"kok", {HB_TAG('K','O','K',' ')}}, /* Konkani [macrolanguage] */ + {"kos", {HB_TAG('K','O','S',' ')}}, /* Kosraean */ + {"koy", {HB_TAG('A','T','H',' ')}}, /* Koyukon -> Athapaskan */ + {"kpe", {HB_TAG('K','P','L',' ')}}, /* Kpelle [macrolanguage] */ + {"kpv", {HB_TAG('K','O','Z',' ')}}, /* Komi-Zyrian */ + {"kpy", {HB_TAG('K','Y','K',' ')}}, /* Koryak */ + {"kqs", {HB_TAG('K','I','S',' ')}}, /* Northern Kissi -> Kisii */ + {"kqy", {HB_TAG('K','R','T',' ')}}, /* Koorete */ + {"kr", {HB_TAG('K','N','R',' ')}}, /* Kanuri [macrolanguage] */ + {"krc", {HB_TAG('K','A','R',' '), /* Karachay-Balkar -> Karachay */ + HB_TAG('B','A','L',' ')}}, /* Karachay-Balkar -> Balkar */ + {"kri", {HB_TAG('K','R','I',' ')}}, /* Krio */ + {"krl", {HB_TAG('K','R','L',' ')}}, /* Karelian */ + {"krt", {HB_TAG('K','N','R',' ')}}, /* Tumari Kanuri -> Kanuri */ + {"kru", {HB_TAG('K','U','U',' ')}}, /* Kurukh */ + {"ks", {HB_TAG('K','S','H',' ')}}, /* Kashmiri */ + {"ksh", {HB_TAG('K','S','H','0')}}, /* Kölsch -> Ripuarian */ + {"kss", {HB_TAG('K','I','S',' ')}}, /* Southern Kisi -> Kisii */ + {"ksw", {HB_TAG('K','S','W',' ')}}, /* S’gaw Karen */ + {"ktb", {HB_TAG('K','E','B',' ')}}, /* Kambaata -> Kebena */ + {"ktu", {HB_TAG('K','O','N',' ')}}, /* Kituba (Democratic Republic of Congo) -> Kikongo */ + {"ktw", {HB_TAG('A','T','H',' ')}}, /* Kato -> Athapaskan */ + {"ku", {HB_TAG('K','U','R',' ')}}, /* Kurdish [macrolanguage] */ + {"kum", {HB_TAG('K','U','M',' ')}}, /* Kumyk */ + {"kuu", {HB_TAG('A','T','H',' ')}}, /* Upper Kuskokwim -> Athapaskan */ + {"kv", {HB_TAG('K','O','M',' ')}}, /* Komi [macrolanguage] */ + {"kvb", {HB_TAG('M','L','Y',' ')}}, /* Kubu -> Malay */ + {"kvr", {HB_TAG('M','L','Y',' ')}}, /* Kerinci -> Malay */ + {"kw", {HB_TAG('C','O','R',' ')}}, /* Cornish */ + {"kwy", {HB_TAG('K','O','N','0')}}, /* San Salvador Kongo -> Kongo */ + {"kxc", {HB_TAG('K','M','S',' ')}}, /* Konso -> Komso */ + {"kxd", {HB_TAG('M','L','Y',' ')}}, /* Brunei -> Malay */ + {"kxu", {HB_TAG('K','U','I',' ')}}, /* Kui (India) */ + {"ky", {HB_TAG('K','I','R',' ')}}, /* Kirghiz (Kyrgyz) */ + {"kyu", {HB_TAG('K','Y','U',' ')}}, /* Western Kayah */ + {"la", {HB_TAG('L','A','T',' ')}}, /* Latin */ + {"lad", {HB_TAG('J','U','D',' ')}}, /* Ladino */ + {"lb", {HB_TAG('L','T','Z',' ')}}, /* Luxembourgish */ + {"lbe", {HB_TAG('L','A','K',' ')}}, /* Lak */ + {"lbj", {HB_TAG('L','D','K',' ')}}, /* Ladakhi */ + {"lbl", {HB_TAG('B','I','K',' ')}}, /* Libon Bikol -> Bikol */ + {"lce", {HB_TAG('M','L','Y',' ')}}, /* Loncong -> Malay */ + {"lcf", {HB_TAG('M','L','Y',' ')}}, /* Lubu -> Malay */ + {"ldi", {HB_TAG('K','O','N','0')}}, /* Laari -> Kongo */ + {"lez", {HB_TAG('L','E','Z',' ')}}, /* Lezghian -> Lezgi */ + {"lg", {HB_TAG('L','U','G',' ')}}, /* Ganda */ + {"li", {HB_TAG('L','I','M',' ')}}, /* Limburgish */ + {"lif", {HB_TAG('L','M','B',' ')}}, /* Limbu */ + {"lij", {HB_TAG('L','I','J',' ')}}, /* Ligurian */ + {"lis", {HB_TAG('L','I','S',' ')}}, /* Lisu */ + {"liw", {HB_TAG('M','L','Y',' ')}}, /* Col -> Malay */ + {"ljp", {HB_TAG('L','J','P',' ')}}, /* Lampung Api -> Lampung */ + {"lkb", {HB_TAG('L','U','H',' ')}}, /* Kabras -> Luyia */ + {"lki", {HB_TAG('L','K','I',' ')}}, /* Laki */ + {"lko", {HB_TAG('L','U','H',' ')}}, /* Khayo -> Luyia */ + {"lks", {HB_TAG('L','U','H',' ')}}, /* Kisa -> Luyia */ + {"lld", {HB_TAG('L','A','D',' ')}}, /* Ladin */ + {"lmn", {HB_TAG('L','A','M',' ')}}, /* Lambadi -> Lambani */ + {"lmo", {HB_TAG('L','M','O',' ')}}, /* Lombard */ + {"ln", {HB_TAG('L','I','N',' ')}}, /* Lingala */ + {"lo", {HB_TAG('L','A','O',' ')}}, /* Lao */ + {"lom", {HB_TAG('L','O','M',' ')}}, /* Loma (Liberia) */ + {"lrc", {HB_TAG('L','R','C',' ')}}, /* Northern Luri -> Luri */ + {"lri", {HB_TAG('L','U','H',' ')}}, /* Marachi -> Luyia */ + {"lrm", {HB_TAG('L','U','H',' ')}}, /* Marama -> Luyia */ + {"lsm", {HB_TAG('L','U','H',' ')}}, /* Saamia -> Luyia */ + {"lt", {HB_TAG('L','T','H',' ')}}, /* Lithuanian */ + {"ltg", {HB_TAG('L','V','I',' ')}}, /* Latgalian -> Latvian */ + {"lto", {HB_TAG('L','U','H',' ')}}, /* Tsotso -> Luyia */ + {"lts", {HB_TAG('L','U','H',' ')}}, /* Tachoni -> Luyia */ + {"lu", {HB_TAG('L','U','B',' ')}}, /* Luba-Katanga */ + {"lua", {HB_TAG('L','U','A',' ')}}, /* Luba-Lulua */ + {"luo", {HB_TAG('L','U','O',' ')}}, /* Luo (Kenya and Tanzania) */ + {"lus", {HB_TAG('M','I','Z',' ')}}, /* Lushai -> Mizo */ + {"luy", {HB_TAG('L','U','H',' ')}}, /* Luyia [macrolanguage] */ + {"luz", {HB_TAG('L','R','C',' ')}}, /* Southern Luri -> Luri */ + {"lv", {HB_TAG('L','V','I',' ')}}, /* Latvian [macrolanguage] */ + {"lvs", {HB_TAG('L','V','I',' ')}}, /* Standard Latvian -> Latvian */ + {"lwg", {HB_TAG('L','U','H',' ')}}, /* Wanga -> Luyia */ + {"lzh", {HB_TAG('Z','H','T',' ')}}, /* Literary Chinese -> Chinese Traditional */ + {"lzz", {HB_TAG('L','A','Z',' ')}}, /* Laz */ + {"mad", {HB_TAG('M','A','D',' ')}}, /* Madurese -> Madura */ + {"mag", {HB_TAG('M','A','G',' ')}}, /* Magahi */ + {"mai", {HB_TAG('M','T','H',' ')}}, /* Maithili */ + {"mak", {HB_TAG('M','K','R',' ')}}, /* Makasar */ + {"mam", {HB_TAG('M','A','M',' ')}}, /* Mam */ + {"man", {HB_TAG('M','N','K',' ')}}, /* Mandingo [macrolanguage] -> Maninka */ + {"max", {HB_TAG('M','L','Y',' ')}}, /* North Moluccan Malay -> Malay */ + {"mbo", {HB_TAG('M','B','O',' ')}}, /* Mbo (Cameroon) */ + {"mct", {HB_TAG('B','T','I',' ')}}, /* Mengisa -> Beti */ + {"mdf", {HB_TAG('M','O','K',' ')}}, /* Moksha */ + {"mdr", {HB_TAG('M','D','R',' ')}}, /* Mandar */ + {"mdy", {HB_TAG('M','L','E',' ')}}, /* Male (Ethiopia) */ + {"men", {HB_TAG('M','D','E',' ')}}, /* Mende (Sierra Leone) */ + {"meo", {HB_TAG('M','L','Y',' ')}}, /* Kedah Malay -> Malay */ + {"mer", {HB_TAG('M','E','R',' ')}}, /* Meru */ + {"mfa", {HB_TAG('M','L','Y',' ')}}, /* Pattani Malay -> Malay */ + {"mfb", {HB_TAG('M','L','Y',' ')}}, /* Bangka -> Malay */ + {"mfe", {HB_TAG('M','F','E',' ')}}, /* Morisyen */ + {"mg", {HB_TAG('M','L','G',' ')}}, /* Malagasy [macrolanguage] */ + {"mh", {HB_TAG('M','A','H',' ')}}, /* Marshallese */ + {"mhr", {HB_TAG('L','M','A',' ')}}, /* Eastern Mari -> Low Mari */ + {"mhv", {HB_TAG('A','R','K',' ')}}, /* Arakanese (retired code) -> Rakhine */ + {"mi", {HB_TAG('M','R','I',' ')}}, /* Maori */ + {"min", {HB_TAG('M','I','N',' ')}}, /* Minangkabau */ + {"mk", {HB_TAG('M','K','D',' ')}}, /* Macedonian */ + {"mku", {HB_TAG('M','N','K',' ')}}, /* Konyanka Maninka -> Maninka */ + {"mkw", {HB_TAG('M','K','W',' ')}}, /* Kituba (Congo) */ + {"ml", {HB_TAG('M','A','L',' '), /* Malayalam -> Malayalam Traditional */ + HB_TAG('M','L','R',' ')}}, /* Malayalam -> Malayalam Reformed */ + {"mlq", {HB_TAG('M','L','N',' '), /* Western Maninkakan -> Malinke */ + HB_TAG('M','N','K',' ')}}, /* Western Maninkakan -> Maninka */ + {"mmr", {HB_TAG('H','M','N',' ')}}, /* Western Xiangxi Miao -> Hmong */ + {"mn", {HB_TAG('M','N','G',' ')}}, /* Mongolian [macrolanguage] */ + {"mnc", {HB_TAG('M','C','H',' ')}}, /* Manchu */ + {"mni", {HB_TAG('M','N','I',' ')}}, /* Manipuri */ + {"mnk", {HB_TAG('M','N','D',' '), /* Mandinka */ + HB_TAG('M','N','K',' ')}}, /* Mandinka -> Maninka */ + {"mnp", {HB_TAG('Z','H','S',' ')}}, /* Min Bei Chinese -> Chinese Simplified */ + {"mns", {HB_TAG('M','A','N',' ')}}, /* Mansi */ + {"mnw", {HB_TAG('M','O','N',' ')}}, /* Mon */ + {"mo", {HB_TAG('M','O','L',' ')}}, /* Moldavian (retired code) */ + {"moh", {HB_TAG('M','O','H',' ')}}, /* Mohawk */ + {"mos", {HB_TAG('M','O','S',' ')}}, /* Mossi */ + {"mpe", {HB_TAG('M','A','J',' ')}}, /* Majang */ + {"mqg", {HB_TAG('M','L','Y',' ')}}, /* Kota Bangun Kutai Malay -> Malay */ + {"mr", {HB_TAG('M','A','R',' ')}}, /* Marathi */ + {"mrh", {HB_TAG('Q','I','N',' ')}}, /* Mara Chin -> Chin */ + {"mrj", {HB_TAG('H','M','A',' ')}}, /* Western Mari -> High Mari */ + {"ms", {HB_TAG('M','L','Y',' ')}}, /* Malay [macrolanguage] */ + {"msc", {HB_TAG('M','N','K',' ')}}, /* Sankaran Maninka -> Maninka */ + {"msh", {HB_TAG('M','L','G',' ')}}, /* Masikoro Malagasy -> Malagasy */ + {"msi", {HB_TAG('M','L','Y',' ')}}, /* Sabah Malay -> Malay */ + {"mt", {HB_TAG('M','T','S',' ')}}, /* Maltese */ + {"mtr", {HB_TAG('M','A','W',' ')}}, /* Mewari -> Marwari */ + {"mui", {HB_TAG('M','L','Y',' ')}}, /* Musi -> Malay */ + {"mup", {HB_TAG('R','A','J',' ')}}, /* Malvi -> Rajasthani */ + {"muq", {HB_TAG('H','M','N',' ')}}, /* Eastern Xiangxi Miao -> Hmong */ + {"mus", {HB_TAG('M','U','S',' ')}}, /* Creek -> Muscogee */ + {"mvb", {HB_TAG('A','T','H',' ')}}, /* Mattole -> Athapaskan */ + {"mve", {HB_TAG('M','A','W',' ')}}, /* Marwari (Pakistan) */ + {"mvf", {HB_TAG('M','N','G',' ')}}, /* Peripheral Mongolian -> Mongolian */ + {"mwk", {HB_TAG('M','N','K',' ')}}, /* Kita Maninkakan -> Maninka */ + {"mwl", {HB_TAG('M','W','L',' ')}}, /* Mirandese */ + {"mwr", {HB_TAG('M','A','W',' ')}}, /* Marwari [macrolanguage] */ + {"mww", {HB_TAG('M','W','W',' ')}}, /* Hmong Daw */ + {"my", {HB_TAG('B','R','M',' ')}}, /* Burmese */ + {"mym", {HB_TAG('M','E','N',' ')}}, /* Me'en */ + {"myn", {HB_TAG('M','Y','N',' ')}}, /* Mayan [family] */ + {"myq", {HB_TAG('M','N','K',' ')}}, /* Forest Maninka (retired code) -> Maninka */ + {"myv", {HB_TAG('E','R','Z',' ')}}, /* Erzya */ + {"mzn", {HB_TAG('M','Z','N',' ')}}, /* Mazanderani */ + {"na", {HB_TAG('N','A','U',' ')}}, /* Nauru -> Nauruan */ + {"nag", {HB_TAG('N','A','G',' ')}}, /* Naga Pidgin -> Naga-Assamese */ + {"nah", {HB_TAG('N','A','H',' ')}}, /* Nahuatl [family] */ + {"nan", {HB_TAG('Z','H','S',' ')}}, /* Min Nan Chinese -> Chinese Simplified */ + {"nap", {HB_TAG('N','A','P',' ')}}, /* Neapolitan */ + {"nb", {HB_TAG('N','O','R',' ')}}, /* Norwegian Bokmål -> Norwegian */ + {"nd", {HB_TAG('N','D','B',' ')}}, /* North Ndebele -> Ndebele */ + {"ndc", {HB_TAG('N','D','C',' ')}}, /* Ndau */ + {"nds", {HB_TAG('N','D','S',' ')}}, /* Low Saxon */ + {"ne", {HB_TAG('N','E','P',' ')}}, /* Nepali [macrolanguage] */ + {"new", {HB_TAG('N','E','W',' ')}}, /* Newari */ + {"ng", {HB_TAG('N','D','G',' ')}}, /* Ndonga */ + {"nga", {HB_TAG('N','G','A',' ')}}, /* Ngbaka */ + {"ngl", {HB_TAG('L','M','W',' ')}}, /* Lomwe */ + {"ngo", {HB_TAG('S','X','T',' ')}}, /* Ngoni -> Sutu */ + {"nhd", {HB_TAG('G','U','A',' ')}}, /* Chiripá -> Guarani */ + {"niq", {HB_TAG('K','A','L',' ')}}, /* Nandi -> Kalenjin */ + {"niu", {HB_TAG('N','I','U',' ')}}, /* Niuean */ + {"niv", {HB_TAG('G','I','L',' ')}}, /* Gilyak */ + {"njz", {HB_TAG('N','I','S',' ')}}, /* Nyishi -> Nisi */ + {"nl", {HB_TAG('N','L','D',' ')}}, /* Dutch */ + {"nle", {HB_TAG('L','U','H',' ')}}, /* East Nyala -> Luyia */ + {"nn", {HB_TAG('N','Y','N',' ')}}, /* Norwegian Nynorsk (Nynorsk, Norwegian) */ + {"no", {HB_TAG('N','O','R',' ')}}, /* Norwegian [macrolanguage] */ + {"nod", {HB_TAG('N','T','A',' ')}}, /* Northern Thai -> Northern Tai */ + {"noe", {HB_TAG('N','O','E',' ')}}, /* Nimadi */ + {"nog", {HB_TAG('N','O','G',' ')}}, /* Nogai */ + {"nov", {HB_TAG('N','O','V',' ')}}, /* Novial */ + {"npi", {HB_TAG('N','E','P',' ')}}, /* Nepali */ + {"nqo", {HB_TAG('N','K','O',' ')}}, /* N'Ko */ + {"nr", {HB_TAG('N','D','B',' ')}}, /* South Ndebele -> Ndebele */ + {"nsk", {HB_TAG('N','A','S',' ')}}, /* Naskapi */ + {"nso", {HB_TAG('N','S','O',' ')}}, /* Pedi -> Sotho, Northern */ + {"nv", {HB_TAG('N','A','V',' '), /* Navajo */ + HB_TAG('A','T','H',' ')}}, /* Navajo -> Athapaskan */ + {"ny", {HB_TAG('C','H','I',' ')}}, /* Chichewa (Chewa, Nyanja) */ + {"nyd", {HB_TAG('L','U','H',' ')}}, /* Nyore -> Luyia */ + {"nym", {HB_TAG('N','Y','M',' ')}}, /* Nyamwezi */ + {"nyn", {HB_TAG('N','K','L',' ')}}, /* Nyankole */ + {"nza", {HB_TAG('N','Z','A',' ')}}, /* Tigon Mbembe -> Mbembe Tigon */ + {"oc", {HB_TAG('O','C','I',' ')}}, /* Occitan (post 1500) */ + {"oj", {HB_TAG('O','J','B',' ')}}, /* Ojibwa [macrolanguage] -> Ojibway */ + {"ojb", {HB_TAG('O','J','B',' ')}}, /* Northwestern Ojibwa -> Ojibway */ + {"ojc", {HB_TAG('O','J','B',' ')}}, /* Central Ojibwa -> Ojibway */ + {"ojg", {HB_TAG('O','J','B',' ')}}, /* Eastern Ojibwa -> Ojibway */ + {"ojs", {HB_TAG('O','C','R',' ')}}, /* Severn Ojibwa -> Oji-Cree */ + {"ojw", {HB_TAG('O','J','B',' ')}}, /* Western Ojibwa -> Ojibway */ + {"oki", {HB_TAG('K','A','L',' ')}}, /* Okiek -> Kalenjin */ + {"okm", {HB_TAG('K','O','H',' ')}}, /* Middle Korean (10th-16th cent.) -> Korean Old Hangul */ + {"om", {HB_TAG('O','R','O',' ')}}, /* Oromo [macrolanguage] */ + {"or", {HB_TAG('O','R','I',' ')}}, /* Odia (formerly Oriya) [macrolanguage] */ + {"orc", {HB_TAG('O','R','O',' ')}}, /* Orma -> Oromo */ + {"orn", {HB_TAG('M','L','Y',' ')}}, /* Orang Kanaq -> Malay */ + {"ors", {HB_TAG('M','L','Y',' ')}}, /* Orang Seletar -> Malay */ + {"ory", {HB_TAG('O','R','I',' ')}}, /* Odia (formerly Oriya) */ + {"os", {HB_TAG('O','S','S',' ')}}, /* Ossetian */ + {"otw", {HB_TAG('O','J','B',' ')}}, /* Ottawa -> Ojibway */ + {"pa", {HB_TAG('P','A','N',' ')}}, /* Punjabi */ + {"pag", {HB_TAG('P','A','G',' ')}}, /* Pangasinan */ + {"pam", {HB_TAG('P','A','M',' ')}}, /* Pampanga -> Pampangan */ + {"pap", {HB_TAG('P','A','P','0')}}, /* Papiamento -> Papiamentu */ + {"pau", {HB_TAG('P','A','U',' ')}}, /* Palauan */ + {"pbt", {HB_TAG('P','A','S',' ')}}, /* Southern Pashto -> Pashto */ + {"pbu", {HB_TAG('P','A','S',' ')}}, /* Northern Pashto -> Pashto */ + {"pcc", {HB_TAG('P','C','C',' ')}}, /* Bouyei */ + {"pcd", {HB_TAG('P','C','D',' ')}}, /* Picard */ + {"pce", {HB_TAG('P','L','G',' ')}}, /* Ruching Palaung -> Palaung */ + {"pck", {HB_TAG('Q','I','N',' ')}}, /* Paite Chin -> Chin */ + {"pdc", {HB_TAG('P','D','C',' ')}}, /* Pennsylvania German */ + {"pel", {HB_TAG('M','L','Y',' ')}}, /* Pekal -> Malay */ + {"pes", {HB_TAG('F','A','R',' ')}}, /* Iranian Persian -> Persian */ + {"pga", {HB_TAG('A','R','A',' ')}}, /* Sudanese Creole Arabic -> Arabic */ + {"phk", {HB_TAG('P','H','K',' ')}}, /* Phake */ + {"pi", {HB_TAG('P','A','L',' ')}}, /* Pali */ + {"pih", {HB_TAG('P','I','H',' ')}}, /* Pitcairn-Norfolk -> Norfolk */ + {"pko", {HB_TAG('K','A','L',' ')}}, /* Pökoot -> Kalenjin */ + {"pl", {HB_TAG('P','L','K',' ')}}, /* Polish */ + {"pll", {HB_TAG('P','L','G',' ')}}, /* Shwe Palaung -> Palaung */ + {"plp", {HB_TAG('P','A','P',' ')}}, /* Palpa */ + {"plt", {HB_TAG('M','L','G',' ')}}, /* Plateau Malagasy -> Malagasy */ + {"pms", {HB_TAG('P','M','S',' ')}}, /* Piemontese */ + {"pnb", {HB_TAG('P','N','B',' ')}}, /* Western Panjabi */ + {"poh", {HB_TAG('P','O','H',' ')}}, /* Poqomchi' -> Pocomchi */ + {"pon", {HB_TAG('P','O','N',' ')}}, /* Pohnpeian */ + {"ppa", {HB_TAG('B','A','G',' ')}}, /* Pao (retired code) -> Baghelkhandi */ + {"pro", {HB_TAG('P','R','O',' ')}}, /* Old Provençal (to 1500) -> Provencal */ + {"prs", {HB_TAG('D','R','I',' ')}}, /* Dari */ + {"ps", {HB_TAG('P','A','S',' ')}}, /* Pashto [macrolanguage] */ + {"pse", {HB_TAG('M','L','Y',' ')}}, /* Central Malay -> Malay */ + {"pst", {HB_TAG('P','A','S',' ')}}, /* Central Pashto -> Pashto */ + {"pt", {HB_TAG('P','T','G',' ')}}, /* Portuguese */ + {"pwo", {HB_TAG('P','W','O',' ')}}, /* Pwo Western Karen -> Western Pwo Karen */ + {"qu", {HB_TAG('Q','U','Z',' ')}}, /* Quechua [macrolanguage] */ + {"qub", {HB_TAG('Q','U','Z',' ')}}, /* Huallaga Huánuco Quechua -> Quechua */ + {"quc", {HB_TAG('Q','U','C',' ')}}, /* K’iche’ */ + {"qud", {HB_TAG('Q','U','Z',' ')}}, /* Calderón Highland Quichua -> Quechua */ + {"quf", {HB_TAG('Q','U','Z',' ')}}, /* Lambayeque Quechua -> Quechua */ + {"qug", {HB_TAG('Q','U','Z',' ')}}, /* Chimborazo Highland Quichua -> Quechua */ + {"quh", {HB_TAG('Q','U','H',' ')}}, /* South Bolivian Quechua -> Quechua (Bolivia) */ + {"quk", {HB_TAG('Q','U','Z',' ')}}, /* Chachapoyas Quechua -> Quechua */ + {"qul", {HB_TAG('Q','U','Z',' ')}}, /* North Bolivian Quechua -> Quechua */ + {"qup", {HB_TAG('Q','U','Z',' ')}}, /* Southern Pastaza Quechua -> Quechua */ + {"qur", {HB_TAG('Q','U','Z',' ')}}, /* Yanahuanca Pasco Quechua -> Quechua */ + {"qus", {HB_TAG('Q','U','Z',' ')}}, /* Santiago del Estero Quichua -> Quechua */ + {"quw", {HB_TAG('Q','U','Z',' ')}}, /* Tena Lowland Quichua -> Quechua */ + {"qux", {HB_TAG('Q','U','Z',' ')}}, /* Yauyos Quechua -> Quechua */ + {"quy", {HB_TAG('Q','U','Z',' ')}}, /* Ayacucho Quechua -> Quechua */ + {"quz", {HB_TAG('Q','U','Z',' ')}}, /* Cusco Quechua -> Quechua */ + {"qva", {HB_TAG('Q','U','Z',' ')}}, /* Ambo-Pasco Quechua -> Quechua */ + {"qvc", {HB_TAG('Q','U','Z',' ')}}, /* Cajamarca Quechua -> Quechua */ + {"qve", {HB_TAG('Q','U','Z',' ')}}, /* Eastern Apurímac Quechua -> Quechua */ + {"qvh", {HB_TAG('Q','U','Z',' ')}}, /* Huamalíes-Dos de Mayo Huánuco Quechua -> Quechua */ + {"qvi", {HB_TAG('Q','V','I',' ')}}, /* Imbabura Highland Quichua -> Quechua (Ecuador) */ + {"qvj", {HB_TAG('Q','U','Z',' ')}}, /* Loja Highland Quichua -> Quechua */ + {"qvl", {HB_TAG('Q','U','Z',' ')}}, /* Cajatambo North Lima Quechua -> Quechua */ + {"qvm", {HB_TAG('Q','U','Z',' ')}}, /* Margos-Yarowilca-Lauricocha Quechua -> Quechua */ + {"qvn", {HB_TAG('Q','U','Z',' ')}}, /* North Junín Quechua -> Quechua */ + {"qvo", {HB_TAG('Q','U','Z',' ')}}, /* Napo Lowland Quechua -> Quechua */ + {"qvp", {HB_TAG('Q','U','Z',' ')}}, /* Pacaraos Quechua -> Quechua */ + {"qvs", {HB_TAG('Q','U','Z',' ')}}, /* San Martín Quechua -> Quechua */ + {"qvw", {HB_TAG('Q','U','Z',' ')}}, /* Huaylla Wanca Quechua -> Quechua */ + {"qvz", {HB_TAG('Q','U','Z',' ')}}, /* Northern Pastaza Quichua -> Quechua */ + {"qwa", {HB_TAG('Q','U','Z',' ')}}, /* Corongo Ancash Quechua -> Quechua */ + {"qwc", {HB_TAG('Q','U','Z',' ')}}, /* Classical Quechua -> Quechua */ + {"qwh", {HB_TAG('Q','W','H',' ')}}, /* Huaylas Ancash Quechua -> Quechua (Peru) */ + {"qws", {HB_TAG('Q','U','Z',' ')}}, /* Sihuas Ancash Quechua -> Quechua */ + {"qxa", {HB_TAG('Q','U','Z',' ')}}, /* Chiquián Ancash Quechua -> Quechua */ + {"qxc", {HB_TAG('Q','U','Z',' ')}}, /* Chincha Quechua -> Quechua */ + {"qxh", {HB_TAG('Q','U','Z',' ')}}, /* Panao Huánuco Quechua -> Quechua */ + {"qxl", {HB_TAG('Q','U','Z',' ')}}, /* Salasaca Highland Quichua -> Quechua */ + {"qxn", {HB_TAG('Q','U','Z',' ')}}, /* Northern Conchucos Ancash Quechua -> Quechua */ + {"qxo", {HB_TAG('Q','U','Z',' ')}}, /* Southern Conchucos Ancash Quechua -> Quechua */ + {"qxp", {HB_TAG('Q','U','Z',' ')}}, /* Puno Quechua -> Quechua */ + {"qxr", {HB_TAG('Q','U','Z',' ')}}, /* Cañar Highland Quichua -> Quechua */ + {"qxt", {HB_TAG('Q','U','Z',' ')}}, /* Santa Ana de Tusi Pasco Quechua -> Quechua */ + {"qxu", {HB_TAG('Q','U','Z',' ')}}, /* Arequipa-La Unión Quechua -> Quechua */ + {"qxw", {HB_TAG('Q','U','Z',' ')}}, /* Jauja Wanca Quechua -> Quechua */ + {"rag", {HB_TAG('L','U','H',' ')}}, /* Logooli -> Luyia */ + {"raj", {HB_TAG('R','A','J',' ')}}, /* Rajasthani [macrolanguage] */ + {"rar", {HB_TAG('R','A','R',' ')}}, /* Rarotongan */ + {"rbb", {HB_TAG('P','L','G',' ')}}, /* Rumai Palaung -> Palaung */ + {"rbl", {HB_TAG('B','I','K',' ')}}, /* Miraya Bikol -> Bikol */ + {"rej", {HB_TAG('R','E','J',' ')}}, /* Rejang */ + {"ria", {HB_TAG('R','I','A',' ')}}, /* Riang (India) */ + {"rif", {HB_TAG('R','I','F',' ')}}, /* Tarifit */ + {"rit", {HB_TAG('R','I','T',' ')}}, /* Ritarungo */ + {"rki", {HB_TAG('A','R','K',' ')}}, /* Rakhine */ + {"rkw", {HB_TAG('R','K','W',' ')}}, /* Arakwal */ + {"rm", {HB_TAG('R','M','S',' ')}}, /* Romansh */ + {"rmc", {HB_TAG('R','O','Y',' ')}}, /* Carpathian Romani -> Romany */ + {"rmf", {HB_TAG('R','O','Y',' ')}}, /* Kalo Finnish Romani -> Romany */ + {"rml", {HB_TAG('R','O','Y',' ')}}, /* Baltic Romani -> Romany */ + {"rmn", {HB_TAG('R','O','Y',' ')}}, /* Balkan Romani -> Romany */ + {"rmo", {HB_TAG('R','O','Y',' ')}}, /* Sinte Romani -> Romany */ + {"rmw", {HB_TAG('R','O','Y',' ')}}, /* Welsh Romani -> Romany */ + {"rmy", {HB_TAG('R','M','Y',' ')}}, /* Vlax Romani */ + {"rmz", {HB_TAG('A','R','K',' ')}}, /* Marma -> Rakhine */ + {"rn", {HB_TAG('R','U','N',' ')}}, /* Rundi */ + {"ro", {HB_TAG('R','O','M',' ')}}, /* Romanian */ + {"rom", {HB_TAG('R','O','Y',' ')}}, /* Romany [macrolanguage] */ + {"rtm", {HB_TAG('R','T','M',' ')}}, /* Rotuman */ + {"ru", {HB_TAG('R','U','S',' ')}}, /* Russian */ + {"rue", {HB_TAG('R','S','Y',' ')}}, /* Rusyn */ + {"rup", {HB_TAG('R','U','P',' ')}}, /* Aromanian */ + {"rw", {HB_TAG('R','U','A',' ')}}, /* Kinyarwanda */ + {"rwr", {HB_TAG('M','A','W',' ')}}, /* Marwari (India) */ + {"sa", {HB_TAG('S','A','N',' ')}}, /* Sanskrit */ + {"sah", {HB_TAG('Y','A','K',' ')}}, /* Yakut -> Sakha */ + {"sam", {HB_TAG('P','A','A',' ')}}, /* Samaritan Aramaic -> Palestinian Aramaic */ + {"sas", {HB_TAG('S','A','S',' ')}}, /* Sasak */ + {"sat", {HB_TAG('S','A','T',' ')}}, /* Santali */ + {"sc", {HB_TAG('S','R','D',' ')}}, /* Sardinian [macrolanguage] */ + {"sck", {HB_TAG('S','A','D',' ')}}, /* Sadri */ + {"scn", {HB_TAG('S','C','N',' ')}}, /* Sicilian */ + {"sco", {HB_TAG('S','C','O',' ')}}, /* Scots */ + {"scs", {HB_TAG('S','C','S',' '), /* North Slavey */ + HB_TAG('S','L','A',' '), /* North Slavey -> Slavey */ + HB_TAG('A','T','H',' ')}}, /* North Slavey -> Athapaskan */ + {"sd", {HB_TAG('S','N','D',' ')}}, /* Sindhi */ + {"sdc", {HB_TAG('S','R','D',' ')}}, /* Sassarese Sardinian -> Sardinian */ + {"sdh", {HB_TAG('K','U','R',' ')}}, /* Southern Kurdish -> Kurdish */ + {"sdn", {HB_TAG('S','R','D',' ')}}, /* Gallurese Sardinian -> Sardinian */ + {"se", {HB_TAG('N','S','M',' ')}}, /* Northern Sami */ + {"seh", {HB_TAG('S','N','A',' ')}}, /* Sena */ + {"sek", {HB_TAG('A','T','H',' ')}}, /* Sekani -> Athapaskan */ + {"sel", {HB_TAG('S','E','L',' ')}}, /* Selkup */ + {"sez", {HB_TAG('Q','I','N',' ')}}, /* Senthang Chin -> Chin */ + {"sfm", {HB_TAG('H','M','N',' ')}}, /* Small Flowery Miao -> Hmong */ + {"sg", {HB_TAG('S','G','O',' ')}}, /* Sango */ + {"sga", {HB_TAG('S','G','A',' ')}}, /* Old Irish (to 900) */ + {"sgc", {HB_TAG('K','A','L',' ')}}, /* Kipsigis -> Kalenjin */ + {"sgs", {HB_TAG('S','G','S',' ')}}, /* Samogitian */ + {"sgw", {HB_TAG('C','H','G',' '), /* Sebat Bet Gurage -> Chaha Gurage */ + HB_TAG('S','G','W',' ')}}, /* Sebat Bet Gurage -> Chaha Gurage (SIL fonts) */ + {"shi", {HB_TAG('S','H','I',' ')}}, /* Tachelhit */ + {"shn", {HB_TAG('S','H','N',' ')}}, /* Shan */ + {"shu", {HB_TAG('A','R','A',' ')}}, /* Chadian Arabic -> Arabic */ + {"si", {HB_TAG('S','N','H',' ')}}, /* Sinhala (Sinhalese) */ + {"sid", {HB_TAG('S','I','D',' ')}}, /* Sidamo */ + {"sjd", {HB_TAG('K','S','M',' ')}}, /* Kildin Sami */ + {"sjo", {HB_TAG('S','I','B',' ')}}, /* Xibe -> Sibe */ + {"sk", {HB_TAG('S','K','Y',' ')}}, /* Slovak */ + {"skg", {HB_TAG('M','L','G',' ')}}, /* Sakalava Malagasy -> Malagasy */ + {"skr", {HB_TAG('S','R','K',' ')}}, /* Saraiki */ + {"sl", {HB_TAG('S','L','V',' ')}}, /* Slovenian */ + {"sm", {HB_TAG('S','M','O',' ')}}, /* Samoan */ + {"sma", {HB_TAG('S','S','M',' ')}}, /* Southern Sami */ + {"smj", {HB_TAG('L','S','M',' ')}}, /* Lule Sami */ + {"smn", {HB_TAG('I','S','M',' ')}}, /* Inari Sami */ + {"sms", {HB_TAG('S','K','S',' ')}}, /* Skolt Sami */ + {"sn", {HB_TAG('S','N','A','0')}}, /* Shona */ + {"snk", {HB_TAG('S','N','K',' ')}}, /* Soninke */ + {"so", {HB_TAG('S','M','L',' ')}}, /* Somali */ + {"sop", {HB_TAG('S','O','P',' ')}}, /* Songe */ + {"spv", {HB_TAG('O','R','I',' ')}}, /* Sambalpuri -> Odia (formerly Oriya) */ + {"spy", {HB_TAG('K','A','L',' ')}}, /* Sabaot -> Kalenjin */ + {"sq", {HB_TAG('S','Q','I',' ')}}, /* Albanian [macrolanguage] */ + {"sr", {HB_TAG('S','R','B',' ')}}, /* Serbian */ + {"src", {HB_TAG('S','R','D',' ')}}, /* Logudorese Sardinian -> Sardinian */ + {"sro", {HB_TAG('S','R','D',' ')}}, /* Campidanese Sardinian -> Sardinian */ + {"srr", {HB_TAG('S','R','R',' ')}}, /* Serer */ + {"srs", {HB_TAG('A','T','H',' ')}}, /* Sarsi -> Athapaskan */ + {"ss", {HB_TAG('S','W','Z',' ')}}, /* Swati */ + {"ssh", {HB_TAG('A','R','A',' ')}}, /* Shihhi Arabic -> Arabic */ + {"st", {HB_TAG('S','O','T',' ')}}, /* Southern Sotho -> Sotho, Southern */ + {"stq", {HB_TAG('S','T','Q',' ')}}, /* Saterfriesisch -> Saterland Frisian */ + {"stv", {HB_TAG('S','I','G',' ')}}, /* Silt'e -> Silte Gurage */ + {"su", {HB_TAG('S','U','N',' ')}}, /* Sundanese */ + {"suk", {HB_TAG('S','U','K',' ')}}, /* Sukuma */ + {"suq", {HB_TAG('S','U','R',' ')}}, /* Suri */ + {"sv", {HB_TAG('S','V','E',' ')}}, /* Swedish */ + {"sva", {HB_TAG('S','V','A',' ')}}, /* Svan */ + {"sw", {HB_TAG('S','W','K',' ')}}, /* Swahili [macrolanguage] */ + {"swb", {HB_TAG('C','M','R',' ')}}, /* Maore Comorian -> Comorian */ + {"swc", {HB_TAG('S','W','K',' ')}}, /* Congo Swahili -> Swahili */ + {"swh", {HB_TAG('S','W','K',' ')}}, /* Swahili */ + {"swv", {HB_TAG('M','A','W',' ')}}, /* Shekhawati -> Marwari */ + {"sxu", {HB_TAG('S','X','U',' ')}}, /* Upper Saxon */ + {"syc", {HB_TAG('S','Y','R',' ')}}, /* Classical Syriac -> Syriac */ + {"syl", {HB_TAG('S','Y','L',' ')}}, /* Sylheti */ + {"syr", {HB_TAG('S','Y','R',' ')}}, /* Syriac [macrolanguage] */ + {"szl", {HB_TAG('S','Z','L',' ')}}, /* Silesian */ + {"ta", {HB_TAG('T','A','M',' ')}}, /* Tamil */ + {"taa", {HB_TAG('A','T','H',' ')}}, /* Lower Tanana -> Athapaskan */ + {"tab", {HB_TAG('T','A','B',' ')}}, /* Tabassaran -> Tabasaran */ + {"taq", {HB_TAG('T','M','H',' ')}}, /* Tamasheq -> Tamashek */ + {"tau", {HB_TAG('A','T','H',' ')}}, /* Upper Tanana -> Athapaskan */ + {"tcb", {HB_TAG('A','T','H',' ')}}, /* Tanacross -> Athapaskan */ + {"tce", {HB_TAG('A','T','H',' ')}}, /* Southern Tutchone -> Athapaskan */ + {"tcp", {HB_TAG('Q','I','N',' ')}}, /* Tawr Chin -> Chin */ + {"tcy", {HB_TAG('T','U','L',' ')}}, /* Tulu -> Tumbuka */ + {"tcz", {HB_TAG('Q','I','N',' ')}}, /* Thado Chin -> Chin */ + {"tdd", {HB_TAG('T','D','D',' ')}}, /* Tai Nüa -> Dehong Dai */ + {"tdx", {HB_TAG('M','L','G',' ')}}, /* Tandroy-Mahafaly Malagasy -> Malagasy */ + {"te", {HB_TAG('T','E','L',' ')}}, /* Telugu */ + {"tec", {HB_TAG('K','A','L',' ')}}, /* Terik -> Kalenjin */ + {"tem", {HB_TAG('T','M','N',' ')}}, /* Timne -> Temne */ + {"tet", {HB_TAG('T','E','T',' ')}}, /* Tetum */ + {"tfn", {HB_TAG('A','T','H',' ')}}, /* Tanaina -> Athapaskan */ + {"tg", {HB_TAG('T','A','J',' ')}}, /* Tajik -> Tajiki */ + {"tgj", {HB_TAG('N','I','S',' ')}}, /* Tagin -> Nisi */ + {"tgx", {HB_TAG('A','T','H',' ')}}, /* Tagish -> Athapaskan */ + {"th", {HB_TAG('T','H','A',' ')}}, /* Thai */ + {"tht", {HB_TAG('A','T','H',' ')}}, /* Tahltan -> Athapaskan */ + {"thv", {HB_TAG('T','M','H',' ')}}, /* Tahaggart Tamahaq -> Tamashek */ + {"thz", {HB_TAG('T','M','H',' ')}}, /* Tayart Tamajeq -> Tamashek */ + {"ti", {HB_TAG('T','G','Y',' ')}}, /* Tigrinya */ + {"tig", {HB_TAG('T','G','R',' ')}}, /* Tigre */ + {"tiv", {HB_TAG('T','I','V',' ')}}, /* Tiv */ + {"tk", {HB_TAG('T','K','M',' ')}}, /* Turkmen */ + {"tkg", {HB_TAG('M','L','G',' ')}}, /* Tesaka Malagasy -> Malagasy */ + {"tl", {HB_TAG('T','G','L',' ')}}, /* Tagalog */ + {"tmh", {HB_TAG('T','M','H',' ')}}, /* Tamashek [macrolanguage] */ + {"tmw", {HB_TAG('M','L','Y',' ')}}, /* Temuan -> Malay */ + {"tn", {HB_TAG('T','N','A',' ')}}, /* Tswana */ + {"tnf", {HB_TAG('D','R','I',' ')}}, /* Tangshewi (retired code) -> Dari */ + {"to", {HB_TAG('T','G','N',' ')}}, /* Tonga (Tonga Islands) -> Tongan */ + {"tod", {HB_TAG('T','O','D','0')}}, /* Toma */ + {"toi", {HB_TAG('T','N','G',' ')}}, /* Tonga (Zambia) */ + {"tol", {HB_TAG('A','T','H',' ')}}, /* Tolowa -> Athapaskan */ + {"tpi", {HB_TAG('T','P','I',' ')}}, /* Tok Pisin */ + {"tr", {HB_TAG('T','R','K',' ')}}, /* Turkish */ + {"tru", {HB_TAG('T','U','A',' '), /* Turoyo -> Turoyo Aramaic */ + HB_TAG('S','Y','R',' ')}}, /* Turoyo -> Syriac */ + {"ts", {HB_TAG('T','S','G',' ')}}, /* Tsonga */ + {"tt", {HB_TAG('T','A','T',' ')}}, /* Tatar */ + {"ttm", {HB_TAG('A','T','H',' ')}}, /* Northern Tutchone -> Athapaskan */ + {"ttq", {HB_TAG('T','M','H',' ')}}, /* Tawallammat Tamajaq -> Tamashek */ + {"tum", {HB_TAG('T','U','M',' ')}}, /* Tumbuka -> Tulu */ + {"tuu", {HB_TAG('A','T','H',' ')}}, /* Tututni -> Athapaskan */ + {"tuy", {HB_TAG('K','A','L',' ')}}, /* Tugen -> Kalenjin */ + {"tvl", {HB_TAG('T','V','L',' ')}}, /* Tuvalu */ + {"tw", {HB_TAG('T','W','I',' '), /* Twi */ + HB_TAG('A','K','A',' ')}}, /* Twi -> Akan */ + {"txc", {HB_TAG('A','T','H',' ')}}, /* Tsetsaut -> Athapaskan */ + {"txy", {HB_TAG('M','L','G',' ')}}, /* Tanosy Malagasy -> Malagasy */ + {"ty", {HB_TAG('T','H','T',' ')}}, /* Tahitian */ + {"tyv", {HB_TAG('T','U','V',' ')}}, /* Tuvinian -> Tuvin */ + {"tyz", {HB_TAG('T','Y','Z',' ')}}, /* Tày */ + {"tzm", {HB_TAG('T','Z','M',' ')}}, /* Central Atlas Tamazight -> Tamazight */ + {"tzo", {HB_TAG('T','Z','O',' ')}}, /* Tzotzil */ + {"ubl", {HB_TAG('B','I','K',' ')}}, /* Buhi'non Bikol -> Bikol */ + {"udm", {HB_TAG('U','D','M',' ')}}, /* Udmurt */ + {"ug", {HB_TAG('U','Y','G',' ')}}, /* Uyghur */ + {"uk", {HB_TAG('U','K','R',' ')}}, /* Ukrainian */ + {"umb", {HB_TAG('U','M','B',' ')}}, /* Umbundu */ + {"unr", {HB_TAG('M','U','N',' ')}}, /* Mundari */ + {"ur", {HB_TAG('U','R','D',' ')}}, /* Urdu */ + {"urk", {HB_TAG('M','L','Y',' ')}}, /* Urak Lawoi' -> Malay */ + {"uz", {HB_TAG('U','Z','B',' ')}}, /* Uzbek [macrolanguage] */ + {"uzn", {HB_TAG('U','Z','B',' ')}}, /* Northern Uzbek -> Uzbek */ + {"uzs", {HB_TAG('U','Z','B',' ')}}, /* Southern Uzbek -> Uzbek */ + {"ve", {HB_TAG('V','E','N',' ')}}, /* Venda */ + {"vec", {HB_TAG('V','E','C',' ')}}, /* Venetian */ + {"vi", {HB_TAG('V','I','T',' ')}}, /* Vietnamese */ + {"vkk", {HB_TAG('M','L','Y',' ')}}, /* Kaur -> Malay */ + {"vkt", {HB_TAG('M','L','Y',' ')}}, /* Tenggarong Kutai Malay -> Malay */ + {"vls", {HB_TAG('F','L','E',' ')}}, /* Vlaams -> Dutch (Flemish) */ + {"vmw", {HB_TAG('M','A','K',' ')}}, /* Makhuwa */ + {"vo", {HB_TAG('V','O','L',' ')}}, /* Volapük */ + {"vro", {HB_TAG('V','R','O',' ')}}, /* Võro */ + {"wa", {HB_TAG('W','L','N',' ')}}, /* Walloon */ + {"war", {HB_TAG('W','A','R',' ')}}, /* Waray (Philippines) -> Waray-Waray */ + {"wbm", {HB_TAG('W','A',' ',' ')}}, /* Wa */ + {"wbr", {HB_TAG('W','A','G',' ')}}, /* Wagdi */ + {"wlc", {HB_TAG('C','M','R',' ')}}, /* Mwali Comorian -> Comorian */ + {"wle", {HB_TAG('S','I','G',' ')}}, /* Wolane -> Silte Gurage */ + {"wlk", {HB_TAG('A','T','H',' ')}}, /* Wailaki -> Athapaskan */ + {"wni", {HB_TAG('C','M','R',' ')}}, /* Ndzwani Comorian -> Comorian */ + {"wo", {HB_TAG('W','L','F',' ')}}, /* Wolof */ + {"wry", {HB_TAG('M','A','W',' ')}}, /* Merwari -> Marwari */ + {"wsg", {HB_TAG('G','O','N',' ')}}, /* Adilabad Gondi -> Gondi */ + {"wtm", {HB_TAG('W','T','M',' ')}}, /* Mewati */ + {"wuu", {HB_TAG('Z','H','S',' ')}}, /* Wu Chinese -> Chinese Simplified */ + {"xal", {HB_TAG('K','L','M',' '), /* Kalmyk */ + HB_TAG('T','O','D',' ')}}, /* Kalmyk -> Todo */ + {"xan", {HB_TAG('S','E','K',' ')}}, /* Xamtanga -> Sekota */ + {"xh", {HB_TAG('X','H','S',' ')}}, /* Xhosa */ + {"xjb", {HB_TAG('X','J','B',' ')}}, /* Minjungbal -> Minjangbal */ + {"xmm", {HB_TAG('M','L','Y',' ')}}, /* Manado Malay -> Malay */ + {"xmv", {HB_TAG('M','L','G',' ')}}, /* Antankarana Malagasy -> Malagasy */ + {"xmw", {HB_TAG('M','L','G',' ')}}, /* Tsimihety Malagasy -> Malagasy */ + {"xnr", {HB_TAG('D','G','R',' ')}}, /* Kangri -> Dogri */ + {"xog", {HB_TAG('X','O','G',' ')}}, /* Soga */ + {"xpe", {HB_TAG('X','P','E',' ')}}, /* Liberia Kpelle -> Kpelle (Liberia) */ + {"xsl", {HB_TAG('S','S','L',' '), /* South Slavey */ + HB_TAG('S','L','A',' '), /* South Slavey -> Slavey */ + HB_TAG('A','T','H',' ')}}, /* South Slavey -> Athapaskan */ + {"xst", {HB_TAG('S','I','G',' ')}}, /* Silt'e (retired code) -> Silte Gurage */ + {"xwo", {HB_TAG('T','O','D',' ')}}, /* Written Oirat -> Todo */ + {"yao", {HB_TAG('Y','A','O',' ')}}, /* Yao */ + {"yap", {HB_TAG('Y','A','P',' ')}}, /* Yapese */ + {"ybd", {HB_TAG('A','R','K',' ')}}, /* Yangbye (retired code) -> Rakhine */ + {"ydd", {HB_TAG('J','I','I',' ')}}, /* Eastern Yiddish -> Yiddish */ + {"yi", {HB_TAG('J','I','I',' ')}}, /* Yiddish [macrolanguage] */ + {"yih", {HB_TAG('J','I','I',' ')}}, /* Western Yiddish -> Yiddish */ + {"yo", {HB_TAG('Y','B','A',' ')}}, /* Yoruba */ + {"yos", {HB_TAG('Q','I','N',' ')}}, /* Yos (retired code) -> Chin */ + {"yue", {HB_TAG('Z','H','H',' ')}}, /* Yue Chinese -> Chinese, Hong Kong SAR */ + {"za", {HB_TAG('Z','H','A',' ')}}, /* Zhuang [macrolanguage] */ + {"zch", {HB_TAG('Z','H','A',' ')}}, /* Central Hongshuihe Zhuang -> Zhuang */ + {"zdj", {HB_TAG('C','M','R',' ')}}, /* Ngazidja Comorian -> Comorian */ + {"zea", {HB_TAG('Z','E','A',' ')}}, /* Zeeuws -> Zealandic */ + {"zeh", {HB_TAG('Z','H','A',' ')}}, /* Eastern Hongshuihe Zhuang -> Zhuang */ + {"zgb", {HB_TAG('Z','H','A',' ')}}, /* Guibei Zhuang -> Zhuang */ + {"zgh", {HB_TAG('Z','G','H',' ')}}, /* Standard Moroccan Tamazight -> Standard Morrocan Tamazigh */ + {"zgm", {HB_TAG('Z','H','A',' ')}}, /* Minz Zhuang -> Zhuang */ + {"zgn", {HB_TAG('Z','H','A',' ')}}, /* Guibian Zhuang -> Zhuang */ + {"zh", {HB_TAG('Z','H','S',' ')}}, /* Chinese [macrolanguage] -> Chinese Simplified */ + {"zhd", {HB_TAG('Z','H','A',' ')}}, /* Dai Zhuang -> Zhuang */ + {"zhn", {HB_TAG('Z','H','A',' ')}}, /* Nong Zhuang -> Zhuang */ + {"zlj", {HB_TAG('Z','H','A',' ')}}, /* Liujiang Zhuang -> Zhuang */ + {"zlm", {HB_TAG('M','L','Y',' ')}}, /* Malay */ + {"zln", {HB_TAG('Z','H','A',' ')}}, /* Lianshan Zhuang -> Zhuang */ + {"zlq", {HB_TAG('Z','H','A',' ')}}, /* Liuqian Zhuang -> Zhuang */ + {"zmi", {HB_TAG('M','L','Y',' ')}}, /* Negeri Sembilan Malay -> Malay */ + {"zne", {HB_TAG('Z','N','D',' ')}}, /* Zande */ + {"zom", {HB_TAG('Q','I','N',' ')}}, /* Zou -> Chin */ + {"zqe", {HB_TAG('Z','H','A',' ')}}, /* Qiubei Zhuang -> Zhuang */ + {"zsm", {HB_TAG('M','L','Y',' ')}}, /* Standard Malay -> Malay */ + {"zu", {HB_TAG('Z','U','L',' ')}}, /* Zulu */ + {"zum", {HB_TAG('L','R','C',' ')}}, /* Kumzari -> Luri */ + {"zyb", {HB_TAG('Z','H','A',' ')}}, /* Yongbei Zhuang -> Zhuang */ + {"zyg", {HB_TAG('Z','H','A',' ')}}, /* Yang Zhuang -> Zhuang */ + {"zyj", {HB_TAG('Z','H','A',' ')}}, /* Youjiang Zhuang -> Zhuang */ + {"zyn", {HB_TAG('Z','H','A',' ')}}, /* Yongnan Zhuang -> Zhuang */ + {"zza", {HB_TAG('Z','Z','A',' ')}}, /* Zazaki [macrolanguage] */ + {"zzj", {HB_TAG('Z','H','A',' ')}}, /* Zuojiang Zhuang -> Zhuang */ +}; + +static_assert (HB_OT_MAX_TAGS_PER_LANGUAGE == 3u, ""); + +/** + * hb_ot_tags_from_complex_language: + * @lang_str: a BCP 47 language tag to convert. + * @limit: a pointer to the end of the substring of @lang_str to consider for + * conversion. + * @count: maximum number of language tags to retrieve (IN) and actual number of + * language tags retrieved (OUT). If no tags are retrieved, it is not modified. + * @tags: array of size at least @language_count to store the language tag + * results + * + * Converts a multi-subtag BCP 47 language tag to language tags. + * + * Return value: Whether any language systems were retrieved. + **/ +static bool +hb_ot_tags_from_complex_language (const char *lang_str, + const char *limit, + unsigned int *count /* IN/OUT */, + hb_tag_t *tags /* OUT */) +{ + if (lang_matches (lang_str, "cdo-hant-hk")) + { + /* Min Dong Chinese; Han (Traditional variant); Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cdo-hant-mo")) + { + /* Min Dong Chinese; Han (Traditional variant); Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cjy-hant-hk")) + { + /* Jinyu Chinese; Han (Traditional variant); Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cjy-hant-mo")) + { + /* Jinyu Chinese; Han (Traditional variant); Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cmn-hant-hk")) + { + /* Mandarin Chinese; Han (Traditional variant); Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cmn-hant-mo")) + { + /* Mandarin Chinese; Han (Traditional variant); Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cpx-hant-hk")) + { + /* Pu-Xian Chinese; Han (Traditional variant); Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cpx-hant-mo")) + { + /* Pu-Xian Chinese; Han (Traditional variant); Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "czh-hant-hk")) + { + /* Huizhou Chinese; Han (Traditional variant); Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "czh-hant-mo")) + { + /* Huizhou Chinese; Han (Traditional variant); Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "czo-hant-hk")) + { + /* Min Zhong Chinese; Han (Traditional variant); Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "czo-hant-mo")) + { + /* Min Zhong Chinese; Han (Traditional variant); Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "gan-hant-hk")) + { + /* Gan Chinese; Han (Traditional variant); Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "gan-hant-mo")) + { + /* Gan Chinese; Han (Traditional variant); Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "hak-hant-hk")) + { + /* Hakka Chinese; Han (Traditional variant); Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "hak-hant-mo")) + { + /* Hakka Chinese; Han (Traditional variant); Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "hsn-hant-hk")) + { + /* Xiang Chinese; Han (Traditional variant); Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "hsn-hant-mo")) + { + /* Xiang Chinese; Han (Traditional variant); Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "mnp-hant-hk")) + { + /* Min Bei Chinese; Han (Traditional variant); Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "mnp-hant-mo")) + { + /* Min Bei Chinese; Han (Traditional variant); Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "nan-hant-hk")) + { + /* Min Nan Chinese; Han (Traditional variant); Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "nan-hant-mo")) + { + /* Min Nan Chinese; Han (Traditional variant); Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (1 + && subtag_matches (lang_str, limit, "-fonnapa")) + { + /* Undetermined; North American Phonetic Alphabet */ + tags[0] = HB_TAG('A','P','P','H'); /* Phonetic transcription—Americanist conventions */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "wuu-hant-hk")) + { + /* Wu Chinese; Han (Traditional variant); Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "wuu-hant-mo")) + { + /* Wu Chinese; Han (Traditional variant); Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strcmp (lang_str, "art-lojban")) + { + /* Lojban */ + tags[0] = HB_TAG('J','B','O',' '); /* Lojban */ + *count = 1; + return true; + } + if (1 + && subtag_matches (lang_str, limit, "-polyton")) + { + /* Modern Greek (1453-); Polytonic Greek */ + tags[0] = HB_TAG('P','G','R',' '); /* Polytonic Greek */ + *count = 1; + return true; + } + if (1 + && subtag_matches (lang_str, limit, "-fonipa")) + { + /* Undetermined; International Phonetic Alphabet */ + tags[0] = HB_TAG('I','P','P','H'); /* Phonetic transcription—IPA conventions */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "zh-hant-hk")) + { + /* Chinese; Han (Traditional variant); Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "zh-hant-mo")) + { + /* Chinese; Han (Traditional variant); Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cdo-hans")) + { + /* Min Dong Chinese; Han (Simplified variant) */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cdo-hant")) + { + /* Min Dong Chinese; Han (Traditional variant) */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cjy-hans")) + { + /* Jinyu Chinese; Han (Simplified variant) */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cjy-hant")) + { + /* Jinyu Chinese; Han (Traditional variant) */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cmn-hans")) + { + /* Mandarin Chinese; Han (Simplified variant) */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cmn-hant")) + { + /* Mandarin Chinese; Han (Traditional variant) */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cpx-hans")) + { + /* Pu-Xian Chinese; Han (Simplified variant) */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cpx-hant")) + { + /* Pu-Xian Chinese; Han (Traditional variant) */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "czh-hans")) + { + /* Huizhou Chinese; Han (Simplified variant) */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "czh-hant")) + { + /* Huizhou Chinese; Han (Traditional variant) */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "czo-hans")) + { + /* Min Zhong Chinese; Han (Simplified variant) */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "czo-hant")) + { + /* Min Zhong Chinese; Han (Traditional variant) */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "gan-hans")) + { + /* Gan Chinese; Han (Simplified variant) */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "gan-hant")) + { + /* Gan Chinese; Han (Traditional variant) */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "hak-hans")) + { + /* Hakka Chinese; Han (Simplified variant) */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "hak-hant")) + { + /* Hakka Chinese; Han (Traditional variant) */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "hsn-hans")) + { + /* Xiang Chinese; Han (Simplified variant) */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "hsn-hant")) + { + /* Xiang Chinese; Han (Traditional variant) */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (0 == strcmp (lang_str, "i-navajo")) + { + /* Navajo */ + unsigned int i; + hb_tag_t possible_tags[] = { + HB_TAG('N','A','V',' '), /* Navajo */ + HB_TAG('A','T','H',' '), /* Athapaskan */ + }; + for (i = 0; i < 2 && i < *count; i++) + tags[i] = possible_tags[i]; + *count = i; + return true; + } + if (lang_matches (lang_str, "lzh-hans")) + { + /* Literary Chinese; Han (Simplified variant) */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "mnp-hans")) + { + /* Min Bei Chinese; Han (Simplified variant) */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "mnp-hant")) + { + /* Min Bei Chinese; Han (Traditional variant) */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "nan-hans")) + { + /* Min Nan Chinese; Han (Simplified variant) */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "nan-hant")) + { + /* Min Nan Chinese; Han (Traditional variant) */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (1 + && subtag_matches (lang_str, limit, "-geok")) + { + /* Undetermined; Khutsuri (Asomtavruli and Nuskhuri) */ + tags[0] = HB_TAG('K','G','E',' '); /* Khutsuri Georgian */ + *count = 1; + return true; + } + if (1 + && subtag_matches (lang_str, limit, "-syre")) + { + /* Undetermined; Syriac (Estrangelo variant) */ + tags[0] = HB_TAG('S','Y','R','E'); /* Syriac, Estrangela script-variant (equivalent to ISO 15924 'Syre') */ + *count = 1; + return true; + } + if (1 + && subtag_matches (lang_str, limit, "-syrj")) + { + /* Undetermined; Syriac (Western variant) */ + tags[0] = HB_TAG('S','Y','R','J'); /* Syriac, Western script-variant (equivalent to ISO 15924 'Syrj') */ + *count = 1; + return true; + } + if (1 + && subtag_matches (lang_str, limit, "-syrn")) + { + /* Undetermined; Syriac (Eastern variant) */ + tags[0] = HB_TAG('S','Y','R','N'); /* Syriac, Eastern script-variant (equivalent to ISO 15924 'Syrn') */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "wuu-hans")) + { + /* Wu Chinese; Han (Simplified variant) */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "wuu-hant")) + { + /* Wu Chinese; Han (Traditional variant) */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "yue-hans")) + { + /* Yue Chinese; Han (Simplified variant) */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "ga-latg")) + { + /* Irish; Latin (Gaelic variant) */ + tags[0] = HB_TAG('I','R','T',' '); /* Irish Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "zh-hans")) + { + /* Chinese; Han (Simplified variant) */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "zh-hant")) + { + /* Chinese; Han (Traditional variant) */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cdo") + && subtag_matches (lang_str, limit, "-hk")) + { + /* Min Dong Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cdo") + && subtag_matches (lang_str, limit, "-mo")) + { + /* Min Dong Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cdo") + && subtag_matches (lang_str, limit, "-tw")) + { + /* Min Dong Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cjy") + && subtag_matches (lang_str, limit, "-hk")) + { + /* Jinyu Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cjy") + && subtag_matches (lang_str, limit, "-mo")) + { + /* Jinyu Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cjy") + && subtag_matches (lang_str, limit, "-tw")) + { + /* Jinyu Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cmn") + && subtag_matches (lang_str, limit, "-hk")) + { + /* Mandarin Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cmn") + && subtag_matches (lang_str, limit, "-mo")) + { + /* Mandarin Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cmn") + && subtag_matches (lang_str, limit, "-tw")) + { + /* Mandarin Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cpx") + && subtag_matches (lang_str, limit, "-hk")) + { + /* Pu-Xian Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cpx") + && subtag_matches (lang_str, limit, "-mo")) + { + /* Pu-Xian Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "cpx") + && subtag_matches (lang_str, limit, "-tw")) + { + /* Pu-Xian Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "czh") + && subtag_matches (lang_str, limit, "-hk")) + { + /* Huizhou Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "czh") + && subtag_matches (lang_str, limit, "-mo")) + { + /* Huizhou Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "czh") + && subtag_matches (lang_str, limit, "-tw")) + { + /* Huizhou Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "czo") + && subtag_matches (lang_str, limit, "-hk")) + { + /* Min Zhong Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "czo") + && subtag_matches (lang_str, limit, "-mo")) + { + /* Min Zhong Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "czo") + && subtag_matches (lang_str, limit, "-tw")) + { + /* Min Zhong Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "gan") + && subtag_matches (lang_str, limit, "-hk")) + { + /* Gan Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "gan") + && subtag_matches (lang_str, limit, "-mo")) + { + /* Gan Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "gan") + && subtag_matches (lang_str, limit, "-tw")) + { + /* Gan Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "hak") + && subtag_matches (lang_str, limit, "-hk")) + { + /* Hakka Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "hak") + && subtag_matches (lang_str, limit, "-mo")) + { + /* Hakka Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "hak") + && subtag_matches (lang_str, limit, "-tw")) + { + /* Hakka Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "hsn") + && subtag_matches (lang_str, limit, "-hk")) + { + /* Xiang Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "hsn") + && subtag_matches (lang_str, limit, "-mo")) + { + /* Xiang Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "hsn") + && subtag_matches (lang_str, limit, "-tw")) + { + /* Xiang Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "mnp") + && subtag_matches (lang_str, limit, "-hk")) + { + /* Min Bei Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "mnp") + && subtag_matches (lang_str, limit, "-mo")) + { + /* Min Bei Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "mnp") + && subtag_matches (lang_str, limit, "-tw")) + { + /* Min Bei Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "nan") + && subtag_matches (lang_str, limit, "-hk")) + { + /* Min Nan Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "nan") + && subtag_matches (lang_str, limit, "-mo")) + { + /* Min Nan Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "nan") + && subtag_matches (lang_str, limit, "-tw")) + { + /* Min Nan Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (0 == strcmp (lang_str, "no-nyn")) + { + /* Norwegian Nynorsk */ + tags[0] = HB_TAG('N','Y','N',' '); /* Norwegian Nynorsk (Nynorsk, Norwegian) */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "wuu") + && subtag_matches (lang_str, limit, "-hk")) + { + /* Wu Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "wuu") + && subtag_matches (lang_str, limit, "-mo")) + { + /* Wu Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "wuu") + && subtag_matches (lang_str, limit, "-tw")) + { + /* Wu Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (0 == strcmp (lang_str, "i-hak")) + { + /* Hakka */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (0 == strcmp (lang_str, "i-lux")) + { + /* Luxembourgish */ + tags[0] = HB_TAG('L','T','Z',' '); /* Luxembourgish */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "ro") + && subtag_matches (lang_str, limit, "-md")) + { + /* Romanian; Moldova */ + tags[0] = HB_TAG('M','O','L',' '); /* Moldavian */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "zh") + && subtag_matches (lang_str, limit, "-hk")) + { + /* Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "zh") + && subtag_matches (lang_str, limit, "-mo")) + { + /* Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (lang_str, "zh") + && subtag_matches (lang_str, limit, "-tw")) + { + /* Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + return false; +} + +/** + * hb_ot_ambiguous_tag_to_language + * @tag: A language tag. + * + * Converts @tag to a BCP 47 language tag if it is ambiguous (it corresponds to + * many language tags) and the best tag is not the alphabetically first, or if + * the best tag consists of multiple subtags. + * + * Return value: The #hb_language_t corresponding to the BCP 47 language tag, + * or #HB_LANGUAGE_INVALID if @tag is not ambiguous. + **/ +static hb_language_t +hb_ot_ambiguous_tag_to_language (hb_tag_t tag) +{ + switch (tag) + { + case HB_TAG('A','P','P','H'): /* Phonetic transcription—Americanist conventions */ + return hb_language_from_string ("und-fonnapa", -1); /* Undetermined; North American Phonetic Alphabet */ + case HB_TAG('A','R','A',' '): /* Arabic */ + return hb_language_from_string ("ar", -1); /* Arabic */ + case HB_TAG('A','R','K',' '): /* Rakhine */ + return hb_language_from_string ("rki", -1); /* Rakhine */ + case HB_TAG('A','T','H',' '): /* Athapaskan */ + return hb_language_from_string ("ath", -1); /* Athapascan */ + case HB_TAG('B','I','K',' '): /* Bikol */ + return hb_language_from_string ("bik", -1); /* Bikol */ + case HB_TAG('C','R','R',' '): /* Carrier */ + return hb_language_from_string ("crx", -1); /* Carrier */ + case HB_TAG('D','N','K',' '): /* Dinka */ + return hb_language_from_string ("din", -1); /* Dinka */ + case HB_TAG('D','R','I',' '): /* Dari */ + return hb_language_from_string ("prs", -1); /* Dari */ + case HB_TAG('D','U','J',' '): /* Dhuwal */ + return hb_language_from_string ("dwu", -1); /* Dhuwal */ + case HB_TAG('D','Z','N',' '): /* Dzongkha */ + return hb_language_from_string ("dz", -1); /* Dzongkha */ + case HB_TAG('E','T','I',' '): /* Estonian */ + return hb_language_from_string ("et", -1); /* Estonian */ + case HB_TAG('G','O','N',' '): /* Gondi */ + return hb_language_from_string ("gon", -1); /* Gondi */ + case HB_TAG('H','M','N',' '): /* Hmong */ + return hb_language_from_string ("hmn", -1); /* Hmong */ + case HB_TAG('I','J','O',' '): /* Ijo */ + return hb_language_from_string ("ijo", -1); /* Ijo */ + case HB_TAG('I','N','U',' '): /* Inuktitut */ + return hb_language_from_string ("iu", -1); /* Inuktitut */ + case HB_TAG('I','P','K',' '): /* Inupiat */ + return hb_language_from_string ("ik", -1); /* Inupiaq */ + case HB_TAG('I','P','P','H'): /* Phonetic transcription—IPA conventions */ + return hb_language_from_string ("und-fonipa", -1); /* Undetermined; International Phonetic Alphabet */ + case HB_TAG('I','R','T',' '): /* Irish Traditional */ + return hb_language_from_string ("ga-Latg", -1); /* Irish; Latin (Gaelic variant) */ + case HB_TAG('J','I','I',' '): /* Yiddish */ + return hb_language_from_string ("yi", -1); /* Yiddish */ + case HB_TAG('K','A','L',' '): /* Kalenjin */ + return hb_language_from_string ("kln", -1); /* Kalenjin */ + case HB_TAG('K','G','E',' '): /* Khutsuri Georgian */ + return hb_language_from_string ("und-Geok", -1); /* Undetermined; Khutsuri (Asomtavruli and Nuskhuri) */ + case HB_TAG('K','N','R',' '): /* Kanuri */ + return hb_language_from_string ("kr", -1); /* Kanuri */ + case HB_TAG('K','O','K',' '): /* Konkani */ + return hb_language_from_string ("kok", -1); /* Konkani */ + case HB_TAG('K','U','R',' '): /* Kurdish */ + return hb_language_from_string ("ku", -1); /* Kurdish */ + case HB_TAG('L','U','H',' '): /* Luyia */ + return hb_language_from_string ("luy", -1); /* Luyia */ + case HB_TAG('L','V','I',' '): /* Latvian */ + return hb_language_from_string ("lv", -1); /* Latvian */ + case HB_TAG('M','A','W',' '): /* Marwari */ + return hb_language_from_string ("mwr", -1); /* Marwari */ + case HB_TAG('M','L','G',' '): /* Malagasy */ + return hb_language_from_string ("mg", -1); /* Malagasy */ + case HB_TAG('M','L','Y',' '): /* Malay */ + return hb_language_from_string ("ms", -1); /* Malay */ + case HB_TAG('M','N','G',' '): /* Mongolian */ + return hb_language_from_string ("mn", -1); /* Mongolian */ + case HB_TAG('M','O','L',' '): /* Moldavian */ + return hb_language_from_string ("ro-MD", -1); /* Romanian; Moldova */ + case HB_TAG('N','E','P',' '): /* Nepali */ + return hb_language_from_string ("ne", -1); /* Nepali */ + case HB_TAG('N','I','S',' '): /* Nisi */ + return hb_language_from_string ("njz", -1); /* Nyishi */ + case HB_TAG('N','O','R',' '): /* Norwegian */ + return hb_language_from_string ("no", -1); /* Norwegian */ + case HB_TAG('O','J','B',' '): /* Ojibway */ + return hb_language_from_string ("oj", -1); /* Ojibwa */ + case HB_TAG('O','R','O',' '): /* Oromo */ + return hb_language_from_string ("om", -1); /* Oromo */ + case HB_TAG('P','A','S',' '): /* Pashto */ + return hb_language_from_string ("ps", -1); /* Pashto */ + case HB_TAG('P','G','R',' '): /* Polytonic Greek */ + return hb_language_from_string ("el-polyton", -1); /* Modern Greek (1453-); Polytonic Greek */ + case HB_TAG('Q','U','H',' '): /* Quechua (Bolivia) */ + return hb_language_from_string ("quh", -1); /* South Bolivian Quechua */ + case HB_TAG('R','A','J',' '): /* Rajasthani */ + return hb_language_from_string ("raj", -1); /* Rajasthani */ + case HB_TAG('R','O','Y',' '): /* Romany */ + return hb_language_from_string ("rom", -1); /* Romany */ + case HB_TAG('S','Q','I',' '): /* Albanian */ + return hb_language_from_string ("sq", -1); /* Albanian */ + case HB_TAG('S','Y','R',' '): /* Syriac */ + return hb_language_from_string ("syr", -1); /* Syriac */ + case HB_TAG('S','Y','R','E'): /* Syriac, Estrangela script-variant (equivalent to ISO 15924 'Syre') */ + return hb_language_from_string ("und-Syre", -1); /* Undetermined; Syriac (Estrangelo variant) */ + case HB_TAG('S','Y','R','J'): /* Syriac, Western script-variant (equivalent to ISO 15924 'Syrj') */ + return hb_language_from_string ("und-Syrj", -1); /* Undetermined; Syriac (Western variant) */ + case HB_TAG('S','Y','R','N'): /* Syriac, Eastern script-variant (equivalent to ISO 15924 'Syrn') */ + return hb_language_from_string ("und-Syrn", -1); /* Undetermined; Syriac (Eastern variant) */ + case HB_TAG('T','M','H',' '): /* Tamashek */ + return hb_language_from_string ("tmh", -1); /* Tamashek */ + case HB_TAG('Z','H','H',' '): /* Chinese, Hong Kong SAR */ + return hb_language_from_string ("zh-HK", -1); /* Chinese; Hong Kong */ + case HB_TAG('Z','H','S',' '): /* Chinese Simplified */ + return hb_language_from_string ("zh-Hans", -1); /* Chinese; Han (Simplified variant) */ + case HB_TAG('Z','H','T',' '): /* Chinese Traditional */ + return hb_language_from_string ("zh-Hant", -1); /* Chinese; Han (Traditional variant) */ + default: + return HB_LANGUAGE_INVALID; + } +} + +#endif /* HB_OT_TAG_TABLE_HH */ + +/* == End of generated table == */ diff --git a/src/hb-ot-tag.cc b/src/hb-ot-tag.cc index a9692409a..4d8cb5989 100644 --- a/src/hb-ot-tag.cc +++ b/src/hb-ot-tag.cc @@ -167,732 +167,6 @@ hb_ot_tag_to_script (hb_tag_t tag) /* hb_language_t */ -typedef struct { - char language[4]; - hb_tag_t tags[HB_OT_MAX_TAGS_PER_LANGUAGE]; -} LangTag; - -/* - * Complete list at: - * https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags - * - * Generated by intersecting the OpenType language tag list from - * Draft OpenType 1.5 spec, with with the ISO 639-3 codes from - * 2008-08-04, matching on name, and finally adjusted manually. - * - * Updated on 2012-12-07 with more research into remaining codes. - * - * Updated on 2013-11-23 based on usage in SIL and Microsoft fonts, - * the new proposal from Microsoft, and latest ISO 639-3 names. - * - * Some items still missing. Those are commented out at the end. - * Keep sorted for bsearch. - * - * Updated as of 2015-05-06: OT1.7 on MS website has some newer - * items that we don't have here, eg. Zazaki. This is the new - * items in OpenType 1.7 (red items), most of which we have: - * https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags - */ - -static const LangTag ot_languages[] = { - {"aa", {HB_TAG('A','F','R',' ')}}, /* Afar */ - {"ab", {HB_TAG('A','B','K',' ')}}, /* Abkhazian */ - {"abq", {HB_TAG('A','B','A',' ')}}, /* Abaza */ - {"acf", {HB_TAG('F','A','N',' ')}}, /* French Antillean */ - {"ach", {HB_TAG('A','C','H',' ')}}, /* Acoli */ - {"acr", {HB_TAG('A','C','R',' ')}}, /* Achi */ - {"ada", {HB_TAG('D','N','G',' ')}}, /* Dangme */ - {"ady", {HB_TAG('A','D','Y',' ')}}, /* Adyghe */ - {"af", {HB_TAG('A','F','K',' ')}}, /* Afrikaans */ - {"ahg", {HB_TAG('A','G','W',' ')}}, /* Agaw */ - {"aii", {HB_TAG('S','W','A',' ')}}, /* Swadaya Aramaic */ - {"aio", {HB_TAG('A','I','O',' ')}}, /* Aiton */ - {"aiw", {HB_TAG('A','R','I',' ')}}, /* Aari */ - {"ak", {HB_TAG('T','W','I',' ')}}, /* Akan [macrolanguage] */ - {"aka", {HB_TAG('A','K','A',' ')}}, /* Akan */ - {"alt", {HB_TAG('A','L','T',' ')}}, /* [Southern] Altai */ - {"am", {HB_TAG('A','M','H',' ')}}, /* Amharic */ - {"amf", {HB_TAG('H','B','N',' ')}}, /* Hammer-Banna */ - {"amw", {HB_TAG('S','Y','R',' ')}}, /* Western Neo-Aramaic */ - {"an", {HB_TAG('A','R','G',' ')}}, /* Aragonese */ - {"ang", {HB_TAG('A','N','G',' ')}}, /* Old English (ca. 450-1100) */ - {"ar", {HB_TAG('A','R','A',' ')}}, /* Arabic [macrolanguage] */ - {"arb", {HB_TAG('A','R','A',' ')}}, /* Standard Arabic */ - {"arn", {HB_TAG('M','A','P',' ')}}, /* Mapudungun */ - {"ary", {HB_TAG('M','O','R',' ')}}, /* Moroccan Arabic */ - {"as", {HB_TAG('A','S','M',' ')}}, /* Assamese */ - {"ast", {HB_TAG('A','S','T',' ')}}, /* Asturian/Asturleonese/Bable/Leonese */ - {"ath", {HB_TAG('A','T','H',' ')}}, /* Athapaskan [family] */ - {"atj", {HB_TAG('R','C','R',' ')}}, /* R-Cree */ - {"atv", {HB_TAG('A','L','T',' ')}}, /* [Northern] Altai */ - {"av", {HB_TAG('A','V','R',' ')}}, /* Avaric */ - {"awa", {HB_TAG('A','W','A',' ')}}, /* Awadhi */ - {"ay", {HB_TAG('A','Y','M',' ')}}, /* Aymara [macrolanguage] */ - {"az", {HB_TAG('A','Z','E',' ')}}, /* Azerbaijani [macrolanguage] */ - {"azb", {HB_TAG('A','Z','B',' ')}}, /* South Azerbaijani */ - {"azj", {HB_TAG('A','Z','E',' ')}}, /* North Azerbaijani */ - {"ba", {HB_TAG('B','S','H',' ')}}, /* Bashkir */ - {"bad", {HB_TAG('B','A','D','0')}}, /* Banda */ - {"bai", {HB_TAG('B','M','L',' ')}}, /* Bamileke [family] */ - {"bal", {HB_TAG('B','L','I',' ')}}, /* Baluchi [macrolangauge] */ - {"ban", {HB_TAG('B','A','N',' ')}}, /* Balinese */ - {"bar", {HB_TAG('B','A','R',' ')}}, /* Bavarian */ - {"bbc", {HB_TAG('B','B','C',' ')}}, /* Batak Toba */ - {"bci", {HB_TAG('B','A','U',' ')}}, /* Baoulé */ - {"bcl", {HB_TAG('B','I','K',' ')}}, /* Central Bikol */ - {"bcq", {HB_TAG('B','C','H',' ')}}, /* Bench */ - {"bdy", {HB_TAG('B','D','Y',' ')}}, /* Bandjalang */ - {"be", {HB_TAG('B','E','L',' ')}}, /* Belarusian */ - {"bem", {HB_TAG('B','E','M',' ')}}, /* Bemba (Zambia) */ - {"ber", {HB_TAG('B','E','R',' ')}}, /* Berber [family] */ - {"bfq", {HB_TAG('B','A','D',' ')}}, /* Badaga */ - {"bft", {HB_TAG('B','L','T',' ')}}, /* Balti */ - {"bfu", {HB_TAG('L','A','H',' ')}}, /* Lahuli */ - {"bfy", {HB_TAG('B','A','G',' ')}}, /* Baghelkhandi */ - {"bg", {HB_TAG('B','G','R',' ')}}, /* Bulgarian */ - {"bgc", {HB_TAG('B','G','C',' ')}}, /* Haryanvi */ - {"bgq", {HB_TAG('B','G','Q',' ')}}, /* Bagri */ - {"bgr", {HB_TAG('Q','I','N',' ')}}, /* Bawm Chin */ - {"bhb", {HB_TAG('B','H','I',' ')}}, /* Bhili */ - {"bhk", {HB_TAG('B','I','K',' ')}}, /* Albay Bicolano (retired code) */ - {"bho", {HB_TAG('B','H','O',' ')}}, /* Bhojpuri */ - {"bi", {HB_TAG('B','I','S',' ')}}, /* Bislama */ - {"bik", {HB_TAG('B','I','K',' ')}}, /* Bikol [macrolanguage] */ - {"bin", {HB_TAG('E','D','O',' ')}}, /* Bini */ - {"bjj", {HB_TAG('B','J','J',' ')}}, /* Kanauji */ - {"bjt", {HB_TAG('B','L','N',' ')}}, /* Balanta-Ganja */ - {"bla", {HB_TAG('B','K','F',' ')}}, /* Blackfoot */ - {"ble", {HB_TAG('B','L','N',' ')}}, /* Balanta-Kentohe */ - {"blk", {HB_TAG('B','L','K',' ')}}, /* Pa'O/Pa'o Karen */ - {"bln", {HB_TAG('B','I','K',' ')}}, /* Southern Catanduanes Bikol */ - {"bm", {HB_TAG('B','M','B',' ')}}, /* Bambara */ - {"bn", {HB_TAG('B','E','N',' ')}}, /* Bengali */ - {"bo", {HB_TAG('T','I','B',' ')}}, /* Tibetan */ - {"bpy", {HB_TAG('B','P','Y',' ')}}, /* Bishnupriya */ - {"bqi", {HB_TAG('L','R','C',' ')}}, /* Bakhtiari */ - {"br", {HB_TAG('B','R','E',' ')}}, /* Breton */ - {"bra", {HB_TAG('B','R','I',' ')}}, /* Braj Bhasha */ - {"brh", {HB_TAG('B','R','H',' ')}}, /* Brahui */ - {"brx", {HB_TAG('B','R','X',' ')}}, /* Bodo (India) */ - {"bs", {HB_TAG('B','O','S',' ')}}, /* Bosnian */ - {"btb", {HB_TAG('B','T','I',' ')}}, /* Beti (Cameroon) */ - {"bto", {HB_TAG('B','I','K',' ')}}, /* Rinconada Bikol */ - {"bts", {HB_TAG('B','T','S',' ')}}, /* Batak Simalungun */ - {"bug", {HB_TAG('B','U','G',' ')}}, /* Buginese */ - {"bxr", {HB_TAG('R','B','U',' ')}}, /* Russian Buriat */ - {"byn", {HB_TAG('B','I','L',' ')}}, /* Bilen */ - {"ca", {HB_TAG('C','A','T',' ')}}, /* Catalan */ - {"cak", {HB_TAG('C','A','K',' ')}}, /* Kaqchikel */ - {"cbk", {HB_TAG('C','B','K',' ')}}, /* Chavacano */ - {"cbl", {HB_TAG('Q','I','N',' ')}}, /* Bualkhaw Chin */ - {"cco", {HB_TAG('C','C','H','N')}}, /* Chinantec */ - {"ce", {HB_TAG('C','H','E',' ')}}, /* Chechen */ - {"ceb", {HB_TAG('C','E','B',' ')}}, /* Cebuano */ - {"cfm", {HB_TAG('H','A','L',' ')}}, /* Halam/Falam Chin */ - {"cgg", {HB_TAG('C','G','G',' ')}}, /* Chiga */ - {"ch", {HB_TAG('C','H','A',' ')}}, /* Chamorro */ - {"chj", {HB_TAG('C','C','H','N')}}, /* Chinantec */ - {"chk", {HB_TAG('C','H','K','0')}}, /* Chuukese */ - {"cho", {HB_TAG('C','H','O',' ')}}, /* Choctaw */ - {"chp", {HB_TAG('C','H','P',' ')}}, /* Chipewyan */ - {"chq", {HB_TAG('C','C','H','N')}}, /* Chinantec */ - {"chr", {HB_TAG('C','H','R',' ')}}, /* Cherokee */ - {"chy", {HB_TAG('C','H','Y',' ')}}, /* Cheyenne */ - {"chz", {HB_TAG('C','C','H','N')}}, /* Chinantec */ - {"cja", {HB_TAG('C','J','A',' ')}}, /* Western Cham */ - {"cjm", {HB_TAG('C','J','M',' ')}}, /* Eastern Cham */ - {"cka", {HB_TAG('Q','I','N',' ')}}, /* Khumi Awa Chin */ - {"ckb", {HB_TAG('K','U','R',' ')}}, /* Central Kurdish (Sorani) */ - {"ckt", {HB_TAG('C','H','K',' ')}}, /* Chukchi */ - {"cld", {HB_TAG('S','Y','R',' ')}}, /* Chaldean Neo-Aramaic */ - {"cle", {HB_TAG('C','C','H','N')}}, /* Chinantec */ - {"cmr", {HB_TAG('Q','I','N',' ')}}, /* Mro-Khimi Chin */ - {"cnb", {HB_TAG('Q','I','N',' ')}}, /* Chinbon Chin */ - {"cnh", {HB_TAG('Q','I','N',' ')}}, /* Hakha Chin */ - {"cnk", {HB_TAG('Q','I','N',' ')}}, /* Khumi Chin */ - {"cnl", {HB_TAG('C','C','H','N')}}, /* Chinantec */ - {"cnt", {HB_TAG('C','C','H','N')}}, /* Chinantec */ - {"cnw", {HB_TAG('Q','I','N',' ')}}, /* Ngawn Chin */ - {"cop", {HB_TAG('C','O','P',' ')}}, /* Coptic */ - {"cpa", {HB_TAG('C','C','H','N')}}, /* Chinantec */ - {"cpp", {HB_TAG('C','P','P',' ')}}, /* Creoles */ - {"cr", {HB_TAG('C','R','E',' ')}}, /* Cree */ - {"cre", {HB_TAG('Y','C','R',' ')}}, /* Y-Cree */ - {"crh", {HB_TAG('C','R','T',' ')}}, /* Crimean Tatar */ - {"crj", {HB_TAG('E','C','R',' ')}}, /* [Southern] East Cree */ - {"crk", {HB_TAG('W','C','R',' ')}}, /* West-Cree */ - {"crl", {HB_TAG('E','C','R',' ')}}, /* [Northern] East Cree */ - {"crm", {HB_TAG('M','C','R',' ')}}, /* Moose Cree */ - {"crx", {HB_TAG('C','R','R',' ')}}, /* Carrier */ - {"cs", {HB_TAG('C','S','Y',' ')}}, /* Czech */ - {"csa", {HB_TAG('C','C','H','N')}}, /* Chinantec */ - {"csb", {HB_TAG('C','S','B',' ')}}, /* Kashubian */ - {"csh", {HB_TAG('Q','I','N',' ')}}, /* Asho Chin */ - {"cso", {HB_TAG('C','C','H','N')}}, /* Chinantec */ - {"csy", {HB_TAG('Q','I','N',' ')}}, /* Siyin Chin */ - {"ctd", {HB_TAG('Q','I','N',' ')}}, /* Tedim Chin */ - {"cte", {HB_TAG('C','C','H','N')}}, /* Chinantec */ - {"ctg", {HB_TAG('C','T','G',' ')}}, /* Chittagonian */ - {"ctl", {HB_TAG('C','C','H','N')}}, /* Chinantec */ - {"cts", {HB_TAG('B','I','K',' ')}}, /* Northern Catanduanes Bikol */ - {"cu", {HB_TAG('C','S','L',' ')}}, /* Church Slavic */ - {"cuc", {HB_TAG('C','C','H','N')}}, /* Chinantec */ - {"cuk", {HB_TAG('C','U','K',' ')}}, /* San Blas Kuna */ - {"cv", {HB_TAG('C','H','U',' ')}}, /* Chuvash */ - {"cvn", {HB_TAG('C','C','H','N')}}, /* Chinantec */ - {"cwd", {HB_TAG('D','C','R',' ')}}, /* Woods Cree */ - {"cy", {HB_TAG('W','E','L',' ')}}, /* Welsh */ - {"czt", {HB_TAG('Q','I','N',' ')}}, /* Zotung Chin */ - {"da", {HB_TAG('D','A','N',' ')}}, /* Danish */ - {"dao", {HB_TAG('Q','I','N',' ')}}, /* Daai Chin */ - {"dap", {HB_TAG('N','I','S',' ')}}, /* Nisi (India) */ - {"dar", {HB_TAG('D','A','R',' ')}}, /* Dargwa */ - {"dax", {HB_TAG('D','A','X',' ')}}, /* Dayi */ - {"de", {HB_TAG('D','E','U',' ')}}, /* German */ - {"dgo", {HB_TAG('D','G','O',' ')}}, /* Dogri */ - {"dhd", {HB_TAG('M','A','W',' ')}}, /* Dhundari */ - {"dhg", {HB_TAG('D','H','G',' ')}}, /* Dhangu */ - {"din", {HB_TAG('D','N','K',' ')}}, /* Dinka [macrolanguage] */ - {"diq", {HB_TAG('D','I','Q',' ')}}, /* Dimli */ - {"dje", {HB_TAG('D','J','R',' ')}}, /* Zarma */ - {"djr", {HB_TAG('D','J','R','0')}}, /* Djambarrpuyngu */ - {"dng", {HB_TAG('D','U','N',' ')}}, /* Dungan */ - {"dnj", {HB_TAG('D','N','J',' ')}}, /* Dan */ - {"doi", {HB_TAG('D','G','R',' ')}}, /* Dogri [macrolanguage] */ - {"dsb", {HB_TAG('L','S','B',' ')}}, /* Lower Sorbian */ - {"duj", {HB_TAG('D','U','J',' ')}}, /* Dhuwal */ - {"dv", {HB_TAG('D','I','V',' ')}}, /* Dhivehi/Divehi/Maldivian */ - {"dyu", {HB_TAG('J','U','L',' ')}}, /* Jula */ - {"dz", {HB_TAG('D','Z','N',' ')}}, /* Dzongkha */ - {"ee", {HB_TAG('E','W','E',' ')}}, /* Ewe */ - {"efi", {HB_TAG('E','F','I',' ')}}, /* Efik */ - {"ekk", {HB_TAG('E','T','I',' ')}}, /* Standard Estonian */ - {"el", {HB_TAG('E','L','L',' ')}}, /* Modern Greek (1453-) */ - {"emk", {HB_TAG('M','N','K',' ')}}, /* Eastern Maninkakan */ - {"en", {HB_TAG('E','N','G',' ')}}, /* English */ - {"enf", {HB_TAG('F','N','E',' ')}}, /* Forest Nenets */ - {"enh", {HB_TAG('T','N','E',' ')}}, /* Tundra Nenets */ - {"eo", {HB_TAG('N','T','O',' ')}}, /* Esperanto */ - {"eot", {HB_TAG('B','T','I',' ')}}, /* Beti (Côte d'Ivoire) */ - {"es", {HB_TAG('E','S','P',' ')}}, /* Spanish */ - {"esu", {HB_TAG('E','S','U',' ')}}, /* Central Yupik */ - {"et", {HB_TAG('E','T','I',' ')}}, /* Estonian [macrolanguage] */ - {"eu", {HB_TAG('E','U','Q',' ')}}, /* Basque */ - {"eve", {HB_TAG('E','V','N',' ')}}, /* Even */ - {"evn", {HB_TAG('E','V','K',' ')}}, /* Evenki */ - {"fa", {HB_TAG('F','A','R',' ')}}, /* Persian [macrolanguage] */ - {"fan", {HB_TAG('F','A','N','0')}}, /* Fang */ - {"fat", {HB_TAG('F','A','T',' ')}}, /* Fanti */ - {"ff", {HB_TAG('F','U','L',' ')}}, /* Fulah [macrolanguage] */ - {"fi", {HB_TAG('F','I','N',' ')}}, /* Finnish */ - {"fil", {HB_TAG('P','I','L',' ')}}, /* Filipino */ - {"fj", {HB_TAG('F','J','I',' ')}}, /* Fijian */ - {"flm", {HB_TAG('H','A','L',' ')}}, /* Halam/Falam Chin [retired ISO639 code] */ - {"fo", {HB_TAG('F','O','S',' ')}}, /* Faroese */ - {"fon", {HB_TAG('F','O','N',' ')}}, /* Fon */ - {"fr", {HB_TAG('F','R','A',' ')}}, /* French */ - {"frc", {HB_TAG('F','R','C',' ')}}, /* Cajun French */ - {"frp", {HB_TAG('F','R','P',' ')}}, /* Arpitan/Francoprovençal */ - {"fuf", {HB_TAG('F','T','A',' ')}}, /* Futa */ - {"fur", {HB_TAG('F','R','L',' ')}}, /* Friulian */ - {"fuv", {HB_TAG('F','U','V',' ')}}, /* Nigerian Fulfulde */ - {"fy", {HB_TAG('F','R','I',' ')}}, /* Western Frisian */ - {"ga", {HB_TAG('I','R','I',' ')}}, /* Irish */ - {"gaa", {HB_TAG('G','A','D',' ')}}, /* Ga */ - {"gag", {HB_TAG('G','A','G',' ')}}, /* Gagauz */ - {"gbm", {HB_TAG('G','A','W',' ')}}, /* Garhwali */ - {"gd", {HB_TAG('G','A','E',' ')}}, /* Scottish Gaelic */ - {"gez", {HB_TAG('G','E','Z',' ')}}, /* Ge'ez */ - {"ggo", {HB_TAG('G','O','N',' ')}}, /* Southern Gondi */ - {"gih", {HB_TAG('G','I','H',' ')}}, /* Githabul */ - {"gil", {HB_TAG('G','I','L','0')}}, /* Kiribati (Gilbertese) */ - {"gkp", {HB_TAG('G','K','P',' ')}}, /* Kpelle (Guinea) */ - {"gl", {HB_TAG('G','A','L',' ')}}, /* Galician */ - {"gld", {HB_TAG('N','A','N',' ')}}, /* Nanai */ - {"glk", {HB_TAG('G','L','K',' ')}}, /* Gilaki */ - {"gn", {HB_TAG('G','U','A',' ')}}, /* Guarani [macrolanguage] */ - {"gnn", {HB_TAG('G','N','N',' ')}}, /* Gumatj */ - {"gno", {HB_TAG('G','O','N',' ')}}, /* Northern Gondi */ - {"gog", {HB_TAG('G','O','G',' ')}}, /* Gogo */ - {"gon", {HB_TAG('G','O','N',' ')}}, /* Gondi [macrolanguage] */ - {"grt", {HB_TAG('G','R','O',' ')}}, /* Garo */ - {"gru", {HB_TAG('S','O','G',' ')}}, /* Sodo Gurage */ - {"gsw", {HB_TAG('A','L','S',' ')}}, /* Alsatian */ - {"gu", {HB_TAG('G','U','J',' ')}}, /* Gujarati */ - {"guc", {HB_TAG('G','U','C',' ')}}, /* Wayuu */ - {"guf", {HB_TAG('G','U','F',' ')}}, /* Gupapuyngu */ - {"guk", {HB_TAG('G','M','Z',' ')}}, /* Gumuz */ -/*{"guk", {HB_TAG('G','U','K',' ')}},*/ /* Gumuz (in SIL fonts) */ - {"guz", {HB_TAG('G','U','Z',' ')}}, /* Ekegusii/Gusii */ - {"gv", {HB_TAG('M','N','X',' ')}}, /* Manx */ - {"ha", {HB_TAG('H','A','U',' ')}}, /* Hausa */ - {"har", {HB_TAG('H','R','I',' ')}}, /* Harari */ - {"haw", {HB_TAG('H','A','W',' ')}}, /* Hawaiian */ - {"hay", {HB_TAG('H','A','Y',' ')}}, /* Haya */ - {"haz", {HB_TAG('H','A','Z',' ')}}, /* Hazaragi */ - {"he", {HB_TAG('I','W','R',' ')}}, /* Hebrew */ - {"hi", {HB_TAG('H','I','N',' ')}}, /* Hindi */ - {"hil", {HB_TAG('H','I','L',' ')}}, /* Hiligaynon */ - {"hlt", {HB_TAG('Q','I','N',' ')}}, /* Matu Chin */ - {"hmn", {HB_TAG('H','M','N',' ')}}, /* Hmong */ - {"hnd", {HB_TAG('H','N','D',' ')}}, /* [Southern] Hindko */ - {"hne", {HB_TAG('C','H','H',' ')}}, /* Chattisgarhi */ - {"hno", {HB_TAG('H','N','D',' ')}}, /* [Northern] Hindko */ - {"ho", {HB_TAG('H','M','O',' ')}}, /* Hiri Motu */ - {"hoc", {HB_TAG('H','O',' ',' ')}}, /* Ho */ - {"hoj", {HB_TAG('H','A','R',' ')}}, /* Harauti */ - {"hr", {HB_TAG('H','R','V',' ')}}, /* Croatian */ - {"hsb", {HB_TAG('U','S','B',' ')}}, /* Upper Sorbian */ - {"ht", {HB_TAG('H','A','I',' ')}}, /* Haitian/Haitian Creole */ - {"hu", {HB_TAG('H','U','N',' ')}}, /* Hungarian */ - {"hy", {HB_TAG('H','Y','E',' ')}}, /* Armenian */ - {"hz", {HB_TAG('H','E','R',' ')}}, /* Herero */ - {"ia", {HB_TAG('I','N','A',' ')}}, /* Interlingua (International Auxiliary Language Association) */ - {"iba", {HB_TAG('I','B','A',' ')}}, /* Iban */ - {"ibb", {HB_TAG('I','B','B',' ')}}, /* Ibibio */ - {"id", {HB_TAG('I','N','D',' ')}}, /* Indonesian */ - {"ie", {HB_TAG('I','L','E',' ')}}, /* Interlingue/Occidental */ - {"ig", {HB_TAG('I','B','O',' ')}}, /* Igbo */ - {"igb", {HB_TAG('E','B','I',' ')}}, /* Ebira */ - {"ii", {HB_TAG('Y','I','M',' ')}}, /* Yi Modern */ - {"ijc", {HB_TAG('I','J','O',' ')}}, /* Izon */ - {"ijo", {HB_TAG('I','J','O',' ')}}, /* Ijo [family] */ - {"ik", {HB_TAG('I','P','K',' ')}}, /* Inupiaq [macrolanguage] */ - {"ilo", {HB_TAG('I','L','O',' ')}}, /* Ilokano */ - {"inh", {HB_TAG('I','N','G',' ')}}, /* Ingush */ - {"io", {HB_TAG('I','D','O',' ')}}, /* Ido */ - {"is", {HB_TAG('I','S','L',' ')}}, /* Icelandic */ - {"it", {HB_TAG('I','T','A',' ')}}, /* Italian */ - {"iu", {HB_TAG('I','N','U',' ')}}, /* Inuktitut [macrolanguage] */ - {"ja", {HB_TAG('J','A','N',' ')}}, /* Japanese */ - {"jam", {HB_TAG('J','A','M',' ')}}, /* Jamaican Creole English */ - {"jbo", {HB_TAG('J','B','O',' ')}}, /* Lojban */ - {"jv", {HB_TAG('J','A','V',' ')}}, /* Javanese */ - {"ka", {HB_TAG('K','A','T',' ')}}, /* Georgian */ - {"kaa", {HB_TAG('K','R','K',' ')}}, /* Karakalpak */ - {"kab", {HB_TAG('K','A','B','0')}}, /* Kabyle */ - {"kam", {HB_TAG('K','M','B',' ')}}, /* Kamba (Kenya) */ - {"kar", {HB_TAG('K','R','N',' ')}}, /* Karen [family] */ - {"kat", {HB_TAG('K','G','E',' ')}}, /* Khutsuri Georgian */ - {"kbd", {HB_TAG('K','A','B',' ')}}, /* Kabardian */ - {"kde", {HB_TAG('K','D','E',' ')}}, /* Makonde */ - {"kdr", {HB_TAG('K','R','M',' ')}}, /* Karaim */ - {"kdt", {HB_TAG('K','U','Y',' ')}}, /* Kuy */ - {"kea", {HB_TAG('K','E','A',' ')}}, /* Kabuverdianu (Crioulo) */ - {"kek", {HB_TAG('K','E','K',' ')}}, /* Kekchi */ - {"kex", {HB_TAG('K','K','N',' ')}}, /* Kokni */ - {"kfa", {HB_TAG('K','O','D',' ')}}, /* Kodagu */ - {"kfr", {HB_TAG('K','A','C',' ')}}, /* Kachchi */ - {"kfx", {HB_TAG('K','U','L',' ')}}, /* Kulvi */ - {"kfy", {HB_TAG('K','M','N',' ')}}, /* Kumaoni */ - {"kg", {HB_TAG('K','O','N',' ')}}, /* Kongo [macrolanguage] */ - {"kha", {HB_TAG('K','S','I',' ')}}, /* Khasi */ - {"khb", {HB_TAG('X','B','D',' ')}}, /* Lü */ - {"kht", {HB_TAG('K','H','N',' ')}}, /* Khamti (Microsoft fonts) */ -/*{"kht", {HB_TAG('K','H','T',' ')}},*/ /* Khamti (OpenType spec and SIL fonts) */ - {"khw", {HB_TAG('K','H','W',' ')}}, /* Khowar */ - {"ki", {HB_TAG('K','I','K',' ')}}, /* Gikuyu/Kikuyu */ - {"kiu", {HB_TAG('K','I','U',' ')}}, /* Kirmanjki */ - {"kj", {HB_TAG('K','U','A',' ')}}, /* Kuanyama/Kwanyama */ - {"kjd", {HB_TAG('K','J','D',' ')}}, /* Southern Kiwai */ - {"kjh", {HB_TAG('K','H','A',' ')}}, /* Khakass */ - {"kjp", {HB_TAG('K','J','P',' ')}}, /* Pwo Eastern Karen */ - {"kk", {HB_TAG('K','A','Z',' ')}}, /* Kazakh */ - {"kl", {HB_TAG('G','R','N',' ')}}, /* Kalaallisut */ - {"kln", {HB_TAG('K','A','L',' ')}}, /* Kalenjin */ - {"km", {HB_TAG('K','H','M',' ')}}, /* Central Khmer */ - {"kmb", {HB_TAG('M','B','N',' ')}}, /* Kimbundu */ - {"kmw", {HB_TAG('K','M','O',' ')}}, /* Komo (Democratic Republic of Congo) */ - {"kn", {HB_TAG('K','A','N',' ')}}, /* Kannada */ - {"knn", {HB_TAG('K','O','K',' ')}}, /* Konkani */ - {"ko", {HB_TAG('K','O','R',' ')}}, /* Korean */ - {"koi", {HB_TAG('K','O','P',' ')}}, /* Komi-Permyak */ - {"kok", {HB_TAG('K','O','K',' ')}}, /* Konkani [macrolanguage] */ - {"kon", {HB_TAG('K','O','N','0')}}, /* Kongo */ - {"kos", {HB_TAG('K','O','S',' ')}}, /* Kosraean */ - {"kpe", {HB_TAG('K','P','L',' ')}}, /* Kpelle [macrolanguage] */ - {"kpv", {HB_TAG('K','O','Z',' ')}}, /* Komi-Zyrian */ - {"kpy", {HB_TAG('K','Y','K',' ')}}, /* Koryak */ - {"kqy", {HB_TAG('K','R','T',' ')}}, /* Koorete */ - {"kr", {HB_TAG('K','N','R',' ')}}, /* Kanuri [macrolanguage] */ - {"kri", {HB_TAG('K','R','I',' ')}}, /* Krio */ - {"krl", {HB_TAG('K','R','L',' ')}}, /* Karelian */ - {"kru", {HB_TAG('K','U','U',' ')}}, /* Kurukh */ - {"ks", {HB_TAG('K','S','H',' ')}}, /* Kashmiri */ - {"ksh", {HB_TAG('K','S','H','0')}}, /* Ripuarian, Kölsch */ -/*{"ksw", {HB_TAG('K','R','N',' ')}},*/ /* S'gaw Karen (Microsoft fonts?) */ - {"ksw", {HB_TAG('K','S','W',' ')}}, /* S'gaw Karen (OpenType spec and SIL fonts) */ - {"ktb", {HB_TAG('K','E','B',' ')}}, /* Kebena */ - {"ktu", {HB_TAG('K','O','N',' ')}}, /* Kikongo */ - {"ku", {HB_TAG('K','U','R',' ')}}, /* Kurdish [macrolanguage] */ - {"kum", {HB_TAG('K','U','M',' ')}}, /* Kumyk */ - {"kv", {HB_TAG('K','O','M',' ')}}, /* Komi [macrolanguage] */ - {"kvd", {HB_TAG('K','U','I',' ')}}, /* Kui (Indonesia) */ - {"kw", {HB_TAG('C','O','R',' ')}}, /* Cornish */ - {"kxc", {HB_TAG('K','M','S',' ')}}, /* Komso */ - {"kxu", {HB_TAG('K','U','I',' ')}}, /* Kui (India) */ - {"ky", {HB_TAG('K','I','R',' ')}}, /* Kirghiz/Kyrgyz */ - {"kyu", {HB_TAG('K','Y','U',' ')}}, /* Western Kayah */ - {"la", {HB_TAG('L','A','T',' ')}}, /* Latin */ - {"lad", {HB_TAG('J','U','D',' ')}}, /* Ladino */ - {"lb", {HB_TAG('L','T','Z',' ')}}, /* Luxembourgish */ - {"lbe", {HB_TAG('L','A','K',' ')}}, /* Lak */ - {"lbj", {HB_TAG('L','D','K',' ')}}, /* Ladakhi */ - {"lez", {HB_TAG('L','E','Z',' ')}}, /* Lezgi */ - {"lg", {HB_TAG('L','U','G',' ')}}, /* Ganda */ - {"li", {HB_TAG('L','I','M',' ')}}, /* Limburgan/Limburger/Limburgish */ - {"lif", {HB_TAG('L','M','B',' ')}}, /* Limbu */ - {"lij", {HB_TAG('L','I','J',' ')}}, /* Ligurian */ - {"lis", {HB_TAG('L','I','S',' ')}}, /* Lisu */ - {"ljp", {HB_TAG('L','J','P',' ')}}, /* Lampung Api */ - {"lki", {HB_TAG('L','K','I',' ')}}, /* Laki */ - {"lld", {HB_TAG('L','A','D',' ')}}, /* Ladin */ - {"lmn", {HB_TAG('L','A','M',' ')}}, /* Lambani */ - {"lmo", {HB_TAG('L','M','O',' ')}}, /* Lombard */ - {"ln", {HB_TAG('L','I','N',' ')}}, /* Lingala */ - {"lo", {HB_TAG('L','A','O',' ')}}, /* Lao */ - {"lom", {HB_TAG('L','O','M',' ')}}, /* Loma */ - {"lrc", {HB_TAG('L','R','C',' ')}}, /* Northern Luri */ - {"lt", {HB_TAG('L','T','H',' ')}}, /* Lithuanian */ - {"lu", {HB_TAG('L','U','B',' ')}}, /* Luba-Katanga */ - {"lua", {HB_TAG('L','U','B',' ')}}, /* Luba-Kasai */ - {"luo", {HB_TAG('L','U','O',' ')}}, /* Luo (Kenya and Tanzania) */ - {"lus", {HB_TAG('M','I','Z',' ')}}, /* Mizo */ - {"luy", {HB_TAG('L','U','H',' ')}}, /* Luyia/Oluluyia [macrolanguage] */ - {"luz", {HB_TAG('L','R','C',' ')}}, /* Southern Luri */ - {"lv", {HB_TAG('L','V','I',' ')}}, /* Latvian */ - {"lzz", {HB_TAG('L','A','Z',' ')}}, /* Laz */ - {"mad", {HB_TAG('M','A','D',' ')}}, /* Madurese */ - {"mag", {HB_TAG('M','A','G',' ')}}, /* Magahi */ - {"mai", {HB_TAG('M','T','H',' ')}}, /* Maithili */ - {"mak", {HB_TAG('M','K','R',' ')}}, /* Makasar */ - {"mam", {HB_TAG('M','A','M',' ')}}, /* Mam */ - {"man", {HB_TAG('M','N','K',' ')}}, /* Manding/Mandingo [macrolanguage] */ - {"mdc", {HB_TAG('M','L','E',' ')}}, /* Male (Papua New Guinea) */ - {"mdf", {HB_TAG('M','O','K',' ')}}, /* Moksha */ - {"mdr", {HB_TAG('M','D','R',' ')}}, /* Mandar */ - {"mdy", {HB_TAG('M','L','E',' ')}}, /* Male (Ethiopia) */ - {"men", {HB_TAG('M','D','E',' ')}}, /* Mende (Sierra Leone) */ - {"mer", {HB_TAG('M','E','R',' ')}}, /* Meru */ - {"mfe", {HB_TAG('M','F','E',' ')}}, /* Morisyen */ - {"mg", {HB_TAG('M','L','G',' ')}}, /* Malagasy [macrolanguage] */ - {"mh", {HB_TAG('M','A','H',' ')}}, /* Marshallese */ - {"mhr", {HB_TAG('L','M','A',' ')}}, /* Low Mari */ - {"mi", {HB_TAG('M','R','I',' ')}}, /* Maori */ - {"min", {HB_TAG('M','I','N',' ')}}, /* Minangkabau */ - {"mk", {HB_TAG('M','K','D',' ')}}, /* Macedonian */ - {"mku", {HB_TAG('M','N','K',' ')}}, /* Konyanka Maninka */ - {"mkw", {HB_TAG('M','K','W',' ')}}, /* Kituba (Congo) */ - {"ml", {HB_TAG('M','L','R',' ')}}, /* Malayalam */ - {"mlq", {HB_TAG('M','N','K',' ')}}, /* Western Maninkakan */ - {"mn", {HB_TAG('M','N','G',' ')}}, /* Mongolian [macrolanguage] */ - {"mnc", {HB_TAG('M','C','H',' ')}}, /* Manchu */ - {"mni", {HB_TAG('M','N','I',' ')}}, /* Manipuri */ - {"mnk", {HB_TAG('M','N','D',' ')}}, /* Mandinka */ - {"mns", {HB_TAG('M','A','N',' ')}}, /* Mansi */ - {"mnw", {HB_TAG('M','O','N',' ')}}, /* Mon */ - {"mo", {HB_TAG('M','O','L',' ')}}, /* Moldavian */ - {"moh", {HB_TAG('M','O','H',' ')}}, /* Mohawk */ - {"mos", {HB_TAG('M','O','S',' ')}}, /* Mossi */ - {"mpe", {HB_TAG('M','A','J',' ')}}, /* Majang */ - {"mr", {HB_TAG('M','A','R',' ')}}, /* Marathi */ - {"mrh", {HB_TAG('Q','I','N',' ')}}, /* Mara Chin */ - {"mrj", {HB_TAG('H','M','A',' ')}}, /* High Mari */ - {"ms", {HB_TAG('M','L','Y',' ')}}, /* Malay [macrolanguage] */ - {"msc", {HB_TAG('M','N','K',' ')}}, /* Sankaran Maninka */ - {"mt", {HB_TAG('M','T','S',' ')}}, /* Maltese */ - {"mtr", {HB_TAG('M','A','W',' ')}}, /* Mewari */ - {"mus", {HB_TAG('M','U','S',' ')}}, /* Creek */ - {"mve", {HB_TAG('M','A','W',' ')}}, /* Marwari (Pakistan) */ - {"mwk", {HB_TAG('M','N','K',' ')}}, /* Kita Maninkakan */ - {"mwl", {HB_TAG('M','W','L',' ')}}, /* Mirandese */ - {"mwr", {HB_TAG('M','A','W',' ')}}, /* Marwari [macrolanguage] */ - {"mww", {HB_TAG('M','W','W',' ')}}, /* Hmong Daw */ - {"my", {HB_TAG('B','R','M',' ')}}, /* Burmese */ - {"mym", {HB_TAG('M','E','N',' ')}}, /* Me'en */ - {"myn", {HB_TAG('M','Y','N',' ')}}, /* Mayan */ - {"myq", {HB_TAG('M','N','K',' ')}}, /* Forest Maninka (retired code) */ - {"myv", {HB_TAG('E','R','Z',' ')}}, /* Erzya */ - {"mzn", {HB_TAG('M','Z','N',' ')}}, /* Mazanderani */ - {"na", {HB_TAG('N','A','U',' ')}}, /* Nauru */ - {"nag", {HB_TAG('N','A','G',' ')}}, /* Naga-Assamese */ - {"nah", {HB_TAG('N','A','H',' ')}}, /* Nahuatl [family] */ - {"nap", {HB_TAG('N','A','P',' ')}}, /* Neapolitan */ - {"nb", {HB_TAG('N','O','R',' ')}}, /* Norwegian Bokmål */ - {"nco", {HB_TAG('S','I','B',' ')}}, /* Sibe */ - {"nd", {HB_TAG('N','D','B',' ')}}, /* [North] Ndebele */ - {"ndc", {HB_TAG('N','D','C',' ')}}, /* Ndau */ - {"nds", {HB_TAG('N','D','S',' ')}}, /* Low German/Low Saxon */ - {"ne", {HB_TAG('N','E','P',' ')}}, /* Nepali */ - {"new", {HB_TAG('N','E','W',' ')}}, /* Newari */ - {"ng", {HB_TAG('N','D','G',' ')}}, /* Ndonga */ - {"nga", {HB_TAG('N','G','A',' ')}}, /* Ngabaka */ - {"ngl", {HB_TAG('L','M','W',' ')}}, /* Lomwe */ - {"ngo", {HB_TAG('S','X','T',' ')}}, /* Sutu */ - {"niu", {HB_TAG('N','I','U',' ')}}, /* Niuean */ - {"niv", {HB_TAG('G','I','L',' ')}}, /* Gilyak */ - {"nl", {HB_TAG('N','L','D',' ')}}, /* Dutch */ - {"nn", {HB_TAG('N','Y','N',' ')}}, /* Norwegian Nynorsk */ - {"no", {HB_TAG('N','O','R',' ')}}, /* Norwegian [macrolanguage] */ - {"nod", {HB_TAG('N','T','A',' ')}}, /* Northern Thai */ - {"noe", {HB_TAG('N','O','E',' ')}}, /* Nimadi */ - {"nog", {HB_TAG('N','O','G',' ')}}, /* Nogai */ - {"nov", {HB_TAG('N','O','V',' ')}}, /* Novial */ - {"nqo", {HB_TAG('N','K','O',' ')}}, /* N'Ko */ - {"nr", {HB_TAG('N','D','B',' ')}}, /* [South] Ndebele */ - {"nsk", {HB_TAG('N','A','S',' ')}}, /* Naskapi */ - {"nso", {HB_TAG('S','O','T',' ')}}, /* [Northern] Sotho */ - {"nv", {HB_TAG('N','A','V',' ')}}, /* Navajo */ - {"ny", {HB_TAG('C','H','I',' ')}}, /* Chewa/Chichwa/Nyanja */ - {"nym", {HB_TAG('N','Y','M',' ')}}, /* Nyamwezi */ - {"nyn", {HB_TAG('N','K','L',' ')}}, /* Nyankole */ - {"oc", {HB_TAG('O','C','I',' ')}}, /* Occitan (post 1500) */ - {"oj", {HB_TAG('O','J','B',' ')}}, /* Ojibwa [macrolanguage] */ - {"ojs", {HB_TAG('O','C','R',' ')}}, /* Oji-Cree */ - {"okm", {HB_TAG('K','O','H',' ')}}, /* Korean Old Hangul */ - {"om", {HB_TAG('O','R','O',' ')}}, /* Oromo [macrolanguage] */ - {"or", {HB_TAG('O','R','I',' ')}}, /* Oriya */ - {"os", {HB_TAG('O','S','S',' ')}}, /* Ossetian */ - {"pa", {HB_TAG('P','A','N',' ')}}, /* Panjabi */ - {"pag", {HB_TAG('P','A','G',' ')}}, /* Pangasinan */ - {"pam", {HB_TAG('P','A','M',' ')}}, /* Kapampangan/Pampanga */ - {"pap", {HB_TAG('P','A','P','0')}}, /* Papiamento */ - {"pau", {HB_TAG('P','A','U',' ')}}, /* Palauan */ - {"pcc", {HB_TAG('P','C','C',' ')}}, /* Bouyei */ - {"pcd", {HB_TAG('P','C','D',' ')}}, /* Picard */ - {"pce", {HB_TAG('P','L','G',' ')}}, /* [Ruching] Palaung */ - {"pck", {HB_TAG('Q','I','N',' ')}}, /* Paite Chin */ - {"pdc", {HB_TAG('P','D','C',' ')}}, /* Pennsylvania German */ - {"pes", {HB_TAG('F','A','R',' ')}}, /* Iranian Persian */ - {"phk", {HB_TAG('P','H','K',' ')}}, /* Phake */ - {"pi", {HB_TAG('P','A','L',' ')}}, /* Pali */ - {"pih", {HB_TAG('P','I','H',' ')}}, /* Pitcairn-Norfolk */ - {"pl", {HB_TAG('P','L','K',' ')}}, /* Polish */ - {"pll", {HB_TAG('P','L','G',' ')}}, /* [Shwe] Palaung */ - {"plp", {HB_TAG('P','A','P',' ')}}, /* Palpa */ - {"pms", {HB_TAG('P','M','S',' ')}}, /* Piemontese */ - {"pnb", {HB_TAG('P','N','B',' ')}}, /* Western Panjabi */ - {"poh", {HB_TAG('P','O','H',' ')}}, /* Pocomchi */ - {"pon", {HB_TAG('P','O','N',' ')}}, /* Pohnpeian */ - {"prs", {HB_TAG('D','R','I',' ')}}, /* Afghan Persian/Dari */ - {"ps", {HB_TAG('P','A','S',' ')}}, /* Pashto/Pushto [macrolanguage] */ - {"pt", {HB_TAG('P','T','G',' ')}}, /* Portuguese */ - {"pwo", {HB_TAG('P','W','O',' ')}}, /* Pwo Western Karen */ - {"qu", {HB_TAG('Q','U','Z',' ')}}, /* Quechua [macrolanguage] */ - {"quc", {HB_TAG('Q','U','C',' ')}}, /* K'iche'/Quiché */ - {"quh", {HB_TAG('Q','U','H',' ')}}, /* Quechua (Bolivia) */ - {"quz", {HB_TAG('Q','U','Z',' ')}}, /* Cusco Quechua */ - {"qvi", {HB_TAG('Q','V','I',' ')}}, /* Quechua (Ecuador) */ - {"qwh", {HB_TAG('Q','W','H',' ')}}, /* Quechua (Peru) */ - {"raj", {HB_TAG('R','A','J',' ')}}, /* Rajasthani [macrolanguage] */ - {"rar", {HB_TAG('R','A','R',' ')}}, /* Rarotongan */ - {"rbb", {HB_TAG('P','L','G',' ')}}, /* Rumai Palaung */ - {"rej", {HB_TAG('R','E','J',' ')}}, /* Rejang */ - {"ria", {HB_TAG('R','I','A',' ')}}, /* Riang (India) */ - {"rif", {HB_TAG('R','I','F',' ')}}, /* Tarifit */ - {"ril", {HB_TAG('R','I','A',' ')}}, /* Riang (Myanmar) */ - {"rit", {HB_TAG('R','I','T',' ')}}, /* Ritarungo */ - {"rki", {HB_TAG('A','R','K',' ')}}, /* Rakhine */ - {"rkw", {HB_TAG('R','K','W',' ')}}, /* Arakwal */ - {"rm", {HB_TAG('R','M','S',' ')}}, /* Romansh */ - {"rmy", {HB_TAG('R','M','Y',' ')}}, /* Vlax Romani */ - {"rn", {HB_TAG('R','U','N',' ')}}, /* Rundi */ - {"ro", {HB_TAG('R','O','M',' ')}}, /* Romanian */ - {"rom", {HB_TAG('R','O','Y',' ')}}, /* Romany [macrolanguage] */ - {"rtm", {HB_TAG('R','T','M',' ')}}, /* Rotuman */ - {"ru", {HB_TAG('R','U','S',' ')}}, /* Russian */ - {"rue", {HB_TAG('R','S','Y',' ')}}, /* Rusyn */ - {"rup", {HB_TAG('R','U','P',' ')}}, /* Aromanian/Arumanian/Macedo-Romanian */ - {"rw", {HB_TAG('R','U','A',' ')}}, /* Kinyarwanda */ - {"rwr", {HB_TAG('M','A','W',' ')}}, /* Marwari (India) */ - {"sa", {HB_TAG('S','A','N',' ')}}, /* Sanskrit */ - {"sah", {HB_TAG('Y','A','K',' ')}}, /* Yakut */ - {"sam", {HB_TAG('P','A','A',' ')}}, /* Palestinian Aramaic */ - {"sas", {HB_TAG('S','A','S',' ')}}, /* Sasak */ - {"sat", {HB_TAG('S','A','T',' ')}}, /* Santali */ - {"sc", {HB_TAG('S','R','D',' ')}}, /* Sardinian [macrolanguage] */ - {"sck", {HB_TAG('S','A','D',' ')}}, /* Sadri */ - {"scn", {HB_TAG('S','C','N',' ')}}, /* Sicilian */ - {"sco", {HB_TAG('S','C','O',' ')}}, /* Scots */ - {"scs", {HB_TAG('S','L','A',' ')}}, /* [North] Slavey */ - {"sd", {HB_TAG('S','N','D',' ')}}, /* Sindhi */ - {"se", {HB_TAG('N','S','M',' ')}}, /* Northern Sami */ - {"seh", {HB_TAG('S','N','A',' ')}}, /* Sena */ - {"sel", {HB_TAG('S','E','L',' ')}}, /* Selkup */ - {"sez", {HB_TAG('Q','I','N',' ')}}, /* Senthang Chin */ - {"sg", {HB_TAG('S','G','O',' ')}}, /* Sango */ - {"sga", {HB_TAG('S','G','A',' ')}}, /* Old Irish (to 900) */ - {"sgs", {HB_TAG('S','G','S',' ')}}, /* Samogitian */ - {"sgw", {HB_TAG('C','H','G',' ')}}, /* Sebat Bet Gurage */ -/*{"sgw", {HB_TAG('S','G','W',' ')}},*/ /* Sebat Bet Gurage (in SIL fonts) */ - {"shi", {HB_TAG('S','H','I',' ')}}, /* Tachelhit */ - {"shn", {HB_TAG('S','H','N',' ')}}, /* Shan */ - {"si", {HB_TAG('S','N','H',' ')}}, /* Sinhala */ - {"sid", {HB_TAG('S','I','D',' ')}}, /* Sidamo */ - {"sjd", {HB_TAG('K','S','M',' ')}}, /* Kildin Sami */ - {"sk", {HB_TAG('S','K','Y',' ')}}, /* Slovak */ - {"skr", {HB_TAG('S','R','K',' ')}}, /* Seraiki */ - {"sl", {HB_TAG('S','L','V',' ')}}, /* Slovenian */ - {"sm", {HB_TAG('S','M','O',' ')}}, /* Samoan */ - {"sma", {HB_TAG('S','S','M',' ')}}, /* Southern Sami */ - {"smj", {HB_TAG('L','S','M',' ')}}, /* Lule Sami */ - {"smn", {HB_TAG('I','S','M',' ')}}, /* Inari Sami */ - {"sms", {HB_TAG('S','K','S',' ')}}, /* Skolt Sami */ - {"sn", {HB_TAG('S','N','A','0')}}, /* Shona */ - {"snk", {HB_TAG('S','N','K',' ')}}, /* Soninke */ - {"so", {HB_TAG('S','M','L',' ')}}, /* Somali */ - {"sop", {HB_TAG('S','O','P',' ')}}, /* Songe */ - {"sq", {HB_TAG('S','Q','I',' ')}}, /* Albanian [macrolanguage] */ - {"sr", {HB_TAG('S','R','B',' ')}}, /* Serbian */ - {"srr", {HB_TAG('S','R','R',' ')}}, /* Serer */ - {"ss", {HB_TAG('S','W','Z',' ')}}, /* Swati */ - {"st", {HB_TAG('S','O','T',' ')}}, /* [Southern] Sotho */ - {"stq", {HB_TAG('S','T','Q',' ')}}, /* Saterfriesisch */ - {"stv", {HB_TAG('S','I','G',' ')}}, /* Silt'e */ - {"su", {HB_TAG('S','U','N',' ')}}, /* Sundanese */ - {"suk", {HB_TAG('S','U','K',' ')}}, /* Sukama */ - {"suq", {HB_TAG('S','U','R',' ')}}, /* Suri */ - {"sv", {HB_TAG('S','V','E',' ')}}, /* Swedish */ - {"sva", {HB_TAG('S','V','A',' ')}}, /* Svan */ - {"sw", {HB_TAG('S','W','K',' ')}}, /* Swahili [macrolanguage] */ - {"swb", {HB_TAG('C','M','R',' ')}}, /* Comorian */ - {"swh", {HB_TAG('S','W','K',' ')}}, /* Kiswahili/Swahili */ - {"swv", {HB_TAG('M','A','W',' ')}}, /* Shekhawati */ - {"sxu", {HB_TAG('S','X','U',' ')}}, /* Upper Saxon */ - {"syc", {HB_TAG('S','Y','R',' ')}}, /* Classical Syriac */ - {"syl", {HB_TAG('S','Y','L',' ')}}, /* Sylheti */ - {"syr", {HB_TAG('S','Y','R',' ')}}, /* Syriac [macrolanguage] */ - {"szl", {HB_TAG('S','Z','L',' ')}}, /* Silesian */ - {"ta", {HB_TAG('T','A','M',' ')}}, /* Tamil */ - {"tab", {HB_TAG('T','A','B',' ')}}, /* Tabasaran */ - {"tcp", {HB_TAG('Q','I','N',' ')}}, /* Tawr Chin */ - {"tcy", {HB_TAG('T','U','L',' ')}}, /* Tulu */ - {"tcz", {HB_TAG('Q','I','N',' ')}}, /* Thado Chin */ - {"tdd", {HB_TAG('T','D','D',' ')}}, /* Tai Nüa */ - {"te", {HB_TAG('T','E','L',' ')}}, /* Telugu */ - {"tem", {HB_TAG('T','M','N',' ')}}, /* Temne */ - {"tet", {HB_TAG('T','E','T',' ')}}, /* Tetum */ - {"tg", {HB_TAG('T','A','J',' ')}}, /* Tajik */ - {"th", {HB_TAG('T','H','A',' ')}}, /* Thai */ - {"ti", {HB_TAG('T','G','Y',' ')}}, /* Tigrinya */ - {"tig", {HB_TAG('T','G','R',' ')}}, /* Tigre */ - {"tiv", {HB_TAG('T','I','V',' ')}}, /* Tiv */ - {"tk", {HB_TAG('T','K','M',' ')}}, /* Turkmen */ - {"tl", {HB_TAG('T','G','L',' ')}}, /* Tagalog */ - {"tmh", {HB_TAG('T','M','H',' ')}}, /* Tamashek */ - {"tn", {HB_TAG('T','N','A',' ')}}, /* Tswana */ - {"to", {HB_TAG('T','G','N',' ')}}, /* Tonga (Tonga Islands) */ - {"tod", {HB_TAG('T','O','D','0')}}, /* Toma */ - {"toi", {HB_TAG('T','N','G',' ')}}, /* Tonga */ - {"tpi", {HB_TAG('T','P','I',' ')}}, /* Tok Pisin */ - {"tr", {HB_TAG('T','R','K',' ')}}, /* Turkish */ - {"tru", {HB_TAG('T','U','A',' ')}}, /* Turoyo Aramaic */ - {"ts", {HB_TAG('T','S','G',' ')}}, /* Tsonga */ - {"tt", {HB_TAG('T','A','T',' ')}}, /* Tatar */ - {"tum", {HB_TAG('T','U','M',' ')}}, /* Tumbuka */ - {"tvl", {HB_TAG('T','V','L',' ')}}, /* Tuvalu */ - {"tw", {HB_TAG('T','W','I',' ')}}, /* Twi */ - {"ty", {HB_TAG('T','H','T',' ')}}, /* Tahitian */ - {"tyv", {HB_TAG('T','U','V',' ')}}, /* Tuvin */ - {"tyz", {HB_TAG('T','Y','Z',' ')}}, /* Tày */ - {"tzm", {HB_TAG('T','Z','M',' ')}}, /* Central Atlas Tamazight */ - {"tzo", {HB_TAG('T','Z','O',' ')}}, /* Tzotzil */ - {"udm", {HB_TAG('U','D','M',' ')}}, /* Udmurt */ - {"ug", {HB_TAG('U','Y','G',' ')}}, /* Uighur */ - {"uk", {HB_TAG('U','K','R',' ')}}, /* Ukrainian */ - {"umb", {HB_TAG('U','M','B',' ')}}, /* Umbundu */ - {"unr", {HB_TAG('M','U','N',' ')}}, /* Mundari */ - {"ur", {HB_TAG('U','R','D',' ')}}, /* Urdu */ - {"uz", {HB_TAG('U','Z','B',' ')}}, /* Uzbek [macrolanguage] */ - {"uzn", {HB_TAG('U','Z','B',' ')}}, /* Northern Uzbek */ - {"uzs", {HB_TAG('U','Z','B',' ')}}, /* Southern Uzbek */ - {"ve", {HB_TAG('V','E','N',' ')}}, /* Venda */ - {"vec", {HB_TAG('V','E','C',' ')}}, /* Venetian */ - {"vi", {HB_TAG('V','I','T',' ')}}, /* Vietnamese */ - {"vls", {HB_TAG('F','L','E',' ')}}, /* Vlaams */ - {"vmw", {HB_TAG('M','A','K',' ')}}, /* Makhuwa */ - {"vo", {HB_TAG('V','O','L',' ')}}, /* Volapük */ - {"vro", {HB_TAG('V','R','O',' ')}}, /* Võro */ - {"wa", {HB_TAG('W','L','N',' ')}}, /* Walloon */ - {"war", {HB_TAG('W','A','R',' ')}}, /* Waray (Philippines) */ - {"wbm", {HB_TAG('W','A',' ',' ')}}, /* Wa */ - {"wbr", {HB_TAG('W','A','G',' ')}}, /* Wagdi */ - {"wle", {HB_TAG('S','I','G',' ')}}, /* Wolane */ - {"wo", {HB_TAG('W','L','F',' ')}}, /* Wolof */ - {"wry", {HB_TAG('M','A','W',' ')}}, /* Merwari */ - {"wtm", {HB_TAG('W','T','M',' ')}}, /* Mewati */ - {"xal", {HB_TAG('K','L','M',' ')}}, /* Kalmyk */ - {"xan", {HB_TAG('S','E','K',' ')}}, /* Sekota */ - {"xh", {HB_TAG('X','H','S',' ')}}, /* Xhosa */ - {"xjb", {HB_TAG('X','J','B',' ')}}, /* Minjangbal */ - {"xog", {HB_TAG('X','O','G',' ')}}, /* Soga */ - {"xom", {HB_TAG('K','M','O',' ')}}, /* Komo (Sudan) */ - {"xpe", {HB_TAG('X','P','E',' ')}}, /* Kpelle (Liberia) */ - {"xsl", {HB_TAG('S','S','L',' ')}}, /* South Slavey */ - {"xst", {HB_TAG('S','I','G',' ')}}, /* Silt'e (retired code) */ - {"xwo", {HB_TAG('T','O','D',' ')}}, /* Written Oirat (Todo) */ - {"yao", {HB_TAG('Y','A','O',' ')}}, /* Yao */ - {"yap", {HB_TAG('Y','A','P',' ')}}, /* Yapese */ - {"yi", {HB_TAG('J','I','I',' ')}}, /* Yiddish [macrolanguage] */ - {"yo", {HB_TAG('Y','B','A',' ')}}, /* Yoruba */ - {"yos", {HB_TAG('Q','I','N',' ')}}, /* Yos, deprecated by IANA in favor of Zou [zom] */ - {"yso", {HB_TAG('N','I','S',' ')}}, /* Nisi (China) */ - {"za", {HB_TAG('Z','H','A',' ')}}, /* Chuang/Zhuang [macrolanguage] */ - {"zea", {HB_TAG('Z','E','A',' ')}}, /* Zeeuws */ - {"zgh", {HB_TAG('Z','G','H',' ')}}, /* Standard Morrocan Tamazigh */ - {"zne", {HB_TAG('Z','N','D',' ')}}, /* Zande */ - {"zom", {HB_TAG('Q','I','N',' ')}}, /* Zou */ - {"zu", {HB_TAG('Z','U','L',' ')}}, /* Zulu */ - {"zum", {HB_TAG('L','R','C',' ')}}, /* Kumzari */ - {"zza", {HB_TAG('Z','Z','A',' ')}}, /* Zazaki */ - - /* The corresponding languages IDs for the following IDs are unclear, - * overlap, or are architecturally weird. Needs more research. */ - -/*{"chp", {HB_TAG('S','A','Y',' ')}},*/ /* Sayisi */ -/*{"cwd", {HB_TAG('T','C','R',' ')}},*/ /* TH-Cree */ -/*{"emk", {HB_TAG('E','M','K',' ')}},*/ /* Eastern Maninkakan */ -/*{"krc", {HB_TAG('B','A','L',' ')}},*/ /* Balkar */ -/*{"??", {HB_TAG('B','C','R',' ')}},*/ /* Bible Cree */ -/*{"zh?", {HB_TAG('C','H','N',' ')}},*/ /* Chinese (seen in Microsoft fonts) */ -/*{"ar-Syrc?", {HB_TAG('G','A','R',' ')}},*/ /* Garshuni */ -/*{"hy?", {HB_TAG('H','Y','E','0')}},*/ /* Armenian East (ISO 639-3 hye according to Microsoft, but that’s equivalent to ISO 639-1 hy) */ -/*{"ga-Latg?/" {HB_TAG('I','R','T',' ')}},*/ /* Irish Traditional */ -/*{"krc", {HB_TAG('K','A','R',' ')}},*/ /* Karachay */ -/*{"ka-Geok?", {HB_TAG('K','G','E',' ')}},*/ /* Khutsuri Georgian */ -/*{"kca", {HB_TAG('K','H','K',' ')}},*/ /* Khanty-Kazim */ -/*{"kca", {HB_TAG('K','H','S',' ')}},*/ /* Khanty-Shurishkar */ -/*{"kca", {HB_TAG('K','H','V',' ')}},*/ /* Khanty-Vakhi */ -/*{"kqs, kss", {HB_TAG('K','I','S',' ')}},*/ /* Kisii */ -/*{"lua", {HB_TAG('L','U','A',' ')}},*/ /* Luba-Lulua */ -/*{"mlq", {HB_TAG('M','L','N',' ')}},*/ /* Malinke */ -/*{"nso", {HB_TAG('N','S','O',' ')}},*/ /* Sotho, Northern */ -/*{"??", {HB_TAG('M','A','L',' ')}},*/ /* Malayalam Traditional */ -/*{"csw", {HB_TAG('N','C','R',' ')}},*/ /* N-Cree */ -/*{"csw", {HB_TAG('N','H','C',' ')}},*/ /* Norway House Cree */ -/*{"el-polyton", {HB_TAG('P','G','R',' ')}},*/ /* Polytonic Greek */ -/*{"bgr, cnh, cnw, czt, sez, tcp, csy, ctd, flm, pck, tcz, zom, cmr, dao, hlt, cka, cnk, mrh, mwg, cbl, cnb, csh", {HB_TAG('Q','I','N',' ')}},*/ /* Chin */ -/*{"??", {HB_TAG('Y','I','C',' ')}},*/ /* Yi Classic */ -/*{"zh-Latn-pinyin", {HB_TAG('Z','H','P',' ')}},*/ /* Chinese Phonetic */ -}; - -typedef struct { - char language[11]; - hb_tag_t tag; -} LangTagLong; -static const LangTagLong ot_languages_zh[] = { - /* Store longest-first, if one is a prefix of another. */ - {"zh-cn", HB_TAG('Z','H','S',' ')}, /* Chinese (China) */ - {"zh-hk", HB_TAG('Z','H','H',' ')}, /* Chinese (Hong Kong) */ - {"zh-mo", HB_TAG('Z','H','H',' ')}, /* Chinese (Macao) */ - {"zh-sg", HB_TAG('Z','H','S',' ')}, /* Chinese (Singapore) */ - {"zh-tw", HB_TAG('Z','H','T',' ')}, /* Chinese (Taiwan) */ - {"zh-hans", HB_TAG('Z','H','S',' ')}, /* Chinese (Simplified) */ - {"zh-hant-hk",HB_TAG('Z','H','H',' ')}, /* Chinese (Hong Kong) */ - {"zh-hant-mo",HB_TAG('Z','H','H',' ')}, /* Chinese (Macao) */ - {"zh-hant", HB_TAG('Z','H','T',' ')}, /* Chinese (Traditional) */ -}; - static int lang_compare_first_component (const void *pa, const void *pb) @@ -912,23 +186,16 @@ lang_compare_first_component (const void *pa, } static bool -match_subtag (const char *lang_str, - const char *limit, - unsigned int *count, - hb_tag_t *tags, - const char *subtag, - hb_tag_t tag) +subtag_matches (const char *lang_str, + const char *limit, + const char *subtag) { do { const char *s = strstr (lang_str, subtag); - if (!s || s >= limit || s == lang_str) + if (!s || s >= limit) return false; if (!ISALNUM (s[strlen (subtag)])) - { - tags[0] = tag; - *count = 1; return true; - } lang_str = s + strlen (subtag); } while (1); } @@ -942,6 +209,23 @@ lang_matches (const char *lang_str, const char *spec) (lang_str[len] == '\0' || lang_str[len] == '-'); } +typedef struct { + char language[4]; + hb_tag_t tags[HB_OT_MAX_TAGS_PER_LANGUAGE]; +} LangTag; + +#include "hb-ot-tag-table.hh" + +/* The corresponding languages IDs for the following IDs are unclear, + * overlap, or are architecturally weird. Needs more research. */ + +/*{"??", {HB_TAG('B','C','R',' ')}},*/ /* Bible Cree */ +/*{"zh?", {HB_TAG('C','H','N',' ')}},*/ /* Chinese (seen in Microsoft fonts) */ +/*{"ar-Syrc?", {HB_TAG('G','A','R',' ')}},*/ /* Garshuni */ +/*{"??", {HB_TAG('N','G','R',' ')}},*/ /* Nagari */ +/*{"??", {HB_TAG('Y','I','C',' ')}},*/ /* Yi Classic */ +/*{"zh?", {HB_TAG('Z','H','P',' ')}},*/ /* Chinese Phonetic */ + hb_tag_t hb_ot_tag_from_language (hb_language_t language) { @@ -960,36 +244,11 @@ hb_ot_tags_from_language (const char *lang_str, { const char *s; - if (0 - /* - * "fonipa" is a variant tag in BCP-47, meaning the International Phonetic Alphabet. - * It can be applied to any language. - */ - || match_subtag (lang_str, limit, count, tags, "-fonipa", HB_TAG('I','P','P','H')) - /* - * "fonnapa" is a variant tag in BCP-47, meaning the North American Phonetic Alphabet - * also known as Americanist Phonetic Notation. It can be applied to any language. - */ - || match_subtag (lang_str, limit, count, tags, "-fonnapa", HB_TAG('A','P','P','H')) - /* - * "Syre" is a BCP-47 script tag, meaning the Estrangela variant of the Syriac script. - * It can be applied to any language. - */ - || match_subtag (lang_str, limit, count, tags, "-syre", HB_TAG('S','Y','R','E')) - /* - * "Syrj" is a BCP-47 script tag, meaning the Western variant of the Syriac script. - * It can be applied to any language. - */ - || match_subtag (lang_str, limit, count, tags, "-syrj", HB_TAG('S','Y','R','J')) - /* - * "Syrn" is a BCP-47 script tag, meaning the Eastern variant of the Syriac script. - * It can be applied to any language. - */ - || match_subtag (lang_str, limit, count, tags, "-syrn", HB_TAG('S','Y','R','N')) - ) - return; + /* Check for matches of multiple subtags. */ + if (hb_ot_tags_from_complex_language (lang_str, limit, count, tags)) + return; - /* Find a language matching in the first component */ + /* Find a language matching in the first component. */ { const LangTag *lang_tag; lang_tag = (LangTag *) bsearch (lang_str, ot_languages, @@ -1005,29 +264,6 @@ hb_ot_tags_from_language (const char *lang_str, } } - /* Otherwise, check the Chinese ones */ - if (0 == lang_compare_first_component (lang_str, "zh")) - { - unsigned int i; - - for (i = 0; i < ARRAY_LENGTH (ot_languages_zh); i++) - { - const LangTagLong *lang_tag; - lang_tag = &ot_languages_zh[i]; - if (lang_matches (lang_str, lang_tag->language)) - { - tags[0] = lang_tag->tag; - *count = 1; - return; - } - } - - /* Otherwise just return 'ZHS ' */ - tags[0] = HB_TAG('Z','H','S',' '); - *count = 1; - return; - } - s = strchr (lang_str, '-'); if (!s) s = lang_str + strlen (lang_str); @@ -1160,36 +396,16 @@ hb_ot_tag_to_language (hb_tag_t tag) if (tag == HB_OT_TAG_DEFAULT_LANGUAGE) return nullptr; - /* struct LangTag has only room for 3-letter language tags. */ - switch (tag) { - case HB_TAG('A','P','P','H'): /* Phonetic transcription—Americanist conventions */ - return hb_language_from_string ("und-fonnapa", -1); - case HB_TAG('I','P','P','H'): /* Phonetic transcription—IPA conventions */ - return hb_language_from_string ("und-fonipa", -1); - case HB_TAG('S','Y','R',' '): /* Syriac [macrolanguage] */ - return hb_language_from_string ("syr", -1); - case HB_TAG('S','Y','R','E'): /* Estrangela Syriac */ - return hb_language_from_string ("und-Syre", -1); - case HB_TAG('S','Y','R','J'): /* Western Syriac */ - return hb_language_from_string ("und-Syrj", -1); - case HB_TAG('S','Y','R','N'): /* Eastern Syriac */ - return hb_language_from_string ("und-Syrn", -1); + { + hb_language_t disambiguated_tag = hb_ot_ambiguous_tag_to_language (tag); + if (disambiguated_tag != HB_LANGUAGE_INVALID) + return disambiguated_tag; } for (i = 0; i < ARRAY_LENGTH (ot_languages); i++) if (ot_languages[i].tags[0] == tag) return hb_language_from_string (ot_languages[i].language, -1); - /* If tag starts with ZH, it's Chinese */ - if ((tag & 0xFFFF0000u) == 0x5A480000u) { - switch (tag) { - case HB_TAG('Z','H','H',' '): return hb_language_from_string ("zh-hk", -1); /* Hong Kong */ - case HB_TAG('Z','H','S',' '): return hb_language_from_string ("zh-Hans", -1); /* Simplified */ - case HB_TAG('Z','H','T',' '): return hb_language_from_string ("zh-Hant", -1); /* Traditional */ - default: break; /* Fall through */ - } - } - /* Else return a custom language in the form of "x-hbotABCD" */ { unsigned char buf[11] = "x-hbot"; diff --git a/src/hb-ot-tag.h b/src/hb-ot-tag.h index 44f66c3f3..ef7014c4e 100644 --- a/src/hb-ot-tag.h +++ b/src/hb-ot-tag.h @@ -40,7 +40,7 @@ HB_BEGIN_DECLS #define HB_OT_TAG_DEFAULT_LANGUAGE HB_TAG ('d', 'f', 'l', 't') #define HB_OT_MAX_TAGS_PER_SCRIPT 2u -#define HB_OT_MAX_TAGS_PER_LANGUAGE 1u +#define HB_OT_MAX_TAGS_PER_LANGUAGE 3u HB_EXTERN void hb_ot_tags (hb_script_t script, diff --git a/test/api/test-ot-tag.c b/test/api/test-ot-tag.c index 0618877ea..6d64d131b 100644 --- a/test/api/test-ot-tag.c +++ b/test/api/test-ot-tag.c @@ -269,27 +269,27 @@ test_ot_tag_language (void) test_language_two_way ("TUA", "tru"); /* Turoyo Aramaic */ - test_language_two_way ("ZHH", "zh-hk"); /* Chinese (Hong Kong) */ - test_tag_from_language ("ZHS", "zh"); /* Chinese */ test_tag_from_language ("ZHS", "zh-cn"); /* Chinese (China) */ test_tag_from_language ("ZHS", "zh-sg"); /* Chinese (Singapore) */ test_tag_from_language ("ZHH", "zh-mo"); /* Chinese (Macao) */ test_tag_from_language ("ZHH", "zh-hant-mo"); /* Chinese (Macao) */ - test_tag_from_language ("ZHH", "zh-hk"); /* Chinese (Hong Kong) */ + test_language_two_way ("ZHH", "zh-HK"); /* Chinese (Hong Kong) */ test_tag_from_language ("ZHH", "zH-HanT-hK"); /* Chinese (Hong Kong) */ test_tag_from_language ("ZHT", "zh-tw"); /* Chinese (Taiwan) */ - test_tag_from_language ("ZHS", "zh-Hans"); /* Chinese (Simplified) */ - test_tag_from_language ("ZHT", "zh-Hant"); /* Chinese (Traditional) */ + test_language_two_way ("ZHS", "zh-Hans"); /* Chinese (Simplified) */ + test_language_two_way ("ZHT", "zh-Hant"); /* Chinese (Traditional) */ test_tag_from_language ("ZHS", "zh-xx"); /* Chinese (Other) */ + test_tag_from_language ("ZHS", "zh-Hans-TW"); + + test_tag_from_language ("ZHH", "yue"); + test_tag_from_language ("ZHH", "yue-Hant"); + test_tag_from_language ("ZHS", "yue-Hans"); + test_tag_from_language ("ZHS", "zh"); /* Chinese */ test_tag_from_language ("ZHS", "zh-xx"); - test_tag_to_language ("ZHS", "zh-Hans"); - test_tag_to_language ("ZHT", "zh-Hant"); - test_tag_to_language ("ZHP", "x-hbotzhp"); - test_language_two_way ("ABC", "x-hbotabc"); test_tag_from_language ("ABC", "asdf-asdf-wer-x-hbotabc-zxc"); test_tag_from_language ("ABC", "asdf-asdf-wer-x-hbotabc"); @@ -305,7 +305,7 @@ test_ot_tag_language (void) * Invalid input. The precise answer does not matter, as long as it * does not crash or get into an infinite loop. */ - test_tag_from_language ("dflt", "-fonipa"); + test_tag_from_language ("IPPH", "-fonipa"); /* * Tags that contain "-fonipa" as a substring but which do not contain @@ -320,36 +320,45 @@ test_ot_tag_language (void) test_tag_from_language ("IPPH", "en-fonipa"); test_tag_from_language ("IPPH", "en-fonipax-fonipa"); test_tag_from_language ("IPPH", "rm-CH-fonipa-sursilv-x-foobar"); - test_tag_from_language ("IPPH", "und-fonipa"); + test_language_two_way ("IPPH", "und-fonipa"); test_tag_from_language ("IPPH", "zh-fonipa"); - test_tag_to_language ("IPPH", "und-fonipa"); /* North American Phonetic Alphabet (Americanist Phonetic Notation) */ test_tag_from_language ("APPH", "en-fonnapa"); test_tag_from_language ("APPH", "chr-fonnapa"); - test_tag_from_language ("APPH", "und-fonnapa"); - test_tag_to_language ("APPH", "und-fonnapa"); + test_language_two_way ("APPH", "und-fonnapa"); + + /* Khutsuri Georgian */ + test_tag_from_language ("KGE", "ka-Geok"); + test_language_two_way ("KGE", "und-Geok"); + + /* Irish Traditional */ + test_language_two_way ("IRT", "ga-Latg"); + + /* Moldavian */ + test_language_two_way ("MOL", "ro-MD"); + + /* Polytonic Greek */ + test_language_two_way ("PGR", "el-polyton"); + test_tag_from_language ("PGR", "el-CY-polyton"); /* Estrangela Syriac */ test_tag_from_language ("SYRE", "aii-Syre"); test_tag_from_language ("SYRE", "de-Syre"); test_tag_from_language ("SYRE", "syr-Syre"); - test_tag_from_language ("SYRE", "und-Syre"); - test_tag_to_language ("SYRE", "und-Syre"); + test_language_two_way ("SYRE", "und-Syre"); /* Western Syriac */ test_tag_from_language ("SYRJ", "aii-Syrj"); test_tag_from_language ("SYRJ", "de-Syrj"); test_tag_from_language ("SYRJ", "syr-Syrj"); - test_tag_from_language ("SYRJ", "und-Syrj"); - test_tag_to_language ("SYRJ", "und-Syrj"); + test_language_two_way ("SYRJ", "und-Syrj"); /* Eastern Syriac */ test_tag_from_language ("SYRN", "aii-Syrn"); test_tag_from_language ("SYRN", "de-Syrn"); test_tag_from_language ("SYRN", "syr-Syrn"); - test_tag_from_language ("SYRN", "und-Syrn"); - test_tag_to_language ("SYRN", "und-Syrn"); + test_language_two_way ("SYRN", "und-Syrn"); /* Test that x-hbot overrides the base language */ test_tag_from_language ("ABC", "fa-x-hbotabc-zxc"); @@ -358,6 +367,18 @@ test_ot_tag_language (void) test_tag_from_language ("ABC", "zh-cn-x-hbotabc-zxc"); test_tag_from_language ("ABC", "zh-xy-x-hbotabc-zxc"); test_tag_from_language ("ABC", "xyz-xy-x-hbotabc-zxc"); + + /* Unnormalized BCP 47 tags */ + test_tag_from_language ("JBO", "art-lojban"); + test_tag_from_language ("LTZ", "i-lux"); + test_tag_from_language ("MNG", "drh"); + test_tag_from_language ("NOR", "no-bok"); + test_tag_from_language ("NYN", "no-nyn"); + test_tag_from_language ("ZHS", "i-hak"); + test_tag_from_language ("ZHS", "zh-guoyu"); + test_tag_from_language ("ZHS", "zh-min"); + test_tag_from_language ("ZHS", "zh-min-nan"); + test_tag_from_language ("ZHS", "zh-xiang"); } static void @@ -401,10 +422,11 @@ test_ot_tag_full (void) test_tags (HB_SCRIPT_INVALID, "en", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 1, "ENG"); test_tags (HB_SCRIPT_LATIN, "en", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 1, 1, "latn", "ENG"); test_tags (HB_SCRIPT_LATIN, "en", 0, 0, 0, 0); + test_tags (HB_SCRIPT_INVALID, "und-fonnapa", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 1, "APPH"); test_tags (HB_SCRIPT_INVALID, "en-fonnapa", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 1, "APPH"); test_tags (HB_SCRIPT_INVALID, "x-hbot1234-hbsc5678", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 1, 1, "5678", "1234"); - test_tags (HB_SCRIPT_MALAYALAM, "ml", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 2, 1, "mlm2", "mlym", "MLR"); - test_tags (HB_SCRIPT_MALAYALAM, "ml", 1, 1, 1, 1, "mlm2", "MLR"); + test_tags (HB_SCRIPT_MALAYALAM, "ml", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 2, 2, "mlm2", "mlym", "MAL", "MLR"); + test_tags (HB_SCRIPT_MALAYALAM, "ml", 1, 1, 1, 1, "mlm2", "MAL"); test_tags (HB_SCRIPT_INVALID, "xyz", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 1, "XYZ"); test_tags (HB_SCRIPT_INVALID, "xy", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 0); }